• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/GlobalISel/Utils.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineConstantPool.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/IntrinsicsAArch64.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 #define DEBUG_TYPE "aarch64-isel"
40 
41 using namespace llvm;
42 
43 namespace {
44 
45 #define GET_GLOBALISEL_PREDICATE_BITSET
46 #include "AArch64GenGlobalISel.inc"
47 #undef GET_GLOBALISEL_PREDICATE_BITSET
48 
49 class AArch64InstructionSelector : public InstructionSelector {
50 public:
51   AArch64InstructionSelector(const AArch64TargetMachine &TM,
52                              const AArch64Subtarget &STI,
53                              const AArch64RegisterBankInfo &RBI);
54 
55   bool select(MachineInstr &I) override;
getName()56   static const char *getName() { return DEBUG_TYPE; }
57 
setupMF(MachineFunction & MF,GISelKnownBits & KB,CodeGenCoverage & CoverageInfo)58   void setupMF(MachineFunction &MF, GISelKnownBits &KB,
59                CodeGenCoverage &CoverageInfo) override {
60     InstructionSelector::setupMF(MF, KB, CoverageInfo);
61 
62     // hasFnAttribute() is expensive to call on every BRCOND selection, so
63     // cache it here for each run of the selector.
64     ProduceNonFlagSettingCondBr =
65         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
66   }
67 
68 private:
69   /// tblgen-erated 'select' implementation, used as the initial selector for
70   /// the patterns that don't require complex C++.
71   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
72 
73   // A lowering phase that runs before any selection attempts.
74 
75   void preISelLower(MachineInstr &I) const;
76 
77   // An early selection function that runs before the selectImpl() call.
78   bool earlySelect(MachineInstr &I) const;
79 
80   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
81 
82   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
83   void contractCrossBankCopyIntoStore(MachineInstr &I,
84                                       MachineRegisterInfo &MRI) const;
85 
86   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
87                           MachineRegisterInfo &MRI) const;
88   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
89                            MachineRegisterInfo &MRI) const;
90 
91   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
92                            MachineRegisterInfo &MRI) const;
93 
94   bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
95   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
96 
97   // Helper to generate an equivalent of scalar_to_vector into a new register,
98   // returned via 'Dst'.
99   MachineInstr *emitScalarToVector(unsigned EltSize,
100                                    const TargetRegisterClass *DstRC,
101                                    Register Scalar,
102                                    MachineIRBuilder &MIRBuilder) const;
103 
104   /// Emit a lane insert into \p DstReg, or a new vector register if None is
105   /// provided.
106   ///
107   /// The lane inserted into is defined by \p LaneIdx. The vector source
108   /// register is given by \p SrcReg. The register containing the element is
109   /// given by \p EltReg.
110   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
111                                Register EltReg, unsigned LaneIdx,
112                                const RegisterBank &RB,
113                                MachineIRBuilder &MIRBuilder) const;
114   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
115   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
116   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
117   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
118 
119   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
120   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
121   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
122   bool selectSplitVectorUnmerge(MachineInstr &I,
123                                 MachineRegisterInfo &MRI) const;
124   bool selectIntrinsicWithSideEffects(MachineInstr &I,
125                                       MachineRegisterInfo &MRI) const;
126   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
127   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
128   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
129   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
130   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
131   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
132   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
133 
134   unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
135   MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
136                                          MachineIRBuilder &MIRBuilder) const;
137 
138   // Emit a vector concat operation.
139   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
140                                  Register Op2,
141                                  MachineIRBuilder &MIRBuilder) const;
142   MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
143                                    MachineOperand &Predicate,
144                                    MachineIRBuilder &MIRBuilder) const;
145   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
146                         MachineIRBuilder &MIRBuilder) const;
147   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
148                         MachineIRBuilder &MIRBuilder) const;
149   MachineInstr *emitTST(const Register &LHS, const Register &RHS,
150                         MachineIRBuilder &MIRBuilder) const;
151   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
152                                      const RegisterBank &DstRB, LLT ScalarTy,
153                                      Register VecReg, unsigned LaneIdx,
154                                      MachineIRBuilder &MIRBuilder) const;
155 
156   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
157   /// materialized using a FMOV instruction, then update MI and return it.
158   /// Otherwise, do nothing and return a nullptr.
159   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
160                                      MachineRegisterInfo &MRI) const;
161 
162   /// Emit a CSet for a compare.
163   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
164                                 MachineIRBuilder &MIRBuilder) const;
165 
166   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
167   // We use these manually instead of using the importer since it doesn't
168   // support SDNodeXForm.
169   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
170   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
171   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
172   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
173 
174   ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
175   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
176   ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
177 
178   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
179                                             unsigned Size) const;
180 
selectAddrModeUnscaled8(MachineOperand & Root) const181   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
182     return selectAddrModeUnscaled(Root, 1);
183   }
selectAddrModeUnscaled16(MachineOperand & Root) const184   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
185     return selectAddrModeUnscaled(Root, 2);
186   }
selectAddrModeUnscaled32(MachineOperand & Root) const187   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
188     return selectAddrModeUnscaled(Root, 4);
189   }
selectAddrModeUnscaled64(MachineOperand & Root) const190   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
191     return selectAddrModeUnscaled(Root, 8);
192   }
selectAddrModeUnscaled128(MachineOperand & Root) const193   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
194     return selectAddrModeUnscaled(Root, 16);
195   }
196 
197   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
198                                            unsigned Size) const;
199   template <int Width>
selectAddrModeIndexed(MachineOperand & Root) const200   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
201     return selectAddrModeIndexed(Root, Width / 8);
202   }
203 
204   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
205                                      const MachineRegisterInfo &MRI) const;
206   ComplexRendererFns
207   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
208                                   unsigned SizeInBytes) const;
209 
210   /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
211   /// or not a shift + extend should be folded into an addressing mode. Returns
212   /// None when this is not profitable or possible.
213   ComplexRendererFns
214   selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
215                     MachineOperand &Offset, unsigned SizeInBytes,
216                     bool WantsExt) const;
217   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
218   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
219                                        unsigned SizeInBytes) const;
220   template <int Width>
selectAddrModeXRO(MachineOperand & Root) const221   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
222     return selectAddrModeXRO(Root, Width / 8);
223   }
224 
225   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
226                                        unsigned SizeInBytes) const;
227   template <int Width>
selectAddrModeWRO(MachineOperand & Root) const228   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
229     return selectAddrModeWRO(Root, Width / 8);
230   }
231 
232   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
233 
selectArithShiftedRegister(MachineOperand & Root) const234   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
235     return selectShiftedRegister(Root);
236   }
237 
selectLogicalShiftedRegister(MachineOperand & Root) const238   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
239     // TODO: selectShiftedRegister should allow for rotates on logical shifts.
240     // For now, make them the same. The only difference between the two is that
241     // logical shifts are allowed to fold in rotates. Otherwise, these are
242     // functionally the same.
243     return selectShiftedRegister(Root);
244   }
245 
246   /// Given an extend instruction, determine the correct shift-extend type for
247   /// that instruction.
248   ///
249   /// If the instruction is going to be used in a load or store, pass
250   /// \p IsLoadStore = true.
251   AArch64_AM::ShiftExtendType
252   getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
253                        bool IsLoadStore = false) const;
254 
255   /// Instructions that accept extend modifiers like UXTW expect the register
256   /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
257   /// subregister copy if necessary. Return either ExtReg, or the result of the
258   /// new copy.
259   Register narrowExtendRegIfNeeded(Register ExtReg,
260                                              MachineIRBuilder &MIB) const;
261   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
262 
263   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
264                       int OpIdx = -1) const;
265   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
266                           int OpIdx = -1) const;
267   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
268                           int OpIdx = -1) const;
269 
270   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
271   void materializeLargeCMVal(MachineInstr &I, const Value *V,
272                              unsigned OpFlags) const;
273 
274   // Optimization methods.
275   bool tryOptVectorShuffle(MachineInstr &I) const;
276   bool tryOptVectorDup(MachineInstr &MI) const;
277   bool tryOptSelect(MachineInstr &MI) const;
278   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
279                                       MachineOperand &Predicate,
280                                       MachineIRBuilder &MIRBuilder) const;
281 
282   /// Return true if \p MI is a load or store of \p NumBytes bytes.
283   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
284 
285   /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
286   /// register zeroed out. In other words, the result of MI has been explicitly
287   /// zero extended.
288   bool isDef32(const MachineInstr &MI) const;
289 
290   const AArch64TargetMachine &TM;
291   const AArch64Subtarget &STI;
292   const AArch64InstrInfo &TII;
293   const AArch64RegisterInfo &TRI;
294   const AArch64RegisterBankInfo &RBI;
295 
296   bool ProduceNonFlagSettingCondBr = false;
297 
298 #define GET_GLOBALISEL_PREDICATES_DECL
299 #include "AArch64GenGlobalISel.inc"
300 #undef GET_GLOBALISEL_PREDICATES_DECL
301 
302 // We declare the temporaries used by selectImpl() in the class to minimize the
303 // cost of constructing placeholder values.
304 #define GET_GLOBALISEL_TEMPORARIES_DECL
305 #include "AArch64GenGlobalISel.inc"
306 #undef GET_GLOBALISEL_TEMPORARIES_DECL
307 };
308 
309 } // end anonymous namespace
310 
311 #define GET_GLOBALISEL_IMPL
312 #include "AArch64GenGlobalISel.inc"
313 #undef GET_GLOBALISEL_IMPL
314 
AArch64InstructionSelector(const AArch64TargetMachine & TM,const AArch64Subtarget & STI,const AArch64RegisterBankInfo & RBI)315 AArch64InstructionSelector::AArch64InstructionSelector(
316     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
317     const AArch64RegisterBankInfo &RBI)
318     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
319       TRI(*STI.getRegisterInfo()), RBI(RBI),
320 #define GET_GLOBALISEL_PREDICATES_INIT
321 #include "AArch64GenGlobalISel.inc"
322 #undef GET_GLOBALISEL_PREDICATES_INIT
323 #define GET_GLOBALISEL_TEMPORARIES_INIT
324 #include "AArch64GenGlobalISel.inc"
325 #undef GET_GLOBALISEL_TEMPORARIES_INIT
326 {
327 }
328 
329 // FIXME: This should be target-independent, inferred from the types declared
330 // for each class in the bank.
331 static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty,const RegisterBank & RB,const RegisterBankInfo & RBI,bool GetAllRegSet=false)332 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
333                          const RegisterBankInfo &RBI,
334                          bool GetAllRegSet = false) {
335   if (RB.getID() == AArch64::GPRRegBankID) {
336     if (Ty.getSizeInBits() <= 32)
337       return GetAllRegSet ? &AArch64::GPR32allRegClass
338                           : &AArch64::GPR32RegClass;
339     if (Ty.getSizeInBits() == 64)
340       return GetAllRegSet ? &AArch64::GPR64allRegClass
341                           : &AArch64::GPR64RegClass;
342     return nullptr;
343   }
344 
345   if (RB.getID() == AArch64::FPRRegBankID) {
346     if (Ty.getSizeInBits() <= 16)
347       return &AArch64::FPR16RegClass;
348     if (Ty.getSizeInBits() == 32)
349       return &AArch64::FPR32RegClass;
350     if (Ty.getSizeInBits() == 64)
351       return &AArch64::FPR64RegClass;
352     if (Ty.getSizeInBits() == 128)
353       return &AArch64::FPR128RegClass;
354     return nullptr;
355   }
356 
357   return nullptr;
358 }
359 
360 /// Given a register bank, and size in bits, return the smallest register class
361 /// that can represent that combination.
362 static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank & RB,unsigned SizeInBits,bool GetAllRegSet=false)363 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
364                       bool GetAllRegSet = false) {
365   unsigned RegBankID = RB.getID();
366 
367   if (RegBankID == AArch64::GPRRegBankID) {
368     if (SizeInBits <= 32)
369       return GetAllRegSet ? &AArch64::GPR32allRegClass
370                           : &AArch64::GPR32RegClass;
371     if (SizeInBits == 64)
372       return GetAllRegSet ? &AArch64::GPR64allRegClass
373                           : &AArch64::GPR64RegClass;
374   }
375 
376   if (RegBankID == AArch64::FPRRegBankID) {
377     switch (SizeInBits) {
378     default:
379       return nullptr;
380     case 8:
381       return &AArch64::FPR8RegClass;
382     case 16:
383       return &AArch64::FPR16RegClass;
384     case 32:
385       return &AArch64::FPR32RegClass;
386     case 64:
387       return &AArch64::FPR64RegClass;
388     case 128:
389       return &AArch64::FPR128RegClass;
390     }
391   }
392 
393   return nullptr;
394 }
395 
396 /// Returns the correct subregister to use for a given register class.
getSubRegForClass(const TargetRegisterClass * RC,const TargetRegisterInfo & TRI,unsigned & SubReg)397 static bool getSubRegForClass(const TargetRegisterClass *RC,
398                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
399   switch (TRI.getRegSizeInBits(*RC)) {
400   case 8:
401     SubReg = AArch64::bsub;
402     break;
403   case 16:
404     SubReg = AArch64::hsub;
405     break;
406   case 32:
407     if (RC != &AArch64::FPR32RegClass)
408       SubReg = AArch64::sub_32;
409     else
410       SubReg = AArch64::ssub;
411     break;
412   case 64:
413     SubReg = AArch64::dsub;
414     break;
415   default:
416     LLVM_DEBUG(
417         dbgs() << "Couldn't find appropriate subregister for register class.");
418     return false;
419   }
420 
421   return true;
422 }
423 
424 /// Check whether \p I is a currently unsupported binary operation:
425 /// - it has an unsized type
426 /// - an operand is not a vreg
427 /// - all operands are not in the same bank
428 /// These are checks that should someday live in the verifier, but right now,
429 /// these are mostly limitations of the aarch64 selector.
unsupportedBinOp(const MachineInstr & I,const AArch64RegisterBankInfo & RBI,const MachineRegisterInfo & MRI,const AArch64RegisterInfo & TRI)430 static bool unsupportedBinOp(const MachineInstr &I,
431                              const AArch64RegisterBankInfo &RBI,
432                              const MachineRegisterInfo &MRI,
433                              const AArch64RegisterInfo &TRI) {
434   LLT Ty = MRI.getType(I.getOperand(0).getReg());
435   if (!Ty.isValid()) {
436     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
437     return true;
438   }
439 
440   const RegisterBank *PrevOpBank = nullptr;
441   for (auto &MO : I.operands()) {
442     // FIXME: Support non-register operands.
443     if (!MO.isReg()) {
444       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
445       return true;
446     }
447 
448     // FIXME: Can generic operations have physical registers operands? If
449     // so, this will need to be taught about that, and we'll need to get the
450     // bank out of the minimal class for the register.
451     // Either way, this needs to be documented (and possibly verified).
452     if (!Register::isVirtualRegister(MO.getReg())) {
453       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
454       return true;
455     }
456 
457     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
458     if (!OpBank) {
459       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
460       return true;
461     }
462 
463     if (PrevOpBank && OpBank != PrevOpBank) {
464       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
465       return true;
466     }
467     PrevOpBank = OpBank;
468   }
469   return false;
470 }
471 
472 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
473 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
474 /// and of size \p OpSize.
475 /// \returns \p GenericOpc if the combination is unsupported.
selectBinaryOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)476 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
477                                unsigned OpSize) {
478   switch (RegBankID) {
479   case AArch64::GPRRegBankID:
480     if (OpSize == 32) {
481       switch (GenericOpc) {
482       case TargetOpcode::G_SHL:
483         return AArch64::LSLVWr;
484       case TargetOpcode::G_LSHR:
485         return AArch64::LSRVWr;
486       case TargetOpcode::G_ASHR:
487         return AArch64::ASRVWr;
488       default:
489         return GenericOpc;
490       }
491     } else if (OpSize == 64) {
492       switch (GenericOpc) {
493       case TargetOpcode::G_PTR_ADD:
494         return AArch64::ADDXrr;
495       case TargetOpcode::G_SHL:
496         return AArch64::LSLVXr;
497       case TargetOpcode::G_LSHR:
498         return AArch64::LSRVXr;
499       case TargetOpcode::G_ASHR:
500         return AArch64::ASRVXr;
501       default:
502         return GenericOpc;
503       }
504     }
505     break;
506   case AArch64::FPRRegBankID:
507     switch (OpSize) {
508     case 32:
509       switch (GenericOpc) {
510       case TargetOpcode::G_FADD:
511         return AArch64::FADDSrr;
512       case TargetOpcode::G_FSUB:
513         return AArch64::FSUBSrr;
514       case TargetOpcode::G_FMUL:
515         return AArch64::FMULSrr;
516       case TargetOpcode::G_FDIV:
517         return AArch64::FDIVSrr;
518       default:
519         return GenericOpc;
520       }
521     case 64:
522       switch (GenericOpc) {
523       case TargetOpcode::G_FADD:
524         return AArch64::FADDDrr;
525       case TargetOpcode::G_FSUB:
526         return AArch64::FSUBDrr;
527       case TargetOpcode::G_FMUL:
528         return AArch64::FMULDrr;
529       case TargetOpcode::G_FDIV:
530         return AArch64::FDIVDrr;
531       case TargetOpcode::G_OR:
532         return AArch64::ORRv8i8;
533       default:
534         return GenericOpc;
535       }
536     }
537     break;
538   }
539   return GenericOpc;
540 }
541 
542 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
543 /// appropriate for the (value) register bank \p RegBankID and of memory access
544 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
545 /// addressing mode (e.g., LDRXui).
546 /// \returns \p GenericOpc if the combination is unsupported.
selectLoadStoreUIOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)547 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
548                                     unsigned OpSize) {
549   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
550   switch (RegBankID) {
551   case AArch64::GPRRegBankID:
552     switch (OpSize) {
553     case 8:
554       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
555     case 16:
556       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
557     case 32:
558       return isStore ? AArch64::STRWui : AArch64::LDRWui;
559     case 64:
560       return isStore ? AArch64::STRXui : AArch64::LDRXui;
561     }
562     break;
563   case AArch64::FPRRegBankID:
564     switch (OpSize) {
565     case 8:
566       return isStore ? AArch64::STRBui : AArch64::LDRBui;
567     case 16:
568       return isStore ? AArch64::STRHui : AArch64::LDRHui;
569     case 32:
570       return isStore ? AArch64::STRSui : AArch64::LDRSui;
571     case 64:
572       return isStore ? AArch64::STRDui : AArch64::LDRDui;
573     }
574     break;
575   }
576   return GenericOpc;
577 }
578 
579 #ifndef NDEBUG
580 /// Helper function that verifies that we have a valid copy at the end of
581 /// selectCopy. Verifies that the source and dest have the expected sizes and
582 /// then returns true.
isValidCopy(const MachineInstr & I,const RegisterBank & DstBank,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)583 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
584                         const MachineRegisterInfo &MRI,
585                         const TargetRegisterInfo &TRI,
586                         const RegisterBankInfo &RBI) {
587   const Register DstReg = I.getOperand(0).getReg();
588   const Register SrcReg = I.getOperand(1).getReg();
589   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
590   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
591 
592   // Make sure the size of the source and dest line up.
593   assert(
594       (DstSize == SrcSize ||
595        // Copies are a mean to setup initial types, the number of
596        // bits may not exactly match.
597        (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
598        // Copies are a mean to copy bits around, as long as we are
599        // on the same register class, that's fine. Otherwise, that
600        // means we need some SUBREG_TO_REG or AND & co.
601        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
602       "Copy with different width?!");
603 
604   // Check the size of the destination.
605   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
606          "GPRs cannot get more than 64-bit width values");
607 
608   return true;
609 }
610 #endif
611 
612 /// Helper function for selectCopy. Inserts a subregister copy from
613 /// \p *From to \p *To, linking it up to \p I.
614 ///
615 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
616 ///
617 /// CopyReg (From class) = COPY SrcReg
618 /// SubRegCopy (To class) = COPY CopyReg:SubReg
619 /// Dst = COPY SubRegCopy
selectSubregisterCopy(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI,Register SrcReg,const TargetRegisterClass * From,const TargetRegisterClass * To,unsigned SubReg)620 static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
621                                   const RegisterBankInfo &RBI, Register SrcReg,
622                                   const TargetRegisterClass *From,
623                                   const TargetRegisterClass *To,
624                                   unsigned SubReg) {
625   MachineIRBuilder MIB(I);
626   auto Copy = MIB.buildCopy({From}, {SrcReg});
627   auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
628                         .addReg(Copy.getReg(0), 0, SubReg);
629   MachineOperand &RegOp = I.getOperand(1);
630   RegOp.setReg(SubRegCopy.getReg(0));
631 
632   // It's possible that the destination register won't be constrained. Make
633   // sure that happens.
634   if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
635     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
636 
637   return true;
638 }
639 
640 /// Helper function to get the source and destination register classes for a
641 /// copy. Returns a std::pair containing the source register class for the
642 /// copy, and the destination register class for the copy. If a register class
643 /// cannot be determined, then it will be nullptr.
644 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)645 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
646                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
647                      const RegisterBankInfo &RBI) {
648   Register DstReg = I.getOperand(0).getReg();
649   Register SrcReg = I.getOperand(1).getReg();
650   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
651   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
652   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
653   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
654 
655   // Special casing for cross-bank copies of s1s. We can technically represent
656   // a 1-bit value with any size of register. The minimum size for a GPR is 32
657   // bits. So, we need to put the FPR on 32 bits as well.
658   //
659   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
660   // then we can pull it into the helpers that get the appropriate class for a
661   // register bank. Or make a new helper that carries along some constraint
662   // information.
663   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
664     SrcSize = DstSize = 32;
665 
666   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
667           getMinClassForRegBank(DstRegBank, DstSize, true)};
668 }
669 
selectCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)670 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
671                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
672                        const RegisterBankInfo &RBI) {
673 
674   Register DstReg = I.getOperand(0).getReg();
675   Register SrcReg = I.getOperand(1).getReg();
676   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
677   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
678 
679   // Find the correct register classes for the source and destination registers.
680   const TargetRegisterClass *SrcRC;
681   const TargetRegisterClass *DstRC;
682   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
683 
684   if (!DstRC) {
685     LLVM_DEBUG(dbgs() << "Unexpected dest size "
686                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
687     return false;
688   }
689 
690   // A couple helpers below, for making sure that the copy we produce is valid.
691 
692   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
693   // to verify that the src and dst are the same size, since that's handled by
694   // the SUBREG_TO_REG.
695   bool KnownValid = false;
696 
697   // Returns true, or asserts if something we don't expect happens. Instead of
698   // returning true, we return isValidCopy() to ensure that we verify the
699   // result.
700   auto CheckCopy = [&]() {
701     // If we have a bitcast or something, we can't have physical registers.
702     assert((I.isCopy() ||
703             (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
704              !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
705            "No phys reg on generic operator!");
706     assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
707     (void)KnownValid;
708     return true;
709   };
710 
711   // Is this a copy? If so, then we may need to insert a subregister copy, or
712   // a SUBREG_TO_REG.
713   if (I.isCopy()) {
714     // Yes. Check if there's anything to fix up.
715     if (!SrcRC) {
716       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
717       return false;
718     }
719 
720     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
721     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
722 
723     // If we're doing a cross-bank copy on different-sized registers, we need
724     // to do a bit more work.
725     if (SrcSize > DstSize) {
726       // We're doing a cross-bank copy into a smaller register. We need a
727       // subregister copy. First, get a register class that's on the same bank
728       // as the destination, but the same size as the source.
729       const TargetRegisterClass *SubregRC =
730           getMinClassForRegBank(DstRegBank, SrcSize, true);
731       assert(SubregRC && "Didn't get a register class for subreg?");
732 
733       // Get the appropriate subregister for the destination.
734       unsigned SubReg = 0;
735       if (!getSubRegForClass(DstRC, TRI, SubReg)) {
736         LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
737         return false;
738       }
739 
740       // Now, insert a subregister copy using the new register class.
741       selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
742       return CheckCopy();
743     }
744 
745     // Is this a cross-bank copy?
746     if (DstRegBank.getID() != SrcRegBank.getID()) {
747       if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
748           SrcSize == 16) {
749         // Special case for FPR16 to GPR32.
750         // FIXME: This can probably be generalized like the above case.
751         Register PromoteReg =
752             MRI.createVirtualRegister(&AArch64::FPR32RegClass);
753         BuildMI(*I.getParent(), I, I.getDebugLoc(),
754                 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
755             .addImm(0)
756             .addUse(SrcReg)
757             .addImm(AArch64::hsub);
758         MachineOperand &RegOp = I.getOperand(1);
759         RegOp.setReg(PromoteReg);
760 
761         // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
762         KnownValid = true;
763       }
764     }
765 
766     // If the destination is a physical register, then there's nothing to
767     // change, so we're done.
768     if (Register::isPhysicalRegister(DstReg))
769       return CheckCopy();
770   }
771 
772   // No need to constrain SrcReg. It will get constrained when we hit another
773   // of its use or its defs. Copies do not have constraints.
774   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
775     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
776                       << " operand\n");
777     return false;
778   }
779   I.setDesc(TII.get(AArch64::COPY));
780   return CheckCopy();
781 }
782 
selectFPConvOpc(unsigned GenericOpc,LLT DstTy,LLT SrcTy)783 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
784   if (!DstTy.isScalar() || !SrcTy.isScalar())
785     return GenericOpc;
786 
787   const unsigned DstSize = DstTy.getSizeInBits();
788   const unsigned SrcSize = SrcTy.getSizeInBits();
789 
790   switch (DstSize) {
791   case 32:
792     switch (SrcSize) {
793     case 32:
794       switch (GenericOpc) {
795       case TargetOpcode::G_SITOFP:
796         return AArch64::SCVTFUWSri;
797       case TargetOpcode::G_UITOFP:
798         return AArch64::UCVTFUWSri;
799       case TargetOpcode::G_FPTOSI:
800         return AArch64::FCVTZSUWSr;
801       case TargetOpcode::G_FPTOUI:
802         return AArch64::FCVTZUUWSr;
803       default:
804         return GenericOpc;
805       }
806     case 64:
807       switch (GenericOpc) {
808       case TargetOpcode::G_SITOFP:
809         return AArch64::SCVTFUXSri;
810       case TargetOpcode::G_UITOFP:
811         return AArch64::UCVTFUXSri;
812       case TargetOpcode::G_FPTOSI:
813         return AArch64::FCVTZSUWDr;
814       case TargetOpcode::G_FPTOUI:
815         return AArch64::FCVTZUUWDr;
816       default:
817         return GenericOpc;
818       }
819     default:
820       return GenericOpc;
821     }
822   case 64:
823     switch (SrcSize) {
824     case 32:
825       switch (GenericOpc) {
826       case TargetOpcode::G_SITOFP:
827         return AArch64::SCVTFUWDri;
828       case TargetOpcode::G_UITOFP:
829         return AArch64::UCVTFUWDri;
830       case TargetOpcode::G_FPTOSI:
831         return AArch64::FCVTZSUXSr;
832       case TargetOpcode::G_FPTOUI:
833         return AArch64::FCVTZUUXSr;
834       default:
835         return GenericOpc;
836       }
837     case 64:
838       switch (GenericOpc) {
839       case TargetOpcode::G_SITOFP:
840         return AArch64::SCVTFUXDri;
841       case TargetOpcode::G_UITOFP:
842         return AArch64::UCVTFUXDri;
843       case TargetOpcode::G_FPTOSI:
844         return AArch64::FCVTZSUXDr;
845       case TargetOpcode::G_FPTOUI:
846         return AArch64::FCVTZUUXDr;
847       default:
848         return GenericOpc;
849       }
850     default:
851       return GenericOpc;
852     }
853   default:
854     return GenericOpc;
855   };
856   return GenericOpc;
857 }
858 
selectSelectOpc(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI)859 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
860                                 const RegisterBankInfo &RBI) {
861   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
862   bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
863                AArch64::GPRRegBankID);
864   LLT Ty = MRI.getType(I.getOperand(0).getReg());
865   if (Ty == LLT::scalar(32))
866     return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
867   else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
868     return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
869   return 0;
870 }
871 
872 /// Helper function to select the opcode for a G_FCMP.
selectFCMPOpc(MachineInstr & I,MachineRegisterInfo & MRI)873 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
874   // If this is a compare against +0.0, then we don't have to explicitly
875   // materialize a constant.
876   const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
877   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
878   unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
879   if (OpSize != 32 && OpSize != 64)
880     return 0;
881   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
882                               {AArch64::FCMPSri, AArch64::FCMPDri}};
883   return CmpOpcTbl[ShouldUseImm][OpSize == 64];
884 }
885 
886 /// Returns true if \p P is an unsigned integer comparison predicate.
isUnsignedICMPPred(const CmpInst::Predicate P)887 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
888   switch (P) {
889   default:
890     return false;
891   case CmpInst::ICMP_UGT:
892   case CmpInst::ICMP_UGE:
893   case CmpInst::ICMP_ULT:
894   case CmpInst::ICMP_ULE:
895     return true;
896   }
897 }
898 
changeICMPPredToAArch64CC(CmpInst::Predicate P)899 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
900   switch (P) {
901   default:
902     llvm_unreachable("Unknown condition code!");
903   case CmpInst::ICMP_NE:
904     return AArch64CC::NE;
905   case CmpInst::ICMP_EQ:
906     return AArch64CC::EQ;
907   case CmpInst::ICMP_SGT:
908     return AArch64CC::GT;
909   case CmpInst::ICMP_SGE:
910     return AArch64CC::GE;
911   case CmpInst::ICMP_SLT:
912     return AArch64CC::LT;
913   case CmpInst::ICMP_SLE:
914     return AArch64CC::LE;
915   case CmpInst::ICMP_UGT:
916     return AArch64CC::HI;
917   case CmpInst::ICMP_UGE:
918     return AArch64CC::HS;
919   case CmpInst::ICMP_ULT:
920     return AArch64CC::LO;
921   case CmpInst::ICMP_ULE:
922     return AArch64CC::LS;
923   }
924 }
925 
changeFCMPPredToAArch64CC(CmpInst::Predicate P,AArch64CC::CondCode & CondCode,AArch64CC::CondCode & CondCode2)926 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
927                                       AArch64CC::CondCode &CondCode,
928                                       AArch64CC::CondCode &CondCode2) {
929   CondCode2 = AArch64CC::AL;
930   switch (P) {
931   default:
932     llvm_unreachable("Unknown FP condition!");
933   case CmpInst::FCMP_OEQ:
934     CondCode = AArch64CC::EQ;
935     break;
936   case CmpInst::FCMP_OGT:
937     CondCode = AArch64CC::GT;
938     break;
939   case CmpInst::FCMP_OGE:
940     CondCode = AArch64CC::GE;
941     break;
942   case CmpInst::FCMP_OLT:
943     CondCode = AArch64CC::MI;
944     break;
945   case CmpInst::FCMP_OLE:
946     CondCode = AArch64CC::LS;
947     break;
948   case CmpInst::FCMP_ONE:
949     CondCode = AArch64CC::MI;
950     CondCode2 = AArch64CC::GT;
951     break;
952   case CmpInst::FCMP_ORD:
953     CondCode = AArch64CC::VC;
954     break;
955   case CmpInst::FCMP_UNO:
956     CondCode = AArch64CC::VS;
957     break;
958   case CmpInst::FCMP_UEQ:
959     CondCode = AArch64CC::EQ;
960     CondCode2 = AArch64CC::VS;
961     break;
962   case CmpInst::FCMP_UGT:
963     CondCode = AArch64CC::HI;
964     break;
965   case CmpInst::FCMP_UGE:
966     CondCode = AArch64CC::PL;
967     break;
968   case CmpInst::FCMP_ULT:
969     CondCode = AArch64CC::LT;
970     break;
971   case CmpInst::FCMP_ULE:
972     CondCode = AArch64CC::LE;
973     break;
974   case CmpInst::FCMP_UNE:
975     CondCode = AArch64CC::NE;
976     break;
977   }
978 }
979 
selectCompareBranch(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const980 bool AArch64InstructionSelector::selectCompareBranch(
981     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
982 
983   const Register CondReg = I.getOperand(0).getReg();
984   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
985   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
986   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
987     CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
988   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
989     return false;
990 
991   Register LHS = CCMI->getOperand(2).getReg();
992   Register RHS = CCMI->getOperand(3).getReg();
993   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
994   if (!VRegAndVal)
995     std::swap(RHS, LHS);
996 
997   VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
998   if (!VRegAndVal || VRegAndVal->Value != 0) {
999     MachineIRBuilder MIB(I);
1000     // If we can't select a CBZ then emit a cmp + Bcc.
1001     if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
1002                             CCMI->getOperand(1), MIB))
1003       return false;
1004     const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1005         (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
1006     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1007     I.eraseFromParent();
1008     return true;
1009   }
1010 
1011   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
1012   if (RB.getID() != AArch64::GPRRegBankID)
1013     return false;
1014 
1015   const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
1016   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
1017     return false;
1018 
1019   const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
1020   unsigned CBOpc = 0;
1021   if (CmpWidth <= 32)
1022     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
1023   else if (CmpWidth == 64)
1024     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
1025   else
1026     return false;
1027 
1028   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1029       .addUse(LHS)
1030       .addMBB(DestMBB)
1031       .constrainAllUses(TII, TRI, RBI);
1032 
1033   I.eraseFromParent();
1034   return true;
1035 }
1036 
1037 /// Returns the element immediate value of a vector shift operand if found.
1038 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
getVectorShiftImm(Register Reg,MachineRegisterInfo & MRI)1039 static Optional<int64_t> getVectorShiftImm(Register Reg,
1040                                            MachineRegisterInfo &MRI) {
1041   assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1042   MachineInstr *OpMI = MRI.getVRegDef(Reg);
1043   assert(OpMI && "Expected to find a vreg def for vector shift operand");
1044   if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1045     return None;
1046 
1047   // Check all operands are identical immediates.
1048   int64_t ImmVal = 0;
1049   for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1050     auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1051     if (!VRegAndVal)
1052       return None;
1053 
1054     if (Idx == 1)
1055       ImmVal = VRegAndVal->Value;
1056     if (ImmVal != VRegAndVal->Value)
1057       return None;
1058   }
1059 
1060   return ImmVal;
1061 }
1062 
1063 /// Matches and returns the shift immediate value for a SHL instruction given
1064 /// a shift operand.
getVectorSHLImm(LLT SrcTy,Register Reg,MachineRegisterInfo & MRI)1065 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1066   Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1067   if (!ShiftImm)
1068     return None;
1069   // Check the immediate is in range for a SHL.
1070   int64_t Imm = *ShiftImm;
1071   if (Imm < 0)
1072     return None;
1073   switch (SrcTy.getElementType().getSizeInBits()) {
1074   default:
1075     LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1076     return None;
1077   case 8:
1078     if (Imm > 7)
1079       return None;
1080     break;
1081   case 16:
1082     if (Imm > 15)
1083       return None;
1084     break;
1085   case 32:
1086     if (Imm > 31)
1087       return None;
1088     break;
1089   case 64:
1090     if (Imm > 63)
1091       return None;
1092     break;
1093   }
1094   return Imm;
1095 }
1096 
selectVectorSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1097 bool AArch64InstructionSelector::selectVectorSHL(
1098     MachineInstr &I, MachineRegisterInfo &MRI) const {
1099   assert(I.getOpcode() == TargetOpcode::G_SHL);
1100   Register DstReg = I.getOperand(0).getReg();
1101   const LLT Ty = MRI.getType(DstReg);
1102   Register Src1Reg = I.getOperand(1).getReg();
1103   Register Src2Reg = I.getOperand(2).getReg();
1104 
1105   if (!Ty.isVector())
1106     return false;
1107 
1108   // Check if we have a vector of constants on RHS that we can select as the
1109   // immediate form.
1110   Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1111 
1112   unsigned Opc = 0;
1113   if (Ty == LLT::vector(2, 64)) {
1114     Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1115   } else if (Ty == LLT::vector(4, 32)) {
1116     Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1117   } else if (Ty == LLT::vector(2, 32)) {
1118     Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1119   } else {
1120     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1121     return false;
1122   }
1123 
1124   MachineIRBuilder MIB(I);
1125   auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1126   if (ImmVal)
1127     Shl.addImm(*ImmVal);
1128   else
1129     Shl.addUse(Src2Reg);
1130   constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1131   I.eraseFromParent();
1132   return true;
1133 }
1134 
selectVectorASHR(MachineInstr & I,MachineRegisterInfo & MRI) const1135 bool AArch64InstructionSelector::selectVectorASHR(
1136     MachineInstr &I, MachineRegisterInfo &MRI) const {
1137   assert(I.getOpcode() == TargetOpcode::G_ASHR);
1138   Register DstReg = I.getOperand(0).getReg();
1139   const LLT Ty = MRI.getType(DstReg);
1140   Register Src1Reg = I.getOperand(1).getReg();
1141   Register Src2Reg = I.getOperand(2).getReg();
1142 
1143   if (!Ty.isVector())
1144     return false;
1145 
1146   // There is not a shift right register instruction, but the shift left
1147   // register instruction takes a signed value, where negative numbers specify a
1148   // right shift.
1149 
1150   unsigned Opc = 0;
1151   unsigned NegOpc = 0;
1152   const TargetRegisterClass *RC = nullptr;
1153   if (Ty == LLT::vector(2, 64)) {
1154     Opc = AArch64::SSHLv2i64;
1155     NegOpc = AArch64::NEGv2i64;
1156     RC = &AArch64::FPR128RegClass;
1157   } else if (Ty == LLT::vector(4, 32)) {
1158     Opc = AArch64::SSHLv4i32;
1159     NegOpc = AArch64::NEGv4i32;
1160     RC = &AArch64::FPR128RegClass;
1161   } else if (Ty == LLT::vector(2, 32)) {
1162     Opc = AArch64::SSHLv2i32;
1163     NegOpc = AArch64::NEGv2i32;
1164     RC = &AArch64::FPR64RegClass;
1165   } else {
1166     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1167     return false;
1168   }
1169 
1170   MachineIRBuilder MIB(I);
1171   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1172   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1173   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1174   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1175   I.eraseFromParent();
1176   return true;
1177 }
1178 
selectVaStartAAPCS(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1179 bool AArch64InstructionSelector::selectVaStartAAPCS(
1180     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1181   return false;
1182 }
1183 
selectVaStartDarwin(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1184 bool AArch64InstructionSelector::selectVaStartDarwin(
1185     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1186   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1187   Register ListReg = I.getOperand(0).getReg();
1188 
1189   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1190 
1191   auto MIB =
1192       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1193           .addDef(ArgsAddrReg)
1194           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1195           .addImm(0)
1196           .addImm(0);
1197 
1198   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1199 
1200   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1201             .addUse(ArgsAddrReg)
1202             .addUse(ListReg)
1203             .addImm(0)
1204             .addMemOperand(*I.memoperands_begin());
1205 
1206   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1207   I.eraseFromParent();
1208   return true;
1209 }
1210 
materializeLargeCMVal(MachineInstr & I,const Value * V,unsigned OpFlags) const1211 void AArch64InstructionSelector::materializeLargeCMVal(
1212     MachineInstr &I, const Value *V, unsigned OpFlags) const {
1213   MachineBasicBlock &MBB = *I.getParent();
1214   MachineFunction &MF = *MBB.getParent();
1215   MachineRegisterInfo &MRI = MF.getRegInfo();
1216   MachineIRBuilder MIB(I);
1217 
1218   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1219   MovZ->addOperand(MF, I.getOperand(1));
1220   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1221                                      AArch64II::MO_NC);
1222   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1223   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1224 
1225   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1226                        Register ForceDstReg) {
1227     Register DstReg = ForceDstReg
1228                           ? ForceDstReg
1229                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1230     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1231     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1232       MovI->addOperand(MF, MachineOperand::CreateGA(
1233                                GV, MovZ->getOperand(1).getOffset(), Flags));
1234     } else {
1235       MovI->addOperand(
1236           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1237                                        MovZ->getOperand(1).getOffset(), Flags));
1238     }
1239     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1240     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1241     return DstReg;
1242   };
1243   Register DstReg = BuildMovK(MovZ.getReg(0),
1244                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1245   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1246   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1247   return;
1248 }
1249 
preISelLower(MachineInstr & I) const1250 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1251   MachineBasicBlock &MBB = *I.getParent();
1252   MachineFunction &MF = *MBB.getParent();
1253   MachineRegisterInfo &MRI = MF.getRegInfo();
1254 
1255   switch (I.getOpcode()) {
1256   case TargetOpcode::G_SHL:
1257   case TargetOpcode::G_ASHR:
1258   case TargetOpcode::G_LSHR: {
1259     // These shifts are legalized to have 64 bit shift amounts because we want
1260     // to take advantage of the existing imported selection patterns that assume
1261     // the immediates are s64s. However, if the shifted type is 32 bits and for
1262     // some reason we receive input GMIR that has an s64 shift amount that's not
1263     // a G_CONSTANT, insert a truncate so that we can still select the s32
1264     // register-register variant.
1265     Register SrcReg = I.getOperand(1).getReg();
1266     Register ShiftReg = I.getOperand(2).getReg();
1267     const LLT ShiftTy = MRI.getType(ShiftReg);
1268     const LLT SrcTy = MRI.getType(SrcReg);
1269     if (SrcTy.isVector())
1270       return;
1271     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1272     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1273       return;
1274     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1275     assert(AmtMI && "could not find a vreg definition for shift amount");
1276     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1277       // Insert a subregister copy to implement a 64->32 trunc
1278       MachineIRBuilder MIB(I);
1279       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1280                        .addReg(ShiftReg, 0, AArch64::sub_32);
1281       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1282       I.getOperand(2).setReg(Trunc.getReg(0));
1283     }
1284     return;
1285   }
1286   case TargetOpcode::G_STORE:
1287     contractCrossBankCopyIntoStore(I, MRI);
1288     return;
1289   default:
1290     return;
1291   }
1292 }
1293 
earlySelectSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1294 bool AArch64InstructionSelector::earlySelectSHL(
1295     MachineInstr &I, MachineRegisterInfo &MRI) const {
1296   // We try to match the immediate variant of LSL, which is actually an alias
1297   // for a special case of UBFM. Otherwise, we fall back to the imported
1298   // selector which will match the register variant.
1299   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1300   const auto &MO = I.getOperand(2);
1301   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1302   if (!VRegAndVal)
1303     return false;
1304 
1305   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1306   if (DstTy.isVector())
1307     return false;
1308   bool Is64Bit = DstTy.getSizeInBits() == 64;
1309   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1310   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1311   MachineIRBuilder MIB(I);
1312 
1313   if (!Imm1Fn || !Imm2Fn)
1314     return false;
1315 
1316   auto NewI =
1317       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1318                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1319 
1320   for (auto &RenderFn : *Imm1Fn)
1321     RenderFn(NewI);
1322   for (auto &RenderFn : *Imm2Fn)
1323     RenderFn(NewI);
1324 
1325   I.eraseFromParent();
1326   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1327 }
1328 
contractCrossBankCopyIntoStore(MachineInstr & I,MachineRegisterInfo & MRI) const1329 void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1330     MachineInstr &I, MachineRegisterInfo &MRI) const {
1331   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1332   // If we're storing a scalar, it doesn't matter what register bank that
1333   // scalar is on. All that matters is the size.
1334   //
1335   // So, if we see something like this (with a 32-bit scalar as an example):
1336   //
1337   // %x:gpr(s32) = ... something ...
1338   // %y:fpr(s32) = COPY %x:gpr(s32)
1339   // G_STORE %y:fpr(s32)
1340   //
1341   // We can fix this up into something like this:
1342   //
1343   // G_STORE %x:gpr(s32)
1344   //
1345   // And then continue the selection process normally.
1346   MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1347   if (!Def)
1348     return;
1349   Register DefDstReg = Def->getOperand(0).getReg();
1350   LLT DefDstTy = MRI.getType(DefDstReg);
1351   Register StoreSrcReg = I.getOperand(0).getReg();
1352   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1353 
1354   // If we get something strange like a physical register, then we shouldn't
1355   // go any further.
1356   if (!DefDstTy.isValid())
1357     return;
1358 
1359   // Are the source and dst types the same size?
1360   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1361     return;
1362 
1363   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1364       RBI.getRegBank(DefDstReg, MRI, TRI))
1365     return;
1366 
1367   // We have a cross-bank copy, which is entering a store. Let's fold it.
1368   I.getOperand(0).setReg(DefDstReg);
1369 }
1370 
earlySelect(MachineInstr & I) const1371 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1372   assert(I.getParent() && "Instruction should be in a basic block!");
1373   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1374 
1375   MachineBasicBlock &MBB = *I.getParent();
1376   MachineFunction &MF = *MBB.getParent();
1377   MachineRegisterInfo &MRI = MF.getRegInfo();
1378 
1379   switch (I.getOpcode()) {
1380   case TargetOpcode::G_SHL:
1381     return earlySelectSHL(I, MRI);
1382   case TargetOpcode::G_CONSTANT: {
1383     bool IsZero = false;
1384     if (I.getOperand(1).isCImm())
1385       IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1386     else if (I.getOperand(1).isImm())
1387       IsZero = I.getOperand(1).getImm() == 0;
1388 
1389     if (!IsZero)
1390       return false;
1391 
1392     Register DefReg = I.getOperand(0).getReg();
1393     LLT Ty = MRI.getType(DefReg);
1394     if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
1395       return false;
1396 
1397     if (Ty == LLT::scalar(64)) {
1398       I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1399       RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1400     } else {
1401       I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1402       RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1403     }
1404     I.setDesc(TII.get(TargetOpcode::COPY));
1405     return true;
1406   }
1407   default:
1408     return false;
1409   }
1410 }
1411 
select(MachineInstr & I)1412 bool AArch64InstructionSelector::select(MachineInstr &I) {
1413   assert(I.getParent() && "Instruction should be in a basic block!");
1414   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1415 
1416   MachineBasicBlock &MBB = *I.getParent();
1417   MachineFunction &MF = *MBB.getParent();
1418   MachineRegisterInfo &MRI = MF.getRegInfo();
1419 
1420   unsigned Opcode = I.getOpcode();
1421   // G_PHI requires same handling as PHI
1422   if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1423     // Certain non-generic instructions also need some special handling.
1424 
1425     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1426       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1427 
1428     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1429       const Register DefReg = I.getOperand(0).getReg();
1430       const LLT DefTy = MRI.getType(DefReg);
1431 
1432       const RegClassOrRegBank &RegClassOrBank =
1433         MRI.getRegClassOrRegBank(DefReg);
1434 
1435       const TargetRegisterClass *DefRC
1436         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1437       if (!DefRC) {
1438         if (!DefTy.isValid()) {
1439           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1440           return false;
1441         }
1442         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1443         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1444         if (!DefRC) {
1445           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1446           return false;
1447         }
1448       }
1449 
1450       I.setDesc(TII.get(TargetOpcode::PHI));
1451 
1452       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1453     }
1454 
1455     if (I.isCopy())
1456       return selectCopy(I, TII, MRI, TRI, RBI);
1457 
1458     return true;
1459   }
1460 
1461 
1462   if (I.getNumOperands() != I.getNumExplicitOperands()) {
1463     LLVM_DEBUG(
1464         dbgs() << "Generic instruction has unexpected implicit operands\n");
1465     return false;
1466   }
1467 
1468   // Try to do some lowering before we start instruction selecting. These
1469   // lowerings are purely transformations on the input G_MIR and so selection
1470   // must continue after any modification of the instruction.
1471   preISelLower(I);
1472 
1473   // There may be patterns where the importer can't deal with them optimally,
1474   // but does select it to a suboptimal sequence so our custom C++ selection
1475   // code later never has a chance to work on it. Therefore, we have an early
1476   // selection attempt here to give priority to certain selection routines
1477   // over the imported ones.
1478   if (earlySelect(I))
1479     return true;
1480 
1481   if (selectImpl(I, *CoverageInfo))
1482     return true;
1483 
1484   LLT Ty =
1485       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1486 
1487   MachineIRBuilder MIB(I);
1488 
1489   switch (Opcode) {
1490   case TargetOpcode::G_BRCOND: {
1491     if (Ty.getSizeInBits() > 32) {
1492       // We shouldn't need this on AArch64, but it would be implemented as an
1493       // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1494       // bit being tested is < 32.
1495       LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1496                         << ", expected at most 32-bits");
1497       return false;
1498     }
1499 
1500     const Register CondReg = I.getOperand(0).getReg();
1501     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1502 
1503     // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1504     // instructions will not be produced, as they are conditional branch
1505     // instructions that do not set flags.
1506     bool ProduceNonFlagSettingCondBr =
1507         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1508     if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1509       return true;
1510 
1511     if (ProduceNonFlagSettingCondBr) {
1512       auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1513                      .addUse(CondReg)
1514                      .addImm(/*bit offset=*/0)
1515                      .addMBB(DestMBB);
1516 
1517       I.eraseFromParent();
1518       return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1519     } else {
1520       auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1521                      .addDef(AArch64::WZR)
1522                      .addUse(CondReg)
1523                      .addImm(1);
1524       constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1525       auto Bcc =
1526           BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1527               .addImm(AArch64CC::EQ)
1528               .addMBB(DestMBB);
1529 
1530       I.eraseFromParent();
1531       return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1532     }
1533   }
1534 
1535   case TargetOpcode::G_BRINDIRECT: {
1536     I.setDesc(TII.get(AArch64::BR));
1537     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1538   }
1539 
1540   case TargetOpcode::G_BRJT:
1541     return selectBrJT(I, MRI);
1542 
1543   case TargetOpcode::G_BSWAP: {
1544     // Handle vector types for G_BSWAP directly.
1545     Register DstReg = I.getOperand(0).getReg();
1546     LLT DstTy = MRI.getType(DstReg);
1547 
1548     // We should only get vector types here; everything else is handled by the
1549     // importer right now.
1550     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1551       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1552       return false;
1553     }
1554 
1555     // Only handle 4 and 2 element vectors for now.
1556     // TODO: 16-bit elements.
1557     unsigned NumElts = DstTy.getNumElements();
1558     if (NumElts != 4 && NumElts != 2) {
1559       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1560       return false;
1561     }
1562 
1563     // Choose the correct opcode for the supported types. Right now, that's
1564     // v2s32, v4s32, and v2s64.
1565     unsigned Opc = 0;
1566     unsigned EltSize = DstTy.getElementType().getSizeInBits();
1567     if (EltSize == 32)
1568       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1569                                           : AArch64::REV32v16i8;
1570     else if (EltSize == 64)
1571       Opc = AArch64::REV64v16i8;
1572 
1573     // We should always get something by the time we get here...
1574     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1575 
1576     I.setDesc(TII.get(Opc));
1577     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1578   }
1579 
1580   case TargetOpcode::G_FCONSTANT:
1581   case TargetOpcode::G_CONSTANT: {
1582     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1583 
1584     const LLT s8 = LLT::scalar(8);
1585     const LLT s16 = LLT::scalar(16);
1586     const LLT s32 = LLT::scalar(32);
1587     const LLT s64 = LLT::scalar(64);
1588     const LLT p0 = LLT::pointer(0, 64);
1589 
1590     const Register DefReg = I.getOperand(0).getReg();
1591     const LLT DefTy = MRI.getType(DefReg);
1592     const unsigned DefSize = DefTy.getSizeInBits();
1593     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1594 
1595     // FIXME: Redundant check, but even less readable when factored out.
1596     if (isFP) {
1597       if (Ty != s32 && Ty != s64) {
1598         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1599                           << " constant, expected: " << s32 << " or " << s64
1600                           << '\n');
1601         return false;
1602       }
1603 
1604       if (RB.getID() != AArch64::FPRRegBankID) {
1605         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1606                           << " constant on bank: " << RB
1607                           << ", expected: FPR\n");
1608         return false;
1609       }
1610 
1611       // The case when we have 0.0 is covered by tablegen. Reject it here so we
1612       // can be sure tablegen works correctly and isn't rescued by this code.
1613       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1614         return false;
1615     } else {
1616       // s32 and s64 are covered by tablegen.
1617       if (Ty != p0 && Ty != s8 && Ty != s16) {
1618         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1619                           << " constant, expected: " << s32 << ", " << s64
1620                           << ", or " << p0 << '\n');
1621         return false;
1622       }
1623 
1624       if (RB.getID() != AArch64::GPRRegBankID) {
1625         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1626                           << " constant on bank: " << RB
1627                           << ", expected: GPR\n");
1628         return false;
1629       }
1630     }
1631 
1632     // We allow G_CONSTANT of types < 32b.
1633     const unsigned MovOpc =
1634         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1635 
1636     if (isFP) {
1637       // Either emit a FMOV, or emit a copy to emit a normal mov.
1638       const TargetRegisterClass &GPRRC =
1639           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1640       const TargetRegisterClass &FPRRC =
1641           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1642 
1643       // Can we use a FMOV instruction to represent the immediate?
1644       if (emitFMovForFConstant(I, MRI))
1645         return true;
1646 
1647       // Nope. Emit a copy and use a normal mov instead.
1648       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1649       MachineOperand &RegOp = I.getOperand(0);
1650       RegOp.setReg(DefGPRReg);
1651       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1652       MIB.buildCopy({DefReg}, {DefGPRReg});
1653 
1654       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1655         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1656         return false;
1657       }
1658 
1659       MachineOperand &ImmOp = I.getOperand(1);
1660       // FIXME: Is going through int64_t always correct?
1661       ImmOp.ChangeToImmediate(
1662           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
1663     } else if (I.getOperand(1).isCImm()) {
1664       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1665       I.getOperand(1).ChangeToImmediate(Val);
1666     } else if (I.getOperand(1).isImm()) {
1667       uint64_t Val = I.getOperand(1).getImm();
1668       I.getOperand(1).ChangeToImmediate(Val);
1669     }
1670 
1671     I.setDesc(TII.get(MovOpc));
1672     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1673     return true;
1674   }
1675   case TargetOpcode::G_EXTRACT: {
1676     Register DstReg = I.getOperand(0).getReg();
1677     Register SrcReg = I.getOperand(1).getReg();
1678     LLT SrcTy = MRI.getType(SrcReg);
1679     LLT DstTy = MRI.getType(DstReg);
1680     (void)DstTy;
1681     unsigned SrcSize = SrcTy.getSizeInBits();
1682 
1683     if (SrcTy.getSizeInBits() > 64) {
1684       // This should be an extract of an s128, which is like a vector extract.
1685       if (SrcTy.getSizeInBits() != 128)
1686         return false;
1687       // Only support extracting 64 bits from an s128 at the moment.
1688       if (DstTy.getSizeInBits() != 64)
1689         return false;
1690 
1691       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1692       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1693       // Check we have the right regbank always.
1694       assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1695              DstRB.getID() == AArch64::FPRRegBankID &&
1696              "Wrong extract regbank!");
1697       (void)SrcRB;
1698 
1699       // Emit the same code as a vector extract.
1700       // Offset must be a multiple of 64.
1701       unsigned Offset = I.getOperand(2).getImm();
1702       if (Offset % 64 != 0)
1703         return false;
1704       unsigned LaneIdx = Offset / 64;
1705       MachineIRBuilder MIB(I);
1706       MachineInstr *Extract = emitExtractVectorElt(
1707           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1708       if (!Extract)
1709         return false;
1710       I.eraseFromParent();
1711       return true;
1712     }
1713 
1714     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1715     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1716                                       Ty.getSizeInBits() - 1);
1717 
1718     if (SrcSize < 64) {
1719       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1720              "unexpected G_EXTRACT types");
1721       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1722     }
1723 
1724     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1725     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1726     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1727         .addReg(DstReg, 0, AArch64::sub_32);
1728     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1729                                  AArch64::GPR32RegClass, MRI);
1730     I.getOperand(0).setReg(DstReg);
1731 
1732     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1733   }
1734 
1735   case TargetOpcode::G_INSERT: {
1736     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1737     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1738     unsigned DstSize = DstTy.getSizeInBits();
1739     // Larger inserts are vectors, same-size ones should be something else by
1740     // now (split up or turned into COPYs).
1741     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1742       return false;
1743 
1744     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1745     unsigned LSB = I.getOperand(3).getImm();
1746     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1747     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1748     MachineInstrBuilder(MF, I).addImm(Width - 1);
1749 
1750     if (DstSize < 64) {
1751       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1752              "unexpected G_INSERT types");
1753       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1754     }
1755 
1756     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1757     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1758             TII.get(AArch64::SUBREG_TO_REG))
1759         .addDef(SrcReg)
1760         .addImm(0)
1761         .addUse(I.getOperand(2).getReg())
1762         .addImm(AArch64::sub_32);
1763     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1764                                  AArch64::GPR32RegClass, MRI);
1765     I.getOperand(2).setReg(SrcReg);
1766 
1767     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1768   }
1769   case TargetOpcode::G_FRAME_INDEX: {
1770     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1771     if (Ty != LLT::pointer(0, 64)) {
1772       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1773                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1774       return false;
1775     }
1776     I.setDesc(TII.get(AArch64::ADDXri));
1777 
1778     // MOs for a #0 shifted immediate.
1779     I.addOperand(MachineOperand::CreateImm(0));
1780     I.addOperand(MachineOperand::CreateImm(0));
1781 
1782     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1783   }
1784 
1785   case TargetOpcode::G_GLOBAL_VALUE: {
1786     auto GV = I.getOperand(1).getGlobal();
1787     if (GV->isThreadLocal())
1788       return selectTLSGlobalValue(I, MRI);
1789 
1790     unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
1791     if (OpFlags & AArch64II::MO_GOT) {
1792       I.setDesc(TII.get(AArch64::LOADgot));
1793       I.getOperand(1).setTargetFlags(OpFlags);
1794     } else if (TM.getCodeModel() == CodeModel::Large) {
1795       // Materialize the global using movz/movk instructions.
1796       materializeLargeCMVal(I, GV, OpFlags);
1797       I.eraseFromParent();
1798       return true;
1799     } else if (TM.getCodeModel() == CodeModel::Tiny) {
1800       I.setDesc(TII.get(AArch64::ADR));
1801       I.getOperand(1).setTargetFlags(OpFlags);
1802     } else {
1803       I.setDesc(TII.get(AArch64::MOVaddr));
1804       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1805       MachineInstrBuilder MIB(MF, I);
1806       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1807                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1808     }
1809     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1810   }
1811 
1812   case TargetOpcode::G_ZEXTLOAD:
1813   case TargetOpcode::G_LOAD:
1814   case TargetOpcode::G_STORE: {
1815     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1816     MachineIRBuilder MIB(I);
1817 
1818     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1819 
1820     if (PtrTy != LLT::pointer(0, 64)) {
1821       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1822                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1823       return false;
1824     }
1825 
1826     auto &MemOp = **I.memoperands_begin();
1827     if (MemOp.isAtomic()) {
1828       // For now we just support s8 acquire loads to be able to compile stack
1829       // protector code.
1830       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
1831           MemOp.getSize() == 1) {
1832         I.setDesc(TII.get(AArch64::LDARB));
1833         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1834       }
1835       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
1836       return false;
1837     }
1838     unsigned MemSizeInBits = MemOp.getSize() * 8;
1839 
1840     const Register PtrReg = I.getOperand(1).getReg();
1841 #ifndef NDEBUG
1842     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1843     // Sanity-check the pointer register.
1844     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1845            "Load/Store pointer operand isn't a GPR");
1846     assert(MRI.getType(PtrReg).isPointer() &&
1847            "Load/Store pointer operand isn't a pointer");
1848 #endif
1849 
1850     const Register ValReg = I.getOperand(0).getReg();
1851     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1852 
1853     const unsigned NewOpc =
1854         selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1855     if (NewOpc == I.getOpcode())
1856       return false;
1857 
1858     I.setDesc(TII.get(NewOpc));
1859 
1860     uint64_t Offset = 0;
1861     auto *PtrMI = MRI.getVRegDef(PtrReg);
1862 
1863     // Try to fold a GEP into our unsigned immediate addressing mode.
1864     if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
1865       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1866         int64_t Imm = *COff;
1867         const unsigned Size = MemSizeInBits / 8;
1868         const unsigned Scale = Log2_32(Size);
1869         if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1870           Register Ptr2Reg = PtrMI->getOperand(1).getReg();
1871           I.getOperand(1).setReg(Ptr2Reg);
1872           PtrMI = MRI.getVRegDef(Ptr2Reg);
1873           Offset = Imm / Size;
1874         }
1875       }
1876     }
1877 
1878     // If we haven't folded anything into our addressing mode yet, try to fold
1879     // a frame index into the base+offset.
1880     if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1881       I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1882 
1883     I.addOperand(MachineOperand::CreateImm(Offset));
1884 
1885     // If we're storing a 0, use WZR/XZR.
1886     if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1887       if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1888         if (I.getOpcode() == AArch64::STRWui)
1889           I.getOperand(0).setReg(AArch64::WZR);
1890         else if (I.getOpcode() == AArch64::STRXui)
1891           I.getOperand(0).setReg(AArch64::XZR);
1892       }
1893     }
1894 
1895     if (IsZExtLoad) {
1896       // The zextload from a smaller type to i32 should be handled by the importer.
1897       if (MRI.getType(ValReg).getSizeInBits() != 64)
1898         return false;
1899       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1900       //and zero_extend with SUBREG_TO_REG.
1901       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1902       Register DstReg = I.getOperand(0).getReg();
1903       I.getOperand(0).setReg(LdReg);
1904 
1905       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1906       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1907           .addImm(0)
1908           .addUse(LdReg)
1909           .addImm(AArch64::sub_32);
1910       constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1911       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1912                                           MRI);
1913     }
1914     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1915   }
1916 
1917   case TargetOpcode::G_SMULH:
1918   case TargetOpcode::G_UMULH: {
1919     // Reject the various things we don't support yet.
1920     if (unsupportedBinOp(I, RBI, MRI, TRI))
1921       return false;
1922 
1923     const Register DefReg = I.getOperand(0).getReg();
1924     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1925 
1926     if (RB.getID() != AArch64::GPRRegBankID) {
1927       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1928       return false;
1929     }
1930 
1931     if (Ty != LLT::scalar(64)) {
1932       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1933                         << ", expected: " << LLT::scalar(64) << '\n');
1934       return false;
1935     }
1936 
1937     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1938                                                              : AArch64::UMULHrr;
1939     I.setDesc(TII.get(NewOpc));
1940 
1941     // Now that we selected an opcode, we need to constrain the register
1942     // operands to use appropriate classes.
1943     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1944   }
1945   case TargetOpcode::G_FADD:
1946   case TargetOpcode::G_FSUB:
1947   case TargetOpcode::G_FMUL:
1948   case TargetOpcode::G_FDIV:
1949 
1950   case TargetOpcode::G_ASHR:
1951     if (MRI.getType(I.getOperand(0).getReg()).isVector())
1952       return selectVectorASHR(I, MRI);
1953     LLVM_FALLTHROUGH;
1954   case TargetOpcode::G_SHL:
1955     if (Opcode == TargetOpcode::G_SHL &&
1956         MRI.getType(I.getOperand(0).getReg()).isVector())
1957       return selectVectorSHL(I, MRI);
1958     LLVM_FALLTHROUGH;
1959   case TargetOpcode::G_OR:
1960   case TargetOpcode::G_LSHR: {
1961     // Reject the various things we don't support yet.
1962     if (unsupportedBinOp(I, RBI, MRI, TRI))
1963       return false;
1964 
1965     const unsigned OpSize = Ty.getSizeInBits();
1966 
1967     const Register DefReg = I.getOperand(0).getReg();
1968     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1969 
1970     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1971     if (NewOpc == I.getOpcode())
1972       return false;
1973 
1974     I.setDesc(TII.get(NewOpc));
1975     // FIXME: Should the type be always reset in setDesc?
1976 
1977     // Now that we selected an opcode, we need to constrain the register
1978     // operands to use appropriate classes.
1979     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1980   }
1981 
1982   case TargetOpcode::G_PTR_ADD: {
1983     MachineIRBuilder MIRBuilder(I);
1984     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1985             MIRBuilder);
1986     I.eraseFromParent();
1987     return true;
1988   }
1989   case TargetOpcode::G_UADDO: {
1990     // TODO: Support other types.
1991     unsigned OpSize = Ty.getSizeInBits();
1992     if (OpSize != 32 && OpSize != 64) {
1993       LLVM_DEBUG(
1994           dbgs()
1995           << "G_UADDO currently only supported for 32 and 64 b types.\n");
1996       return false;
1997     }
1998 
1999     // TODO: Support vectors.
2000     if (Ty.isVector()) {
2001       LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
2002       return false;
2003     }
2004 
2005     // Add and set the set condition flag.
2006     unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
2007     MachineIRBuilder MIRBuilder(I);
2008     auto AddsMI = MIRBuilder.buildInstr(
2009         AddsOpc, {I.getOperand(0).getReg()},
2010         {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
2011     constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
2012 
2013     // Now, put the overflow result in the register given by the first operand
2014     // to the G_UADDO. CSINC increments the result when the predicate is false,
2015     // so to get the increment when it's true, we need to use the inverse. In
2016     // this case, we want to increment when carry is set.
2017     auto CsetMI = MIRBuilder
2018                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2019                                   {Register(AArch64::WZR), Register(AArch64::WZR)})
2020                       .addImm(getInvertedCondCode(AArch64CC::HS));
2021     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2022     I.eraseFromParent();
2023     return true;
2024   }
2025 
2026   case TargetOpcode::G_PTR_MASK: {
2027     uint64_t Align = I.getOperand(2).getImm();
2028     if (Align >= 64 || Align == 0)
2029       return false;
2030 
2031     uint64_t Mask = ~((1ULL << Align) - 1);
2032     I.setDesc(TII.get(AArch64::ANDXri));
2033     I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
2034 
2035     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2036   }
2037   case TargetOpcode::G_PTRTOINT:
2038   case TargetOpcode::G_TRUNC: {
2039     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2040     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2041 
2042     const Register DstReg = I.getOperand(0).getReg();
2043     const Register SrcReg = I.getOperand(1).getReg();
2044 
2045     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2046     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2047 
2048     if (DstRB.getID() != SrcRB.getID()) {
2049       LLVM_DEBUG(
2050           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2051       return false;
2052     }
2053 
2054     if (DstRB.getID() == AArch64::GPRRegBankID) {
2055       const TargetRegisterClass *DstRC =
2056           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2057       if (!DstRC)
2058         return false;
2059 
2060       const TargetRegisterClass *SrcRC =
2061           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2062       if (!SrcRC)
2063         return false;
2064 
2065       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2066           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2067         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2068         return false;
2069       }
2070 
2071       if (DstRC == SrcRC) {
2072         // Nothing to be done
2073       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2074                  SrcTy == LLT::scalar(64)) {
2075         llvm_unreachable("TableGen can import this case");
2076         return false;
2077       } else if (DstRC == &AArch64::GPR32RegClass &&
2078                  SrcRC == &AArch64::GPR64RegClass) {
2079         I.getOperand(1).setSubReg(AArch64::sub_32);
2080       } else {
2081         LLVM_DEBUG(
2082             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2083         return false;
2084       }
2085 
2086       I.setDesc(TII.get(TargetOpcode::COPY));
2087       return true;
2088     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2089       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2090         I.setDesc(TII.get(AArch64::XTNv4i16));
2091         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2092         return true;
2093       }
2094 
2095       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2096         MachineIRBuilder MIB(I);
2097         MachineInstr *Extract = emitExtractVectorElt(
2098             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2099         if (!Extract)
2100           return false;
2101         I.eraseFromParent();
2102         return true;
2103       }
2104     }
2105 
2106     return false;
2107   }
2108 
2109   case TargetOpcode::G_ANYEXT: {
2110     const Register DstReg = I.getOperand(0).getReg();
2111     const Register SrcReg = I.getOperand(1).getReg();
2112 
2113     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2114     if (RBDst.getID() != AArch64::GPRRegBankID) {
2115       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2116                         << ", expected: GPR\n");
2117       return false;
2118     }
2119 
2120     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2121     if (RBSrc.getID() != AArch64::GPRRegBankID) {
2122       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2123                         << ", expected: GPR\n");
2124       return false;
2125     }
2126 
2127     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2128 
2129     if (DstSize == 0) {
2130       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2131       return false;
2132     }
2133 
2134     if (DstSize != 64 && DstSize > 32) {
2135       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2136                         << ", expected: 32 or 64\n");
2137       return false;
2138     }
2139     // At this point G_ANYEXT is just like a plain COPY, but we need
2140     // to explicitly form the 64-bit value if any.
2141     if (DstSize > 32) {
2142       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2143       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2144           .addDef(ExtSrc)
2145           .addImm(0)
2146           .addUse(SrcReg)
2147           .addImm(AArch64::sub_32);
2148       I.getOperand(1).setReg(ExtSrc);
2149     }
2150     return selectCopy(I, TII, MRI, TRI, RBI);
2151   }
2152 
2153   case TargetOpcode::G_ZEXT:
2154   case TargetOpcode::G_SEXT: {
2155     unsigned Opcode = I.getOpcode();
2156     const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2157     const Register DefReg = I.getOperand(0).getReg();
2158     const Register SrcReg = I.getOperand(1).getReg();
2159     const LLT DstTy = MRI.getType(DefReg);
2160     const LLT SrcTy = MRI.getType(SrcReg);
2161     unsigned DstSize = DstTy.getSizeInBits();
2162     unsigned SrcSize = SrcTy.getSizeInBits();
2163 
2164     if (DstTy.isVector())
2165       return false; // Should be handled by imported patterns.
2166 
2167     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2168                AArch64::GPRRegBankID &&
2169            "Unexpected ext regbank");
2170 
2171     MachineIRBuilder MIB(I);
2172     MachineInstr *ExtI;
2173 
2174     // First check if we're extending the result of a load which has a dest type
2175     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2176     // GPR register on AArch64 and all loads which are smaller automatically
2177     // zero-extend the upper bits. E.g.
2178     // %v(s8) = G_LOAD %p, :: (load 1)
2179     // %v2(s32) = G_ZEXT %v(s8)
2180     if (!IsSigned) {
2181       auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2182       if (LoadMI &&
2183           RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2184         const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2185         unsigned BytesLoaded = MemOp->getSize();
2186         if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2187           return selectCopy(I, TII, MRI, TRI, RBI);
2188       }
2189     }
2190 
2191     if (DstSize == 64) {
2192       // FIXME: Can we avoid manually doing this?
2193       if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2194         LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2195                           << " operand\n");
2196         return false;
2197       }
2198 
2199       auto SubregToReg =
2200           MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2201               .addImm(0)
2202               .addUse(SrcReg)
2203               .addImm(AArch64::sub_32);
2204 
2205       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2206                              {DefReg}, {SubregToReg})
2207                   .addImm(0)
2208                   .addImm(SrcSize - 1);
2209     } else if (DstSize <= 32) {
2210       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2211                              {DefReg}, {SrcReg})
2212                   .addImm(0)
2213                   .addImm(SrcSize - 1);
2214     } else {
2215       return false;
2216     }
2217 
2218     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2219     I.eraseFromParent();
2220     return true;
2221   }
2222 
2223   case TargetOpcode::G_SITOFP:
2224   case TargetOpcode::G_UITOFP:
2225   case TargetOpcode::G_FPTOSI:
2226   case TargetOpcode::G_FPTOUI: {
2227     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2228               SrcTy = MRI.getType(I.getOperand(1).getReg());
2229     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2230     if (NewOpc == Opcode)
2231       return false;
2232 
2233     I.setDesc(TII.get(NewOpc));
2234     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2235 
2236     return true;
2237   }
2238 
2239 
2240   case TargetOpcode::G_INTTOPTR:
2241     // The importer is currently unable to import pointer types since they
2242     // didn't exist in SelectionDAG.
2243     return selectCopy(I, TII, MRI, TRI, RBI);
2244 
2245   case TargetOpcode::G_BITCAST:
2246     // Imported SelectionDAG rules can handle every bitcast except those that
2247     // bitcast from a type to the same type. Ideally, these shouldn't occur
2248     // but we might not run an optimizer that deletes them. The other exception
2249     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2250     // of them.
2251     return selectCopy(I, TII, MRI, TRI, RBI);
2252 
2253   case TargetOpcode::G_SELECT: {
2254     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2255       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2256                         << ", expected: " << LLT::scalar(1) << '\n');
2257       return false;
2258     }
2259 
2260     const Register CondReg = I.getOperand(1).getReg();
2261     const Register TReg = I.getOperand(2).getReg();
2262     const Register FReg = I.getOperand(3).getReg();
2263 
2264     if (tryOptSelect(I))
2265       return true;
2266 
2267     Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2268     MachineInstr &TstMI =
2269         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2270              .addDef(AArch64::WZR)
2271              .addUse(CondReg)
2272              .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2273 
2274     MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2275                                 .addDef(I.getOperand(0).getReg())
2276                                 .addUse(TReg)
2277                                 .addUse(FReg)
2278                                 .addImm(AArch64CC::NE);
2279 
2280     constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2281     constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2282 
2283     I.eraseFromParent();
2284     return true;
2285   }
2286   case TargetOpcode::G_ICMP: {
2287     if (Ty.isVector())
2288       return selectVectorICmp(I, MRI);
2289 
2290     if (Ty != LLT::scalar(32)) {
2291       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2292                         << ", expected: " << LLT::scalar(32) << '\n');
2293       return false;
2294     }
2295 
2296     MachineIRBuilder MIRBuilder(I);
2297     if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2298                             MIRBuilder))
2299       return false;
2300     emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2301                     MIRBuilder);
2302     I.eraseFromParent();
2303     return true;
2304   }
2305 
2306   case TargetOpcode::G_FCMP: {
2307     if (Ty != LLT::scalar(32)) {
2308       LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2309                         << ", expected: " << LLT::scalar(32) << '\n');
2310       return false;
2311     }
2312 
2313     unsigned CmpOpc = selectFCMPOpc(I, MRI);
2314     if (!CmpOpc)
2315       return false;
2316 
2317     // FIXME: regbank
2318 
2319     AArch64CC::CondCode CC1, CC2;
2320     changeFCMPPredToAArch64CC(
2321         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2322 
2323     // Partially build the compare. Decide if we need to add a use for the
2324     // third operand based off whether or not we're comparing against 0.0.
2325     auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2326                      .addUse(I.getOperand(2).getReg());
2327 
2328     // If we don't have an immediate compare, then we need to add a use of the
2329     // register which wasn't used for the immediate.
2330     // Note that the immediate will always be the last operand.
2331     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2332       CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2333 
2334     const Register DefReg = I.getOperand(0).getReg();
2335     Register Def1Reg = DefReg;
2336     if (CC2 != AArch64CC::AL)
2337       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2338 
2339     MachineInstr &CSetMI =
2340         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2341              .addDef(Def1Reg)
2342              .addUse(AArch64::WZR)
2343              .addUse(AArch64::WZR)
2344              .addImm(getInvertedCondCode(CC1));
2345 
2346     if (CC2 != AArch64CC::AL) {
2347       Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2348       MachineInstr &CSet2MI =
2349           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2350                .addDef(Def2Reg)
2351                .addUse(AArch64::WZR)
2352                .addUse(AArch64::WZR)
2353                .addImm(getInvertedCondCode(CC2));
2354       MachineInstr &OrMI =
2355           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2356                .addDef(DefReg)
2357                .addUse(Def1Reg)
2358                .addUse(Def2Reg);
2359       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2360       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2361     }
2362     constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2363     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2364 
2365     I.eraseFromParent();
2366     return true;
2367   }
2368   case TargetOpcode::G_VASTART:
2369     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2370                                 : selectVaStartAAPCS(I, MF, MRI);
2371   case TargetOpcode::G_INTRINSIC:
2372     return selectIntrinsic(I, MRI);
2373   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2374     return selectIntrinsicWithSideEffects(I, MRI);
2375   case TargetOpcode::G_IMPLICIT_DEF: {
2376     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2377     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2378     const Register DstReg = I.getOperand(0).getReg();
2379     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2380     const TargetRegisterClass *DstRC =
2381         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2382     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2383     return true;
2384   }
2385   case TargetOpcode::G_BLOCK_ADDR: {
2386     if (TM.getCodeModel() == CodeModel::Large) {
2387       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2388       I.eraseFromParent();
2389       return true;
2390     } else {
2391       I.setDesc(TII.get(AArch64::MOVaddrBA));
2392       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2393                            I.getOperand(0).getReg())
2394                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
2395                                         /* Offset */ 0, AArch64II::MO_PAGE)
2396                        .addBlockAddress(
2397                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2398                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2399       I.eraseFromParent();
2400       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2401     }
2402   }
2403   case TargetOpcode::G_INTRINSIC_TRUNC:
2404     return selectIntrinsicTrunc(I, MRI);
2405   case TargetOpcode::G_INTRINSIC_ROUND:
2406     return selectIntrinsicRound(I, MRI);
2407   case TargetOpcode::G_BUILD_VECTOR:
2408     return selectBuildVector(I, MRI);
2409   case TargetOpcode::G_MERGE_VALUES:
2410     return selectMergeValues(I, MRI);
2411   case TargetOpcode::G_UNMERGE_VALUES:
2412     return selectUnmergeValues(I, MRI);
2413   case TargetOpcode::G_SHUFFLE_VECTOR:
2414     return selectShuffleVector(I, MRI);
2415   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2416     return selectExtractElt(I, MRI);
2417   case TargetOpcode::G_INSERT_VECTOR_ELT:
2418     return selectInsertElt(I, MRI);
2419   case TargetOpcode::G_CONCAT_VECTORS:
2420     return selectConcatVectors(I, MRI);
2421   case TargetOpcode::G_JUMP_TABLE:
2422     return selectJumpTable(I, MRI);
2423   }
2424 
2425   return false;
2426 }
2427 
selectBrJT(MachineInstr & I,MachineRegisterInfo & MRI) const2428 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2429                                             MachineRegisterInfo &MRI) const {
2430   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2431   Register JTAddr = I.getOperand(0).getReg();
2432   unsigned JTI = I.getOperand(1).getIndex();
2433   Register Index = I.getOperand(2).getReg();
2434   MachineIRBuilder MIB(I);
2435 
2436   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2437   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2438   MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2439                  {JTAddr, Index})
2440       .addJumpTableIndex(JTI);
2441 
2442   // Build the indirect branch.
2443   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2444   I.eraseFromParent();
2445   return true;
2446 }
2447 
selectJumpTable(MachineInstr & I,MachineRegisterInfo & MRI) const2448 bool AArch64InstructionSelector::selectJumpTable(
2449     MachineInstr &I, MachineRegisterInfo &MRI) const {
2450   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2451   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2452 
2453   Register DstReg = I.getOperand(0).getReg();
2454   unsigned JTI = I.getOperand(1).getIndex();
2455   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2456   MachineIRBuilder MIB(I);
2457   auto MovMI =
2458     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2459           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2460           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2461   I.eraseFromParent();
2462   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2463 }
2464 
selectTLSGlobalValue(MachineInstr & I,MachineRegisterInfo & MRI) const2465 bool AArch64InstructionSelector::selectTLSGlobalValue(
2466     MachineInstr &I, MachineRegisterInfo &MRI) const {
2467   if (!STI.isTargetMachO())
2468     return false;
2469   MachineFunction &MF = *I.getParent()->getParent();
2470   MF.getFrameInfo().setAdjustsStack(true);
2471 
2472   const GlobalValue &GV = *I.getOperand(1).getGlobal();
2473   MachineIRBuilder MIB(I);
2474 
2475   MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2476       .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2477 
2478   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2479                              {Register(AArch64::X0)})
2480                   .addImm(0);
2481 
2482   // TLS calls preserve all registers except those that absolutely must be
2483   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2484   // silly).
2485   MIB.buildInstr(AArch64::BLR, {}, {Load})
2486       .addDef(AArch64::X0, RegState::Implicit)
2487       .addRegMask(TRI.getTLSCallPreservedMask());
2488 
2489   MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2490   RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2491                                MRI);
2492   I.eraseFromParent();
2493   return true;
2494 }
2495 
selectIntrinsicTrunc(MachineInstr & I,MachineRegisterInfo & MRI) const2496 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2497     MachineInstr &I, MachineRegisterInfo &MRI) const {
2498   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2499 
2500   // Select the correct opcode.
2501   unsigned Opc = 0;
2502   if (!SrcTy.isVector()) {
2503     switch (SrcTy.getSizeInBits()) {
2504     default:
2505     case 16:
2506       Opc = AArch64::FRINTZHr;
2507       break;
2508     case 32:
2509       Opc = AArch64::FRINTZSr;
2510       break;
2511     case 64:
2512       Opc = AArch64::FRINTZDr;
2513       break;
2514     }
2515   } else {
2516     unsigned NumElts = SrcTy.getNumElements();
2517     switch (SrcTy.getElementType().getSizeInBits()) {
2518     default:
2519       break;
2520     case 16:
2521       if (NumElts == 4)
2522         Opc = AArch64::FRINTZv4f16;
2523       else if (NumElts == 8)
2524         Opc = AArch64::FRINTZv8f16;
2525       break;
2526     case 32:
2527       if (NumElts == 2)
2528         Opc = AArch64::FRINTZv2f32;
2529       else if (NumElts == 4)
2530         Opc = AArch64::FRINTZv4f32;
2531       break;
2532     case 64:
2533       if (NumElts == 2)
2534         Opc = AArch64::FRINTZv2f64;
2535       break;
2536     }
2537   }
2538 
2539   if (!Opc) {
2540     // Didn't get an opcode above, bail.
2541     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2542     return false;
2543   }
2544 
2545   // Legalization would have set us up perfectly for this; we just need to
2546   // set the opcode and move on.
2547   I.setDesc(TII.get(Opc));
2548   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2549 }
2550 
selectIntrinsicRound(MachineInstr & I,MachineRegisterInfo & MRI) const2551 bool AArch64InstructionSelector::selectIntrinsicRound(
2552     MachineInstr &I, MachineRegisterInfo &MRI) const {
2553   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2554 
2555   // Select the correct opcode.
2556   unsigned Opc = 0;
2557   if (!SrcTy.isVector()) {
2558     switch (SrcTy.getSizeInBits()) {
2559     default:
2560     case 16:
2561       Opc = AArch64::FRINTAHr;
2562       break;
2563     case 32:
2564       Opc = AArch64::FRINTASr;
2565       break;
2566     case 64:
2567       Opc = AArch64::FRINTADr;
2568       break;
2569     }
2570   } else {
2571     unsigned NumElts = SrcTy.getNumElements();
2572     switch (SrcTy.getElementType().getSizeInBits()) {
2573     default:
2574       break;
2575     case 16:
2576       if (NumElts == 4)
2577         Opc = AArch64::FRINTAv4f16;
2578       else if (NumElts == 8)
2579         Opc = AArch64::FRINTAv8f16;
2580       break;
2581     case 32:
2582       if (NumElts == 2)
2583         Opc = AArch64::FRINTAv2f32;
2584       else if (NumElts == 4)
2585         Opc = AArch64::FRINTAv4f32;
2586       break;
2587     case 64:
2588       if (NumElts == 2)
2589         Opc = AArch64::FRINTAv2f64;
2590       break;
2591     }
2592   }
2593 
2594   if (!Opc) {
2595     // Didn't get an opcode above, bail.
2596     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2597     return false;
2598   }
2599 
2600   // Legalization would have set us up perfectly for this; we just need to
2601   // set the opcode and move on.
2602   I.setDesc(TII.get(Opc));
2603   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2604 }
2605 
selectVectorICmp(MachineInstr & I,MachineRegisterInfo & MRI) const2606 bool AArch64InstructionSelector::selectVectorICmp(
2607     MachineInstr &I, MachineRegisterInfo &MRI) const {
2608   Register DstReg = I.getOperand(0).getReg();
2609   LLT DstTy = MRI.getType(DstReg);
2610   Register SrcReg = I.getOperand(2).getReg();
2611   Register Src2Reg = I.getOperand(3).getReg();
2612   LLT SrcTy = MRI.getType(SrcReg);
2613 
2614   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2615   unsigned NumElts = DstTy.getNumElements();
2616 
2617   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2618   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2619   // Third index is cc opcode:
2620   // 0 == eq
2621   // 1 == ugt
2622   // 2 == uge
2623   // 3 == ult
2624   // 4 == ule
2625   // 5 == sgt
2626   // 6 == sge
2627   // 7 == slt
2628   // 8 == sle
2629   // ne is done by negating 'eq' result.
2630 
2631   // This table below assumes that for some comparisons the operands will be
2632   // commuted.
2633   // ult op == commute + ugt op
2634   // ule op == commute + uge op
2635   // slt op == commute + sgt op
2636   // sle op == commute + sge op
2637   unsigned PredIdx = 0;
2638   bool SwapOperands = false;
2639   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2640   switch (Pred) {
2641   case CmpInst::ICMP_NE:
2642   case CmpInst::ICMP_EQ:
2643     PredIdx = 0;
2644     break;
2645   case CmpInst::ICMP_UGT:
2646     PredIdx = 1;
2647     break;
2648   case CmpInst::ICMP_UGE:
2649     PredIdx = 2;
2650     break;
2651   case CmpInst::ICMP_ULT:
2652     PredIdx = 3;
2653     SwapOperands = true;
2654     break;
2655   case CmpInst::ICMP_ULE:
2656     PredIdx = 4;
2657     SwapOperands = true;
2658     break;
2659   case CmpInst::ICMP_SGT:
2660     PredIdx = 5;
2661     break;
2662   case CmpInst::ICMP_SGE:
2663     PredIdx = 6;
2664     break;
2665   case CmpInst::ICMP_SLT:
2666     PredIdx = 7;
2667     SwapOperands = true;
2668     break;
2669   case CmpInst::ICMP_SLE:
2670     PredIdx = 8;
2671     SwapOperands = true;
2672     break;
2673   default:
2674     llvm_unreachable("Unhandled icmp predicate");
2675     return false;
2676   }
2677 
2678   // This table obviously should be tablegen'd when we have our GISel native
2679   // tablegen selector.
2680 
2681   static const unsigned OpcTable[4][4][9] = {
2682       {
2683           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2684            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2685            0 /* invalid */},
2686           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2687            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2688            0 /* invalid */},
2689           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2690            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2691            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2692           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2693            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2694            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2695       },
2696       {
2697           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2698            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2699            0 /* invalid */},
2700           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2701            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2702            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2703           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2704            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2705            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2706           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2707            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2708            0 /* invalid */}
2709       },
2710       {
2711           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2712            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2713            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2714           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2715            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2716            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2717           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2718            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2719            0 /* invalid */},
2720           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2721            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2722            0 /* invalid */}
2723       },
2724       {
2725           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2726            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2727            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2728           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2729            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2730            0 /* invalid */},
2731           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2732            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2733            0 /* invalid */},
2734           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2735            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2736            0 /* invalid */}
2737       },
2738   };
2739   unsigned EltIdx = Log2_32(SrcEltSize / 8);
2740   unsigned NumEltsIdx = Log2_32(NumElts / 2);
2741   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2742   if (!Opc) {
2743     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2744     return false;
2745   }
2746 
2747   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2748   const TargetRegisterClass *SrcRC =
2749       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2750   if (!SrcRC) {
2751     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2752     return false;
2753   }
2754 
2755   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2756   if (SrcTy.getSizeInBits() == 128)
2757     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2758 
2759   if (SwapOperands)
2760     std::swap(SrcReg, Src2Reg);
2761 
2762   MachineIRBuilder MIB(I);
2763   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2764   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2765 
2766   // Invert if we had a 'ne' cc.
2767   if (NotOpc) {
2768     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2769     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2770   } else {
2771     MIB.buildCopy(DstReg, Cmp.getReg(0));
2772   }
2773   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2774   I.eraseFromParent();
2775   return true;
2776 }
2777 
emitScalarToVector(unsigned EltSize,const TargetRegisterClass * DstRC,Register Scalar,MachineIRBuilder & MIRBuilder) const2778 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2779     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2780     MachineIRBuilder &MIRBuilder) const {
2781   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2782 
2783   auto BuildFn = [&](unsigned SubregIndex) {
2784     auto Ins =
2785         MIRBuilder
2786             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2787             .addImm(SubregIndex);
2788     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2789     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2790     return &*Ins;
2791   };
2792 
2793   switch (EltSize) {
2794   case 16:
2795     return BuildFn(AArch64::hsub);
2796   case 32:
2797     return BuildFn(AArch64::ssub);
2798   case 64:
2799     return BuildFn(AArch64::dsub);
2800   default:
2801     return nullptr;
2802   }
2803 }
2804 
selectMergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const2805 bool AArch64InstructionSelector::selectMergeValues(
2806     MachineInstr &I, MachineRegisterInfo &MRI) const {
2807   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2808   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2809   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2810   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2811   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2812 
2813   if (I.getNumOperands() != 3)
2814     return false;
2815 
2816   // Merging 2 s64s into an s128.
2817   if (DstTy == LLT::scalar(128)) {
2818     if (SrcTy.getSizeInBits() != 64)
2819       return false;
2820     MachineIRBuilder MIB(I);
2821     Register DstReg = I.getOperand(0).getReg();
2822     Register Src1Reg = I.getOperand(1).getReg();
2823     Register Src2Reg = I.getOperand(2).getReg();
2824     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2825     MachineInstr *InsMI =
2826         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2827     if (!InsMI)
2828       return false;
2829     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2830                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
2831     if (!Ins2MI)
2832       return false;
2833     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2834     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2835     I.eraseFromParent();
2836     return true;
2837   }
2838 
2839   if (RB.getID() != AArch64::GPRRegBankID)
2840     return false;
2841 
2842   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2843     return false;
2844 
2845   auto *DstRC = &AArch64::GPR64RegClass;
2846   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2847   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2848                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2849                                 .addDef(SubToRegDef)
2850                                 .addImm(0)
2851                                 .addUse(I.getOperand(1).getReg())
2852                                 .addImm(AArch64::sub_32);
2853   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2854   // Need to anyext the second scalar before we can use bfm
2855   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2856                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2857                                 .addDef(SubToRegDef2)
2858                                 .addImm(0)
2859                                 .addUse(I.getOperand(2).getReg())
2860                                 .addImm(AArch64::sub_32);
2861   MachineInstr &BFM =
2862       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2863            .addDef(I.getOperand(0).getReg())
2864            .addUse(SubToRegDef)
2865            .addUse(SubToRegDef2)
2866            .addImm(32)
2867            .addImm(31);
2868   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2869   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2870   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2871   I.eraseFromParent();
2872   return true;
2873 }
2874 
getLaneCopyOpcode(unsigned & CopyOpc,unsigned & ExtractSubReg,const unsigned EltSize)2875 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2876                               const unsigned EltSize) {
2877   // Choose a lane copy opcode and subregister based off of the size of the
2878   // vector's elements.
2879   switch (EltSize) {
2880   case 16:
2881     CopyOpc = AArch64::CPYi16;
2882     ExtractSubReg = AArch64::hsub;
2883     break;
2884   case 32:
2885     CopyOpc = AArch64::CPYi32;
2886     ExtractSubReg = AArch64::ssub;
2887     break;
2888   case 64:
2889     CopyOpc = AArch64::CPYi64;
2890     ExtractSubReg = AArch64::dsub;
2891     break;
2892   default:
2893     // Unknown size, bail out.
2894     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2895     return false;
2896   }
2897   return true;
2898 }
2899 
emitExtractVectorElt(Optional<Register> DstReg,const RegisterBank & DstRB,LLT ScalarTy,Register VecReg,unsigned LaneIdx,MachineIRBuilder & MIRBuilder) const2900 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2901     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2902     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2903   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2904   unsigned CopyOpc = 0;
2905   unsigned ExtractSubReg = 0;
2906   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2907     LLVM_DEBUG(
2908         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2909     return nullptr;
2910   }
2911 
2912   const TargetRegisterClass *DstRC =
2913       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2914   if (!DstRC) {
2915     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2916     return nullptr;
2917   }
2918 
2919   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2920   const LLT &VecTy = MRI.getType(VecReg);
2921   const TargetRegisterClass *VecRC =
2922       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2923   if (!VecRC) {
2924     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2925     return nullptr;
2926   }
2927 
2928   // The register that we're going to copy into.
2929   Register InsertReg = VecReg;
2930   if (!DstReg)
2931     DstReg = MRI.createVirtualRegister(DstRC);
2932   // If the lane index is 0, we just use a subregister COPY.
2933   if (LaneIdx == 0) {
2934     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2935                     .addReg(VecReg, 0, ExtractSubReg);
2936     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2937     return &*Copy;
2938   }
2939 
2940   // Lane copies require 128-bit wide registers. If we're dealing with an
2941   // unpacked vector, then we need to move up to that width. Insert an implicit
2942   // def and a subregister insert to get us there.
2943   if (VecTy.getSizeInBits() != 128) {
2944     MachineInstr *ScalarToVector = emitScalarToVector(
2945         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2946     if (!ScalarToVector)
2947       return nullptr;
2948     InsertReg = ScalarToVector->getOperand(0).getReg();
2949   }
2950 
2951   MachineInstr *LaneCopyMI =
2952       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2953   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2954 
2955   // Make sure that we actually constrain the initial copy.
2956   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2957   return LaneCopyMI;
2958 }
2959 
selectExtractElt(MachineInstr & I,MachineRegisterInfo & MRI) const2960 bool AArch64InstructionSelector::selectExtractElt(
2961     MachineInstr &I, MachineRegisterInfo &MRI) const {
2962   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2963          "unexpected opcode!");
2964   Register DstReg = I.getOperand(0).getReg();
2965   const LLT NarrowTy = MRI.getType(DstReg);
2966   const Register SrcReg = I.getOperand(1).getReg();
2967   const LLT WideTy = MRI.getType(SrcReg);
2968   (void)WideTy;
2969   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2970          "source register size too small!");
2971   assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2972 
2973   // Need the lane index to determine the correct copy opcode.
2974   MachineOperand &LaneIdxOp = I.getOperand(2);
2975   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2976 
2977   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2978     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2979     return false;
2980   }
2981 
2982   // Find the index to extract from.
2983   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2984   if (!VRegAndVal)
2985     return false;
2986   unsigned LaneIdx = VRegAndVal->Value;
2987 
2988   MachineIRBuilder MIRBuilder(I);
2989 
2990   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2991   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2992                                                LaneIdx, MIRBuilder);
2993   if (!Extract)
2994     return false;
2995 
2996   I.eraseFromParent();
2997   return true;
2998 }
2999 
selectSplitVectorUnmerge(MachineInstr & I,MachineRegisterInfo & MRI) const3000 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3001     MachineInstr &I, MachineRegisterInfo &MRI) const {
3002   unsigned NumElts = I.getNumOperands() - 1;
3003   Register SrcReg = I.getOperand(NumElts).getReg();
3004   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3005   const LLT SrcTy = MRI.getType(SrcReg);
3006 
3007   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3008   if (SrcTy.getSizeInBits() > 128) {
3009     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3010     return false;
3011   }
3012 
3013   MachineIRBuilder MIB(I);
3014 
3015   // We implement a split vector operation by treating the sub-vectors as
3016   // scalars and extracting them.
3017   const RegisterBank &DstRB =
3018       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3019   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3020     Register Dst = I.getOperand(OpIdx).getReg();
3021     MachineInstr *Extract =
3022         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3023     if (!Extract)
3024       return false;
3025   }
3026   I.eraseFromParent();
3027   return true;
3028 }
3029 
selectUnmergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3030 bool AArch64InstructionSelector::selectUnmergeValues(
3031     MachineInstr &I, MachineRegisterInfo &MRI) const {
3032   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3033          "unexpected opcode");
3034 
3035   // TODO: Handle unmerging into GPRs and from scalars to scalars.
3036   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3037           AArch64::FPRRegBankID ||
3038       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3039           AArch64::FPRRegBankID) {
3040     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3041                          "currently unsupported.\n");
3042     return false;
3043   }
3044 
3045   // The last operand is the vector source register, and every other operand is
3046   // a register to unpack into.
3047   unsigned NumElts = I.getNumOperands() - 1;
3048   Register SrcReg = I.getOperand(NumElts).getReg();
3049   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3050   const LLT WideTy = MRI.getType(SrcReg);
3051   (void)WideTy;
3052   assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3053          "can only unmerge from vector or s128 types!");
3054   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3055          "source register size too small!");
3056 
3057   if (!NarrowTy.isScalar())
3058     return selectSplitVectorUnmerge(I, MRI);
3059 
3060   MachineIRBuilder MIB(I);
3061 
3062   // Choose a lane copy opcode and subregister based off of the size of the
3063   // vector's elements.
3064   unsigned CopyOpc = 0;
3065   unsigned ExtractSubReg = 0;
3066   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3067     return false;
3068 
3069   // Set up for the lane copies.
3070   MachineBasicBlock &MBB = *I.getParent();
3071 
3072   // Stores the registers we'll be copying from.
3073   SmallVector<Register, 4> InsertRegs;
3074 
3075   // We'll use the first register twice, so we only need NumElts-1 registers.
3076   unsigned NumInsertRegs = NumElts - 1;
3077 
3078   // If our elements fit into exactly 128 bits, then we can copy from the source
3079   // directly. Otherwise, we need to do a bit of setup with some subregister
3080   // inserts.
3081   if (NarrowTy.getSizeInBits() * NumElts == 128) {
3082     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3083   } else {
3084     // No. We have to perform subregister inserts. For each insert, create an
3085     // implicit def and a subregister insert, and save the register we create.
3086     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3087       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3088       MachineInstr &ImpDefMI =
3089           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3090                    ImpDefReg);
3091 
3092       // Now, create the subregister insert from SrcReg.
3093       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3094       MachineInstr &InsMI =
3095           *BuildMI(MBB, I, I.getDebugLoc(),
3096                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3097                .addUse(ImpDefReg)
3098                .addUse(SrcReg)
3099                .addImm(AArch64::dsub);
3100 
3101       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3102       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3103 
3104       // Save the register so that we can copy from it after.
3105       InsertRegs.push_back(InsertReg);
3106     }
3107   }
3108 
3109   // Now that we've created any necessary subregister inserts, we can
3110   // create the copies.
3111   //
3112   // Perform the first copy separately as a subregister copy.
3113   Register CopyTo = I.getOperand(0).getReg();
3114   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3115                        .addReg(InsertRegs[0], 0, ExtractSubReg);
3116   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3117 
3118   // Now, perform the remaining copies as vector lane copies.
3119   unsigned LaneIdx = 1;
3120   for (Register InsReg : InsertRegs) {
3121     Register CopyTo = I.getOperand(LaneIdx).getReg();
3122     MachineInstr &CopyInst =
3123         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3124              .addUse(InsReg)
3125              .addImm(LaneIdx);
3126     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3127     ++LaneIdx;
3128   }
3129 
3130   // Separately constrain the first copy's destination. Because of the
3131   // limitation in constrainOperandRegClass, we can't guarantee that this will
3132   // actually be constrained. So, do it ourselves using the second operand.
3133   const TargetRegisterClass *RC =
3134       MRI.getRegClassOrNull(I.getOperand(1).getReg());
3135   if (!RC) {
3136     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3137     return false;
3138   }
3139 
3140   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3141   I.eraseFromParent();
3142   return true;
3143 }
3144 
selectConcatVectors(MachineInstr & I,MachineRegisterInfo & MRI) const3145 bool AArch64InstructionSelector::selectConcatVectors(
3146     MachineInstr &I, MachineRegisterInfo &MRI) const {
3147   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3148          "Unexpected opcode");
3149   Register Dst = I.getOperand(0).getReg();
3150   Register Op1 = I.getOperand(1).getReg();
3151   Register Op2 = I.getOperand(2).getReg();
3152   MachineIRBuilder MIRBuilder(I);
3153   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3154   if (!ConcatMI)
3155     return false;
3156   I.eraseFromParent();
3157   return true;
3158 }
3159 
3160 unsigned
emitConstantPoolEntry(Constant * CPVal,MachineFunction & MF) const3161 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3162                                                   MachineFunction &MF) const {
3163   Type *CPTy = CPVal->getType();
3164   unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3165   if (Align == 0)
3166     Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3167 
3168   MachineConstantPool *MCP = MF.getConstantPool();
3169   return MCP->getConstantPoolIndex(CPVal, Align);
3170 }
3171 
emitLoadFromConstantPool(Constant * CPVal,MachineIRBuilder & MIRBuilder) const3172 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3173     Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3174   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3175 
3176   auto Adrp =
3177       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3178           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3179 
3180   MachineInstr *LoadMI = nullptr;
3181   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3182   case 16:
3183     LoadMI =
3184         &*MIRBuilder
3185               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3186               .addConstantPoolIndex(CPIdx, 0,
3187                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3188     break;
3189   case 8:
3190     LoadMI = &*MIRBuilder
3191                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3192                  .addConstantPoolIndex(
3193                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3194     break;
3195   default:
3196     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3197                       << *CPVal->getType());
3198     return nullptr;
3199   }
3200   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3201   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3202   return LoadMI;
3203 }
3204 
3205 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3206 /// size and RB.
3207 static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank & RB,unsigned EltSize)3208 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3209   unsigned Opc, SubregIdx;
3210   if (RB.getID() == AArch64::GPRRegBankID) {
3211     if (EltSize == 32) {
3212       Opc = AArch64::INSvi32gpr;
3213       SubregIdx = AArch64::ssub;
3214     } else if (EltSize == 64) {
3215       Opc = AArch64::INSvi64gpr;
3216       SubregIdx = AArch64::dsub;
3217     } else {
3218       llvm_unreachable("invalid elt size!");
3219     }
3220   } else {
3221     if (EltSize == 8) {
3222       Opc = AArch64::INSvi8lane;
3223       SubregIdx = AArch64::bsub;
3224     } else if (EltSize == 16) {
3225       Opc = AArch64::INSvi16lane;
3226       SubregIdx = AArch64::hsub;
3227     } else if (EltSize == 32) {
3228       Opc = AArch64::INSvi32lane;
3229       SubregIdx = AArch64::ssub;
3230     } else if (EltSize == 64) {
3231       Opc = AArch64::INSvi64lane;
3232       SubregIdx = AArch64::dsub;
3233     } else {
3234       llvm_unreachable("invalid elt size!");
3235     }
3236   }
3237   return std::make_pair(Opc, SubregIdx);
3238 }
3239 
3240 MachineInstr *
emitADD(Register DefReg,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const3241 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3242                                     MachineOperand &RHS,
3243                                     MachineIRBuilder &MIRBuilder) const {
3244   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3245   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3246   static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3247                                        {AArch64::ADDWrr, AArch64::ADDWri}};
3248   bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3249   auto ImmFns = selectArithImmed(RHS);
3250   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3251   auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3252 
3253   // If we matched a valid constant immediate, add those operands.
3254   if (ImmFns) {
3255     for (auto &RenderFn : *ImmFns)
3256       RenderFn(AddMI);
3257   } else {
3258     AddMI.addUse(RHS.getReg());
3259   }
3260 
3261   constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3262   return &*AddMI;
3263 }
3264 
3265 MachineInstr *
emitCMN(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const3266 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3267                                     MachineIRBuilder &MIRBuilder) const {
3268   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3269   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3270   static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3271                                        {AArch64::ADDSWrr, AArch64::ADDSWri}};
3272   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3273   auto ImmFns = selectArithImmed(RHS);
3274   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3275   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3276 
3277   auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3278 
3279   // If we matched a valid constant immediate, add those operands.
3280   if (ImmFns) {
3281     for (auto &RenderFn : *ImmFns)
3282       RenderFn(CmpMI);
3283   } else {
3284     CmpMI.addUse(RHS.getReg());
3285   }
3286 
3287   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3288   return &*CmpMI;
3289 }
3290 
3291 MachineInstr *
emitTST(const Register & LHS,const Register & RHS,MachineIRBuilder & MIRBuilder) const3292 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3293                                     MachineIRBuilder &MIRBuilder) const {
3294   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3295   unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3296   bool Is32Bit = (RegSize == 32);
3297   static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3298                                        {AArch64::ANDSWrr, AArch64::ANDSWri}};
3299   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3300 
3301   // We might be able to fold in an immediate into the TST. We need to make sure
3302   // it's a logical immediate though, since ANDS requires that.
3303   auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3304   bool IsImmForm = ValAndVReg.hasValue() &&
3305                    AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3306   unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3307   auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3308 
3309   if (IsImmForm)
3310     TstMI.addImm(
3311         AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3312   else
3313     TstMI.addUse(RHS);
3314 
3315   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3316   return &*TstMI;
3317 }
3318 
emitIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const3319 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3320     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3321     MachineIRBuilder &MIRBuilder) const {
3322   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3323   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3324 
3325   // Fold the compare if possible.
3326   MachineInstr *FoldCmp =
3327       tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3328   if (FoldCmp)
3329     return FoldCmp;
3330 
3331   // Can't fold into a CMN. Just emit a normal compare.
3332   unsigned CmpOpc = 0;
3333   Register ZReg;
3334 
3335   LLT CmpTy = MRI.getType(LHS.getReg());
3336   assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3337          "Expected scalar or pointer");
3338   if (CmpTy == LLT::scalar(32)) {
3339     CmpOpc = AArch64::SUBSWrr;
3340     ZReg = AArch64::WZR;
3341   } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3342     CmpOpc = AArch64::SUBSXrr;
3343     ZReg = AArch64::XZR;
3344   } else {
3345     return nullptr;
3346   }
3347 
3348   // Try to match immediate forms.
3349   auto ImmFns = selectArithImmed(RHS);
3350   if (ImmFns)
3351     CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3352 
3353   auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3354   // If we matched a valid constant immediate, add those operands.
3355   if (ImmFns) {
3356     for (auto &RenderFn : *ImmFns)
3357       RenderFn(CmpMI);
3358   } else {
3359     CmpMI.addUse(RHS.getReg());
3360   }
3361 
3362   // Make sure that we can constrain the compare that we emitted.
3363   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3364   return &*CmpMI;
3365 }
3366 
emitVectorConcat(Optional<Register> Dst,Register Op1,Register Op2,MachineIRBuilder & MIRBuilder) const3367 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3368     Optional<Register> Dst, Register Op1, Register Op2,
3369     MachineIRBuilder &MIRBuilder) const {
3370   // We implement a vector concat by:
3371   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3372   // 2. Insert the upper vector into the destination's upper element
3373   // TODO: some of this code is common with G_BUILD_VECTOR handling.
3374   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3375 
3376   const LLT Op1Ty = MRI.getType(Op1);
3377   const LLT Op2Ty = MRI.getType(Op2);
3378 
3379   if (Op1Ty != Op2Ty) {
3380     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3381     return nullptr;
3382   }
3383   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3384 
3385   if (Op1Ty.getSizeInBits() >= 128) {
3386     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3387     return nullptr;
3388   }
3389 
3390   // At the moment we just support 64 bit vector concats.
3391   if (Op1Ty.getSizeInBits() != 64) {
3392     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3393     return nullptr;
3394   }
3395 
3396   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3397   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3398   const TargetRegisterClass *DstRC =
3399       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3400 
3401   MachineInstr *WidenedOp1 =
3402       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3403   MachineInstr *WidenedOp2 =
3404       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3405   if (!WidenedOp1 || !WidenedOp2) {
3406     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3407     return nullptr;
3408   }
3409 
3410   // Now do the insert of the upper element.
3411   unsigned InsertOpc, InsSubRegIdx;
3412   std::tie(InsertOpc, InsSubRegIdx) =
3413       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3414 
3415   if (!Dst)
3416     Dst = MRI.createVirtualRegister(DstRC);
3417   auto InsElt =
3418       MIRBuilder
3419           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3420           .addImm(1) /* Lane index */
3421           .addUse(WidenedOp2->getOperand(0).getReg())
3422           .addImm(0);
3423   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3424   return &*InsElt;
3425 }
3426 
emitFMovForFConstant(MachineInstr & I,MachineRegisterInfo & MRI) const3427 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3428     MachineInstr &I, MachineRegisterInfo &MRI) const {
3429   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3430          "Expected a G_FCONSTANT!");
3431   MachineOperand &ImmOp = I.getOperand(1);
3432   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3433 
3434   // Only handle 32 and 64 bit defs for now.
3435   if (DefSize != 32 && DefSize != 64)
3436     return nullptr;
3437 
3438   // Don't handle null values using FMOV.
3439   if (ImmOp.getFPImm()->isNullValue())
3440     return nullptr;
3441 
3442   // Get the immediate representation for the FMOV.
3443   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3444   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3445                           : AArch64_AM::getFP64Imm(ImmValAPF);
3446 
3447   // If this is -1, it means the immediate can't be represented as the requested
3448   // floating point value. Bail.
3449   if (Imm == -1)
3450     return nullptr;
3451 
3452   // Update MI to represent the new FMOV instruction, constrain it, and return.
3453   ImmOp.ChangeToImmediate(Imm);
3454   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3455   I.setDesc(TII.get(MovOpc));
3456   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3457   return &I;
3458 }
3459 
3460 MachineInstr *
emitCSetForICMP(Register DefReg,unsigned Pred,MachineIRBuilder & MIRBuilder) const3461 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3462                                      MachineIRBuilder &MIRBuilder) const {
3463   // CSINC increments the result when the predicate is false. Invert it.
3464   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3465       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3466   auto I =
3467       MIRBuilder
3468     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3469           .addImm(InvCC);
3470   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3471   return &*I;
3472 }
3473 
tryOptSelect(MachineInstr & I) const3474 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3475   MachineIRBuilder MIB(I);
3476   MachineRegisterInfo &MRI = *MIB.getMRI();
3477   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3478 
3479   // We want to recognize this pattern:
3480   //
3481   // $z = G_FCMP pred, $x, $y
3482   // ...
3483   // $w = G_SELECT $z, $a, $b
3484   //
3485   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3486   // some copies/truncs in between.)
3487   //
3488   // If we see this, then we can emit something like this:
3489   //
3490   // fcmp $x, $y
3491   // fcsel $w, $a, $b, pred
3492   //
3493   // Rather than emitting both of the rather long sequences in the standard
3494   // G_FCMP/G_SELECT select methods.
3495 
3496   // First, check if the condition is defined by a compare.
3497   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3498   while (CondDef) {
3499     // We can only fold if all of the defs have one use.
3500     if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3501       return false;
3502 
3503     // We can skip over G_TRUNC since the condition is 1-bit.
3504     // Truncating/extending can have no impact on the value.
3505     unsigned Opc = CondDef->getOpcode();
3506     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3507       break;
3508 
3509     // Can't see past copies from physregs.
3510     if (Opc == TargetOpcode::COPY &&
3511         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3512       return false;
3513 
3514     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3515   }
3516 
3517   // Is the condition defined by a compare?
3518   if (!CondDef)
3519     return false;
3520 
3521   unsigned CondOpc = CondDef->getOpcode();
3522   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3523     return false;
3524 
3525   AArch64CC::CondCode CondCode;
3526   if (CondOpc == TargetOpcode::G_ICMP) {
3527     CondCode = changeICMPPredToAArch64CC(
3528         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3529     if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3530                             CondDef->getOperand(1), MIB)) {
3531       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3532       return false;
3533     }
3534   } else {
3535     // Get the condition code for the select.
3536     AArch64CC::CondCode CondCode2;
3537     changeFCMPPredToAArch64CC(
3538         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3539         CondCode2);
3540 
3541     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3542     // instructions to emit the comparison.
3543     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3544     // unnecessary.
3545     if (CondCode2 != AArch64CC::AL)
3546       return false;
3547 
3548     // Make sure we'll be able to select the compare.
3549     unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3550     if (!CmpOpc)
3551       return false;
3552 
3553     // Emit a new compare.
3554     auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3555     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3556       Cmp.addUse(CondDef->getOperand(3).getReg());
3557     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3558   }
3559 
3560   // Emit the select.
3561   unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3562   auto CSel =
3563       MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3564                      {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3565           .addImm(CondCode);
3566   constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3567   I.eraseFromParent();
3568   return true;
3569 }
3570 
tryFoldIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const3571 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3572     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3573     MachineIRBuilder &MIRBuilder) const {
3574   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3575          "Unexpected MachineOperand");
3576   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3577   // We want to find this sort of thing:
3578   // x = G_SUB 0, y
3579   // G_ICMP z, x
3580   //
3581   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3582   // e.g:
3583   //
3584   // cmn z, y
3585 
3586   // Helper lambda to detect the subtract followed by the compare.
3587   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3588   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3589     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3590       return false;
3591 
3592     // Need to make sure NZCV is the same at the end of the transformation.
3593     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3594       return false;
3595 
3596     // We want to match against SUBs.
3597     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3598       return false;
3599 
3600     // Make sure that we're getting
3601     // x = G_SUB 0, y
3602     auto ValAndVReg =
3603         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3604     if (!ValAndVReg || ValAndVReg->Value != 0)
3605       return false;
3606 
3607     // This can safely be represented as a CMN.
3608     return true;
3609   };
3610 
3611   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3612   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3613   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3614   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3615   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
3616 
3617   // Given this:
3618   //
3619   // x = G_SUB 0, y
3620   // G_ICMP x, z
3621   //
3622   // Produce this:
3623   //
3624   // cmn y, z
3625   if (IsCMN(LHSDef, CC))
3626     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3627 
3628   // Same idea here, but with the RHS of the compare instead:
3629   //
3630   // Given this:
3631   //
3632   // x = G_SUB 0, y
3633   // G_ICMP z, x
3634   //
3635   // Produce this:
3636   //
3637   // cmn z, y
3638   if (IsCMN(RHSDef, CC))
3639     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3640 
3641   // Given this:
3642   //
3643   // z = G_AND x, y
3644   // G_ICMP z, 0
3645   //
3646   // Produce this if the compare is signed:
3647   //
3648   // tst x, y
3649   if (!isUnsignedICMPPred(P) && LHSDef &&
3650       LHSDef->getOpcode() == TargetOpcode::G_AND) {
3651     // Make sure that the RHS is 0.
3652     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3653     if (!ValAndVReg || ValAndVReg->Value != 0)
3654       return nullptr;
3655 
3656     return emitTST(LHSDef->getOperand(1).getReg(),
3657                    LHSDef->getOperand(2).getReg(), MIRBuilder);
3658   }
3659 
3660   return nullptr;
3661 }
3662 
tryOptVectorDup(MachineInstr & I) const3663 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3664   // Try to match a vector splat operation into a dup instruction.
3665   // We're looking for this pattern:
3666   //    %scalar:gpr(s64) = COPY $x0
3667   //    %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3668   //    %cst0:gpr(s32) = G_CONSTANT i32 0
3669   //    %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3670   //    %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3671   //    %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3672   //                                             %zerovec(<2 x s32>)
3673   //
3674   // ...into:
3675   // %splat = DUP %scalar
3676   // We use the regbank of the scalar to determine which kind of dup to use.
3677   MachineIRBuilder MIB(I);
3678   MachineRegisterInfo &MRI = *MIB.getMRI();
3679   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3680   using namespace TargetOpcode;
3681   using namespace MIPatternMatch;
3682 
3683   // Begin matching the insert.
3684   auto *InsMI =
3685       getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3686   if (!InsMI)
3687     return false;
3688   // Match the undef vector operand.
3689   auto *UndefMI =
3690       getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3691   if (!UndefMI)
3692     return false;
3693   // Match the scalar being splatted.
3694   Register ScalarReg = InsMI->getOperand(2).getReg();
3695   const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3696   // Match the index constant 0.
3697   int64_t Index = 0;
3698   if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3699     return false;
3700 
3701   // The shuffle's second operand doesn't matter if the mask is all zero.
3702   ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
3703   if (!all_of(Mask, [](int Elem) { return Elem == 0; }))
3704     return false;
3705 
3706   // We're done, now find out what kind of splat we need.
3707   LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3708   LLT EltTy = VecTy.getElementType();
3709   if (EltTy.getSizeInBits() < 32) {
3710     LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
3711     return false;
3712   }
3713   bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3714   unsigned Opc = 0;
3715   if (IsFP) {
3716     switch (EltTy.getSizeInBits()) {
3717     case 32:
3718       if (VecTy.getNumElements() == 2) {
3719         Opc = AArch64::DUPv2i32lane;
3720       } else {
3721         Opc = AArch64::DUPv4i32lane;
3722         assert(VecTy.getNumElements() == 4);
3723       }
3724       break;
3725     case 64:
3726       assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
3727       Opc = AArch64::DUPv2i64lane;
3728       break;
3729     }
3730   } else {
3731     switch (EltTy.getSizeInBits()) {
3732     case 32:
3733       if (VecTy.getNumElements() == 2) {
3734         Opc = AArch64::DUPv2i32gpr;
3735       } else {
3736         Opc = AArch64::DUPv4i32gpr;
3737         assert(VecTy.getNumElements() == 4);
3738       }
3739       break;
3740     case 64:
3741       assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
3742       Opc = AArch64::DUPv2i64gpr;
3743       break;
3744     }
3745   }
3746   assert(Opc && "Did not compute an opcode for a dup");
3747 
3748   // For FP splats, we need to widen the scalar reg via undef too.
3749   if (IsFP) {
3750     MachineInstr *Widen = emitScalarToVector(
3751         EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3752     if (!Widen)
3753       return false;
3754     ScalarReg = Widen->getOperand(0).getReg();
3755   }
3756   auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3757   if (IsFP)
3758     Dup.addImm(0);
3759   constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3760   I.eraseFromParent();
3761   return true;
3762 }
3763 
tryOptVectorShuffle(MachineInstr & I) const3764 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3765   if (TM.getOptLevel() == CodeGenOpt::None)
3766     return false;
3767   if (tryOptVectorDup(I))
3768     return true;
3769   return false;
3770 }
3771 
selectShuffleVector(MachineInstr & I,MachineRegisterInfo & MRI) const3772 bool AArch64InstructionSelector::selectShuffleVector(
3773     MachineInstr &I, MachineRegisterInfo &MRI) const {
3774   if (tryOptVectorShuffle(I))
3775     return true;
3776   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3777   Register Src1Reg = I.getOperand(1).getReg();
3778   const LLT Src1Ty = MRI.getType(Src1Reg);
3779   Register Src2Reg = I.getOperand(2).getReg();
3780   const LLT Src2Ty = MRI.getType(Src2Reg);
3781   ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
3782 
3783   MachineBasicBlock &MBB = *I.getParent();
3784   MachineFunction &MF = *MBB.getParent();
3785   LLVMContext &Ctx = MF.getFunction().getContext();
3786 
3787   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3788   // it's originated from a <1 x T> type. Those should have been lowered into
3789   // G_BUILD_VECTOR earlier.
3790   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3791     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3792     return false;
3793   }
3794 
3795   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3796 
3797   SmallVector<Constant *, 64> CstIdxs;
3798   for (int Val : Mask) {
3799     // For now, any undef indexes we'll just assume to be 0. This should be
3800     // optimized in future, e.g. to select DUP etc.
3801     Val = Val < 0 ? 0 : Val;
3802     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3803       unsigned Offset = Byte + Val * BytesPerElt;
3804       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3805     }
3806   }
3807 
3808   MachineIRBuilder MIRBuilder(I);
3809 
3810   // Use a constant pool to load the index vector for TBL.
3811   Constant *CPVal = ConstantVector::get(CstIdxs);
3812   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3813   if (!IndexLoad) {
3814     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3815     return false;
3816   }
3817 
3818   if (DstTy.getSizeInBits() != 128) {
3819     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3820     // This case can be done with TBL1.
3821     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3822     if (!Concat) {
3823       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3824       return false;
3825     }
3826 
3827     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3828     IndexLoad =
3829         emitScalarToVector(64, &AArch64::FPR128RegClass,
3830                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
3831 
3832     auto TBL1 = MIRBuilder.buildInstr(
3833         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3834         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3835     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3836 
3837     auto Copy =
3838         MIRBuilder
3839             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3840             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3841     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3842     I.eraseFromParent();
3843     return true;
3844   }
3845 
3846   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3847   // Q registers for regalloc.
3848   auto RegSeq = MIRBuilder
3849                     .buildInstr(TargetOpcode::REG_SEQUENCE,
3850                                 {&AArch64::QQRegClass}, {Src1Reg})
3851                     .addImm(AArch64::qsub0)
3852                     .addUse(Src2Reg)
3853                     .addImm(AArch64::qsub1);
3854 
3855   auto TBL2 =
3856       MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3857                             {RegSeq, IndexLoad->getOperand(0).getReg()});
3858   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3859   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3860   I.eraseFromParent();
3861   return true;
3862 }
3863 
emitLaneInsert(Optional<Register> DstReg,Register SrcReg,Register EltReg,unsigned LaneIdx,const RegisterBank & RB,MachineIRBuilder & MIRBuilder) const3864 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3865     Optional<Register> DstReg, Register SrcReg, Register EltReg,
3866     unsigned LaneIdx, const RegisterBank &RB,
3867     MachineIRBuilder &MIRBuilder) const {
3868   MachineInstr *InsElt = nullptr;
3869   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3870   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3871 
3872   // Create a register to define with the insert if one wasn't passed in.
3873   if (!DstReg)
3874     DstReg = MRI.createVirtualRegister(DstRC);
3875 
3876   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3877   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3878 
3879   if (RB.getID() == AArch64::FPRRegBankID) {
3880     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3881     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3882                  .addImm(LaneIdx)
3883                  .addUse(InsSub->getOperand(0).getReg())
3884                  .addImm(0);
3885   } else {
3886     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3887                  .addImm(LaneIdx)
3888                  .addUse(EltReg);
3889   }
3890 
3891   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3892   return InsElt;
3893 }
3894 
selectInsertElt(MachineInstr & I,MachineRegisterInfo & MRI) const3895 bool AArch64InstructionSelector::selectInsertElt(
3896     MachineInstr &I, MachineRegisterInfo &MRI) const {
3897   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3898 
3899   // Get information on the destination.
3900   Register DstReg = I.getOperand(0).getReg();
3901   const LLT DstTy = MRI.getType(DstReg);
3902   unsigned VecSize = DstTy.getSizeInBits();
3903 
3904   // Get information on the element we want to insert into the destination.
3905   Register EltReg = I.getOperand(2).getReg();
3906   const LLT EltTy = MRI.getType(EltReg);
3907   unsigned EltSize = EltTy.getSizeInBits();
3908   if (EltSize < 16 || EltSize > 64)
3909     return false; // Don't support all element types yet.
3910 
3911   // Find the definition of the index. Bail out if it's not defined by a
3912   // G_CONSTANT.
3913   Register IdxReg = I.getOperand(3).getReg();
3914   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3915   if (!VRegAndVal)
3916     return false;
3917   unsigned LaneIdx = VRegAndVal->Value;
3918 
3919   // Perform the lane insert.
3920   Register SrcReg = I.getOperand(1).getReg();
3921   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3922   MachineIRBuilder MIRBuilder(I);
3923 
3924   if (VecSize < 128) {
3925     // If the vector we're inserting into is smaller than 128 bits, widen it
3926     // to 128 to do the insert.
3927     MachineInstr *ScalarToVec = emitScalarToVector(
3928         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3929     if (!ScalarToVec)
3930       return false;
3931     SrcReg = ScalarToVec->getOperand(0).getReg();
3932   }
3933 
3934   // Create an insert into a new FPR128 register.
3935   // Note that if our vector is already 128 bits, we end up emitting an extra
3936   // register.
3937   MachineInstr *InsMI =
3938       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3939 
3940   if (VecSize < 128) {
3941     // If we had to widen to perform the insert, then we have to demote back to
3942     // the original size to get the result we want.
3943     Register DemoteVec = InsMI->getOperand(0).getReg();
3944     const TargetRegisterClass *RC =
3945         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3946     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3947       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3948       return false;
3949     }
3950     unsigned SubReg = 0;
3951     if (!getSubRegForClass(RC, TRI, SubReg))
3952       return false;
3953     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3954       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3955                         << "\n");
3956       return false;
3957     }
3958     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3959         .addReg(DemoteVec, 0, SubReg);
3960     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3961   } else {
3962     // No widening needed.
3963     InsMI->getOperand(0).setReg(DstReg);
3964     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3965   }
3966 
3967   I.eraseFromParent();
3968   return true;
3969 }
3970 
selectBuildVector(MachineInstr & I,MachineRegisterInfo & MRI) const3971 bool AArch64InstructionSelector::selectBuildVector(
3972     MachineInstr &I, MachineRegisterInfo &MRI) const {
3973   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3974   // Until we port more of the optimized selections, for now just use a vector
3975   // insert sequence.
3976   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3977   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3978   unsigned EltSize = EltTy.getSizeInBits();
3979   if (EltSize < 16 || EltSize > 64)
3980     return false; // Don't support all element types yet.
3981   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3982   MachineIRBuilder MIRBuilder(I);
3983 
3984   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3985   MachineInstr *ScalarToVec =
3986       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3987                          I.getOperand(1).getReg(), MIRBuilder);
3988   if (!ScalarToVec)
3989     return false;
3990 
3991   Register DstVec = ScalarToVec->getOperand(0).getReg();
3992   unsigned DstSize = DstTy.getSizeInBits();
3993 
3994   // Keep track of the last MI we inserted. Later on, we might be able to save
3995   // a copy using it.
3996   MachineInstr *PrevMI = nullptr;
3997   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3998     // Note that if we don't do a subregister copy, we can end up making an
3999     // extra register.
4000     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4001                               MIRBuilder);
4002     DstVec = PrevMI->getOperand(0).getReg();
4003   }
4004 
4005   // If DstTy's size in bits is less than 128, then emit a subregister copy
4006   // from DstVec to the last register we've defined.
4007   if (DstSize < 128) {
4008     // Force this to be FPR using the destination vector.
4009     const TargetRegisterClass *RC =
4010         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4011     if (!RC)
4012       return false;
4013     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4014       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4015       return false;
4016     }
4017 
4018     unsigned SubReg = 0;
4019     if (!getSubRegForClass(RC, TRI, SubReg))
4020       return false;
4021     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4022       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4023                         << "\n");
4024       return false;
4025     }
4026 
4027     Register Reg = MRI.createVirtualRegister(RC);
4028     Register DstReg = I.getOperand(0).getReg();
4029 
4030     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4031         .addReg(DstVec, 0, SubReg);
4032     MachineOperand &RegOp = I.getOperand(1);
4033     RegOp.setReg(Reg);
4034     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4035   } else {
4036     // We don't need a subregister copy. Save a copy by re-using the
4037     // destination register on the final insert.
4038     assert(PrevMI && "PrevMI was null?");
4039     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4040     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4041   }
4042 
4043   I.eraseFromParent();
4044   return true;
4045 }
4046 
4047 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4048 /// ID if it exists, and 0 otherwise.
findIntrinsicID(MachineInstr & I)4049 static unsigned findIntrinsicID(MachineInstr &I) {
4050   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4051     return Op.isIntrinsicID();
4052   });
4053   if (IntrinOp == I.operands_end())
4054     return 0;
4055   return IntrinOp->getIntrinsicID();
4056 }
4057 
selectIntrinsicWithSideEffects(MachineInstr & I,MachineRegisterInfo & MRI) const4058 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4059     MachineInstr &I, MachineRegisterInfo &MRI) const {
4060   // Find the intrinsic ID.
4061   unsigned IntrinID = findIntrinsicID(I);
4062   if (!IntrinID)
4063     return false;
4064   MachineIRBuilder MIRBuilder(I);
4065 
4066   // Select the instruction.
4067   switch (IntrinID) {
4068   default:
4069     return false;
4070   case Intrinsic::trap:
4071     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4072     break;
4073   case Intrinsic::debugtrap:
4074     if (!STI.isTargetWindows())
4075       return false;
4076     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4077     break;
4078   }
4079 
4080   I.eraseFromParent();
4081   return true;
4082 }
4083 
selectIntrinsic(MachineInstr & I,MachineRegisterInfo & MRI) const4084 bool AArch64InstructionSelector::selectIntrinsic(
4085     MachineInstr &I, MachineRegisterInfo &MRI) const {
4086   unsigned IntrinID = findIntrinsicID(I);
4087   if (!IntrinID)
4088     return false;
4089   MachineIRBuilder MIRBuilder(I);
4090 
4091   switch (IntrinID) {
4092   default:
4093     break;
4094   case Intrinsic::aarch64_crypto_sha1h:
4095     Register DstReg = I.getOperand(0).getReg();
4096     Register SrcReg = I.getOperand(2).getReg();
4097 
4098     // FIXME: Should this be an assert?
4099     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4100         MRI.getType(SrcReg).getSizeInBits() != 32)
4101       return false;
4102 
4103     // The operation has to happen on FPRs. Set up some new FPR registers for
4104     // the source and destination if they are on GPRs.
4105     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4106       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4107       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4108 
4109       // Make sure the copy ends up getting constrained properly.
4110       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4111                                    AArch64::GPR32RegClass, MRI);
4112     }
4113 
4114     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4115       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4116 
4117     // Actually insert the instruction.
4118     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4119     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4120 
4121     // Did we create a new register for the destination?
4122     if (DstReg != I.getOperand(0).getReg()) {
4123       // Yep. Copy the result of the instruction back into the original
4124       // destination.
4125       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4126       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4127                                    AArch64::GPR32RegClass, MRI);
4128     }
4129 
4130     I.eraseFromParent();
4131     return true;
4132   }
4133   return false;
4134 }
4135 
getImmedFromMO(const MachineOperand & Root)4136 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
4137   auto &MI = *Root.getParent();
4138   auto &MBB = *MI.getParent();
4139   auto &MF = *MBB.getParent();
4140   auto &MRI = MF.getRegInfo();
4141   uint64_t Immed;
4142   if (Root.isImm())
4143     Immed = Root.getImm();
4144   else if (Root.isCImm())
4145     Immed = Root.getCImm()->getZExtValue();
4146   else if (Root.isReg()) {
4147     auto ValAndVReg =
4148         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
4149     if (!ValAndVReg)
4150       return None;
4151     Immed = ValAndVReg->Value;
4152   } else
4153     return None;
4154   return Immed;
4155 }
4156 
4157 InstructionSelector::ComplexRendererFns
selectShiftA_32(const MachineOperand & Root) const4158 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4159   auto MaybeImmed = getImmedFromMO(Root);
4160   if (MaybeImmed == None || *MaybeImmed > 31)
4161     return None;
4162   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4163   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4164 }
4165 
4166 InstructionSelector::ComplexRendererFns
selectShiftB_32(const MachineOperand & Root) const4167 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4168   auto MaybeImmed = getImmedFromMO(Root);
4169   if (MaybeImmed == None || *MaybeImmed > 31)
4170     return None;
4171   uint64_t Enc = 31 - *MaybeImmed;
4172   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4173 }
4174 
4175 InstructionSelector::ComplexRendererFns
selectShiftA_64(const MachineOperand & Root) const4176 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4177   auto MaybeImmed = getImmedFromMO(Root);
4178   if (MaybeImmed == None || *MaybeImmed > 63)
4179     return None;
4180   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4181   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4182 }
4183 
4184 InstructionSelector::ComplexRendererFns
selectShiftB_64(const MachineOperand & Root) const4185 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4186   auto MaybeImmed = getImmedFromMO(Root);
4187   if (MaybeImmed == None || *MaybeImmed > 63)
4188     return None;
4189   uint64_t Enc = 63 - *MaybeImmed;
4190   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4191 }
4192 
4193 /// Helper to select an immediate value that can be represented as a 12-bit
4194 /// value shifted left by either 0 or 12. If it is possible to do so, return
4195 /// the immediate and shift value. If not, return None.
4196 ///
4197 /// Used by selectArithImmed and selectNegArithImmed.
4198 InstructionSelector::ComplexRendererFns
select12BitValueWithLeftShift(uint64_t Immed) const4199 AArch64InstructionSelector::select12BitValueWithLeftShift(
4200     uint64_t Immed) const {
4201   unsigned ShiftAmt;
4202   if (Immed >> 12 == 0) {
4203     ShiftAmt = 0;
4204   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4205     ShiftAmt = 12;
4206     Immed = Immed >> 12;
4207   } else
4208     return None;
4209 
4210   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4211   return {{
4212       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4213       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4214   }};
4215 }
4216 
4217 /// SelectArithImmed - Select an immediate value that can be represented as
4218 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
4219 /// Val set to the 12-bit value and Shift set to the shifter operand.
4220 InstructionSelector::ComplexRendererFns
selectArithImmed(MachineOperand & Root) const4221 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4222   // This function is called from the addsub_shifted_imm ComplexPattern,
4223   // which lists [imm] as the list of opcode it's interested in, however
4224   // we still need to check whether the operand is actually an immediate
4225   // here because the ComplexPattern opcode list is only used in
4226   // root-level opcode matching.
4227   auto MaybeImmed = getImmedFromMO(Root);
4228   if (MaybeImmed == None)
4229     return None;
4230   return select12BitValueWithLeftShift(*MaybeImmed);
4231 }
4232 
4233 /// SelectNegArithImmed - As above, but negates the value before trying to
4234 /// select it.
4235 InstructionSelector::ComplexRendererFns
selectNegArithImmed(MachineOperand & Root) const4236 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4237   // We need a register here, because we need to know if we have a 64 or 32
4238   // bit immediate.
4239   if (!Root.isReg())
4240     return None;
4241   auto MaybeImmed = getImmedFromMO(Root);
4242   if (MaybeImmed == None)
4243     return None;
4244   uint64_t Immed = *MaybeImmed;
4245 
4246   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4247   // have the opposite effect on the C flag, so this pattern mustn't match under
4248   // those circumstances.
4249   if (Immed == 0)
4250     return None;
4251 
4252   // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4253   // the root.
4254   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4255   if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4256     Immed = ~((uint32_t)Immed) + 1;
4257   else
4258     Immed = ~Immed + 1ULL;
4259 
4260   if (Immed & 0xFFFFFFFFFF000000ULL)
4261     return None;
4262 
4263   Immed &= 0xFFFFFFULL;
4264   return select12BitValueWithLeftShift(Immed);
4265 }
4266 
4267 /// Return true if it is worth folding MI into an extended register. That is,
4268 /// if it's safe to pull it into the addressing mode of a load or store as a
4269 /// shift.
isWorthFoldingIntoExtendedReg(MachineInstr & MI,const MachineRegisterInfo & MRI) const4270 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4271     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4272   // Always fold if there is one use, or if we're optimizing for size.
4273   Register DefReg = MI.getOperand(0).getReg();
4274   if (MRI.hasOneUse(DefReg) ||
4275       MI.getParent()->getParent()->getFunction().hasMinSize())
4276     return true;
4277 
4278   // It's better to avoid folding and recomputing shifts when we don't have a
4279   // fastpath.
4280   if (!STI.hasLSLFast())
4281     return false;
4282 
4283   // We have a fastpath, so folding a shift in and potentially computing it
4284   // many times may be beneficial. Check if this is only used in memory ops.
4285   // If it is, then we should fold.
4286   return all_of(MRI.use_instructions(DefReg),
4287                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4288 }
4289 
4290 InstructionSelector::ComplexRendererFns
selectExtendedSHL(MachineOperand & Root,MachineOperand & Base,MachineOperand & Offset,unsigned SizeInBytes,bool WantsExt) const4291 AArch64InstructionSelector::selectExtendedSHL(
4292     MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
4293     unsigned SizeInBytes, bool WantsExt) const {
4294   assert(Base.isReg() && "Expected base to be a register operand");
4295   assert(Offset.isReg() && "Expected offset to be a register operand");
4296 
4297   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4298   MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
4299   if (!OffsetInst)
4300     return None;
4301 
4302   unsigned OffsetOpc = OffsetInst->getOpcode();
4303   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4304     return None;
4305 
4306   // Make sure that the memory op is a valid size.
4307   int64_t LegalShiftVal = Log2_32(SizeInBytes);
4308   if (LegalShiftVal == 0)
4309     return None;
4310   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4311     return None;
4312 
4313   // Now, try to find the specific G_CONSTANT. Start by assuming that the
4314   // register we will offset is the LHS, and the register containing the
4315   // constant is the RHS.
4316   Register OffsetReg = OffsetInst->getOperand(1).getReg();
4317   Register ConstantReg = OffsetInst->getOperand(2).getReg();
4318   auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4319   if (!ValAndVReg) {
4320     // We didn't get a constant on the RHS. If the opcode is a shift, then
4321     // we're done.
4322     if (OffsetOpc == TargetOpcode::G_SHL)
4323       return None;
4324 
4325     // If we have a G_MUL, we can use either register. Try looking at the RHS.
4326     std::swap(OffsetReg, ConstantReg);
4327     ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4328     if (!ValAndVReg)
4329       return None;
4330   }
4331 
4332   // The value must fit into 3 bits, and must be positive. Make sure that is
4333   // true.
4334   int64_t ImmVal = ValAndVReg->Value;
4335 
4336   // Since we're going to pull this into a shift, the constant value must be
4337   // a power of 2. If we got a multiply, then we need to check this.
4338   if (OffsetOpc == TargetOpcode::G_MUL) {
4339     if (!isPowerOf2_32(ImmVal))
4340       return None;
4341 
4342     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4343     ImmVal = Log2_32(ImmVal);
4344   }
4345 
4346   if ((ImmVal & 0x7) != ImmVal)
4347     return None;
4348 
4349   // We are only allowed to shift by LegalShiftVal. This shift value is built
4350   // into the instruction, so we can't just use whatever we want.
4351   if (ImmVal != LegalShiftVal)
4352     return None;
4353 
4354   unsigned SignExtend = 0;
4355   if (WantsExt) {
4356     // Check if the offset is defined by an extend.
4357     MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
4358     auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
4359     if (Ext == AArch64_AM::InvalidShiftExtend)
4360       return None;
4361 
4362     SignExtend = Ext == AArch64_AM::SXTW;
4363 
4364     // Need a 32-bit wide register here.
4365     MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
4366     OffsetReg = ExtInst->getOperand(1).getReg();
4367     OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
4368   }
4369 
4370   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4371   // offset. Signify that we are shifting by setting the shift flag to 1.
4372   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
4373            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4374            [=](MachineInstrBuilder &MIB) {
4375              // Need to add both immediates here to make sure that they are both
4376              // added to the instruction.
4377              MIB.addImm(SignExtend);
4378              MIB.addImm(1);
4379            }}};
4380 }
4381 
4382 /// This is used for computing addresses like this:
4383 ///
4384 /// ldr x1, [x2, x3, lsl #3]
4385 ///
4386 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4387 /// is a constant value specific to this load instruction. That is, we'll never
4388 /// see anything other than a 3 here (which corresponds to the size of the
4389 /// element being loaded.)
4390 InstructionSelector::ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand & Root,unsigned SizeInBytes) const4391 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4392     MachineOperand &Root, unsigned SizeInBytes) const {
4393   if (!Root.isReg())
4394     return None;
4395   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4396 
4397   // We want to find something like this:
4398   //
4399   // val = G_CONSTANT LegalShiftVal
4400   // shift = G_SHL off_reg val
4401   // ptr = G_PTR_ADD base_reg shift
4402   // x = G_LOAD ptr
4403   //
4404   // And fold it into this addressing mode:
4405   //
4406   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4407 
4408   // Check if we can find the G_PTR_ADD.
4409   MachineInstr *PtrAdd =
4410       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
4411   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
4412     return None;
4413 
4414   // Now, try to match an opcode which will match our specific offset.
4415   // We want a G_SHL or a G_MUL.
4416   MachineInstr *OffsetInst =
4417       getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
4418   return selectExtendedSHL(Root, PtrAdd->getOperand(1),
4419                            OffsetInst->getOperand(0), SizeInBytes,
4420                            /*WantsExt=*/false);
4421 }
4422 
4423 /// This is used for computing addresses like this:
4424 ///
4425 /// ldr x1, [x2, x3]
4426 ///
4427 /// Where x2 is the base register, and x3 is an offset register.
4428 ///
4429 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
4430 /// this will do so. Otherwise, it will return None.
4431 InstructionSelector::ComplexRendererFns
selectAddrModeRegisterOffset(MachineOperand & Root) const4432 AArch64InstructionSelector::selectAddrModeRegisterOffset(
4433     MachineOperand &Root) const {
4434   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4435 
4436   // We need a GEP.
4437   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4438   if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
4439     return None;
4440 
4441   // If this is used more than once, let's not bother folding.
4442   // TODO: Check if they are memory ops. If they are, then we can still fold
4443   // without having to recompute anything.
4444   if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4445     return None;
4446 
4447   // Base is the GEP's LHS, offset is its RHS.
4448   return {{[=](MachineInstrBuilder &MIB) {
4449              MIB.addUse(Gep->getOperand(1).getReg());
4450            },
4451            [=](MachineInstrBuilder &MIB) {
4452              MIB.addUse(Gep->getOperand(2).getReg());
4453            },
4454            [=](MachineInstrBuilder &MIB) {
4455              // Need to add both immediates here to make sure that they are both
4456              // added to the instruction.
4457              MIB.addImm(0);
4458              MIB.addImm(0);
4459            }}};
4460 }
4461 
4462 /// This is intended to be equivalent to selectAddrModeXRO in
4463 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4464 InstructionSelector::ComplexRendererFns
selectAddrModeXRO(MachineOperand & Root,unsigned SizeInBytes) const4465 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4466                                               unsigned SizeInBytes) const {
4467   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4468 
4469   // If we have a constant offset, then we probably don't want to match a
4470   // register offset.
4471   if (isBaseWithConstantOffset(Root, MRI))
4472     return None;
4473 
4474   // Try to fold shifts into the addressing mode.
4475   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4476   if (AddrModeFns)
4477     return AddrModeFns;
4478 
4479   // If that doesn't work, see if it's possible to fold in registers from
4480   // a GEP.
4481   return selectAddrModeRegisterOffset(Root);
4482 }
4483 
4484 /// This is used for computing addresses like this:
4485 ///
4486 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
4487 ///
4488 /// Where we have a 64-bit base register, a 32-bit offset register, and an
4489 /// extend (which may or may not be signed).
4490 InstructionSelector::ComplexRendererFns
selectAddrModeWRO(MachineOperand & Root,unsigned SizeInBytes) const4491 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
4492                                               unsigned SizeInBytes) const {
4493   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4494 
4495   MachineInstr *PtrAdd =
4496       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
4497   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
4498     return None;
4499 
4500   MachineOperand &LHS = PtrAdd->getOperand(1);
4501   MachineOperand &RHS = PtrAdd->getOperand(2);
4502   MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
4503 
4504   // The first case is the same as selectAddrModeXRO, except we need an extend.
4505   // In this case, we try to find a shift and extend, and fold them into the
4506   // addressing mode.
4507   //
4508   // E.g.
4509   //
4510   // off_reg = G_Z/S/ANYEXT ext_reg
4511   // val = G_CONSTANT LegalShiftVal
4512   // shift = G_SHL off_reg val
4513   // ptr = G_PTR_ADD base_reg shift
4514   // x = G_LOAD ptr
4515   //
4516   // In this case we can get a load like this:
4517   //
4518   // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
4519   auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
4520                                        SizeInBytes, /*WantsExt=*/true);
4521   if (ExtendedShl)
4522     return ExtendedShl;
4523 
4524   // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
4525   //
4526   // e.g.
4527   // ldr something, [base_reg, ext_reg, sxtw]
4528   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4529     return None;
4530 
4531   // Check if this is an extend. We'll get an extend type if it is.
4532   AArch64_AM::ShiftExtendType Ext =
4533       getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
4534   if (Ext == AArch64_AM::InvalidShiftExtend)
4535     return None;
4536 
4537   // Need a 32-bit wide register.
4538   MachineIRBuilder MIB(*PtrAdd);
4539   Register ExtReg =
4540       narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
4541   unsigned SignExtend = Ext == AArch64_AM::SXTW;
4542 
4543   // Base is LHS, offset is ExtReg.
4544   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
4545            [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4546            [=](MachineInstrBuilder &MIB) {
4547              MIB.addImm(SignExtend);
4548              MIB.addImm(0);
4549            }}};
4550 }
4551 
4552 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
4553 /// should only match when there is an offset that is not valid for a scaled
4554 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
4555 /// memory reference, which is needed here to know what is valid for a scaled
4556 /// immediate.
4557 InstructionSelector::ComplexRendererFns
selectAddrModeUnscaled(MachineOperand & Root,unsigned Size) const4558 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4559                                                    unsigned Size) const {
4560   MachineRegisterInfo &MRI =
4561       Root.getParent()->getParent()->getParent()->getRegInfo();
4562 
4563   if (!Root.isReg())
4564     return None;
4565 
4566   if (!isBaseWithConstantOffset(Root, MRI))
4567     return None;
4568 
4569   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4570   if (!RootDef)
4571     return None;
4572 
4573   MachineOperand &OffImm = RootDef->getOperand(2);
4574   if (!OffImm.isReg())
4575     return None;
4576   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4577   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4578     return None;
4579   int64_t RHSC;
4580   MachineOperand &RHSOp1 = RHS->getOperand(1);
4581   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4582     return None;
4583   RHSC = RHSOp1.getCImm()->getSExtValue();
4584 
4585   // If the offset is valid as a scaled immediate, don't match here.
4586   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4587     return None;
4588   if (RHSC >= -256 && RHSC < 256) {
4589     MachineOperand &Base = RootDef->getOperand(1);
4590     return {{
4591         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4592         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4593     }};
4594   }
4595   return None;
4596 }
4597 
4598 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
4599 /// "Size" argument is the size in bytes of the memory reference, which
4600 /// determines the scale.
4601 InstructionSelector::ComplexRendererFns
selectAddrModeIndexed(MachineOperand & Root,unsigned Size) const4602 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4603                                                   unsigned Size) const {
4604   MachineRegisterInfo &MRI =
4605       Root.getParent()->getParent()->getParent()->getRegInfo();
4606 
4607   if (!Root.isReg())
4608     return None;
4609 
4610   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4611   if (!RootDef)
4612     return None;
4613 
4614   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4615     return {{
4616         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4617         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4618     }};
4619   }
4620 
4621   if (isBaseWithConstantOffset(Root, MRI)) {
4622     MachineOperand &LHS = RootDef->getOperand(1);
4623     MachineOperand &RHS = RootDef->getOperand(2);
4624     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4625     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4626     if (LHSDef && RHSDef) {
4627       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4628       unsigned Scale = Log2_32(Size);
4629       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4630         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4631           return {{
4632               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4633               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4634           }};
4635 
4636         return {{
4637             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4638             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4639         }};
4640       }
4641     }
4642   }
4643 
4644   // Before falling back to our general case, check if the unscaled
4645   // instructions can handle this. If so, that's preferable.
4646   if (selectAddrModeUnscaled(Root, Size).hasValue())
4647     return None;
4648 
4649   return {{
4650       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4651       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4652   }};
4653 }
4654 
4655 /// Given a shift instruction, return the correct shift type for that
4656 /// instruction.
getShiftTypeForInst(MachineInstr & MI)4657 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
4658   // TODO: Handle AArch64_AM::ROR
4659   switch (MI.getOpcode()) {
4660   default:
4661     return AArch64_AM::InvalidShiftExtend;
4662   case TargetOpcode::G_SHL:
4663     return AArch64_AM::LSL;
4664   case TargetOpcode::G_LSHR:
4665     return AArch64_AM::LSR;
4666   case TargetOpcode::G_ASHR:
4667     return AArch64_AM::ASR;
4668   }
4669 }
4670 
4671 /// Select a "shifted register" operand. If the value is not shifted, set the
4672 /// shift operand to a default value of "lsl 0".
4673 ///
4674 /// TODO: Allow shifted register to be rotated in logical instructions.
4675 InstructionSelector::ComplexRendererFns
selectShiftedRegister(MachineOperand & Root) const4676 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4677   if (!Root.isReg())
4678     return None;
4679   MachineRegisterInfo &MRI =
4680       Root.getParent()->getParent()->getParent()->getRegInfo();
4681 
4682   // Check if the operand is defined by an instruction which corresponds to
4683   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
4684   //
4685   // TODO: Handle AArch64_AM::ROR for logical instructions.
4686   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
4687   if (!ShiftInst)
4688     return None;
4689   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
4690   if (ShType == AArch64_AM::InvalidShiftExtend)
4691     return None;
4692   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
4693     return None;
4694 
4695   // Need an immediate on the RHS.
4696   MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
4697   auto Immed = getImmedFromMO(ShiftRHS);
4698   if (!Immed)
4699     return None;
4700 
4701   // We have something that we can fold. Fold in the shift's LHS and RHS into
4702   // the instruction.
4703   MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
4704   Register ShiftReg = ShiftLHS.getReg();
4705 
4706   unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
4707   unsigned Val = *Immed & (NumBits - 1);
4708   unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
4709 
4710   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
4711            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
4712 }
4713 
getExtendTypeForInst(MachineInstr & MI,MachineRegisterInfo & MRI,bool IsLoadStore) const4714 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
4715     MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
4716   unsigned Opc = MI.getOpcode();
4717 
4718   // Handle explicit extend instructions first.
4719   if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
4720     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4721     assert(Size != 64 && "Extend from 64 bits?");
4722     switch (Size) {
4723     case 8:
4724       return AArch64_AM::SXTB;
4725     case 16:
4726       return AArch64_AM::SXTH;
4727     case 32:
4728       return AArch64_AM::SXTW;
4729     default:
4730       return AArch64_AM::InvalidShiftExtend;
4731     }
4732   }
4733 
4734   if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
4735     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4736     assert(Size != 64 && "Extend from 64 bits?");
4737     switch (Size) {
4738     case 8:
4739       return AArch64_AM::UXTB;
4740     case 16:
4741       return AArch64_AM::UXTH;
4742     case 32:
4743       return AArch64_AM::UXTW;
4744     default:
4745       return AArch64_AM::InvalidShiftExtend;
4746     }
4747   }
4748 
4749   // Don't have an explicit extend. Try to handle a G_AND with a constant mask
4750   // on the RHS.
4751   if (Opc != TargetOpcode::G_AND)
4752     return AArch64_AM::InvalidShiftExtend;
4753 
4754   Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
4755   if (!MaybeAndMask)
4756     return AArch64_AM::InvalidShiftExtend;
4757   uint64_t AndMask = *MaybeAndMask;
4758   switch (AndMask) {
4759   default:
4760     return AArch64_AM::InvalidShiftExtend;
4761   case 0xFF:
4762     return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
4763   case 0xFFFF:
4764     return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
4765   case 0xFFFFFFFF:
4766     return AArch64_AM::UXTW;
4767   }
4768 }
4769 
narrowExtendRegIfNeeded(Register ExtReg,MachineIRBuilder & MIB) const4770 Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
4771     Register ExtReg, MachineIRBuilder &MIB) const {
4772   MachineRegisterInfo &MRI = *MIB.getMRI();
4773   if (MRI.getType(ExtReg).getSizeInBits() == 32)
4774     return ExtReg;
4775 
4776   // Insert a copy to move ExtReg to GPR32.
4777   Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4778   auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
4779 
4780   // Select the copy into a subregister copy.
4781   selectCopy(*Copy, TII, MRI, TRI, RBI);
4782   return Copy.getReg(0);
4783 }
4784 
4785 /// Select an "extended register" operand. This operand folds in an extend
4786 /// followed by an optional left shift.
4787 InstructionSelector::ComplexRendererFns
selectArithExtendedRegister(MachineOperand & Root) const4788 AArch64InstructionSelector::selectArithExtendedRegister(
4789     MachineOperand &Root) const {
4790   if (!Root.isReg())
4791     return None;
4792   MachineRegisterInfo &MRI =
4793       Root.getParent()->getParent()->getParent()->getRegInfo();
4794 
4795   uint64_t ShiftVal = 0;
4796   Register ExtReg;
4797   AArch64_AM::ShiftExtendType Ext;
4798   MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
4799   if (!RootDef)
4800     return None;
4801 
4802   if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
4803     return None;
4804 
4805   // Check if we can fold a shift and an extend.
4806   if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
4807     // Look for a constant on the RHS of the shift.
4808     MachineOperand &RHS = RootDef->getOperand(2);
4809     Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
4810     if (!MaybeShiftVal)
4811       return None;
4812     ShiftVal = *MaybeShiftVal;
4813     if (ShiftVal > 4)
4814       return None;
4815     // Look for a valid extend instruction on the LHS of the shift.
4816     MachineOperand &LHS = RootDef->getOperand(1);
4817     MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4818     if (!ExtDef)
4819       return None;
4820     Ext = getExtendTypeForInst(*ExtDef, MRI);
4821     if (Ext == AArch64_AM::InvalidShiftExtend)
4822       return None;
4823     ExtReg = ExtDef->getOperand(1).getReg();
4824   } else {
4825     // Didn't get a shift. Try just folding an extend.
4826     Ext = getExtendTypeForInst(*RootDef, MRI);
4827     if (Ext == AArch64_AM::InvalidShiftExtend)
4828       return None;
4829     ExtReg = RootDef->getOperand(1).getReg();
4830 
4831     // If we have a 32 bit instruction which zeroes out the high half of a
4832     // register, we get an implicit zero extend for free. Check if we have one.
4833     // FIXME: We actually emit the extend right now even though we don't have
4834     // to.
4835     if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
4836       MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
4837       if (ExtInst && isDef32(*ExtInst))
4838         return None;
4839     }
4840   }
4841 
4842   // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
4843   // copy.
4844   MachineIRBuilder MIB(*RootDef);
4845   ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
4846 
4847   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4848            [=](MachineInstrBuilder &MIB) {
4849              MIB.addImm(getArithExtendImm(Ext, ShiftVal));
4850            }}};
4851 }
4852 
renderTruncImm(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const4853 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4854                                                 const MachineInstr &MI,
4855                                                 int OpIdx) const {
4856   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4857   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
4858          "Expected G_CONSTANT");
4859   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4860   assert(CstVal && "Expected constant value");
4861   MIB.addImm(CstVal.getValue());
4862 }
4863 
renderLogicalImm32(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const4864 void AArch64InstructionSelector::renderLogicalImm32(
4865   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
4866   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
4867          "Expected G_CONSTANT");
4868   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4869   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
4870   MIB.addImm(Enc);
4871 }
4872 
renderLogicalImm64(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const4873 void AArch64InstructionSelector::renderLogicalImm64(
4874   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
4875   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
4876          "Expected G_CONSTANT");
4877   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4878   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
4879   MIB.addImm(Enc);
4880 }
4881 
isLoadStoreOfNumBytes(const MachineInstr & MI,unsigned NumBytes) const4882 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
4883     const MachineInstr &MI, unsigned NumBytes) const {
4884   if (!MI.mayLoadOrStore())
4885     return false;
4886   assert(MI.hasOneMemOperand() &&
4887          "Expected load/store to have only one mem op!");
4888   return (*MI.memoperands_begin())->getSize() == NumBytes;
4889 }
4890 
isDef32(const MachineInstr & MI) const4891 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
4892   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4893   if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
4894     return false;
4895 
4896   // Only return true if we know the operation will zero-out the high half of
4897   // the 64-bit register. Truncates can be subregister copies, which don't
4898   // zero out the high bits. Copies and other copy-like instructions can be
4899   // fed by truncates, or could be lowered as subregister copies.
4900   switch (MI.getOpcode()) {
4901   default:
4902     return true;
4903   case TargetOpcode::COPY:
4904   case TargetOpcode::G_BITCAST:
4905   case TargetOpcode::G_TRUNC:
4906   case TargetOpcode::G_PHI:
4907     return false;
4908   }
4909 }
4910 
4911 namespace llvm {
4912 InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine & TM,AArch64Subtarget & Subtarget,AArch64RegisterBankInfo & RBI)4913 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4914                                  AArch64Subtarget &Subtarget,
4915                                  AArch64RegisterBankInfo &RBI) {
4916   return new AArch64InstructionSelector(TM, Subtarget, RBI);
4917 }
4918 }
4919