1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "MCTargetDesc/AArch64MCTargetDesc.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27 #include "llvm/CodeGen/GlobalISel/Utils.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineConstantPool.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/TargetOpcodes.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/PatternMatch.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/IntrinsicsAArch64.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/raw_ostream.h"
43
44 #define DEBUG_TYPE "aarch64-isel"
45
46 using namespace llvm;
47 using namespace MIPatternMatch;
48
49 namespace {
50
51 #define GET_GLOBALISEL_PREDICATE_BITSET
52 #include "AArch64GenGlobalISel.inc"
53 #undef GET_GLOBALISEL_PREDICATE_BITSET
54
55 class AArch64InstructionSelector : public InstructionSelector {
56 public:
57 AArch64InstructionSelector(const AArch64TargetMachine &TM,
58 const AArch64Subtarget &STI,
59 const AArch64RegisterBankInfo &RBI);
60
61 bool select(MachineInstr &I) override;
getName()62 static const char *getName() { return DEBUG_TYPE; }
63
setupMF(MachineFunction & MF,GISelKnownBits & KB,CodeGenCoverage & CoverageInfo)64 void setupMF(MachineFunction &MF, GISelKnownBits &KB,
65 CodeGenCoverage &CoverageInfo) override {
66 InstructionSelector::setupMF(MF, KB, CoverageInfo);
67
68 // hasFnAttribute() is expensive to call on every BRCOND selection, so
69 // cache it here for each run of the selector.
70 ProduceNonFlagSettingCondBr =
71 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
72 MFReturnAddr = Register();
73
74 processPHIs(MF);
75 }
76
77 private:
78 /// tblgen-erated 'select' implementation, used as the initial selector for
79 /// the patterns that don't require complex C++.
80 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
81
82 // A lowering phase that runs before any selection attempts.
83 // Returns true if the instruction was modified.
84 bool preISelLower(MachineInstr &I);
85
86 // An early selection function that runs before the selectImpl() call.
87 bool earlySelect(MachineInstr &I) const;
88
89 // Do some preprocessing of G_PHIs before we begin selection.
90 void processPHIs(MachineFunction &MF);
91
92 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
93
94 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
95 bool contractCrossBankCopyIntoStore(MachineInstr &I,
96 MachineRegisterInfo &MRI);
97
98 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
99
100 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
101 MachineRegisterInfo &MRI) const;
102 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
103 MachineRegisterInfo &MRI) const;
104
105 ///@{
106 /// Helper functions for selectCompareBranch.
107 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
108 MachineIRBuilder &MIB) const;
109 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
110 MachineIRBuilder &MIB) const;
111 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
112 MachineIRBuilder &MIB) const;
113 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
114 MachineBasicBlock *DstMBB,
115 MachineIRBuilder &MIB) const;
116 ///@}
117
118 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
122 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
123
124 // Helper to generate an equivalent of scalar_to_vector into a new register,
125 // returned via 'Dst'.
126 MachineInstr *emitScalarToVector(unsigned EltSize,
127 const TargetRegisterClass *DstRC,
128 Register Scalar,
129 MachineIRBuilder &MIRBuilder) const;
130
131 /// Emit a lane insert into \p DstReg, or a new vector register if None is
132 /// provided.
133 ///
134 /// The lane inserted into is defined by \p LaneIdx. The vector source
135 /// register is given by \p SrcReg. The register containing the element is
136 /// given by \p EltReg.
137 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
138 Register EltReg, unsigned LaneIdx,
139 const RegisterBank &RB,
140 MachineIRBuilder &MIRBuilder) const;
141 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
142 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
143 MachineRegisterInfo &MRI) const;
144 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
145 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
146 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
147
148 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
149 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
150 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
151 bool selectSplitVectorUnmerge(MachineInstr &I,
152 MachineRegisterInfo &MRI) const;
153 bool selectIntrinsicWithSideEffects(MachineInstr &I,
154 MachineRegisterInfo &MRI) const;
155 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
156 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
157 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
158 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
159 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
160 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
161 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
162 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
163
164 unsigned emitConstantPoolEntry(const Constant *CPVal,
165 MachineFunction &MF) const;
166 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
167 MachineIRBuilder &MIRBuilder) const;
168
169 // Emit a vector concat operation.
170 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
171 Register Op2,
172 MachineIRBuilder &MIRBuilder) const;
173
174 // Emit an integer compare between LHS and RHS, which checks for Predicate.
175 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
176 MachineOperand &Predicate,
177 MachineIRBuilder &MIRBuilder) const;
178
179 /// Emit a floating point comparison between \p LHS and \p RHS.
180 MachineInstr *emitFPCompare(Register LHS, Register RHS,
181 MachineIRBuilder &MIRBuilder) const;
182
183 MachineInstr *emitInstr(unsigned Opcode,
184 std::initializer_list<llvm::DstOp> DstOps,
185 std::initializer_list<llvm::SrcOp> SrcOps,
186 MachineIRBuilder &MIRBuilder,
187 const ComplexRendererFns &RenderFns = None) const;
188 /// Helper function to emit an add or sub instruction.
189 ///
190 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
191 /// in a specific order.
192 ///
193 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
194 ///
195 /// \code
196 /// const std::array<std::array<unsigned, 2>, 4> Table {
197 /// {{AArch64::ADDXri, AArch64::ADDWri},
198 /// {AArch64::ADDXrs, AArch64::ADDWrs},
199 /// {AArch64::ADDXrr, AArch64::ADDWrr},
200 /// {AArch64::SUBXri, AArch64::SUBWri},
201 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
202 /// \endcode
203 ///
204 /// Each row in the table corresponds to a different addressing mode. Each
205 /// column corresponds to a different register size.
206 ///
207 /// \attention Rows must be structured as follows:
208 /// - Row 0: The ri opcode variants
209 /// - Row 1: The rs opcode variants
210 /// - Row 2: The rr opcode variants
211 /// - Row 3: The ri opcode variants for negative immediates
212 /// - Row 4: The rx opcode variants
213 ///
214 /// \attention Columns must be structured as follows:
215 /// - Column 0: The 64-bit opcode variants
216 /// - Column 1: The 32-bit opcode variants
217 ///
218 /// \p Dst is the destination register of the binop to emit.
219 /// \p LHS is the left-hand operand of the binop to emit.
220 /// \p RHS is the right-hand operand of the binop to emit.
221 MachineInstr *emitAddSub(
222 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
223 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
224 MachineIRBuilder &MIRBuilder) const;
225 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
226 MachineOperand &RHS,
227 MachineIRBuilder &MIRBuilder) const;
228 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
229 MachineIRBuilder &MIRBuilder) const;
230 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
231 MachineIRBuilder &MIRBuilder) const;
232 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
233 MachineIRBuilder &MIRBuilder) const;
234 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
235 MachineIRBuilder &MIRBuilder) const;
236 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
237 AArch64CC::CondCode CC,
238 MachineIRBuilder &MIRBuilder) const;
239 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
240 const RegisterBank &DstRB, LLT ScalarTy,
241 Register VecReg, unsigned LaneIdx,
242 MachineIRBuilder &MIRBuilder) const;
243
244 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
245 /// materialized using a FMOV instruction, then update MI and return it.
246 /// Otherwise, do nothing and return a nullptr.
247 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
248 MachineRegisterInfo &MRI) const;
249
250 /// Emit a CSet for an integer compare.
251 ///
252 /// \p DefReg is expected to be a 32-bit scalar register.
253 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
254 MachineIRBuilder &MIRBuilder) const;
255 /// Emit a CSet for a FP compare.
256 ///
257 /// \p Dst is expected to be a 32-bit scalar register.
258 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
259 MachineIRBuilder &MIRBuilder) const;
260
261 /// Emit the overflow op for \p Opcode.
262 ///
263 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
264 /// G_USUBO, etc.
265 std::pair<MachineInstr *, AArch64CC::CondCode>
266 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
267 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
268
269 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
270 /// \p IsNegative is true if the test should be "not zero".
271 /// This will also optimize the test bit instruction when possible.
272 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
273 MachineBasicBlock *DstMBB,
274 MachineIRBuilder &MIB) const;
275
276 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
277 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
278 MachineBasicBlock *DestMBB,
279 MachineIRBuilder &MIB) const;
280
281 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
282 // We use these manually instead of using the importer since it doesn't
283 // support SDNodeXForm.
284 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
285 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
286 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
287 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
288
289 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
290 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
291 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
292
293 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
294 unsigned Size) const;
295
selectAddrModeUnscaled8(MachineOperand & Root) const296 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
297 return selectAddrModeUnscaled(Root, 1);
298 }
selectAddrModeUnscaled16(MachineOperand & Root) const299 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
300 return selectAddrModeUnscaled(Root, 2);
301 }
selectAddrModeUnscaled32(MachineOperand & Root) const302 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
303 return selectAddrModeUnscaled(Root, 4);
304 }
selectAddrModeUnscaled64(MachineOperand & Root) const305 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
306 return selectAddrModeUnscaled(Root, 8);
307 }
selectAddrModeUnscaled128(MachineOperand & Root) const308 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
309 return selectAddrModeUnscaled(Root, 16);
310 }
311
312 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
313 /// from complex pattern matchers like selectAddrModeIndexed().
314 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
315 MachineRegisterInfo &MRI) const;
316
317 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
318 unsigned Size) const;
319 template <int Width>
selectAddrModeIndexed(MachineOperand & Root) const320 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
321 return selectAddrModeIndexed(Root, Width / 8);
322 }
323
324 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
325 const MachineRegisterInfo &MRI) const;
326 ComplexRendererFns
327 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
328 unsigned SizeInBytes) const;
329
330 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
331 /// or not a shift + extend should be folded into an addressing mode. Returns
332 /// None when this is not profitable or possible.
333 ComplexRendererFns
334 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
335 MachineOperand &Offset, unsigned SizeInBytes,
336 bool WantsExt) const;
337 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
338 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
339 unsigned SizeInBytes) const;
340 template <int Width>
selectAddrModeXRO(MachineOperand & Root) const341 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
342 return selectAddrModeXRO(Root, Width / 8);
343 }
344
345 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
346 unsigned SizeInBytes) const;
347 template <int Width>
selectAddrModeWRO(MachineOperand & Root) const348 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
349 return selectAddrModeWRO(Root, Width / 8);
350 }
351
352 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
353
selectArithShiftedRegister(MachineOperand & Root) const354 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
355 return selectShiftedRegister(Root);
356 }
357
selectLogicalShiftedRegister(MachineOperand & Root) const358 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
359 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
360 // For now, make them the same. The only difference between the two is that
361 // logical shifts are allowed to fold in rotates. Otherwise, these are
362 // functionally the same.
363 return selectShiftedRegister(Root);
364 }
365
366 /// Given an extend instruction, determine the correct shift-extend type for
367 /// that instruction.
368 ///
369 /// If the instruction is going to be used in a load or store, pass
370 /// \p IsLoadStore = true.
371 AArch64_AM::ShiftExtendType
372 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
373 bool IsLoadStore = false) const;
374
375 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
376 ///
377 /// \returns Either \p Reg if no change was necessary, or the new register
378 /// created by moving \p Reg.
379 ///
380 /// Note: This uses emitCopy right now.
381 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
382 MachineIRBuilder &MIB) const;
383
384 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
385
386 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
387 int OpIdx = -1) const;
388 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
389 int OpIdx = -1) const;
390 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
391 int OpIdx = -1) const;
392
393 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
394 void materializeLargeCMVal(MachineInstr &I, const Value *V,
395 unsigned OpFlags) const;
396
397 // Optimization methods.
398 bool tryOptSelect(MachineInstr &MI) const;
399 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
400 MachineOperand &Predicate,
401 MachineIRBuilder &MIRBuilder) const;
402
403 /// Return true if \p MI is a load or store of \p NumBytes bytes.
404 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
405
406 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
407 /// register zeroed out. In other words, the result of MI has been explicitly
408 /// zero extended.
409 bool isDef32(const MachineInstr &MI) const;
410
411 const AArch64TargetMachine &TM;
412 const AArch64Subtarget &STI;
413 const AArch64InstrInfo &TII;
414 const AArch64RegisterInfo &TRI;
415 const AArch64RegisterBankInfo &RBI;
416
417 bool ProduceNonFlagSettingCondBr = false;
418
419 // Some cached values used during selection.
420 // We use LR as a live-in register, and we keep track of it here as it can be
421 // clobbered by calls.
422 Register MFReturnAddr;
423
424 #define GET_GLOBALISEL_PREDICATES_DECL
425 #include "AArch64GenGlobalISel.inc"
426 #undef GET_GLOBALISEL_PREDICATES_DECL
427
428 // We declare the temporaries used by selectImpl() in the class to minimize the
429 // cost of constructing placeholder values.
430 #define GET_GLOBALISEL_TEMPORARIES_DECL
431 #include "AArch64GenGlobalISel.inc"
432 #undef GET_GLOBALISEL_TEMPORARIES_DECL
433 };
434
435 } // end anonymous namespace
436
437 #define GET_GLOBALISEL_IMPL
438 #include "AArch64GenGlobalISel.inc"
439 #undef GET_GLOBALISEL_IMPL
440
AArch64InstructionSelector(const AArch64TargetMachine & TM,const AArch64Subtarget & STI,const AArch64RegisterBankInfo & RBI)441 AArch64InstructionSelector::AArch64InstructionSelector(
442 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
443 const AArch64RegisterBankInfo &RBI)
444 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
445 TRI(*STI.getRegisterInfo()), RBI(RBI),
446 #define GET_GLOBALISEL_PREDICATES_INIT
447 #include "AArch64GenGlobalISel.inc"
448 #undef GET_GLOBALISEL_PREDICATES_INIT
449 #define GET_GLOBALISEL_TEMPORARIES_INIT
450 #include "AArch64GenGlobalISel.inc"
451 #undef GET_GLOBALISEL_TEMPORARIES_INIT
452 {
453 }
454
455 // FIXME: This should be target-independent, inferred from the types declared
456 // for each class in the bank.
457 static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty,const RegisterBank & RB,const RegisterBankInfo & RBI,bool GetAllRegSet=false)458 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
459 const RegisterBankInfo &RBI,
460 bool GetAllRegSet = false) {
461 if (RB.getID() == AArch64::GPRRegBankID) {
462 if (Ty.getSizeInBits() <= 32)
463 return GetAllRegSet ? &AArch64::GPR32allRegClass
464 : &AArch64::GPR32RegClass;
465 if (Ty.getSizeInBits() == 64)
466 return GetAllRegSet ? &AArch64::GPR64allRegClass
467 : &AArch64::GPR64RegClass;
468 return nullptr;
469 }
470
471 if (RB.getID() == AArch64::FPRRegBankID) {
472 if (Ty.getSizeInBits() <= 16)
473 return &AArch64::FPR16RegClass;
474 if (Ty.getSizeInBits() == 32)
475 return &AArch64::FPR32RegClass;
476 if (Ty.getSizeInBits() == 64)
477 return &AArch64::FPR64RegClass;
478 if (Ty.getSizeInBits() == 128)
479 return &AArch64::FPR128RegClass;
480 return nullptr;
481 }
482
483 return nullptr;
484 }
485
486 /// Given a register bank, and size in bits, return the smallest register class
487 /// that can represent that combination.
488 static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank & RB,unsigned SizeInBits,bool GetAllRegSet=false)489 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
490 bool GetAllRegSet = false) {
491 unsigned RegBankID = RB.getID();
492
493 if (RegBankID == AArch64::GPRRegBankID) {
494 if (SizeInBits <= 32)
495 return GetAllRegSet ? &AArch64::GPR32allRegClass
496 : &AArch64::GPR32RegClass;
497 if (SizeInBits == 64)
498 return GetAllRegSet ? &AArch64::GPR64allRegClass
499 : &AArch64::GPR64RegClass;
500 }
501
502 if (RegBankID == AArch64::FPRRegBankID) {
503 switch (SizeInBits) {
504 default:
505 return nullptr;
506 case 8:
507 return &AArch64::FPR8RegClass;
508 case 16:
509 return &AArch64::FPR16RegClass;
510 case 32:
511 return &AArch64::FPR32RegClass;
512 case 64:
513 return &AArch64::FPR64RegClass;
514 case 128:
515 return &AArch64::FPR128RegClass;
516 }
517 }
518
519 return nullptr;
520 }
521
522 /// Returns the correct subregister to use for a given register class.
getSubRegForClass(const TargetRegisterClass * RC,const TargetRegisterInfo & TRI,unsigned & SubReg)523 static bool getSubRegForClass(const TargetRegisterClass *RC,
524 const TargetRegisterInfo &TRI, unsigned &SubReg) {
525 switch (TRI.getRegSizeInBits(*RC)) {
526 case 8:
527 SubReg = AArch64::bsub;
528 break;
529 case 16:
530 SubReg = AArch64::hsub;
531 break;
532 case 32:
533 if (RC != &AArch64::FPR32RegClass)
534 SubReg = AArch64::sub_32;
535 else
536 SubReg = AArch64::ssub;
537 break;
538 case 64:
539 SubReg = AArch64::dsub;
540 break;
541 default:
542 LLVM_DEBUG(
543 dbgs() << "Couldn't find appropriate subregister for register class.");
544 return false;
545 }
546
547 return true;
548 }
549
550 /// Returns the minimum size the given register bank can hold.
getMinSizeForRegBank(const RegisterBank & RB)551 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
552 switch (RB.getID()) {
553 case AArch64::GPRRegBankID:
554 return 32;
555 case AArch64::FPRRegBankID:
556 return 8;
557 default:
558 llvm_unreachable("Tried to get minimum size for unknown register bank.");
559 }
560 }
561
getImmedFromMO(const MachineOperand & Root)562 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
563 auto &MI = *Root.getParent();
564 auto &MBB = *MI.getParent();
565 auto &MF = *MBB.getParent();
566 auto &MRI = MF.getRegInfo();
567 uint64_t Immed;
568 if (Root.isImm())
569 Immed = Root.getImm();
570 else if (Root.isCImm())
571 Immed = Root.getCImm()->getZExtValue();
572 else if (Root.isReg()) {
573 auto ValAndVReg =
574 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
575 if (!ValAndVReg)
576 return None;
577 Immed = ValAndVReg->Value;
578 } else
579 return None;
580 return Immed;
581 }
582
583 /// Check whether \p I is a currently unsupported binary operation:
584 /// - it has an unsized type
585 /// - an operand is not a vreg
586 /// - all operands are not in the same bank
587 /// These are checks that should someday live in the verifier, but right now,
588 /// these are mostly limitations of the aarch64 selector.
unsupportedBinOp(const MachineInstr & I,const AArch64RegisterBankInfo & RBI,const MachineRegisterInfo & MRI,const AArch64RegisterInfo & TRI)589 static bool unsupportedBinOp(const MachineInstr &I,
590 const AArch64RegisterBankInfo &RBI,
591 const MachineRegisterInfo &MRI,
592 const AArch64RegisterInfo &TRI) {
593 LLT Ty = MRI.getType(I.getOperand(0).getReg());
594 if (!Ty.isValid()) {
595 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
596 return true;
597 }
598
599 const RegisterBank *PrevOpBank = nullptr;
600 for (auto &MO : I.operands()) {
601 // FIXME: Support non-register operands.
602 if (!MO.isReg()) {
603 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
604 return true;
605 }
606
607 // FIXME: Can generic operations have physical registers operands? If
608 // so, this will need to be taught about that, and we'll need to get the
609 // bank out of the minimal class for the register.
610 // Either way, this needs to be documented (and possibly verified).
611 if (!Register::isVirtualRegister(MO.getReg())) {
612 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
613 return true;
614 }
615
616 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
617 if (!OpBank) {
618 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
619 return true;
620 }
621
622 if (PrevOpBank && OpBank != PrevOpBank) {
623 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
624 return true;
625 }
626 PrevOpBank = OpBank;
627 }
628 return false;
629 }
630
631 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
632 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
633 /// and of size \p OpSize.
634 /// \returns \p GenericOpc if the combination is unsupported.
selectBinaryOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)635 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
636 unsigned OpSize) {
637 switch (RegBankID) {
638 case AArch64::GPRRegBankID:
639 if (OpSize == 32) {
640 switch (GenericOpc) {
641 case TargetOpcode::G_SHL:
642 return AArch64::LSLVWr;
643 case TargetOpcode::G_LSHR:
644 return AArch64::LSRVWr;
645 case TargetOpcode::G_ASHR:
646 return AArch64::ASRVWr;
647 default:
648 return GenericOpc;
649 }
650 } else if (OpSize == 64) {
651 switch (GenericOpc) {
652 case TargetOpcode::G_PTR_ADD:
653 return AArch64::ADDXrr;
654 case TargetOpcode::G_SHL:
655 return AArch64::LSLVXr;
656 case TargetOpcode::G_LSHR:
657 return AArch64::LSRVXr;
658 case TargetOpcode::G_ASHR:
659 return AArch64::ASRVXr;
660 default:
661 return GenericOpc;
662 }
663 }
664 break;
665 case AArch64::FPRRegBankID:
666 switch (OpSize) {
667 case 32:
668 switch (GenericOpc) {
669 case TargetOpcode::G_FADD:
670 return AArch64::FADDSrr;
671 case TargetOpcode::G_FSUB:
672 return AArch64::FSUBSrr;
673 case TargetOpcode::G_FMUL:
674 return AArch64::FMULSrr;
675 case TargetOpcode::G_FDIV:
676 return AArch64::FDIVSrr;
677 default:
678 return GenericOpc;
679 }
680 case 64:
681 switch (GenericOpc) {
682 case TargetOpcode::G_FADD:
683 return AArch64::FADDDrr;
684 case TargetOpcode::G_FSUB:
685 return AArch64::FSUBDrr;
686 case TargetOpcode::G_FMUL:
687 return AArch64::FMULDrr;
688 case TargetOpcode::G_FDIV:
689 return AArch64::FDIVDrr;
690 case TargetOpcode::G_OR:
691 return AArch64::ORRv8i8;
692 default:
693 return GenericOpc;
694 }
695 }
696 break;
697 }
698 return GenericOpc;
699 }
700
701 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
702 /// appropriate for the (value) register bank \p RegBankID and of memory access
703 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
704 /// addressing mode (e.g., LDRXui).
705 /// \returns \p GenericOpc if the combination is unsupported.
selectLoadStoreUIOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)706 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
707 unsigned OpSize) {
708 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
709 switch (RegBankID) {
710 case AArch64::GPRRegBankID:
711 switch (OpSize) {
712 case 8:
713 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
714 case 16:
715 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
716 case 32:
717 return isStore ? AArch64::STRWui : AArch64::LDRWui;
718 case 64:
719 return isStore ? AArch64::STRXui : AArch64::LDRXui;
720 }
721 break;
722 case AArch64::FPRRegBankID:
723 switch (OpSize) {
724 case 8:
725 return isStore ? AArch64::STRBui : AArch64::LDRBui;
726 case 16:
727 return isStore ? AArch64::STRHui : AArch64::LDRHui;
728 case 32:
729 return isStore ? AArch64::STRSui : AArch64::LDRSui;
730 case 64:
731 return isStore ? AArch64::STRDui : AArch64::LDRDui;
732 }
733 break;
734 }
735 return GenericOpc;
736 }
737
738 #ifndef NDEBUG
739 /// Helper function that verifies that we have a valid copy at the end of
740 /// selectCopy. Verifies that the source and dest have the expected sizes and
741 /// then returns true.
isValidCopy(const MachineInstr & I,const RegisterBank & DstBank,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)742 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
743 const MachineRegisterInfo &MRI,
744 const TargetRegisterInfo &TRI,
745 const RegisterBankInfo &RBI) {
746 const Register DstReg = I.getOperand(0).getReg();
747 const Register SrcReg = I.getOperand(1).getReg();
748 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
749 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
750
751 // Make sure the size of the source and dest line up.
752 assert(
753 (DstSize == SrcSize ||
754 // Copies are a mean to setup initial types, the number of
755 // bits may not exactly match.
756 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
757 // Copies are a mean to copy bits around, as long as we are
758 // on the same register class, that's fine. Otherwise, that
759 // means we need some SUBREG_TO_REG or AND & co.
760 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
761 "Copy with different width?!");
762
763 // Check the size of the destination.
764 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
765 "GPRs cannot get more than 64-bit width values");
766
767 return true;
768 }
769 #endif
770
771 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
772 /// to \p *To.
773 ///
774 /// E.g "To = COPY SrcReg:SubReg"
copySubReg(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI,Register SrcReg,const TargetRegisterClass * To,unsigned SubReg)775 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
776 const RegisterBankInfo &RBI, Register SrcReg,
777 const TargetRegisterClass *To, unsigned SubReg) {
778 assert(SrcReg.isValid() && "Expected a valid source register?");
779 assert(To && "Destination register class cannot be null");
780 assert(SubReg && "Expected a valid subregister");
781
782 MachineIRBuilder MIB(I);
783 auto SubRegCopy =
784 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
785 MachineOperand &RegOp = I.getOperand(1);
786 RegOp.setReg(SubRegCopy.getReg(0));
787
788 // It's possible that the destination register won't be constrained. Make
789 // sure that happens.
790 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
791 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
792
793 return true;
794 }
795
796 /// Helper function to get the source and destination register classes for a
797 /// copy. Returns a std::pair containing the source register class for the
798 /// copy, and the destination register class for the copy. If a register class
799 /// cannot be determined, then it will be nullptr.
800 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)801 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
802 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
803 const RegisterBankInfo &RBI) {
804 Register DstReg = I.getOperand(0).getReg();
805 Register SrcReg = I.getOperand(1).getReg();
806 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
807 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
808 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
809 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
810
811 // Special casing for cross-bank copies of s1s. We can technically represent
812 // a 1-bit value with any size of register. The minimum size for a GPR is 32
813 // bits. So, we need to put the FPR on 32 bits as well.
814 //
815 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
816 // then we can pull it into the helpers that get the appropriate class for a
817 // register bank. Or make a new helper that carries along some constraint
818 // information.
819 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
820 SrcSize = DstSize = 32;
821
822 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
823 getMinClassForRegBank(DstRegBank, DstSize, true)};
824 }
825
selectCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)826 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
827 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
828 const RegisterBankInfo &RBI) {
829 Register DstReg = I.getOperand(0).getReg();
830 Register SrcReg = I.getOperand(1).getReg();
831 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
832 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
833
834 // Find the correct register classes for the source and destination registers.
835 const TargetRegisterClass *SrcRC;
836 const TargetRegisterClass *DstRC;
837 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
838
839 if (!DstRC) {
840 LLVM_DEBUG(dbgs() << "Unexpected dest size "
841 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
842 return false;
843 }
844
845 // A couple helpers below, for making sure that the copy we produce is valid.
846
847 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
848 // to verify that the src and dst are the same size, since that's handled by
849 // the SUBREG_TO_REG.
850 bool KnownValid = false;
851
852 // Returns true, or asserts if something we don't expect happens. Instead of
853 // returning true, we return isValidCopy() to ensure that we verify the
854 // result.
855 auto CheckCopy = [&]() {
856 // If we have a bitcast or something, we can't have physical registers.
857 assert((I.isCopy() ||
858 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
859 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
860 "No phys reg on generic operator!");
861 bool ValidCopy = true;
862 #ifndef NDEBUG
863 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
864 assert(ValidCopy && "Invalid copy.");
865 #endif
866 return ValidCopy;
867 };
868
869 // Is this a copy? If so, then we may need to insert a subregister copy.
870 if (I.isCopy()) {
871 // Yes. Check if there's anything to fix up.
872 if (!SrcRC) {
873 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
874 return false;
875 }
876
877 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
878 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
879 unsigned SubReg;
880
881 // If the source bank doesn't support a subregister copy small enough,
882 // then we first need to copy to the destination bank.
883 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
884 const TargetRegisterClass *DstTempRC =
885 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
886 getSubRegForClass(DstRC, TRI, SubReg);
887
888 MachineIRBuilder MIB(I);
889 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
890 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
891 } else if (SrcSize > DstSize) {
892 // If the source register is bigger than the destination we need to
893 // perform a subregister copy.
894 const TargetRegisterClass *SubRegRC =
895 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
896 getSubRegForClass(SubRegRC, TRI, SubReg);
897 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
898 } else if (DstSize > SrcSize) {
899 // If the destination register is bigger than the source we need to do
900 // a promotion using SUBREG_TO_REG.
901 const TargetRegisterClass *PromotionRC =
902 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
903 getSubRegForClass(SrcRC, TRI, SubReg);
904
905 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
906 BuildMI(*I.getParent(), I, I.getDebugLoc(),
907 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
908 .addImm(0)
909 .addUse(SrcReg)
910 .addImm(SubReg);
911 MachineOperand &RegOp = I.getOperand(1);
912 RegOp.setReg(PromoteReg);
913
914 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
915 KnownValid = true;
916 }
917
918 // If the destination is a physical register, then there's nothing to
919 // change, so we're done.
920 if (Register::isPhysicalRegister(DstReg))
921 return CheckCopy();
922 }
923
924 // No need to constrain SrcReg. It will get constrained when we hit another
925 // of its use or its defs. Copies do not have constraints.
926 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
927 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
928 << " operand\n");
929 return false;
930 }
931 I.setDesc(TII.get(AArch64::COPY));
932 return CheckCopy();
933 }
934
selectFPConvOpc(unsigned GenericOpc,LLT DstTy,LLT SrcTy)935 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
936 if (!DstTy.isScalar() || !SrcTy.isScalar())
937 return GenericOpc;
938
939 const unsigned DstSize = DstTy.getSizeInBits();
940 const unsigned SrcSize = SrcTy.getSizeInBits();
941
942 switch (DstSize) {
943 case 32:
944 switch (SrcSize) {
945 case 32:
946 switch (GenericOpc) {
947 case TargetOpcode::G_SITOFP:
948 return AArch64::SCVTFUWSri;
949 case TargetOpcode::G_UITOFP:
950 return AArch64::UCVTFUWSri;
951 case TargetOpcode::G_FPTOSI:
952 return AArch64::FCVTZSUWSr;
953 case TargetOpcode::G_FPTOUI:
954 return AArch64::FCVTZUUWSr;
955 default:
956 return GenericOpc;
957 }
958 case 64:
959 switch (GenericOpc) {
960 case TargetOpcode::G_SITOFP:
961 return AArch64::SCVTFUXSri;
962 case TargetOpcode::G_UITOFP:
963 return AArch64::UCVTFUXSri;
964 case TargetOpcode::G_FPTOSI:
965 return AArch64::FCVTZSUWDr;
966 case TargetOpcode::G_FPTOUI:
967 return AArch64::FCVTZUUWDr;
968 default:
969 return GenericOpc;
970 }
971 default:
972 return GenericOpc;
973 }
974 case 64:
975 switch (SrcSize) {
976 case 32:
977 switch (GenericOpc) {
978 case TargetOpcode::G_SITOFP:
979 return AArch64::SCVTFUWDri;
980 case TargetOpcode::G_UITOFP:
981 return AArch64::UCVTFUWDri;
982 case TargetOpcode::G_FPTOSI:
983 return AArch64::FCVTZSUXSr;
984 case TargetOpcode::G_FPTOUI:
985 return AArch64::FCVTZUUXSr;
986 default:
987 return GenericOpc;
988 }
989 case 64:
990 switch (GenericOpc) {
991 case TargetOpcode::G_SITOFP:
992 return AArch64::SCVTFUXDri;
993 case TargetOpcode::G_UITOFP:
994 return AArch64::UCVTFUXDri;
995 case TargetOpcode::G_FPTOSI:
996 return AArch64::FCVTZSUXDr;
997 case TargetOpcode::G_FPTOUI:
998 return AArch64::FCVTZUUXDr;
999 default:
1000 return GenericOpc;
1001 }
1002 default:
1003 return GenericOpc;
1004 }
1005 default:
1006 return GenericOpc;
1007 };
1008 return GenericOpc;
1009 }
1010
1011 MachineInstr *
emitSelect(Register Dst,Register True,Register False,AArch64CC::CondCode CC,MachineIRBuilder & MIB) const1012 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1013 Register False, AArch64CC::CondCode CC,
1014 MachineIRBuilder &MIB) const {
1015 MachineRegisterInfo &MRI = *MIB.getMRI();
1016 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1017 RBI.getRegBank(True, MRI, TRI)->getID() &&
1018 "Expected both select operands to have the same regbank?");
1019 LLT Ty = MRI.getType(True);
1020 if (Ty.isVector())
1021 return nullptr;
1022 const unsigned Size = Ty.getSizeInBits();
1023 assert((Size == 32 || Size == 64) &&
1024 "Expected 32 bit or 64 bit select only?");
1025 const bool Is32Bit = Size == 32;
1026 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1027 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1028 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1029 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1030 return &*FCSel;
1031 }
1032
1033 // By default, we'll try and emit a CSEL.
1034 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1035 bool Optimized = false;
1036 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1037 &Optimized](Register &Reg, Register &OtherReg,
1038 bool Invert) {
1039 if (Optimized)
1040 return false;
1041
1042 // Attempt to fold:
1043 //
1044 // %sub = G_SUB 0, %x
1045 // %select = G_SELECT cc, %reg, %sub
1046 //
1047 // Into:
1048 // %select = CSNEG %reg, %x, cc
1049 Register MatchReg;
1050 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1051 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1052 Reg = MatchReg;
1053 if (Invert) {
1054 CC = AArch64CC::getInvertedCondCode(CC);
1055 std::swap(Reg, OtherReg);
1056 }
1057 return true;
1058 }
1059
1060 // Attempt to fold:
1061 //
1062 // %xor = G_XOR %x, -1
1063 // %select = G_SELECT cc, %reg, %xor
1064 //
1065 // Into:
1066 // %select = CSINV %reg, %x, cc
1067 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1068 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1069 Reg = MatchReg;
1070 if (Invert) {
1071 CC = AArch64CC::getInvertedCondCode(CC);
1072 std::swap(Reg, OtherReg);
1073 }
1074 return true;
1075 }
1076
1077 // Attempt to fold:
1078 //
1079 // %add = G_ADD %x, 1
1080 // %select = G_SELECT cc, %reg, %add
1081 //
1082 // Into:
1083 // %select = CSINC %reg, %x, cc
1084 if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
1085 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1086 Reg = MatchReg;
1087 if (Invert) {
1088 CC = AArch64CC::getInvertedCondCode(CC);
1089 std::swap(Reg, OtherReg);
1090 }
1091 return true;
1092 }
1093
1094 return false;
1095 };
1096
1097 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1098 // true/false values are constants.
1099 // FIXME: All of these patterns already exist in tablegen. We should be
1100 // able to import these.
1101 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1102 &Optimized]() {
1103 if (Optimized)
1104 return false;
1105 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1106 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1107 if (!TrueCst && !FalseCst)
1108 return false;
1109
1110 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1111 if (TrueCst && FalseCst) {
1112 auto T = TrueCst->Value;
1113 auto F = FalseCst->Value;
1114
1115 if (T == 0 && F == 1) {
1116 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1117 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1118 True = ZReg;
1119 False = ZReg;
1120 return true;
1121 }
1122
1123 if (T == 0 && F == -1) {
1124 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1125 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1126 True = ZReg;
1127 False = ZReg;
1128 return true;
1129 }
1130 }
1131
1132 if (TrueCst) {
1133 auto T = TrueCst->Value;
1134 if (T == 1) {
1135 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1136 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1137 True = False;
1138 False = ZReg;
1139 CC = AArch64CC::getInvertedCondCode(CC);
1140 return true;
1141 }
1142
1143 if (T == -1) {
1144 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1145 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1146 True = False;
1147 False = ZReg;
1148 CC = AArch64CC::getInvertedCondCode(CC);
1149 return true;
1150 }
1151 }
1152
1153 if (FalseCst) {
1154 auto F = FalseCst->Value;
1155 if (F == 1) {
1156 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1157 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1158 False = ZReg;
1159 return true;
1160 }
1161
1162 if (F == -1) {
1163 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1164 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1165 False = ZReg;
1166 return true;
1167 }
1168 }
1169 return false;
1170 };
1171
1172 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1173 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1174 Optimized |= TryOptSelectCst();
1175 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1176 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1177 return &*SelectInst;
1178 }
1179
changeICMPPredToAArch64CC(CmpInst::Predicate P)1180 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1181 switch (P) {
1182 default:
1183 llvm_unreachable("Unknown condition code!");
1184 case CmpInst::ICMP_NE:
1185 return AArch64CC::NE;
1186 case CmpInst::ICMP_EQ:
1187 return AArch64CC::EQ;
1188 case CmpInst::ICMP_SGT:
1189 return AArch64CC::GT;
1190 case CmpInst::ICMP_SGE:
1191 return AArch64CC::GE;
1192 case CmpInst::ICMP_SLT:
1193 return AArch64CC::LT;
1194 case CmpInst::ICMP_SLE:
1195 return AArch64CC::LE;
1196 case CmpInst::ICMP_UGT:
1197 return AArch64CC::HI;
1198 case CmpInst::ICMP_UGE:
1199 return AArch64CC::HS;
1200 case CmpInst::ICMP_ULT:
1201 return AArch64CC::LO;
1202 case CmpInst::ICMP_ULE:
1203 return AArch64CC::LS;
1204 }
1205 }
1206
changeFCMPPredToAArch64CC(CmpInst::Predicate P,AArch64CC::CondCode & CondCode,AArch64CC::CondCode & CondCode2)1207 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1208 AArch64CC::CondCode &CondCode,
1209 AArch64CC::CondCode &CondCode2) {
1210 CondCode2 = AArch64CC::AL;
1211 switch (P) {
1212 default:
1213 llvm_unreachable("Unknown FP condition!");
1214 case CmpInst::FCMP_OEQ:
1215 CondCode = AArch64CC::EQ;
1216 break;
1217 case CmpInst::FCMP_OGT:
1218 CondCode = AArch64CC::GT;
1219 break;
1220 case CmpInst::FCMP_OGE:
1221 CondCode = AArch64CC::GE;
1222 break;
1223 case CmpInst::FCMP_OLT:
1224 CondCode = AArch64CC::MI;
1225 break;
1226 case CmpInst::FCMP_OLE:
1227 CondCode = AArch64CC::LS;
1228 break;
1229 case CmpInst::FCMP_ONE:
1230 CondCode = AArch64CC::MI;
1231 CondCode2 = AArch64CC::GT;
1232 break;
1233 case CmpInst::FCMP_ORD:
1234 CondCode = AArch64CC::VC;
1235 break;
1236 case CmpInst::FCMP_UNO:
1237 CondCode = AArch64CC::VS;
1238 break;
1239 case CmpInst::FCMP_UEQ:
1240 CondCode = AArch64CC::EQ;
1241 CondCode2 = AArch64CC::VS;
1242 break;
1243 case CmpInst::FCMP_UGT:
1244 CondCode = AArch64CC::HI;
1245 break;
1246 case CmpInst::FCMP_UGE:
1247 CondCode = AArch64CC::PL;
1248 break;
1249 case CmpInst::FCMP_ULT:
1250 CondCode = AArch64CC::LT;
1251 break;
1252 case CmpInst::FCMP_ULE:
1253 CondCode = AArch64CC::LE;
1254 break;
1255 case CmpInst::FCMP_UNE:
1256 CondCode = AArch64CC::NE;
1257 break;
1258 }
1259 }
1260
1261 /// Return a register which can be used as a bit to test in a TB(N)Z.
getTestBitReg(Register Reg,uint64_t & Bit,bool & Invert,MachineRegisterInfo & MRI)1262 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1263 MachineRegisterInfo &MRI) {
1264 assert(Reg.isValid() && "Expected valid register!");
1265 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1266 unsigned Opc = MI->getOpcode();
1267
1268 if (!MI->getOperand(0).isReg() ||
1269 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1270 break;
1271
1272 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1273 //
1274 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1275 // on the truncated x is the same as the bit number on x.
1276 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1277 Opc == TargetOpcode::G_TRUNC) {
1278 Register NextReg = MI->getOperand(1).getReg();
1279 // Did we find something worth folding?
1280 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1281 break;
1282
1283 // NextReg is worth folding. Keep looking.
1284 Reg = NextReg;
1285 continue;
1286 }
1287
1288 // Attempt to find a suitable operation with a constant on one side.
1289 Optional<uint64_t> C;
1290 Register TestReg;
1291 switch (Opc) {
1292 default:
1293 break;
1294 case TargetOpcode::G_AND:
1295 case TargetOpcode::G_XOR: {
1296 TestReg = MI->getOperand(1).getReg();
1297 Register ConstantReg = MI->getOperand(2).getReg();
1298 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1299 if (!VRegAndVal) {
1300 // AND commutes, check the other side for a constant.
1301 // FIXME: Can we canonicalize the constant so that it's always on the
1302 // same side at some point earlier?
1303 std::swap(ConstantReg, TestReg);
1304 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1305 }
1306 if (VRegAndVal)
1307 C = VRegAndVal->Value;
1308 break;
1309 }
1310 case TargetOpcode::G_ASHR:
1311 case TargetOpcode::G_LSHR:
1312 case TargetOpcode::G_SHL: {
1313 TestReg = MI->getOperand(1).getReg();
1314 auto VRegAndVal =
1315 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1316 if (VRegAndVal)
1317 C = VRegAndVal->Value;
1318 break;
1319 }
1320 }
1321
1322 // Didn't find a constant or viable register. Bail out of the loop.
1323 if (!C || !TestReg.isValid())
1324 break;
1325
1326 // We found a suitable instruction with a constant. Check to see if we can
1327 // walk through the instruction.
1328 Register NextReg;
1329 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1330 switch (Opc) {
1331 default:
1332 break;
1333 case TargetOpcode::G_AND:
1334 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1335 if ((*C >> Bit) & 1)
1336 NextReg = TestReg;
1337 break;
1338 case TargetOpcode::G_SHL:
1339 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1340 // the type of the register.
1341 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1342 NextReg = TestReg;
1343 Bit = Bit - *C;
1344 }
1345 break;
1346 case TargetOpcode::G_ASHR:
1347 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1348 // in x
1349 NextReg = TestReg;
1350 Bit = Bit + *C;
1351 if (Bit >= TestRegSize)
1352 Bit = TestRegSize - 1;
1353 break;
1354 case TargetOpcode::G_LSHR:
1355 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1356 if ((Bit + *C) < TestRegSize) {
1357 NextReg = TestReg;
1358 Bit = Bit + *C;
1359 }
1360 break;
1361 case TargetOpcode::G_XOR:
1362 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1363 // appropriate.
1364 //
1365 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1366 //
1367 // tbz x', b -> tbnz x, b
1368 //
1369 // Because x' only has the b-th bit set if x does not.
1370 if ((*C >> Bit) & 1)
1371 Invert = !Invert;
1372 NextReg = TestReg;
1373 break;
1374 }
1375
1376 // Check if we found anything worth folding.
1377 if (!NextReg.isValid())
1378 return Reg;
1379 Reg = NextReg;
1380 }
1381
1382 return Reg;
1383 }
1384
emitTestBit(Register TestReg,uint64_t Bit,bool IsNegative,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1385 MachineInstr *AArch64InstructionSelector::emitTestBit(
1386 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1387 MachineIRBuilder &MIB) const {
1388 assert(TestReg.isValid());
1389 assert(ProduceNonFlagSettingCondBr &&
1390 "Cannot emit TB(N)Z with speculation tracking!");
1391 MachineRegisterInfo &MRI = *MIB.getMRI();
1392
1393 // Attempt to optimize the test bit by walking over instructions.
1394 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1395 LLT Ty = MRI.getType(TestReg);
1396 unsigned Size = Ty.getSizeInBits();
1397 assert(!Ty.isVector() && "Expected a scalar!");
1398 assert(Bit < 64 && "Bit is too large!");
1399
1400 // When the test register is a 64-bit register, we have to narrow to make
1401 // TBNZW work.
1402 bool UseWReg = Bit < 32;
1403 unsigned NecessarySize = UseWReg ? 32 : 64;
1404 if (Size != NecessarySize)
1405 TestReg = moveScalarRegClass(
1406 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1407 MIB);
1408
1409 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1410 {AArch64::TBZW, AArch64::TBNZW}};
1411 unsigned Opc = OpcTable[UseWReg][IsNegative];
1412 auto TestBitMI =
1413 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1414 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1415 return &*TestBitMI;
1416 }
1417
tryOptAndIntoCompareBranch(MachineInstr & AndInst,bool Invert,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1418 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1419 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1420 MachineIRBuilder &MIB) const {
1421 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1422 // Given something like this:
1423 //
1424 // %x = ...Something...
1425 // %one = G_CONSTANT i64 1
1426 // %zero = G_CONSTANT i64 0
1427 // %and = G_AND %x, %one
1428 // %cmp = G_ICMP intpred(ne), %and, %zero
1429 // %cmp_trunc = G_TRUNC %cmp
1430 // G_BRCOND %cmp_trunc, %bb.3
1431 //
1432 // We want to try and fold the AND into the G_BRCOND and produce either a
1433 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1434 //
1435 // In this case, we'd get
1436 //
1437 // TBNZ %x %bb.3
1438 //
1439
1440 // Check if the AND has a constant on its RHS which we can use as a mask.
1441 // If it's a power of 2, then it's the same as checking a specific bit.
1442 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1443 auto MaybeBit = getConstantVRegValWithLookThrough(
1444 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1445 if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1446 return false;
1447
1448 uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1449 Register TestReg = AndInst.getOperand(1).getReg();
1450
1451 // Emit a TB(N)Z.
1452 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1453 return true;
1454 }
1455
emitCBZ(Register CompareReg,bool IsNegative,MachineBasicBlock * DestMBB,MachineIRBuilder & MIB) const1456 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1457 bool IsNegative,
1458 MachineBasicBlock *DestMBB,
1459 MachineIRBuilder &MIB) const {
1460 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1461 MachineRegisterInfo &MRI = *MIB.getMRI();
1462 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1463 AArch64::GPRRegBankID &&
1464 "Expected GPRs only?");
1465 auto Ty = MRI.getType(CompareReg);
1466 unsigned Width = Ty.getSizeInBits();
1467 assert(!Ty.isVector() && "Expected scalar only?");
1468 assert(Width <= 64 && "Expected width to be at most 64?");
1469 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1470 {AArch64::CBNZW, AArch64::CBNZX}};
1471 unsigned Opc = OpcTable[IsNegative][Width == 64];
1472 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1473 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1474 return &*BranchMI;
1475 }
1476
selectCompareBranchFedByFCmp(MachineInstr & I,MachineInstr & FCmp,MachineIRBuilder & MIB) const1477 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1478 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1479 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1480 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1481 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1482 // totally clean. Some of them require two branches to implement.
1483 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB);
1484 AArch64CC::CondCode CC1, CC2;
1485 changeFCMPPredToAArch64CC(
1486 static_cast<CmpInst::Predicate>(FCmp.getOperand(1).getPredicate()), CC1,
1487 CC2);
1488 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1489 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1490 if (CC2 != AArch64CC::AL)
1491 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1492 I.eraseFromParent();
1493 return true;
1494 }
1495
tryOptCompareBranchFedByICmp(MachineInstr & I,MachineInstr & ICmp,MachineIRBuilder & MIB) const1496 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1497 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1498 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1499 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1500 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1501 //
1502 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1503 // instructions will not be produced, as they are conditional branch
1504 // instructions that do not set flags.
1505 if (!ProduceNonFlagSettingCondBr)
1506 return false;
1507
1508 MachineRegisterInfo &MRI = *MIB.getMRI();
1509 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1510 auto Pred =
1511 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1512 Register LHS = ICmp.getOperand(2).getReg();
1513 Register RHS = ICmp.getOperand(3).getReg();
1514
1515 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1516 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1517 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1518
1519 // When we can emit a TB(N)Z, prefer that.
1520 //
1521 // Handle non-commutative condition codes first.
1522 // Note that we don't want to do this when we have a G_AND because it can
1523 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1524 if (VRegAndVal && !AndInst) {
1525 int64_t C = VRegAndVal->Value;
1526
1527 // When we have a greater-than comparison, we can just test if the msb is
1528 // zero.
1529 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1530 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1531 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1532 I.eraseFromParent();
1533 return true;
1534 }
1535
1536 // When we have a less than comparison, we can just test if the msb is not
1537 // zero.
1538 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1539 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1540 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1541 I.eraseFromParent();
1542 return true;
1543 }
1544 }
1545
1546 // Attempt to handle commutative condition codes. Right now, that's only
1547 // eq/ne.
1548 if (ICmpInst::isEquality(Pred)) {
1549 if (!VRegAndVal) {
1550 std::swap(RHS, LHS);
1551 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1552 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1553 }
1554
1555 if (VRegAndVal && VRegAndVal->Value == 0) {
1556 // If there's a G_AND feeding into this branch, try to fold it away by
1557 // emitting a TB(N)Z instead.
1558 //
1559 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1560 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1561 // would be redundant.
1562 if (AndInst &&
1563 tryOptAndIntoCompareBranch(
1564 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1565 I.eraseFromParent();
1566 return true;
1567 }
1568
1569 // Otherwise, try to emit a CB(N)Z instead.
1570 auto LHSTy = MRI.getType(LHS);
1571 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1572 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1573 I.eraseFromParent();
1574 return true;
1575 }
1576 }
1577 }
1578
1579 return false;
1580 }
1581
selectCompareBranchFedByICmp(MachineInstr & I,MachineInstr & ICmp,MachineIRBuilder & MIB) const1582 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1583 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1584 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1585 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1586 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1587 return true;
1588
1589 // Couldn't optimize. Emit a compare + a Bcc.
1590 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1591 auto PredOp = ICmp.getOperand(1);
1592 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1593 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1594 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1595 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1596 I.eraseFromParent();
1597 return true;
1598 }
1599
selectCompareBranch(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1600 bool AArch64InstructionSelector::selectCompareBranch(
1601 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1602 Register CondReg = I.getOperand(0).getReg();
1603 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1604 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1605 CondReg = CCMI->getOperand(1).getReg();
1606 CCMI = MRI.getVRegDef(CondReg);
1607 }
1608
1609 // Try to select the G_BRCOND using whatever is feeding the condition if
1610 // possible.
1611 MachineIRBuilder MIB(I);
1612 unsigned CCMIOpc = CCMI->getOpcode();
1613 if (CCMIOpc == TargetOpcode::G_FCMP)
1614 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1615 if (CCMIOpc == TargetOpcode::G_ICMP)
1616 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1617
1618 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1619 // instructions will not be produced, as they are conditional branch
1620 // instructions that do not set flags.
1621 if (ProduceNonFlagSettingCondBr) {
1622 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1623 I.getOperand(1).getMBB(), MIB);
1624 I.eraseFromParent();
1625 return true;
1626 }
1627
1628 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1629 auto TstMI =
1630 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1631 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1632 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1633 .addImm(AArch64CC::EQ)
1634 .addMBB(I.getOperand(1).getMBB());
1635 I.eraseFromParent();
1636 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1637 }
1638
1639 /// Returns the element immediate value of a vector shift operand if found.
1640 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
getVectorShiftImm(Register Reg,MachineRegisterInfo & MRI)1641 static Optional<int64_t> getVectorShiftImm(Register Reg,
1642 MachineRegisterInfo &MRI) {
1643 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1644 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1645 assert(OpMI && "Expected to find a vreg def for vector shift operand");
1646 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1647 return None;
1648
1649 // Check all operands are identical immediates.
1650 int64_t ImmVal = 0;
1651 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1652 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1653 if (!VRegAndVal)
1654 return None;
1655
1656 if (Idx == 1)
1657 ImmVal = VRegAndVal->Value;
1658 if (ImmVal != VRegAndVal->Value)
1659 return None;
1660 }
1661
1662 return ImmVal;
1663 }
1664
1665 /// Matches and returns the shift immediate value for a SHL instruction given
1666 /// a shift operand.
getVectorSHLImm(LLT SrcTy,Register Reg,MachineRegisterInfo & MRI)1667 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1668 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1669 if (!ShiftImm)
1670 return None;
1671 // Check the immediate is in range for a SHL.
1672 int64_t Imm = *ShiftImm;
1673 if (Imm < 0)
1674 return None;
1675 switch (SrcTy.getElementType().getSizeInBits()) {
1676 default:
1677 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1678 return None;
1679 case 8:
1680 if (Imm > 7)
1681 return None;
1682 break;
1683 case 16:
1684 if (Imm > 15)
1685 return None;
1686 break;
1687 case 32:
1688 if (Imm > 31)
1689 return None;
1690 break;
1691 case 64:
1692 if (Imm > 63)
1693 return None;
1694 break;
1695 }
1696 return Imm;
1697 }
1698
selectVectorSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1699 bool AArch64InstructionSelector::selectVectorSHL(
1700 MachineInstr &I, MachineRegisterInfo &MRI) const {
1701 assert(I.getOpcode() == TargetOpcode::G_SHL);
1702 Register DstReg = I.getOperand(0).getReg();
1703 const LLT Ty = MRI.getType(DstReg);
1704 Register Src1Reg = I.getOperand(1).getReg();
1705 Register Src2Reg = I.getOperand(2).getReg();
1706
1707 if (!Ty.isVector())
1708 return false;
1709
1710 // Check if we have a vector of constants on RHS that we can select as the
1711 // immediate form.
1712 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1713
1714 unsigned Opc = 0;
1715 if (Ty == LLT::vector(2, 64)) {
1716 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1717 } else if (Ty == LLT::vector(4, 32)) {
1718 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1719 } else if (Ty == LLT::vector(2, 32)) {
1720 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1721 } else if (Ty == LLT::vector(4, 16)) {
1722 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1723 } else if (Ty == LLT::vector(8, 16)) {
1724 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1725 } else if (Ty == LLT::vector(16, 8)) {
1726 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1727 } else if (Ty == LLT::vector(8, 8)) {
1728 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1729 } else {
1730 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1731 return false;
1732 }
1733
1734 MachineIRBuilder MIB(I);
1735 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1736 if (ImmVal)
1737 Shl.addImm(*ImmVal);
1738 else
1739 Shl.addUse(Src2Reg);
1740 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1741 I.eraseFromParent();
1742 return true;
1743 }
1744
selectVectorAshrLshr(MachineInstr & I,MachineRegisterInfo & MRI) const1745 bool AArch64InstructionSelector::selectVectorAshrLshr(
1746 MachineInstr &I, MachineRegisterInfo &MRI) const {
1747 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1748 I.getOpcode() == TargetOpcode::G_LSHR);
1749 Register DstReg = I.getOperand(0).getReg();
1750 const LLT Ty = MRI.getType(DstReg);
1751 Register Src1Reg = I.getOperand(1).getReg();
1752 Register Src2Reg = I.getOperand(2).getReg();
1753
1754 if (!Ty.isVector())
1755 return false;
1756
1757 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1758
1759 // We expect the immediate case to be lowered in the PostLegalCombiner to
1760 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1761
1762 // There is not a shift right register instruction, but the shift left
1763 // register instruction takes a signed value, where negative numbers specify a
1764 // right shift.
1765
1766 unsigned Opc = 0;
1767 unsigned NegOpc = 0;
1768 const TargetRegisterClass *RC =
1769 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1770 if (Ty == LLT::vector(2, 64)) {
1771 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1772 NegOpc = AArch64::NEGv2i64;
1773 } else if (Ty == LLT::vector(4, 32)) {
1774 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1775 NegOpc = AArch64::NEGv4i32;
1776 } else if (Ty == LLT::vector(2, 32)) {
1777 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1778 NegOpc = AArch64::NEGv2i32;
1779 } else if (Ty == LLT::vector(4, 16)) {
1780 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1781 NegOpc = AArch64::NEGv4i16;
1782 } else if (Ty == LLT::vector(8, 16)) {
1783 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1784 NegOpc = AArch64::NEGv8i16;
1785 } else if (Ty == LLT::vector(16, 8)) {
1786 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1787 NegOpc = AArch64::NEGv8i16;
1788 } else if (Ty == LLT::vector(8, 8)) {
1789 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1790 NegOpc = AArch64::NEGv8i8;
1791 } else {
1792 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1793 return false;
1794 }
1795
1796 MachineIRBuilder MIB(I);
1797 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1798 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1799 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1800 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1801 I.eraseFromParent();
1802 return true;
1803 }
1804
selectVaStartAAPCS(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1805 bool AArch64InstructionSelector::selectVaStartAAPCS(
1806 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1807 return false;
1808 }
1809
selectVaStartDarwin(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1810 bool AArch64InstructionSelector::selectVaStartDarwin(
1811 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1812 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1813 Register ListReg = I.getOperand(0).getReg();
1814
1815 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1816
1817 auto MIB =
1818 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1819 .addDef(ArgsAddrReg)
1820 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1821 .addImm(0)
1822 .addImm(0);
1823
1824 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1825
1826 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1827 .addUse(ArgsAddrReg)
1828 .addUse(ListReg)
1829 .addImm(0)
1830 .addMemOperand(*I.memoperands_begin());
1831
1832 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1833 I.eraseFromParent();
1834 return true;
1835 }
1836
materializeLargeCMVal(MachineInstr & I,const Value * V,unsigned OpFlags) const1837 void AArch64InstructionSelector::materializeLargeCMVal(
1838 MachineInstr &I, const Value *V, unsigned OpFlags) const {
1839 MachineBasicBlock &MBB = *I.getParent();
1840 MachineFunction &MF = *MBB.getParent();
1841 MachineRegisterInfo &MRI = MF.getRegInfo();
1842 MachineIRBuilder MIB(I);
1843
1844 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1845 MovZ->addOperand(MF, I.getOperand(1));
1846 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1847 AArch64II::MO_NC);
1848 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1849 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1850
1851 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1852 Register ForceDstReg) {
1853 Register DstReg = ForceDstReg
1854 ? ForceDstReg
1855 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1856 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1857 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1858 MovI->addOperand(MF, MachineOperand::CreateGA(
1859 GV, MovZ->getOperand(1).getOffset(), Flags));
1860 } else {
1861 MovI->addOperand(
1862 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1863 MovZ->getOperand(1).getOffset(), Flags));
1864 }
1865 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1866 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1867 return DstReg;
1868 };
1869 Register DstReg = BuildMovK(MovZ.getReg(0),
1870 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1871 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1872 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1873 return;
1874 }
1875
preISelLower(MachineInstr & I)1876 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1877 MachineBasicBlock &MBB = *I.getParent();
1878 MachineFunction &MF = *MBB.getParent();
1879 MachineRegisterInfo &MRI = MF.getRegInfo();
1880
1881 switch (I.getOpcode()) {
1882 case TargetOpcode::G_SHL:
1883 case TargetOpcode::G_ASHR:
1884 case TargetOpcode::G_LSHR: {
1885 // These shifts are legalized to have 64 bit shift amounts because we want
1886 // to take advantage of the existing imported selection patterns that assume
1887 // the immediates are s64s. However, if the shifted type is 32 bits and for
1888 // some reason we receive input GMIR that has an s64 shift amount that's not
1889 // a G_CONSTANT, insert a truncate so that we can still select the s32
1890 // register-register variant.
1891 Register SrcReg = I.getOperand(1).getReg();
1892 Register ShiftReg = I.getOperand(2).getReg();
1893 const LLT ShiftTy = MRI.getType(ShiftReg);
1894 const LLT SrcTy = MRI.getType(SrcReg);
1895 if (SrcTy.isVector())
1896 return false;
1897 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1898 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1899 return false;
1900 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1901 assert(AmtMI && "could not find a vreg definition for shift amount");
1902 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1903 // Insert a subregister copy to implement a 64->32 trunc
1904 MachineIRBuilder MIB(I);
1905 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1906 .addReg(ShiftReg, 0, AArch64::sub_32);
1907 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1908 I.getOperand(2).setReg(Trunc.getReg(0));
1909 }
1910 return true;
1911 }
1912 case TargetOpcode::G_STORE:
1913 return contractCrossBankCopyIntoStore(I, MRI);
1914 case TargetOpcode::G_PTR_ADD:
1915 return convertPtrAddToAdd(I, MRI);
1916 case TargetOpcode::G_LOAD: {
1917 // For scalar loads of pointers, we try to convert the dest type from p0
1918 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1919 // conversion, this should be ok because all users should have been
1920 // selected already, so the type doesn't matter for them.
1921 Register DstReg = I.getOperand(0).getReg();
1922 const LLT DstTy = MRI.getType(DstReg);
1923 if (!DstTy.isPointer())
1924 return false;
1925 MRI.setType(DstReg, LLT::scalar(64));
1926 return true;
1927 }
1928 case AArch64::G_DUP: {
1929 // Convert the type from p0 to s64 to help selection.
1930 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1931 if (!DstTy.getElementType().isPointer())
1932 return false;
1933 MachineIRBuilder MIB(I);
1934 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1935 MRI.setType(I.getOperand(0).getReg(),
1936 DstTy.changeElementType(LLT::scalar(64)));
1937 MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1938 I.getOperand(1).setReg(NewSrc.getReg(0));
1939 return true;
1940 }
1941 default:
1942 return false;
1943 }
1944 }
1945
1946 /// This lowering tries to look for G_PTR_ADD instructions and then converts
1947 /// them to a standard G_ADD with a COPY on the source.
1948 ///
1949 /// The motivation behind this is to expose the add semantics to the imported
1950 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1951 /// because the selector works bottom up, uses before defs. By the time we
1952 /// end up trying to select a G_PTR_ADD, we should have already attempted to
1953 /// fold this into addressing modes and were therefore unsuccessful.
convertPtrAddToAdd(MachineInstr & I,MachineRegisterInfo & MRI)1954 bool AArch64InstructionSelector::convertPtrAddToAdd(
1955 MachineInstr &I, MachineRegisterInfo &MRI) {
1956 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1957 Register DstReg = I.getOperand(0).getReg();
1958 Register AddOp1Reg = I.getOperand(1).getReg();
1959 const LLT PtrTy = MRI.getType(DstReg);
1960 if (PtrTy.getAddressSpace() != 0)
1961 return false;
1962
1963 MachineIRBuilder MIB(I);
1964 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1965 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1966 // Set regbanks on the registers.
1967 if (PtrTy.isVector())
1968 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1969 else
1970 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1971
1972 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1973 // %dst(intty) = G_ADD %intbase, off
1974 I.setDesc(TII.get(TargetOpcode::G_ADD));
1975 MRI.setType(DstReg, CastPtrTy);
1976 I.getOperand(1).setReg(PtrToInt.getReg(0));
1977 if (!select(*PtrToInt)) {
1978 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
1979 return false;
1980 }
1981
1982 // Also take the opportunity here to try to do some optimization.
1983 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
1984 Register NegatedReg;
1985 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
1986 return true;
1987 I.getOperand(2).setReg(NegatedReg);
1988 I.setDesc(TII.get(TargetOpcode::G_SUB));
1989 return true;
1990 }
1991
earlySelectSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1992 bool AArch64InstructionSelector::earlySelectSHL(
1993 MachineInstr &I, MachineRegisterInfo &MRI) const {
1994 // We try to match the immediate variant of LSL, which is actually an alias
1995 // for a special case of UBFM. Otherwise, we fall back to the imported
1996 // selector which will match the register variant.
1997 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1998 const auto &MO = I.getOperand(2);
1999 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2000 if (!VRegAndVal)
2001 return false;
2002
2003 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2004 if (DstTy.isVector())
2005 return false;
2006 bool Is64Bit = DstTy.getSizeInBits() == 64;
2007 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2008 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2009 MachineIRBuilder MIB(I);
2010
2011 if (!Imm1Fn || !Imm2Fn)
2012 return false;
2013
2014 auto NewI =
2015 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2016 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2017
2018 for (auto &RenderFn : *Imm1Fn)
2019 RenderFn(NewI);
2020 for (auto &RenderFn : *Imm2Fn)
2021 RenderFn(NewI);
2022
2023 I.eraseFromParent();
2024 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2025 }
2026
contractCrossBankCopyIntoStore(MachineInstr & I,MachineRegisterInfo & MRI)2027 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2028 MachineInstr &I, MachineRegisterInfo &MRI) {
2029 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2030 // If we're storing a scalar, it doesn't matter what register bank that
2031 // scalar is on. All that matters is the size.
2032 //
2033 // So, if we see something like this (with a 32-bit scalar as an example):
2034 //
2035 // %x:gpr(s32) = ... something ...
2036 // %y:fpr(s32) = COPY %x:gpr(s32)
2037 // G_STORE %y:fpr(s32)
2038 //
2039 // We can fix this up into something like this:
2040 //
2041 // G_STORE %x:gpr(s32)
2042 //
2043 // And then continue the selection process normally.
2044 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2045 if (!DefDstReg.isValid())
2046 return false;
2047 LLT DefDstTy = MRI.getType(DefDstReg);
2048 Register StoreSrcReg = I.getOperand(0).getReg();
2049 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2050
2051 // If we get something strange like a physical register, then we shouldn't
2052 // go any further.
2053 if (!DefDstTy.isValid())
2054 return false;
2055
2056 // Are the source and dst types the same size?
2057 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2058 return false;
2059
2060 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2061 RBI.getRegBank(DefDstReg, MRI, TRI))
2062 return false;
2063
2064 // We have a cross-bank copy, which is entering a store. Let's fold it.
2065 I.getOperand(0).setReg(DefDstReg);
2066 return true;
2067 }
2068
earlySelect(MachineInstr & I) const2069 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
2070 assert(I.getParent() && "Instruction should be in a basic block!");
2071 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2072
2073 MachineBasicBlock &MBB = *I.getParent();
2074 MachineFunction &MF = *MBB.getParent();
2075 MachineRegisterInfo &MRI = MF.getRegInfo();
2076
2077 switch (I.getOpcode()) {
2078 case TargetOpcode::G_BR: {
2079 // If the branch jumps to the fallthrough block, don't bother emitting it.
2080 // Only do this for -O0 for a good code size improvement, because when
2081 // optimizations are enabled we want to leave this choice to
2082 // MachineBlockPlacement.
2083 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
2084 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
2085 return false;
2086 I.eraseFromParent();
2087 return true;
2088 }
2089 case TargetOpcode::G_SHL:
2090 return earlySelectSHL(I, MRI);
2091 case TargetOpcode::G_CONSTANT: {
2092 bool IsZero = false;
2093 if (I.getOperand(1).isCImm())
2094 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2095 else if (I.getOperand(1).isImm())
2096 IsZero = I.getOperand(1).getImm() == 0;
2097
2098 if (!IsZero)
2099 return false;
2100
2101 Register DefReg = I.getOperand(0).getReg();
2102 LLT Ty = MRI.getType(DefReg);
2103 if (Ty.getSizeInBits() == 64) {
2104 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2105 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2106 } else if (Ty.getSizeInBits() == 32) {
2107 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2108 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2109 } else
2110 return false;
2111
2112 I.setDesc(TII.get(TargetOpcode::COPY));
2113 return true;
2114 }
2115 default:
2116 return false;
2117 }
2118 }
2119
select(MachineInstr & I)2120 bool AArch64InstructionSelector::select(MachineInstr &I) {
2121 assert(I.getParent() && "Instruction should be in a basic block!");
2122 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2123
2124 MachineBasicBlock &MBB = *I.getParent();
2125 MachineFunction &MF = *MBB.getParent();
2126 MachineRegisterInfo &MRI = MF.getRegInfo();
2127
2128 const AArch64Subtarget *Subtarget =
2129 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2130 if (Subtarget->requiresStrictAlign()) {
2131 // We don't support this feature yet.
2132 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2133 return false;
2134 }
2135
2136 unsigned Opcode = I.getOpcode();
2137 // G_PHI requires same handling as PHI
2138 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2139 // Certain non-generic instructions also need some special handling.
2140
2141 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2142 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2143
2144 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2145 const Register DefReg = I.getOperand(0).getReg();
2146 const LLT DefTy = MRI.getType(DefReg);
2147
2148 const RegClassOrRegBank &RegClassOrBank =
2149 MRI.getRegClassOrRegBank(DefReg);
2150
2151 const TargetRegisterClass *DefRC
2152 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2153 if (!DefRC) {
2154 if (!DefTy.isValid()) {
2155 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2156 return false;
2157 }
2158 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2159 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2160 if (!DefRC) {
2161 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2162 return false;
2163 }
2164 }
2165
2166 I.setDesc(TII.get(TargetOpcode::PHI));
2167
2168 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2169 }
2170
2171 if (I.isCopy())
2172 return selectCopy(I, TII, MRI, TRI, RBI);
2173
2174 return true;
2175 }
2176
2177
2178 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2179 LLVM_DEBUG(
2180 dbgs() << "Generic instruction has unexpected implicit operands\n");
2181 return false;
2182 }
2183
2184 // Try to do some lowering before we start instruction selecting. These
2185 // lowerings are purely transformations on the input G_MIR and so selection
2186 // must continue after any modification of the instruction.
2187 if (preISelLower(I)) {
2188 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2189 }
2190
2191 // There may be patterns where the importer can't deal with them optimally,
2192 // but does select it to a suboptimal sequence so our custom C++ selection
2193 // code later never has a chance to work on it. Therefore, we have an early
2194 // selection attempt here to give priority to certain selection routines
2195 // over the imported ones.
2196 if (earlySelect(I))
2197 return true;
2198
2199 if (selectImpl(I, *CoverageInfo))
2200 return true;
2201
2202 LLT Ty =
2203 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2204
2205 MachineIRBuilder MIB(I);
2206
2207 switch (Opcode) {
2208 case TargetOpcode::G_BRCOND:
2209 return selectCompareBranch(I, MF, MRI);
2210
2211 case TargetOpcode::G_BRINDIRECT: {
2212 I.setDesc(TII.get(AArch64::BR));
2213 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2214 }
2215
2216 case TargetOpcode::G_BRJT:
2217 return selectBrJT(I, MRI);
2218
2219 case AArch64::G_ADD_LOW: {
2220 // This op may have been separated from it's ADRP companion by the localizer
2221 // or some other code motion pass. Given that many CPUs will try to
2222 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2223 // which will later be expanded into an ADRP+ADD pair after scheduling.
2224 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2225 if (BaseMI->getOpcode() != AArch64::ADRP) {
2226 I.setDesc(TII.get(AArch64::ADDXri));
2227 I.addOperand(MachineOperand::CreateImm(0));
2228 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2229 }
2230 assert(TM.getCodeModel() == CodeModel::Small &&
2231 "Expected small code model");
2232 MachineIRBuilder MIB(I);
2233 auto Op1 = BaseMI->getOperand(1);
2234 auto Op2 = I.getOperand(2);
2235 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2236 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2237 Op1.getTargetFlags())
2238 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2239 Op2.getTargetFlags());
2240 I.eraseFromParent();
2241 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2242 }
2243
2244 case TargetOpcode::G_BSWAP: {
2245 // Handle vector types for G_BSWAP directly.
2246 Register DstReg = I.getOperand(0).getReg();
2247 LLT DstTy = MRI.getType(DstReg);
2248
2249 // We should only get vector types here; everything else is handled by the
2250 // importer right now.
2251 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2252 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2253 return false;
2254 }
2255
2256 // Only handle 4 and 2 element vectors for now.
2257 // TODO: 16-bit elements.
2258 unsigned NumElts = DstTy.getNumElements();
2259 if (NumElts != 4 && NumElts != 2) {
2260 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2261 return false;
2262 }
2263
2264 // Choose the correct opcode for the supported types. Right now, that's
2265 // v2s32, v4s32, and v2s64.
2266 unsigned Opc = 0;
2267 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2268 if (EltSize == 32)
2269 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2270 : AArch64::REV32v16i8;
2271 else if (EltSize == 64)
2272 Opc = AArch64::REV64v16i8;
2273
2274 // We should always get something by the time we get here...
2275 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2276
2277 I.setDesc(TII.get(Opc));
2278 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2279 }
2280
2281 case TargetOpcode::G_FCONSTANT:
2282 case TargetOpcode::G_CONSTANT: {
2283 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2284
2285 const LLT s8 = LLT::scalar(8);
2286 const LLT s16 = LLT::scalar(16);
2287 const LLT s32 = LLT::scalar(32);
2288 const LLT s64 = LLT::scalar(64);
2289 const LLT p0 = LLT::pointer(0, 64);
2290
2291 const Register DefReg = I.getOperand(0).getReg();
2292 const LLT DefTy = MRI.getType(DefReg);
2293 const unsigned DefSize = DefTy.getSizeInBits();
2294 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2295
2296 // FIXME: Redundant check, but even less readable when factored out.
2297 if (isFP) {
2298 if (Ty != s32 && Ty != s64) {
2299 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2300 << " constant, expected: " << s32 << " or " << s64
2301 << '\n');
2302 return false;
2303 }
2304
2305 if (RB.getID() != AArch64::FPRRegBankID) {
2306 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2307 << " constant on bank: " << RB
2308 << ", expected: FPR\n");
2309 return false;
2310 }
2311
2312 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2313 // can be sure tablegen works correctly and isn't rescued by this code.
2314 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
2315 return false;
2316 } else {
2317 // s32 and s64 are covered by tablegen.
2318 if (Ty != p0 && Ty != s8 && Ty != s16) {
2319 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2320 << " constant, expected: " << s32 << ", " << s64
2321 << ", or " << p0 << '\n');
2322 return false;
2323 }
2324
2325 if (RB.getID() != AArch64::GPRRegBankID) {
2326 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2327 << " constant on bank: " << RB
2328 << ", expected: GPR\n");
2329 return false;
2330 }
2331 }
2332
2333 // We allow G_CONSTANT of types < 32b.
2334 const unsigned MovOpc =
2335 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2336
2337 if (isFP) {
2338 // Either emit a FMOV, or emit a copy to emit a normal mov.
2339 const TargetRegisterClass &GPRRC =
2340 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2341 const TargetRegisterClass &FPRRC =
2342 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
2343
2344 // Can we use a FMOV instruction to represent the immediate?
2345 if (emitFMovForFConstant(I, MRI))
2346 return true;
2347
2348 // For 64b values, emit a constant pool load instead.
2349 if (DefSize == 64) {
2350 auto *FPImm = I.getOperand(1).getFPImm();
2351 MachineIRBuilder MIB(I);
2352 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2353 if (!LoadMI) {
2354 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2355 return false;
2356 }
2357 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2358 I.eraseFromParent();
2359 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2360 }
2361
2362 // Nope. Emit a copy and use a normal mov instead.
2363 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2364 MachineOperand &RegOp = I.getOperand(0);
2365 RegOp.setReg(DefGPRReg);
2366 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2367 MIB.buildCopy({DefReg}, {DefGPRReg});
2368
2369 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2370 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2371 return false;
2372 }
2373
2374 MachineOperand &ImmOp = I.getOperand(1);
2375 // FIXME: Is going through int64_t always correct?
2376 ImmOp.ChangeToImmediate(
2377 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2378 } else if (I.getOperand(1).isCImm()) {
2379 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2380 I.getOperand(1).ChangeToImmediate(Val);
2381 } else if (I.getOperand(1).isImm()) {
2382 uint64_t Val = I.getOperand(1).getImm();
2383 I.getOperand(1).ChangeToImmediate(Val);
2384 }
2385
2386 I.setDesc(TII.get(MovOpc));
2387 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2388 return true;
2389 }
2390 case TargetOpcode::G_EXTRACT: {
2391 Register DstReg = I.getOperand(0).getReg();
2392 Register SrcReg = I.getOperand(1).getReg();
2393 LLT SrcTy = MRI.getType(SrcReg);
2394 LLT DstTy = MRI.getType(DstReg);
2395 (void)DstTy;
2396 unsigned SrcSize = SrcTy.getSizeInBits();
2397
2398 if (SrcTy.getSizeInBits() > 64) {
2399 // This should be an extract of an s128, which is like a vector extract.
2400 if (SrcTy.getSizeInBits() != 128)
2401 return false;
2402 // Only support extracting 64 bits from an s128 at the moment.
2403 if (DstTy.getSizeInBits() != 64)
2404 return false;
2405
2406 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2407 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2408 // Check we have the right regbank always.
2409 assert(SrcRB.getID() == AArch64::FPRRegBankID &&
2410 DstRB.getID() == AArch64::FPRRegBankID &&
2411 "Wrong extract regbank!");
2412 (void)SrcRB;
2413
2414 // Emit the same code as a vector extract.
2415 // Offset must be a multiple of 64.
2416 unsigned Offset = I.getOperand(2).getImm();
2417 if (Offset % 64 != 0)
2418 return false;
2419 unsigned LaneIdx = Offset / 64;
2420 MachineIRBuilder MIB(I);
2421 MachineInstr *Extract = emitExtractVectorElt(
2422 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2423 if (!Extract)
2424 return false;
2425 I.eraseFromParent();
2426 return true;
2427 }
2428
2429 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2430 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2431 Ty.getSizeInBits() - 1);
2432
2433 if (SrcSize < 64) {
2434 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2435 "unexpected G_EXTRACT types");
2436 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2437 }
2438
2439 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2440 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2441 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2442 .addReg(DstReg, 0, AArch64::sub_32);
2443 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2444 AArch64::GPR32RegClass, MRI);
2445 I.getOperand(0).setReg(DstReg);
2446
2447 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2448 }
2449
2450 case TargetOpcode::G_INSERT: {
2451 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2452 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2453 unsigned DstSize = DstTy.getSizeInBits();
2454 // Larger inserts are vectors, same-size ones should be something else by
2455 // now (split up or turned into COPYs).
2456 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2457 return false;
2458
2459 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2460 unsigned LSB = I.getOperand(3).getImm();
2461 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2462 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2463 MachineInstrBuilder(MF, I).addImm(Width - 1);
2464
2465 if (DstSize < 64) {
2466 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2467 "unexpected G_INSERT types");
2468 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2469 }
2470
2471 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2472 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2473 TII.get(AArch64::SUBREG_TO_REG))
2474 .addDef(SrcReg)
2475 .addImm(0)
2476 .addUse(I.getOperand(2).getReg())
2477 .addImm(AArch64::sub_32);
2478 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2479 AArch64::GPR32RegClass, MRI);
2480 I.getOperand(2).setReg(SrcReg);
2481
2482 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2483 }
2484 case TargetOpcode::G_FRAME_INDEX: {
2485 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2486 if (Ty != LLT::pointer(0, 64)) {
2487 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2488 << ", expected: " << LLT::pointer(0, 64) << '\n');
2489 return false;
2490 }
2491 I.setDesc(TII.get(AArch64::ADDXri));
2492
2493 // MOs for a #0 shifted immediate.
2494 I.addOperand(MachineOperand::CreateImm(0));
2495 I.addOperand(MachineOperand::CreateImm(0));
2496
2497 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2498 }
2499
2500 case TargetOpcode::G_GLOBAL_VALUE: {
2501 auto GV = I.getOperand(1).getGlobal();
2502 if (GV->isThreadLocal())
2503 return selectTLSGlobalValue(I, MRI);
2504
2505 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2506 if (OpFlags & AArch64II::MO_GOT) {
2507 I.setDesc(TII.get(AArch64::LOADgot));
2508 I.getOperand(1).setTargetFlags(OpFlags);
2509 } else if (TM.getCodeModel() == CodeModel::Large) {
2510 // Materialize the global using movz/movk instructions.
2511 materializeLargeCMVal(I, GV, OpFlags);
2512 I.eraseFromParent();
2513 return true;
2514 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2515 I.setDesc(TII.get(AArch64::ADR));
2516 I.getOperand(1).setTargetFlags(OpFlags);
2517 } else {
2518 I.setDesc(TII.get(AArch64::MOVaddr));
2519 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2520 MachineInstrBuilder MIB(MF, I);
2521 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2522 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2523 }
2524 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2525 }
2526
2527 case TargetOpcode::G_ZEXTLOAD:
2528 case TargetOpcode::G_LOAD:
2529 case TargetOpcode::G_STORE: {
2530 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2531 MachineIRBuilder MIB(I);
2532
2533 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2534
2535 if (PtrTy != LLT::pointer(0, 64)) {
2536 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2537 << ", expected: " << LLT::pointer(0, 64) << '\n');
2538 return false;
2539 }
2540
2541 auto &MemOp = **I.memoperands_begin();
2542 uint64_t MemSizeInBytes = MemOp.getSize();
2543 if (MemOp.isAtomic()) {
2544 // For now we just support s8 acquire loads to be able to compile stack
2545 // protector code.
2546 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2547 MemSizeInBytes == 1) {
2548 I.setDesc(TII.get(AArch64::LDARB));
2549 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2550 }
2551 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
2552 return false;
2553 }
2554 unsigned MemSizeInBits = MemSizeInBytes * 8;
2555
2556 #ifndef NDEBUG
2557 const Register PtrReg = I.getOperand(1).getReg();
2558 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2559 // Sanity-check the pointer register.
2560 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2561 "Load/Store pointer operand isn't a GPR");
2562 assert(MRI.getType(PtrReg).isPointer() &&
2563 "Load/Store pointer operand isn't a pointer");
2564 #endif
2565
2566 const Register ValReg = I.getOperand(0).getReg();
2567 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2568
2569 // Helper lambda for partially selecting I. Either returns the original
2570 // instruction with an updated opcode, or a new instruction.
2571 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2572 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2573 const unsigned NewOpc =
2574 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2575 if (NewOpc == I.getOpcode())
2576 return nullptr;
2577 // Check if we can fold anything into the addressing mode.
2578 auto AddrModeFns =
2579 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2580 if (!AddrModeFns) {
2581 // Can't fold anything. Use the original instruction.
2582 I.setDesc(TII.get(NewOpc));
2583 I.addOperand(MachineOperand::CreateImm(0));
2584 return &I;
2585 }
2586
2587 // Folded something. Create a new instruction and return it.
2588 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2589 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2590 NewInst.cloneMemRefs(I);
2591 for (auto &Fn : *AddrModeFns)
2592 Fn(NewInst);
2593 I.eraseFromParent();
2594 return &*NewInst;
2595 };
2596
2597 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2598 if (!LoadStore)
2599 return false;
2600
2601 // If we're storing a 0, use WZR/XZR.
2602 if (Opcode == TargetOpcode::G_STORE) {
2603 auto CVal = getConstantVRegValWithLookThrough(
2604 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2605 /*HandleFConstants = */ false);
2606 if (CVal && CVal->Value == 0) {
2607 switch (LoadStore->getOpcode()) {
2608 case AArch64::STRWui:
2609 case AArch64::STRHHui:
2610 case AArch64::STRBBui:
2611 LoadStore->getOperand(0).setReg(AArch64::WZR);
2612 break;
2613 case AArch64::STRXui:
2614 LoadStore->getOperand(0).setReg(AArch64::XZR);
2615 break;
2616 }
2617 }
2618 }
2619
2620 if (IsZExtLoad) {
2621 // The zextload from a smaller type to i32 should be handled by the
2622 // importer.
2623 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2624 return false;
2625 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2626 // and zero_extend with SUBREG_TO_REG.
2627 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2628 Register DstReg = LoadStore->getOperand(0).getReg();
2629 LoadStore->getOperand(0).setReg(LdReg);
2630
2631 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2632 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2633 .addImm(0)
2634 .addUse(LdReg)
2635 .addImm(AArch64::sub_32);
2636 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2637 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2638 MRI);
2639 }
2640 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2641 }
2642
2643 case TargetOpcode::G_SMULH:
2644 case TargetOpcode::G_UMULH: {
2645 // Reject the various things we don't support yet.
2646 if (unsupportedBinOp(I, RBI, MRI, TRI))
2647 return false;
2648
2649 const Register DefReg = I.getOperand(0).getReg();
2650 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2651
2652 if (RB.getID() != AArch64::GPRRegBankID) {
2653 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2654 return false;
2655 }
2656
2657 if (Ty != LLT::scalar(64)) {
2658 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2659 << ", expected: " << LLT::scalar(64) << '\n');
2660 return false;
2661 }
2662
2663 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2664 : AArch64::UMULHrr;
2665 I.setDesc(TII.get(NewOpc));
2666
2667 // Now that we selected an opcode, we need to constrain the register
2668 // operands to use appropriate classes.
2669 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2670 }
2671 case TargetOpcode::G_LSHR:
2672 case TargetOpcode::G_ASHR:
2673 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2674 return selectVectorAshrLshr(I, MRI);
2675 LLVM_FALLTHROUGH;
2676 case TargetOpcode::G_SHL:
2677 if (Opcode == TargetOpcode::G_SHL &&
2678 MRI.getType(I.getOperand(0).getReg()).isVector())
2679 return selectVectorSHL(I, MRI);
2680 LLVM_FALLTHROUGH;
2681 case TargetOpcode::G_FADD:
2682 case TargetOpcode::G_FSUB:
2683 case TargetOpcode::G_FMUL:
2684 case TargetOpcode::G_FDIV:
2685 case TargetOpcode::G_OR: {
2686 // Reject the various things we don't support yet.
2687 if (unsupportedBinOp(I, RBI, MRI, TRI))
2688 return false;
2689
2690 const unsigned OpSize = Ty.getSizeInBits();
2691
2692 const Register DefReg = I.getOperand(0).getReg();
2693 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2694
2695 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2696 if (NewOpc == I.getOpcode())
2697 return false;
2698
2699 I.setDesc(TII.get(NewOpc));
2700 // FIXME: Should the type be always reset in setDesc?
2701
2702 // Now that we selected an opcode, we need to constrain the register
2703 // operands to use appropriate classes.
2704 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2705 }
2706
2707 case TargetOpcode::G_PTR_ADD: {
2708 MachineIRBuilder MIRBuilder(I);
2709 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2710 MIRBuilder);
2711 I.eraseFromParent();
2712 return true;
2713 }
2714 case TargetOpcode::G_SADDO:
2715 case TargetOpcode::G_UADDO:
2716 case TargetOpcode::G_SSUBO: {
2717 // Emit the operation and get the correct condition code.
2718 MachineIRBuilder MIRBuilder(I);
2719 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2720 I.getOperand(2), I.getOperand(3), MIRBuilder);
2721
2722 // Now, put the overflow result in the register given by the first operand
2723 // to the overflow op. CSINC increments the result when the predicate is
2724 // false, so to get the increment when it's true, we need to use the
2725 // inverse. In this case, we want to increment when carry is set.
2726 Register ZReg = AArch64::WZR;
2727 auto CsetMI = MIRBuilder
2728 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2729 {ZReg, ZReg})
2730 .addImm(getInvertedCondCode(OpAndCC.second));
2731 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2732 I.eraseFromParent();
2733 return true;
2734 }
2735
2736 case TargetOpcode::G_PTRMASK: {
2737 Register MaskReg = I.getOperand(2).getReg();
2738 Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
2739 // TODO: Implement arbitrary cases
2740 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2741 return false;
2742
2743 uint64_t Mask = *MaskVal;
2744 I.setDesc(TII.get(AArch64::ANDXri));
2745 I.getOperand(2).ChangeToImmediate(
2746 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2747
2748 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2749 }
2750 case TargetOpcode::G_PTRTOINT:
2751 case TargetOpcode::G_TRUNC: {
2752 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2753 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2754
2755 const Register DstReg = I.getOperand(0).getReg();
2756 const Register SrcReg = I.getOperand(1).getReg();
2757
2758 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2759 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2760
2761 if (DstRB.getID() != SrcRB.getID()) {
2762 LLVM_DEBUG(
2763 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2764 return false;
2765 }
2766
2767 if (DstRB.getID() == AArch64::GPRRegBankID) {
2768 const TargetRegisterClass *DstRC =
2769 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2770 if (!DstRC)
2771 return false;
2772
2773 const TargetRegisterClass *SrcRC =
2774 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2775 if (!SrcRC)
2776 return false;
2777
2778 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2779 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2780 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2781 return false;
2782 }
2783
2784 if (DstRC == SrcRC) {
2785 // Nothing to be done
2786 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2787 SrcTy == LLT::scalar(64)) {
2788 llvm_unreachable("TableGen can import this case");
2789 return false;
2790 } else if (DstRC == &AArch64::GPR32RegClass &&
2791 SrcRC == &AArch64::GPR64RegClass) {
2792 I.getOperand(1).setSubReg(AArch64::sub_32);
2793 } else {
2794 LLVM_DEBUG(
2795 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2796 return false;
2797 }
2798
2799 I.setDesc(TII.get(TargetOpcode::COPY));
2800 return true;
2801 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2802 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2803 I.setDesc(TII.get(AArch64::XTNv4i16));
2804 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2805 return true;
2806 }
2807
2808 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2809 MachineIRBuilder MIB(I);
2810 MachineInstr *Extract = emitExtractVectorElt(
2811 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2812 if (!Extract)
2813 return false;
2814 I.eraseFromParent();
2815 return true;
2816 }
2817
2818 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2819 if (Opcode == TargetOpcode::G_PTRTOINT) {
2820 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
2821 I.setDesc(TII.get(TargetOpcode::COPY));
2822 return true;
2823 }
2824 }
2825
2826 return false;
2827 }
2828
2829 case TargetOpcode::G_ANYEXT: {
2830 const Register DstReg = I.getOperand(0).getReg();
2831 const Register SrcReg = I.getOperand(1).getReg();
2832
2833 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2834 if (RBDst.getID() != AArch64::GPRRegBankID) {
2835 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2836 << ", expected: GPR\n");
2837 return false;
2838 }
2839
2840 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2841 if (RBSrc.getID() != AArch64::GPRRegBankID) {
2842 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2843 << ", expected: GPR\n");
2844 return false;
2845 }
2846
2847 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2848
2849 if (DstSize == 0) {
2850 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2851 return false;
2852 }
2853
2854 if (DstSize != 64 && DstSize > 32) {
2855 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2856 << ", expected: 32 or 64\n");
2857 return false;
2858 }
2859 // At this point G_ANYEXT is just like a plain COPY, but we need
2860 // to explicitly form the 64-bit value if any.
2861 if (DstSize > 32) {
2862 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2863 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2864 .addDef(ExtSrc)
2865 .addImm(0)
2866 .addUse(SrcReg)
2867 .addImm(AArch64::sub_32);
2868 I.getOperand(1).setReg(ExtSrc);
2869 }
2870 return selectCopy(I, TII, MRI, TRI, RBI);
2871 }
2872
2873 case TargetOpcode::G_ZEXT:
2874 case TargetOpcode::G_SEXT_INREG:
2875 case TargetOpcode::G_SEXT: {
2876 unsigned Opcode = I.getOpcode();
2877 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2878 const Register DefReg = I.getOperand(0).getReg();
2879 Register SrcReg = I.getOperand(1).getReg();
2880 const LLT DstTy = MRI.getType(DefReg);
2881 const LLT SrcTy = MRI.getType(SrcReg);
2882 unsigned DstSize = DstTy.getSizeInBits();
2883 unsigned SrcSize = SrcTy.getSizeInBits();
2884
2885 // SEXT_INREG has the same src reg size as dst, the size of the value to be
2886 // extended is encoded in the imm.
2887 if (Opcode == TargetOpcode::G_SEXT_INREG)
2888 SrcSize = I.getOperand(2).getImm();
2889
2890 if (DstTy.isVector())
2891 return false; // Should be handled by imported patterns.
2892
2893 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2894 AArch64::GPRRegBankID &&
2895 "Unexpected ext regbank");
2896
2897 MachineIRBuilder MIB(I);
2898 MachineInstr *ExtI;
2899
2900 // First check if we're extending the result of a load which has a dest type
2901 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2902 // GPR register on AArch64 and all loads which are smaller automatically
2903 // zero-extend the upper bits. E.g.
2904 // %v(s8) = G_LOAD %p, :: (load 1)
2905 // %v2(s32) = G_ZEXT %v(s8)
2906 if (!IsSigned) {
2907 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2908 bool IsGPR =
2909 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2910 if (LoadMI && IsGPR) {
2911 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2912 unsigned BytesLoaded = MemOp->getSize();
2913 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2914 return selectCopy(I, TII, MRI, TRI, RBI);
2915 }
2916
2917 // If we are zero extending from 32 bits to 64 bits, it's possible that
2918 // the instruction implicitly does the zero extend for us. In that case,
2919 // we can just emit a SUBREG_TO_REG.
2920 if (IsGPR && SrcSize == 32 && DstSize == 64) {
2921 // Unlike with the G_LOAD case, we don't want to look through copies
2922 // here.
2923 MachineInstr *Def = MRI.getVRegDef(SrcReg);
2924 if (Def && isDef32(*Def)) {
2925 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2926 .addImm(0)
2927 .addUse(SrcReg)
2928 .addImm(AArch64::sub_32);
2929
2930 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2931 MRI)) {
2932 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
2933 return false;
2934 }
2935
2936 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2937 MRI)) {
2938 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
2939 return false;
2940 }
2941
2942 I.eraseFromParent();
2943 return true;
2944 }
2945 }
2946 }
2947
2948 if (DstSize == 64) {
2949 if (Opcode != TargetOpcode::G_SEXT_INREG) {
2950 // FIXME: Can we avoid manually doing this?
2951 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2952 MRI)) {
2953 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2954 << " operand\n");
2955 return false;
2956 }
2957 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2958 {&AArch64::GPR64RegClass}, {})
2959 .addImm(0)
2960 .addUse(SrcReg)
2961 .addImm(AArch64::sub_32)
2962 .getReg(0);
2963 }
2964
2965 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2966 {DefReg}, {SrcReg})
2967 .addImm(0)
2968 .addImm(SrcSize - 1);
2969 } else if (DstSize <= 32) {
2970 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2971 {DefReg}, {SrcReg})
2972 .addImm(0)
2973 .addImm(SrcSize - 1);
2974 } else {
2975 return false;
2976 }
2977
2978 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2979 I.eraseFromParent();
2980 return true;
2981 }
2982
2983 case TargetOpcode::G_SITOFP:
2984 case TargetOpcode::G_UITOFP:
2985 case TargetOpcode::G_FPTOSI:
2986 case TargetOpcode::G_FPTOUI: {
2987 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2988 SrcTy = MRI.getType(I.getOperand(1).getReg());
2989 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2990 if (NewOpc == Opcode)
2991 return false;
2992
2993 I.setDesc(TII.get(NewOpc));
2994 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2995
2996 return true;
2997 }
2998
2999 case TargetOpcode::G_FREEZE:
3000 return selectCopy(I, TII, MRI, TRI, RBI);
3001
3002 case TargetOpcode::G_INTTOPTR:
3003 // The importer is currently unable to import pointer types since they
3004 // didn't exist in SelectionDAG.
3005 return selectCopy(I, TII, MRI, TRI, RBI);
3006
3007 case TargetOpcode::G_BITCAST:
3008 // Imported SelectionDAG rules can handle every bitcast except those that
3009 // bitcast from a type to the same type. Ideally, these shouldn't occur
3010 // but we might not run an optimizer that deletes them. The other exception
3011 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3012 // of them.
3013 return selectCopy(I, TII, MRI, TRI, RBI);
3014
3015 case TargetOpcode::G_SELECT: {
3016 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3017 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
3018 << ", expected: " << LLT::scalar(1) << '\n');
3019 return false;
3020 }
3021
3022 const Register CondReg = I.getOperand(1).getReg();
3023 const Register TReg = I.getOperand(2).getReg();
3024 const Register FReg = I.getOperand(3).getReg();
3025
3026 if (tryOptSelect(I))
3027 return true;
3028
3029 // Make sure to use an unused vreg instead of wzr, so that the peephole
3030 // optimizations will be able to optimize these.
3031 MachineIRBuilder MIB(I);
3032 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3033 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3034 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3035 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3036 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3037 return false;
3038 I.eraseFromParent();
3039 return true;
3040 }
3041 case TargetOpcode::G_ICMP: {
3042 if (Ty.isVector())
3043 return selectVectorICmp(I, MRI);
3044
3045 if (Ty != LLT::scalar(32)) {
3046 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3047 << ", expected: " << LLT::scalar(32) << '\n');
3048 return false;
3049 }
3050
3051 MachineIRBuilder MIRBuilder(I);
3052 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3053 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3054 MIRBuilder);
3055 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
3056 I.eraseFromParent();
3057 return true;
3058 }
3059
3060 case TargetOpcode::G_FCMP: {
3061 MachineIRBuilder MIRBuilder(I);
3062 CmpInst::Predicate Pred =
3063 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3064 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
3065 MIRBuilder) ||
3066 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
3067 return false;
3068 I.eraseFromParent();
3069 return true;
3070 }
3071 case TargetOpcode::G_VASTART:
3072 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3073 : selectVaStartAAPCS(I, MF, MRI);
3074 case TargetOpcode::G_INTRINSIC:
3075 return selectIntrinsic(I, MRI);
3076 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3077 return selectIntrinsicWithSideEffects(I, MRI);
3078 case TargetOpcode::G_IMPLICIT_DEF: {
3079 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3080 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3081 const Register DstReg = I.getOperand(0).getReg();
3082 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3083 const TargetRegisterClass *DstRC =
3084 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3085 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3086 return true;
3087 }
3088 case TargetOpcode::G_BLOCK_ADDR: {
3089 if (TM.getCodeModel() == CodeModel::Large) {
3090 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3091 I.eraseFromParent();
3092 return true;
3093 } else {
3094 I.setDesc(TII.get(AArch64::MOVaddrBA));
3095 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3096 I.getOperand(0).getReg())
3097 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3098 /* Offset */ 0, AArch64II::MO_PAGE)
3099 .addBlockAddress(
3100 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3101 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3102 I.eraseFromParent();
3103 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3104 }
3105 }
3106 case AArch64::G_DUP: {
3107 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3108 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3109 // difficult because at RBS we may end up pessimizing the fpr case if we
3110 // decided to add an anyextend to fix this. Manual selection is the most
3111 // robust solution for now.
3112 Register SrcReg = I.getOperand(1).getReg();
3113 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
3114 return false; // We expect the fpr regbank case to be imported.
3115 LLT SrcTy = MRI.getType(SrcReg);
3116 if (SrcTy.getSizeInBits() == 16)
3117 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3118 else if (SrcTy.getSizeInBits() == 8)
3119 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3120 else
3121 return false;
3122 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3123 }
3124 case TargetOpcode::G_INTRINSIC_TRUNC:
3125 return selectIntrinsicTrunc(I, MRI);
3126 case TargetOpcode::G_INTRINSIC_ROUND:
3127 return selectIntrinsicRound(I, MRI);
3128 case TargetOpcode::G_BUILD_VECTOR:
3129 return selectBuildVector(I, MRI);
3130 case TargetOpcode::G_MERGE_VALUES:
3131 return selectMergeValues(I, MRI);
3132 case TargetOpcode::G_UNMERGE_VALUES:
3133 return selectUnmergeValues(I, MRI);
3134 case TargetOpcode::G_SHUFFLE_VECTOR:
3135 return selectShuffleVector(I, MRI);
3136 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3137 return selectExtractElt(I, MRI);
3138 case TargetOpcode::G_INSERT_VECTOR_ELT:
3139 return selectInsertElt(I, MRI);
3140 case TargetOpcode::G_CONCAT_VECTORS:
3141 return selectConcatVectors(I, MRI);
3142 case TargetOpcode::G_JUMP_TABLE:
3143 return selectJumpTable(I, MRI);
3144 case TargetOpcode::G_VECREDUCE_FADD:
3145 case TargetOpcode::G_VECREDUCE_ADD:
3146 return selectReduction(I, MRI);
3147 }
3148
3149 return false;
3150 }
3151
selectReduction(MachineInstr & I,MachineRegisterInfo & MRI) const3152 bool AArch64InstructionSelector::selectReduction(
3153 MachineInstr &I, MachineRegisterInfo &MRI) const {
3154 Register VecReg = I.getOperand(1).getReg();
3155 LLT VecTy = MRI.getType(VecReg);
3156 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3157 unsigned Opc = 0;
3158 if (VecTy == LLT::vector(16, 8))
3159 Opc = AArch64::ADDVv16i8v;
3160 else if (VecTy == LLT::vector(8, 16))
3161 Opc = AArch64::ADDVv8i16v;
3162 else if (VecTy == LLT::vector(4, 32))
3163 Opc = AArch64::ADDVv4i32v;
3164 else if (VecTy == LLT::vector(2, 64))
3165 Opc = AArch64::ADDPv2i64p;
3166 else {
3167 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3168 return false;
3169 }
3170 I.setDesc(TII.get(Opc));
3171 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3172 }
3173
3174 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3175 unsigned Opc = 0;
3176 if (VecTy == LLT::vector(2, 32))
3177 Opc = AArch64::FADDPv2i32p;
3178 else if (VecTy == LLT::vector(2, 64))
3179 Opc = AArch64::FADDPv2i64p;
3180 else {
3181 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3182 return false;
3183 }
3184 I.setDesc(TII.get(Opc));
3185 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3186 }
3187 return false;
3188 }
3189
selectBrJT(MachineInstr & I,MachineRegisterInfo & MRI) const3190 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3191 MachineRegisterInfo &MRI) const {
3192 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3193 Register JTAddr = I.getOperand(0).getReg();
3194 unsigned JTI = I.getOperand(1).getIndex();
3195 Register Index = I.getOperand(2).getReg();
3196 MachineIRBuilder MIB(I);
3197
3198 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3199 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3200
3201 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3202 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3203 {TargetReg, ScratchReg}, {JTAddr, Index})
3204 .addJumpTableIndex(JTI);
3205 // Build the indirect branch.
3206 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3207 I.eraseFromParent();
3208 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3209 }
3210
selectJumpTable(MachineInstr & I,MachineRegisterInfo & MRI) const3211 bool AArch64InstructionSelector::selectJumpTable(
3212 MachineInstr &I, MachineRegisterInfo &MRI) const {
3213 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3214 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3215
3216 Register DstReg = I.getOperand(0).getReg();
3217 unsigned JTI = I.getOperand(1).getIndex();
3218 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3219 MachineIRBuilder MIB(I);
3220 auto MovMI =
3221 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3222 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3223 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3224 I.eraseFromParent();
3225 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3226 }
3227
selectTLSGlobalValue(MachineInstr & I,MachineRegisterInfo & MRI) const3228 bool AArch64InstructionSelector::selectTLSGlobalValue(
3229 MachineInstr &I, MachineRegisterInfo &MRI) const {
3230 if (!STI.isTargetMachO())
3231 return false;
3232 MachineFunction &MF = *I.getParent()->getParent();
3233 MF.getFrameInfo().setAdjustsStack(true);
3234
3235 const GlobalValue &GV = *I.getOperand(1).getGlobal();
3236 MachineIRBuilder MIB(I);
3237
3238 auto LoadGOT =
3239 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3240 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3241
3242 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3243 {LoadGOT.getReg(0)})
3244 .addImm(0);
3245
3246 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3247 // TLS calls preserve all registers except those that absolutely must be
3248 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3249 // silly).
3250 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3251 .addUse(AArch64::X0, RegState::Implicit)
3252 .addDef(AArch64::X0, RegState::Implicit)
3253 .addRegMask(TRI.getTLSCallPreservedMask());
3254
3255 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3256 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3257 MRI);
3258 I.eraseFromParent();
3259 return true;
3260 }
3261
selectIntrinsicTrunc(MachineInstr & I,MachineRegisterInfo & MRI) const3262 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3263 MachineInstr &I, MachineRegisterInfo &MRI) const {
3264 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3265
3266 // Select the correct opcode.
3267 unsigned Opc = 0;
3268 if (!SrcTy.isVector()) {
3269 switch (SrcTy.getSizeInBits()) {
3270 default:
3271 case 16:
3272 Opc = AArch64::FRINTZHr;
3273 break;
3274 case 32:
3275 Opc = AArch64::FRINTZSr;
3276 break;
3277 case 64:
3278 Opc = AArch64::FRINTZDr;
3279 break;
3280 }
3281 } else {
3282 unsigned NumElts = SrcTy.getNumElements();
3283 switch (SrcTy.getElementType().getSizeInBits()) {
3284 default:
3285 break;
3286 case 16:
3287 if (NumElts == 4)
3288 Opc = AArch64::FRINTZv4f16;
3289 else if (NumElts == 8)
3290 Opc = AArch64::FRINTZv8f16;
3291 break;
3292 case 32:
3293 if (NumElts == 2)
3294 Opc = AArch64::FRINTZv2f32;
3295 else if (NumElts == 4)
3296 Opc = AArch64::FRINTZv4f32;
3297 break;
3298 case 64:
3299 if (NumElts == 2)
3300 Opc = AArch64::FRINTZv2f64;
3301 break;
3302 }
3303 }
3304
3305 if (!Opc) {
3306 // Didn't get an opcode above, bail.
3307 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3308 return false;
3309 }
3310
3311 // Legalization would have set us up perfectly for this; we just need to
3312 // set the opcode and move on.
3313 I.setDesc(TII.get(Opc));
3314 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3315 }
3316
selectIntrinsicRound(MachineInstr & I,MachineRegisterInfo & MRI) const3317 bool AArch64InstructionSelector::selectIntrinsicRound(
3318 MachineInstr &I, MachineRegisterInfo &MRI) const {
3319 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3320
3321 // Select the correct opcode.
3322 unsigned Opc = 0;
3323 if (!SrcTy.isVector()) {
3324 switch (SrcTy.getSizeInBits()) {
3325 default:
3326 case 16:
3327 Opc = AArch64::FRINTAHr;
3328 break;
3329 case 32:
3330 Opc = AArch64::FRINTASr;
3331 break;
3332 case 64:
3333 Opc = AArch64::FRINTADr;
3334 break;
3335 }
3336 } else {
3337 unsigned NumElts = SrcTy.getNumElements();
3338 switch (SrcTy.getElementType().getSizeInBits()) {
3339 default:
3340 break;
3341 case 16:
3342 if (NumElts == 4)
3343 Opc = AArch64::FRINTAv4f16;
3344 else if (NumElts == 8)
3345 Opc = AArch64::FRINTAv8f16;
3346 break;
3347 case 32:
3348 if (NumElts == 2)
3349 Opc = AArch64::FRINTAv2f32;
3350 else if (NumElts == 4)
3351 Opc = AArch64::FRINTAv4f32;
3352 break;
3353 case 64:
3354 if (NumElts == 2)
3355 Opc = AArch64::FRINTAv2f64;
3356 break;
3357 }
3358 }
3359
3360 if (!Opc) {
3361 // Didn't get an opcode above, bail.
3362 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3363 return false;
3364 }
3365
3366 // Legalization would have set us up perfectly for this; we just need to
3367 // set the opcode and move on.
3368 I.setDesc(TII.get(Opc));
3369 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3370 }
3371
selectVectorICmp(MachineInstr & I,MachineRegisterInfo & MRI) const3372 bool AArch64InstructionSelector::selectVectorICmp(
3373 MachineInstr &I, MachineRegisterInfo &MRI) const {
3374 Register DstReg = I.getOperand(0).getReg();
3375 LLT DstTy = MRI.getType(DstReg);
3376 Register SrcReg = I.getOperand(2).getReg();
3377 Register Src2Reg = I.getOperand(3).getReg();
3378 LLT SrcTy = MRI.getType(SrcReg);
3379
3380 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3381 unsigned NumElts = DstTy.getNumElements();
3382
3383 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3384 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3385 // Third index is cc opcode:
3386 // 0 == eq
3387 // 1 == ugt
3388 // 2 == uge
3389 // 3 == ult
3390 // 4 == ule
3391 // 5 == sgt
3392 // 6 == sge
3393 // 7 == slt
3394 // 8 == sle
3395 // ne is done by negating 'eq' result.
3396
3397 // This table below assumes that for some comparisons the operands will be
3398 // commuted.
3399 // ult op == commute + ugt op
3400 // ule op == commute + uge op
3401 // slt op == commute + sgt op
3402 // sle op == commute + sge op
3403 unsigned PredIdx = 0;
3404 bool SwapOperands = false;
3405 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3406 switch (Pred) {
3407 case CmpInst::ICMP_NE:
3408 case CmpInst::ICMP_EQ:
3409 PredIdx = 0;
3410 break;
3411 case CmpInst::ICMP_UGT:
3412 PredIdx = 1;
3413 break;
3414 case CmpInst::ICMP_UGE:
3415 PredIdx = 2;
3416 break;
3417 case CmpInst::ICMP_ULT:
3418 PredIdx = 3;
3419 SwapOperands = true;
3420 break;
3421 case CmpInst::ICMP_ULE:
3422 PredIdx = 4;
3423 SwapOperands = true;
3424 break;
3425 case CmpInst::ICMP_SGT:
3426 PredIdx = 5;
3427 break;
3428 case CmpInst::ICMP_SGE:
3429 PredIdx = 6;
3430 break;
3431 case CmpInst::ICMP_SLT:
3432 PredIdx = 7;
3433 SwapOperands = true;
3434 break;
3435 case CmpInst::ICMP_SLE:
3436 PredIdx = 8;
3437 SwapOperands = true;
3438 break;
3439 default:
3440 llvm_unreachable("Unhandled icmp predicate");
3441 return false;
3442 }
3443
3444 // This table obviously should be tablegen'd when we have our GISel native
3445 // tablegen selector.
3446
3447 static const unsigned OpcTable[4][4][9] = {
3448 {
3449 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3450 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3451 0 /* invalid */},
3452 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3453 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3454 0 /* invalid */},
3455 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3456 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3457 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3458 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3459 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3460 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3461 },
3462 {
3463 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3464 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3465 0 /* invalid */},
3466 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3467 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3468 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3469 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3470 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3471 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3472 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3473 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3474 0 /* invalid */}
3475 },
3476 {
3477 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3478 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3479 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3480 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3481 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3482 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3483 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3484 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3485 0 /* invalid */},
3486 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3487 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3488 0 /* invalid */}
3489 },
3490 {
3491 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3492 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3493 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3494 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3495 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3496 0 /* invalid */},
3497 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3498 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3499 0 /* invalid */},
3500 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3501 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3502 0 /* invalid */}
3503 },
3504 };
3505 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3506 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3507 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3508 if (!Opc) {
3509 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3510 return false;
3511 }
3512
3513 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3514 const TargetRegisterClass *SrcRC =
3515 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3516 if (!SrcRC) {
3517 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3518 return false;
3519 }
3520
3521 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3522 if (SrcTy.getSizeInBits() == 128)
3523 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3524
3525 if (SwapOperands)
3526 std::swap(SrcReg, Src2Reg);
3527
3528 MachineIRBuilder MIB(I);
3529 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3530 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3531
3532 // Invert if we had a 'ne' cc.
3533 if (NotOpc) {
3534 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3535 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3536 } else {
3537 MIB.buildCopy(DstReg, Cmp.getReg(0));
3538 }
3539 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3540 I.eraseFromParent();
3541 return true;
3542 }
3543
emitScalarToVector(unsigned EltSize,const TargetRegisterClass * DstRC,Register Scalar,MachineIRBuilder & MIRBuilder) const3544 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3545 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3546 MachineIRBuilder &MIRBuilder) const {
3547 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3548
3549 auto BuildFn = [&](unsigned SubregIndex) {
3550 auto Ins =
3551 MIRBuilder
3552 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3553 .addImm(SubregIndex);
3554 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3555 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3556 return &*Ins;
3557 };
3558
3559 switch (EltSize) {
3560 case 16:
3561 return BuildFn(AArch64::hsub);
3562 case 32:
3563 return BuildFn(AArch64::ssub);
3564 case 64:
3565 return BuildFn(AArch64::dsub);
3566 default:
3567 return nullptr;
3568 }
3569 }
3570
selectMergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3571 bool AArch64InstructionSelector::selectMergeValues(
3572 MachineInstr &I, MachineRegisterInfo &MRI) const {
3573 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3574 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3575 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3576 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3577 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3578
3579 if (I.getNumOperands() != 3)
3580 return false;
3581
3582 // Merging 2 s64s into an s128.
3583 if (DstTy == LLT::scalar(128)) {
3584 if (SrcTy.getSizeInBits() != 64)
3585 return false;
3586 MachineIRBuilder MIB(I);
3587 Register DstReg = I.getOperand(0).getReg();
3588 Register Src1Reg = I.getOperand(1).getReg();
3589 Register Src2Reg = I.getOperand(2).getReg();
3590 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3591 MachineInstr *InsMI =
3592 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3593 if (!InsMI)
3594 return false;
3595 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3596 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3597 if (!Ins2MI)
3598 return false;
3599 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3600 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3601 I.eraseFromParent();
3602 return true;
3603 }
3604
3605 if (RB.getID() != AArch64::GPRRegBankID)
3606 return false;
3607
3608 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3609 return false;
3610
3611 auto *DstRC = &AArch64::GPR64RegClass;
3612 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3613 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3614 TII.get(TargetOpcode::SUBREG_TO_REG))
3615 .addDef(SubToRegDef)
3616 .addImm(0)
3617 .addUse(I.getOperand(1).getReg())
3618 .addImm(AArch64::sub_32);
3619 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3620 // Need to anyext the second scalar before we can use bfm
3621 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3622 TII.get(TargetOpcode::SUBREG_TO_REG))
3623 .addDef(SubToRegDef2)
3624 .addImm(0)
3625 .addUse(I.getOperand(2).getReg())
3626 .addImm(AArch64::sub_32);
3627 MachineInstr &BFM =
3628 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3629 .addDef(I.getOperand(0).getReg())
3630 .addUse(SubToRegDef)
3631 .addUse(SubToRegDef2)
3632 .addImm(32)
3633 .addImm(31);
3634 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3635 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3636 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3637 I.eraseFromParent();
3638 return true;
3639 }
3640
getLaneCopyOpcode(unsigned & CopyOpc,unsigned & ExtractSubReg,const unsigned EltSize)3641 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3642 const unsigned EltSize) {
3643 // Choose a lane copy opcode and subregister based off of the size of the
3644 // vector's elements.
3645 switch (EltSize) {
3646 case 16:
3647 CopyOpc = AArch64::CPYi16;
3648 ExtractSubReg = AArch64::hsub;
3649 break;
3650 case 32:
3651 CopyOpc = AArch64::CPYi32;
3652 ExtractSubReg = AArch64::ssub;
3653 break;
3654 case 64:
3655 CopyOpc = AArch64::CPYi64;
3656 ExtractSubReg = AArch64::dsub;
3657 break;
3658 default:
3659 // Unknown size, bail out.
3660 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3661 return false;
3662 }
3663 return true;
3664 }
3665
emitExtractVectorElt(Optional<Register> DstReg,const RegisterBank & DstRB,LLT ScalarTy,Register VecReg,unsigned LaneIdx,MachineIRBuilder & MIRBuilder) const3666 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3667 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3668 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3669 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3670 unsigned CopyOpc = 0;
3671 unsigned ExtractSubReg = 0;
3672 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3673 LLVM_DEBUG(
3674 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3675 return nullptr;
3676 }
3677
3678 const TargetRegisterClass *DstRC =
3679 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3680 if (!DstRC) {
3681 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3682 return nullptr;
3683 }
3684
3685 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3686 const LLT &VecTy = MRI.getType(VecReg);
3687 const TargetRegisterClass *VecRC =
3688 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3689 if (!VecRC) {
3690 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3691 return nullptr;
3692 }
3693
3694 // The register that we're going to copy into.
3695 Register InsertReg = VecReg;
3696 if (!DstReg)
3697 DstReg = MRI.createVirtualRegister(DstRC);
3698 // If the lane index is 0, we just use a subregister COPY.
3699 if (LaneIdx == 0) {
3700 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3701 .addReg(VecReg, 0, ExtractSubReg);
3702 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3703 return &*Copy;
3704 }
3705
3706 // Lane copies require 128-bit wide registers. If we're dealing with an
3707 // unpacked vector, then we need to move up to that width. Insert an implicit
3708 // def and a subregister insert to get us there.
3709 if (VecTy.getSizeInBits() != 128) {
3710 MachineInstr *ScalarToVector = emitScalarToVector(
3711 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3712 if (!ScalarToVector)
3713 return nullptr;
3714 InsertReg = ScalarToVector->getOperand(0).getReg();
3715 }
3716
3717 MachineInstr *LaneCopyMI =
3718 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3719 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3720
3721 // Make sure that we actually constrain the initial copy.
3722 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3723 return LaneCopyMI;
3724 }
3725
selectExtractElt(MachineInstr & I,MachineRegisterInfo & MRI) const3726 bool AArch64InstructionSelector::selectExtractElt(
3727 MachineInstr &I, MachineRegisterInfo &MRI) const {
3728 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3729 "unexpected opcode!");
3730 Register DstReg = I.getOperand(0).getReg();
3731 const LLT NarrowTy = MRI.getType(DstReg);
3732 const Register SrcReg = I.getOperand(1).getReg();
3733 const LLT WideTy = MRI.getType(SrcReg);
3734 (void)WideTy;
3735 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3736 "source register size too small!");
3737 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3738
3739 // Need the lane index to determine the correct copy opcode.
3740 MachineOperand &LaneIdxOp = I.getOperand(2);
3741 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3742
3743 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3744 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3745 return false;
3746 }
3747
3748 // Find the index to extract from.
3749 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3750 if (!VRegAndVal)
3751 return false;
3752 unsigned LaneIdx = VRegAndVal->Value;
3753
3754 MachineIRBuilder MIRBuilder(I);
3755
3756 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3757 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3758 LaneIdx, MIRBuilder);
3759 if (!Extract)
3760 return false;
3761
3762 I.eraseFromParent();
3763 return true;
3764 }
3765
selectSplitVectorUnmerge(MachineInstr & I,MachineRegisterInfo & MRI) const3766 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3767 MachineInstr &I, MachineRegisterInfo &MRI) const {
3768 unsigned NumElts = I.getNumOperands() - 1;
3769 Register SrcReg = I.getOperand(NumElts).getReg();
3770 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3771 const LLT SrcTy = MRI.getType(SrcReg);
3772
3773 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3774 if (SrcTy.getSizeInBits() > 128) {
3775 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3776 return false;
3777 }
3778
3779 MachineIRBuilder MIB(I);
3780
3781 // We implement a split vector operation by treating the sub-vectors as
3782 // scalars and extracting them.
3783 const RegisterBank &DstRB =
3784 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3785 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3786 Register Dst = I.getOperand(OpIdx).getReg();
3787 MachineInstr *Extract =
3788 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3789 if (!Extract)
3790 return false;
3791 }
3792 I.eraseFromParent();
3793 return true;
3794 }
3795
selectUnmergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3796 bool AArch64InstructionSelector::selectUnmergeValues(
3797 MachineInstr &I, MachineRegisterInfo &MRI) const {
3798 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3799 "unexpected opcode");
3800
3801 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3802 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3803 AArch64::FPRRegBankID ||
3804 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3805 AArch64::FPRRegBankID) {
3806 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3807 "currently unsupported.\n");
3808 return false;
3809 }
3810
3811 // The last operand is the vector source register, and every other operand is
3812 // a register to unpack into.
3813 unsigned NumElts = I.getNumOperands() - 1;
3814 Register SrcReg = I.getOperand(NumElts).getReg();
3815 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3816 const LLT WideTy = MRI.getType(SrcReg);
3817 (void)WideTy;
3818 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3819 "can only unmerge from vector or s128 types!");
3820 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3821 "source register size too small!");
3822
3823 if (!NarrowTy.isScalar())
3824 return selectSplitVectorUnmerge(I, MRI);
3825
3826 MachineIRBuilder MIB(I);
3827
3828 // Choose a lane copy opcode and subregister based off of the size of the
3829 // vector's elements.
3830 unsigned CopyOpc = 0;
3831 unsigned ExtractSubReg = 0;
3832 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3833 return false;
3834
3835 // Set up for the lane copies.
3836 MachineBasicBlock &MBB = *I.getParent();
3837
3838 // Stores the registers we'll be copying from.
3839 SmallVector<Register, 4> InsertRegs;
3840
3841 // We'll use the first register twice, so we only need NumElts-1 registers.
3842 unsigned NumInsertRegs = NumElts - 1;
3843
3844 // If our elements fit into exactly 128 bits, then we can copy from the source
3845 // directly. Otherwise, we need to do a bit of setup with some subregister
3846 // inserts.
3847 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3848 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3849 } else {
3850 // No. We have to perform subregister inserts. For each insert, create an
3851 // implicit def and a subregister insert, and save the register we create.
3852 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3853 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3854 MachineInstr &ImpDefMI =
3855 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3856 ImpDefReg);
3857
3858 // Now, create the subregister insert from SrcReg.
3859 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3860 MachineInstr &InsMI =
3861 *BuildMI(MBB, I, I.getDebugLoc(),
3862 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3863 .addUse(ImpDefReg)
3864 .addUse(SrcReg)
3865 .addImm(AArch64::dsub);
3866
3867 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3868 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3869
3870 // Save the register so that we can copy from it after.
3871 InsertRegs.push_back(InsertReg);
3872 }
3873 }
3874
3875 // Now that we've created any necessary subregister inserts, we can
3876 // create the copies.
3877 //
3878 // Perform the first copy separately as a subregister copy.
3879 Register CopyTo = I.getOperand(0).getReg();
3880 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3881 .addReg(InsertRegs[0], 0, ExtractSubReg);
3882 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3883
3884 // Now, perform the remaining copies as vector lane copies.
3885 unsigned LaneIdx = 1;
3886 for (Register InsReg : InsertRegs) {
3887 Register CopyTo = I.getOperand(LaneIdx).getReg();
3888 MachineInstr &CopyInst =
3889 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3890 .addUse(InsReg)
3891 .addImm(LaneIdx);
3892 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3893 ++LaneIdx;
3894 }
3895
3896 // Separately constrain the first copy's destination. Because of the
3897 // limitation in constrainOperandRegClass, we can't guarantee that this will
3898 // actually be constrained. So, do it ourselves using the second operand.
3899 const TargetRegisterClass *RC =
3900 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3901 if (!RC) {
3902 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3903 return false;
3904 }
3905
3906 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3907 I.eraseFromParent();
3908 return true;
3909 }
3910
selectConcatVectors(MachineInstr & I,MachineRegisterInfo & MRI) const3911 bool AArch64InstructionSelector::selectConcatVectors(
3912 MachineInstr &I, MachineRegisterInfo &MRI) const {
3913 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3914 "Unexpected opcode");
3915 Register Dst = I.getOperand(0).getReg();
3916 Register Op1 = I.getOperand(1).getReg();
3917 Register Op2 = I.getOperand(2).getReg();
3918 MachineIRBuilder MIRBuilder(I);
3919 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3920 if (!ConcatMI)
3921 return false;
3922 I.eraseFromParent();
3923 return true;
3924 }
3925
3926 unsigned
emitConstantPoolEntry(const Constant * CPVal,MachineFunction & MF) const3927 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3928 MachineFunction &MF) const {
3929 Type *CPTy = CPVal->getType();
3930 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3931
3932 MachineConstantPool *MCP = MF.getConstantPool();
3933 return MCP->getConstantPoolIndex(CPVal, Alignment);
3934 }
3935
emitLoadFromConstantPool(const Constant * CPVal,MachineIRBuilder & MIRBuilder) const3936 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3937 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3938 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3939
3940 auto Adrp =
3941 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3942 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3943
3944 MachineInstr *LoadMI = nullptr;
3945 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3946 case 16:
3947 LoadMI =
3948 &*MIRBuilder
3949 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3950 .addConstantPoolIndex(CPIdx, 0,
3951 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3952 break;
3953 case 8:
3954 LoadMI = &*MIRBuilder
3955 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3956 .addConstantPoolIndex(
3957 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3958 break;
3959 default:
3960 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3961 << *CPVal->getType());
3962 return nullptr;
3963 }
3964 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3965 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3966 return LoadMI;
3967 }
3968
3969 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3970 /// size and RB.
3971 static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank & RB,unsigned EltSize)3972 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3973 unsigned Opc, SubregIdx;
3974 if (RB.getID() == AArch64::GPRRegBankID) {
3975 if (EltSize == 16) {
3976 Opc = AArch64::INSvi16gpr;
3977 SubregIdx = AArch64::ssub;
3978 } else if (EltSize == 32) {
3979 Opc = AArch64::INSvi32gpr;
3980 SubregIdx = AArch64::ssub;
3981 } else if (EltSize == 64) {
3982 Opc = AArch64::INSvi64gpr;
3983 SubregIdx = AArch64::dsub;
3984 } else {
3985 llvm_unreachable("invalid elt size!");
3986 }
3987 } else {
3988 if (EltSize == 8) {
3989 Opc = AArch64::INSvi8lane;
3990 SubregIdx = AArch64::bsub;
3991 } else if (EltSize == 16) {
3992 Opc = AArch64::INSvi16lane;
3993 SubregIdx = AArch64::hsub;
3994 } else if (EltSize == 32) {
3995 Opc = AArch64::INSvi32lane;
3996 SubregIdx = AArch64::ssub;
3997 } else if (EltSize == 64) {
3998 Opc = AArch64::INSvi64lane;
3999 SubregIdx = AArch64::dsub;
4000 } else {
4001 llvm_unreachable("invalid elt size!");
4002 }
4003 }
4004 return std::make_pair(Opc, SubregIdx);
4005 }
4006
emitInstr(unsigned Opcode,std::initializer_list<llvm::DstOp> DstOps,std::initializer_list<llvm::SrcOp> SrcOps,MachineIRBuilder & MIRBuilder,const ComplexRendererFns & RenderFns) const4007 MachineInstr *AArch64InstructionSelector::emitInstr(
4008 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4009 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4010 const ComplexRendererFns &RenderFns) const {
4011 assert(Opcode && "Expected an opcode?");
4012 assert(!isPreISelGenericOpcode(Opcode) &&
4013 "Function should only be used to produce selected instructions!");
4014 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4015 if (RenderFns)
4016 for (auto &Fn : *RenderFns)
4017 Fn(MI);
4018 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4019 return &*MI;
4020 }
4021
emitAddSub(const std::array<std::array<unsigned,2>,5> & AddrModeAndSizeToOpcode,Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4022 MachineInstr *AArch64InstructionSelector::emitAddSub(
4023 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4024 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4025 MachineIRBuilder &MIRBuilder) const {
4026 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4027 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4028 auto Ty = MRI.getType(LHS.getReg());
4029 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4030 unsigned Size = Ty.getSizeInBits();
4031 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4032 bool Is32Bit = Size == 32;
4033
4034 // INSTRri form with positive arithmetic immediate.
4035 if (auto Fns = selectArithImmed(RHS))
4036 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4037 MIRBuilder, Fns);
4038
4039 // INSTRri form with negative arithmetic immediate.
4040 if (auto Fns = selectNegArithImmed(RHS))
4041 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4042 MIRBuilder, Fns);
4043
4044 // INSTRrx form.
4045 if (auto Fns = selectArithExtendedRegister(RHS))
4046 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4047 MIRBuilder, Fns);
4048
4049 // INSTRrs form.
4050 if (auto Fns = selectShiftedRegister(RHS))
4051 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4052 MIRBuilder, Fns);
4053 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4054 MIRBuilder);
4055 }
4056
4057 MachineInstr *
emitADD(Register DefReg,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4058 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4059 MachineOperand &RHS,
4060 MachineIRBuilder &MIRBuilder) const {
4061 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4062 {{AArch64::ADDXri, AArch64::ADDWri},
4063 {AArch64::ADDXrs, AArch64::ADDWrs},
4064 {AArch64::ADDXrr, AArch64::ADDWrr},
4065 {AArch64::SUBXri, AArch64::SUBWri},
4066 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4067 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4068 }
4069
4070 MachineInstr *
emitADDS(Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4071 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4072 MachineOperand &RHS,
4073 MachineIRBuilder &MIRBuilder) const {
4074 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4075 {{AArch64::ADDSXri, AArch64::ADDSWri},
4076 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4077 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4078 {AArch64::SUBSXri, AArch64::SUBSWri},
4079 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4080 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4081 }
4082
4083 MachineInstr *
emitSUBS(Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4084 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4085 MachineOperand &RHS,
4086 MachineIRBuilder &MIRBuilder) const {
4087 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4088 {{AArch64::SUBSXri, AArch64::SUBSWri},
4089 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4090 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4091 {AArch64::ADDSXri, AArch64::ADDSWri},
4092 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4093 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4094 }
4095
4096 MachineInstr *
emitCMN(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4097 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4098 MachineIRBuilder &MIRBuilder) const {
4099 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4100 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4101 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4102 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4103 }
4104
4105 MachineInstr *
emitTST(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4106 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4107 MachineIRBuilder &MIRBuilder) const {
4108 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4109 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4110 LLT Ty = MRI.getType(LHS.getReg());
4111 unsigned RegSize = Ty.getSizeInBits();
4112 bool Is32Bit = (RegSize == 32);
4113 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4114 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4115 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4116 // ANDS needs a logical immediate for its immediate form. Check if we can
4117 // fold one in.
4118 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4119 if (AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize)) {
4120 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4121 TstMI.addImm(
4122 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
4123 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4124 return &*TstMI;
4125 }
4126 }
4127
4128 if (auto Fns = selectLogicalShiftedRegister(RHS))
4129 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4130 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4131 }
4132
emitIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const4133 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4134 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4135 MachineIRBuilder &MIRBuilder) const {
4136 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4137 assert(Predicate.isPredicate() && "Expected predicate?");
4138 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4139 LLT CmpTy = MRI.getType(LHS.getReg());
4140 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4141 unsigned Size = CmpTy.getSizeInBits();
4142 (void)Size;
4143 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4144 // Fold the compare into a cmn or tst if possible.
4145 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4146 return FoldCmp;
4147 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4148 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4149 }
4150
emitCSetForFCmp(Register Dst,CmpInst::Predicate Pred,MachineIRBuilder & MIRBuilder) const4151 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4152 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4153 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4154 #ifndef NDEBUG
4155 LLT Ty = MRI.getType(Dst);
4156 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4157 "Expected a 32-bit scalar register?");
4158 #endif
4159 const Register ZeroReg = AArch64::WZR;
4160 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4161 auto CSet =
4162 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4163 .addImm(getInvertedCondCode(CC));
4164 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4165 return &*CSet;
4166 };
4167
4168 AArch64CC::CondCode CC1, CC2;
4169 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4170 if (CC2 == AArch64CC::AL)
4171 return EmitCSet(Dst, CC1);
4172
4173 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4174 Register Def1Reg = MRI.createVirtualRegister(RC);
4175 Register Def2Reg = MRI.createVirtualRegister(RC);
4176 EmitCSet(Def1Reg, CC1);
4177 EmitCSet(Def2Reg, CC2);
4178 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4179 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4180 return &*OrMI;
4181 }
4182
4183 MachineInstr *
emitFPCompare(Register LHS,Register RHS,MachineIRBuilder & MIRBuilder) const4184 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4185 MachineIRBuilder &MIRBuilder) const {
4186 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4187 LLT Ty = MRI.getType(LHS);
4188 if (Ty.isVector())
4189 return nullptr;
4190 unsigned OpSize = Ty.getSizeInBits();
4191 if (OpSize != 32 && OpSize != 64)
4192 return nullptr;
4193
4194 // If this is a compare against +0.0, then we don't have
4195 // to explicitly materialize a constant.
4196 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4197 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4198 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4199 {AArch64::FCMPSri, AArch64::FCMPDri}};
4200 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4201
4202 // Partially build the compare. Decide if we need to add a use for the
4203 // third operand based off whether or not we're comparing against 0.0.
4204 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4205 if (!ShouldUseImm)
4206 CmpMI.addUse(RHS);
4207 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4208 return &*CmpMI;
4209 }
4210
emitVectorConcat(Optional<Register> Dst,Register Op1,Register Op2,MachineIRBuilder & MIRBuilder) const4211 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4212 Optional<Register> Dst, Register Op1, Register Op2,
4213 MachineIRBuilder &MIRBuilder) const {
4214 // We implement a vector concat by:
4215 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4216 // 2. Insert the upper vector into the destination's upper element
4217 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4218 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4219
4220 const LLT Op1Ty = MRI.getType(Op1);
4221 const LLT Op2Ty = MRI.getType(Op2);
4222
4223 if (Op1Ty != Op2Ty) {
4224 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4225 return nullptr;
4226 }
4227 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4228
4229 if (Op1Ty.getSizeInBits() >= 128) {
4230 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4231 return nullptr;
4232 }
4233
4234 // At the moment we just support 64 bit vector concats.
4235 if (Op1Ty.getSizeInBits() != 64) {
4236 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4237 return nullptr;
4238 }
4239
4240 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4241 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4242 const TargetRegisterClass *DstRC =
4243 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4244
4245 MachineInstr *WidenedOp1 =
4246 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4247 MachineInstr *WidenedOp2 =
4248 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4249 if (!WidenedOp1 || !WidenedOp2) {
4250 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4251 return nullptr;
4252 }
4253
4254 // Now do the insert of the upper element.
4255 unsigned InsertOpc, InsSubRegIdx;
4256 std::tie(InsertOpc, InsSubRegIdx) =
4257 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4258
4259 if (!Dst)
4260 Dst = MRI.createVirtualRegister(DstRC);
4261 auto InsElt =
4262 MIRBuilder
4263 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4264 .addImm(1) /* Lane index */
4265 .addUse(WidenedOp2->getOperand(0).getReg())
4266 .addImm(0);
4267 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4268 return &*InsElt;
4269 }
4270
emitFMovForFConstant(MachineInstr & I,MachineRegisterInfo & MRI) const4271 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
4272 MachineInstr &I, MachineRegisterInfo &MRI) const {
4273 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
4274 "Expected a G_FCONSTANT!");
4275 MachineOperand &ImmOp = I.getOperand(1);
4276 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
4277
4278 // Only handle 32 and 64 bit defs for now.
4279 if (DefSize != 32 && DefSize != 64)
4280 return nullptr;
4281
4282 // Don't handle null values using FMOV.
4283 if (ImmOp.getFPImm()->isNullValue())
4284 return nullptr;
4285
4286 // Get the immediate representation for the FMOV.
4287 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
4288 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
4289 : AArch64_AM::getFP64Imm(ImmValAPF);
4290
4291 // If this is -1, it means the immediate can't be represented as the requested
4292 // floating point value. Bail.
4293 if (Imm == -1)
4294 return nullptr;
4295
4296 // Update MI to represent the new FMOV instruction, constrain it, and return.
4297 ImmOp.ChangeToImmediate(Imm);
4298 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
4299 I.setDesc(TII.get(MovOpc));
4300 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
4301 return &I;
4302 }
4303
4304 MachineInstr *
emitCSetForICMP(Register DefReg,unsigned Pred,MachineIRBuilder & MIRBuilder) const4305 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4306 MachineIRBuilder &MIRBuilder) const {
4307 // CSINC increments the result when the predicate is false. Invert it.
4308 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4309 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4310 auto I =
4311 MIRBuilder
4312 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
4313 .addImm(InvCC);
4314 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4315 return &*I;
4316 }
4317
4318 std::pair<MachineInstr *, AArch64CC::CondCode>
emitOverflowOp(unsigned Opcode,Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4319 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4320 MachineOperand &LHS,
4321 MachineOperand &RHS,
4322 MachineIRBuilder &MIRBuilder) const {
4323 switch (Opcode) {
4324 default:
4325 llvm_unreachable("Unexpected opcode!");
4326 case TargetOpcode::G_SADDO:
4327 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4328 case TargetOpcode::G_UADDO:
4329 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4330 case TargetOpcode::G_SSUBO:
4331 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4332 }
4333 }
4334
tryOptSelect(MachineInstr & I) const4335 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4336 MachineIRBuilder MIB(I);
4337 MachineRegisterInfo &MRI = *MIB.getMRI();
4338 // We want to recognize this pattern:
4339 //
4340 // $z = G_FCMP pred, $x, $y
4341 // ...
4342 // $w = G_SELECT $z, $a, $b
4343 //
4344 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4345 // some copies/truncs in between.)
4346 //
4347 // If we see this, then we can emit something like this:
4348 //
4349 // fcmp $x, $y
4350 // fcsel $w, $a, $b, pred
4351 //
4352 // Rather than emitting both of the rather long sequences in the standard
4353 // G_FCMP/G_SELECT select methods.
4354
4355 // First, check if the condition is defined by a compare.
4356 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4357 while (CondDef) {
4358 // We can only fold if all of the defs have one use.
4359 Register CondDefReg = CondDef->getOperand(0).getReg();
4360 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4361 // Unless it's another select.
4362 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4363 if (CondDef == &UI)
4364 continue;
4365 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4366 return false;
4367 }
4368 }
4369
4370 // We can skip over G_TRUNC since the condition is 1-bit.
4371 // Truncating/extending can have no impact on the value.
4372 unsigned Opc = CondDef->getOpcode();
4373 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4374 break;
4375
4376 // Can't see past copies from physregs.
4377 if (Opc == TargetOpcode::COPY &&
4378 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4379 return false;
4380
4381 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4382 }
4383
4384 // Is the condition defined by a compare?
4385 if (!CondDef)
4386 return false;
4387
4388 unsigned CondOpc = CondDef->getOpcode();
4389 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4390 return false;
4391
4392 AArch64CC::CondCode CondCode;
4393 if (CondOpc == TargetOpcode::G_ICMP) {
4394 auto Pred =
4395 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4396 CondCode = changeICMPPredToAArch64CC(Pred);
4397 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4398 CondDef->getOperand(1), MIB);
4399 } else {
4400 // Get the condition code for the select.
4401 auto Pred =
4402 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4403 AArch64CC::CondCode CondCode2;
4404 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4405
4406 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4407 // instructions to emit the comparison.
4408 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4409 // unnecessary.
4410 if (CondCode2 != AArch64CC::AL)
4411 return false;
4412
4413 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4414 CondDef->getOperand(3).getReg(), MIB)) {
4415 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4416 return false;
4417 }
4418 }
4419
4420 // Emit the select.
4421 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4422 I.getOperand(3).getReg(), CondCode, MIB);
4423 I.eraseFromParent();
4424 return true;
4425 }
4426
tryFoldIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const4427 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4428 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4429 MachineIRBuilder &MIRBuilder) const {
4430 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4431 "Unexpected MachineOperand");
4432 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4433 // We want to find this sort of thing:
4434 // x = G_SUB 0, y
4435 // G_ICMP z, x
4436 //
4437 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4438 // e.g:
4439 //
4440 // cmn z, y
4441
4442 // Helper lambda to detect the subtract followed by the compare.
4443 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4444 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4445 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4446 return false;
4447
4448 // Need to make sure NZCV is the same at the end of the transformation.
4449 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4450 return false;
4451
4452 // We want to match against SUBs.
4453 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4454 return false;
4455
4456 // Make sure that we're getting
4457 // x = G_SUB 0, y
4458 auto ValAndVReg =
4459 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4460 if (!ValAndVReg || ValAndVReg->Value != 0)
4461 return false;
4462
4463 // This can safely be represented as a CMN.
4464 return true;
4465 };
4466
4467 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4468 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4469 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4470 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4471 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4472
4473 // Given this:
4474 //
4475 // x = G_SUB 0, y
4476 // G_ICMP x, z
4477 //
4478 // Produce this:
4479 //
4480 // cmn y, z
4481 if (IsCMN(LHSDef, CC))
4482 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4483
4484 // Same idea here, but with the RHS of the compare instead:
4485 //
4486 // Given this:
4487 //
4488 // x = G_SUB 0, y
4489 // G_ICMP z, x
4490 //
4491 // Produce this:
4492 //
4493 // cmn z, y
4494 if (IsCMN(RHSDef, CC))
4495 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4496
4497 // Given this:
4498 //
4499 // z = G_AND x, y
4500 // G_ICMP z, 0
4501 //
4502 // Produce this if the compare is signed:
4503 //
4504 // tst x, y
4505 if (!CmpInst::isUnsigned(P) && LHSDef &&
4506 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4507 // Make sure that the RHS is 0.
4508 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4509 if (!ValAndVReg || ValAndVReg->Value != 0)
4510 return nullptr;
4511
4512 return emitTST(LHSDef->getOperand(1),
4513 LHSDef->getOperand(2), MIRBuilder);
4514 }
4515
4516 return nullptr;
4517 }
4518
selectShuffleVector(MachineInstr & I,MachineRegisterInfo & MRI) const4519 bool AArch64InstructionSelector::selectShuffleVector(
4520 MachineInstr &I, MachineRegisterInfo &MRI) const {
4521 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4522 Register Src1Reg = I.getOperand(1).getReg();
4523 const LLT Src1Ty = MRI.getType(Src1Reg);
4524 Register Src2Reg = I.getOperand(2).getReg();
4525 const LLT Src2Ty = MRI.getType(Src2Reg);
4526 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4527
4528 MachineBasicBlock &MBB = *I.getParent();
4529 MachineFunction &MF = *MBB.getParent();
4530 LLVMContext &Ctx = MF.getFunction().getContext();
4531
4532 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4533 // it's originated from a <1 x T> type. Those should have been lowered into
4534 // G_BUILD_VECTOR earlier.
4535 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4536 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4537 return false;
4538 }
4539
4540 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4541
4542 SmallVector<Constant *, 64> CstIdxs;
4543 for (int Val : Mask) {
4544 // For now, any undef indexes we'll just assume to be 0. This should be
4545 // optimized in future, e.g. to select DUP etc.
4546 Val = Val < 0 ? 0 : Val;
4547 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4548 unsigned Offset = Byte + Val * BytesPerElt;
4549 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4550 }
4551 }
4552
4553 MachineIRBuilder MIRBuilder(I);
4554
4555 // Use a constant pool to load the index vector for TBL.
4556 Constant *CPVal = ConstantVector::get(CstIdxs);
4557 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4558 if (!IndexLoad) {
4559 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4560 return false;
4561 }
4562
4563 if (DstTy.getSizeInBits() != 128) {
4564 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4565 // This case can be done with TBL1.
4566 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4567 if (!Concat) {
4568 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4569 return false;
4570 }
4571
4572 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4573 IndexLoad =
4574 emitScalarToVector(64, &AArch64::FPR128RegClass,
4575 IndexLoad->getOperand(0).getReg(), MIRBuilder);
4576
4577 auto TBL1 = MIRBuilder.buildInstr(
4578 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4579 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4580 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4581
4582 auto Copy =
4583 MIRBuilder
4584 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4585 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4586 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4587 I.eraseFromParent();
4588 return true;
4589 }
4590
4591 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4592 // Q registers for regalloc.
4593 auto RegSeq = MIRBuilder
4594 .buildInstr(TargetOpcode::REG_SEQUENCE,
4595 {&AArch64::QQRegClass}, {Src1Reg})
4596 .addImm(AArch64::qsub0)
4597 .addUse(Src2Reg)
4598 .addImm(AArch64::qsub1);
4599
4600 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4601 {RegSeq, IndexLoad->getOperand(0)});
4602 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4603 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4604 I.eraseFromParent();
4605 return true;
4606 }
4607
emitLaneInsert(Optional<Register> DstReg,Register SrcReg,Register EltReg,unsigned LaneIdx,const RegisterBank & RB,MachineIRBuilder & MIRBuilder) const4608 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4609 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4610 unsigned LaneIdx, const RegisterBank &RB,
4611 MachineIRBuilder &MIRBuilder) const {
4612 MachineInstr *InsElt = nullptr;
4613 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4614 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4615
4616 // Create a register to define with the insert if one wasn't passed in.
4617 if (!DstReg)
4618 DstReg = MRI.createVirtualRegister(DstRC);
4619
4620 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4621 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4622
4623 if (RB.getID() == AArch64::FPRRegBankID) {
4624 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4625 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4626 .addImm(LaneIdx)
4627 .addUse(InsSub->getOperand(0).getReg())
4628 .addImm(0);
4629 } else {
4630 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4631 .addImm(LaneIdx)
4632 .addUse(EltReg);
4633 }
4634
4635 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4636 return InsElt;
4637 }
4638
selectInsertElt(MachineInstr & I,MachineRegisterInfo & MRI) const4639 bool AArch64InstructionSelector::selectInsertElt(
4640 MachineInstr &I, MachineRegisterInfo &MRI) const {
4641 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4642
4643 // Get information on the destination.
4644 Register DstReg = I.getOperand(0).getReg();
4645 const LLT DstTy = MRI.getType(DstReg);
4646 unsigned VecSize = DstTy.getSizeInBits();
4647
4648 // Get information on the element we want to insert into the destination.
4649 Register EltReg = I.getOperand(2).getReg();
4650 const LLT EltTy = MRI.getType(EltReg);
4651 unsigned EltSize = EltTy.getSizeInBits();
4652 if (EltSize < 16 || EltSize > 64)
4653 return false; // Don't support all element types yet.
4654
4655 // Find the definition of the index. Bail out if it's not defined by a
4656 // G_CONSTANT.
4657 Register IdxReg = I.getOperand(3).getReg();
4658 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4659 if (!VRegAndVal)
4660 return false;
4661 unsigned LaneIdx = VRegAndVal->Value;
4662
4663 // Perform the lane insert.
4664 Register SrcReg = I.getOperand(1).getReg();
4665 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4666 MachineIRBuilder MIRBuilder(I);
4667
4668 if (VecSize < 128) {
4669 // If the vector we're inserting into is smaller than 128 bits, widen it
4670 // to 128 to do the insert.
4671 MachineInstr *ScalarToVec = emitScalarToVector(
4672 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4673 if (!ScalarToVec)
4674 return false;
4675 SrcReg = ScalarToVec->getOperand(0).getReg();
4676 }
4677
4678 // Create an insert into a new FPR128 register.
4679 // Note that if our vector is already 128 bits, we end up emitting an extra
4680 // register.
4681 MachineInstr *InsMI =
4682 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4683
4684 if (VecSize < 128) {
4685 // If we had to widen to perform the insert, then we have to demote back to
4686 // the original size to get the result we want.
4687 Register DemoteVec = InsMI->getOperand(0).getReg();
4688 const TargetRegisterClass *RC =
4689 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4690 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4691 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4692 return false;
4693 }
4694 unsigned SubReg = 0;
4695 if (!getSubRegForClass(RC, TRI, SubReg))
4696 return false;
4697 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4698 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4699 << "\n");
4700 return false;
4701 }
4702 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4703 .addReg(DemoteVec, 0, SubReg);
4704 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4705 } else {
4706 // No widening needed.
4707 InsMI->getOperand(0).setReg(DstReg);
4708 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4709 }
4710
4711 I.eraseFromParent();
4712 return true;
4713 }
4714
tryOptConstantBuildVec(MachineInstr & I,LLT DstTy,MachineRegisterInfo & MRI) const4715 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4716 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4717 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4718 unsigned DstSize = DstTy.getSizeInBits();
4719 assert(DstSize <= 128 && "Unexpected build_vec type!");
4720 if (DstSize < 32)
4721 return false;
4722 // Check if we're building a constant vector, in which case we want to
4723 // generate a constant pool load instead of a vector insert sequence.
4724 SmallVector<Constant *, 16> Csts;
4725 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4726 // Try to find G_CONSTANT or G_FCONSTANT
4727 auto *OpMI =
4728 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4729 if (OpMI)
4730 Csts.emplace_back(
4731 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4732 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4733 I.getOperand(Idx).getReg(), MRI)))
4734 Csts.emplace_back(
4735 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4736 else
4737 return false;
4738 }
4739 Constant *CV = ConstantVector::get(Csts);
4740 MachineIRBuilder MIB(I);
4741 if (CV->isNullValue()) {
4742 // Until the importer can support immAllZerosV in pattern leaf nodes,
4743 // select a zero move manually here.
4744 Register DstReg = I.getOperand(0).getReg();
4745 if (DstSize == 128) {
4746 auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
4747 I.eraseFromParent();
4748 return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4749 } else if (DstSize == 64) {
4750 auto Mov =
4751 MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4752 .addImm(0);
4753 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4754 .addReg(Mov.getReg(0), 0, AArch64::dsub);
4755 I.eraseFromParent();
4756 return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
4757 }
4758 }
4759 auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4760 if (!CPLoad) {
4761 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
4762 return false;
4763 }
4764 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4765 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4766 *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4767 MRI);
4768 I.eraseFromParent();
4769 return true;
4770 }
4771
selectBuildVector(MachineInstr & I,MachineRegisterInfo & MRI) const4772 bool AArch64InstructionSelector::selectBuildVector(
4773 MachineInstr &I, MachineRegisterInfo &MRI) const {
4774 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4775 // Until we port more of the optimized selections, for now just use a vector
4776 // insert sequence.
4777 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4778 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4779 unsigned EltSize = EltTy.getSizeInBits();
4780
4781 if (tryOptConstantBuildVec(I, DstTy, MRI))
4782 return true;
4783 if (EltSize < 16 || EltSize > 64)
4784 return false; // Don't support all element types yet.
4785 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4786 MachineIRBuilder MIRBuilder(I);
4787
4788 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4789 MachineInstr *ScalarToVec =
4790 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4791 I.getOperand(1).getReg(), MIRBuilder);
4792 if (!ScalarToVec)
4793 return false;
4794
4795 Register DstVec = ScalarToVec->getOperand(0).getReg();
4796 unsigned DstSize = DstTy.getSizeInBits();
4797
4798 // Keep track of the last MI we inserted. Later on, we might be able to save
4799 // a copy using it.
4800 MachineInstr *PrevMI = nullptr;
4801 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4802 // Note that if we don't do a subregister copy, we can end up making an
4803 // extra register.
4804 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4805 MIRBuilder);
4806 DstVec = PrevMI->getOperand(0).getReg();
4807 }
4808
4809 // If DstTy's size in bits is less than 128, then emit a subregister copy
4810 // from DstVec to the last register we've defined.
4811 if (DstSize < 128) {
4812 // Force this to be FPR using the destination vector.
4813 const TargetRegisterClass *RC =
4814 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4815 if (!RC)
4816 return false;
4817 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4818 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4819 return false;
4820 }
4821
4822 unsigned SubReg = 0;
4823 if (!getSubRegForClass(RC, TRI, SubReg))
4824 return false;
4825 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4826 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4827 << "\n");
4828 return false;
4829 }
4830
4831 Register Reg = MRI.createVirtualRegister(RC);
4832 Register DstReg = I.getOperand(0).getReg();
4833
4834 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4835 .addReg(DstVec, 0, SubReg);
4836 MachineOperand &RegOp = I.getOperand(1);
4837 RegOp.setReg(Reg);
4838 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4839 } else {
4840 // We don't need a subregister copy. Save a copy by re-using the
4841 // destination register on the final insert.
4842 assert(PrevMI && "PrevMI was null?");
4843 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4844 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4845 }
4846
4847 I.eraseFromParent();
4848 return true;
4849 }
4850
4851 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4852 /// ID if it exists, and 0 otherwise.
findIntrinsicID(MachineInstr & I)4853 static unsigned findIntrinsicID(MachineInstr &I) {
4854 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4855 return Op.isIntrinsicID();
4856 });
4857 if (IntrinOp == I.operands_end())
4858 return 0;
4859 return IntrinOp->getIntrinsicID();
4860 }
4861
selectIntrinsicWithSideEffects(MachineInstr & I,MachineRegisterInfo & MRI) const4862 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4863 MachineInstr &I, MachineRegisterInfo &MRI) const {
4864 // Find the intrinsic ID.
4865 unsigned IntrinID = findIntrinsicID(I);
4866 if (!IntrinID)
4867 return false;
4868 MachineIRBuilder MIRBuilder(I);
4869
4870 // Select the instruction.
4871 switch (IntrinID) {
4872 default:
4873 return false;
4874 case Intrinsic::trap:
4875 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4876 break;
4877 case Intrinsic::debugtrap:
4878 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4879 break;
4880 case Intrinsic::ubsantrap:
4881 MIRBuilder.buildInstr(AArch64::BRK, {}, {})
4882 .addImm(I.getOperand(1).getImm() | ('U' << 8));
4883 break;
4884 }
4885
4886 I.eraseFromParent();
4887 return true;
4888 }
4889
selectIntrinsic(MachineInstr & I,MachineRegisterInfo & MRI)4890 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4891 MachineRegisterInfo &MRI) {
4892 unsigned IntrinID = findIntrinsicID(I);
4893 if (!IntrinID)
4894 return false;
4895 MachineIRBuilder MIRBuilder(I);
4896
4897 switch (IntrinID) {
4898 default:
4899 break;
4900 case Intrinsic::aarch64_crypto_sha1h: {
4901 Register DstReg = I.getOperand(0).getReg();
4902 Register SrcReg = I.getOperand(2).getReg();
4903
4904 // FIXME: Should this be an assert?
4905 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4906 MRI.getType(SrcReg).getSizeInBits() != 32)
4907 return false;
4908
4909 // The operation has to happen on FPRs. Set up some new FPR registers for
4910 // the source and destination if they are on GPRs.
4911 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4912 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4913 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4914
4915 // Make sure the copy ends up getting constrained properly.
4916 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4917 AArch64::GPR32RegClass, MRI);
4918 }
4919
4920 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4921 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4922
4923 // Actually insert the instruction.
4924 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4925 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4926
4927 // Did we create a new register for the destination?
4928 if (DstReg != I.getOperand(0).getReg()) {
4929 // Yep. Copy the result of the instruction back into the original
4930 // destination.
4931 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4932 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4933 AArch64::GPR32RegClass, MRI);
4934 }
4935
4936 I.eraseFromParent();
4937 return true;
4938 }
4939 case Intrinsic::frameaddress:
4940 case Intrinsic::returnaddress: {
4941 MachineFunction &MF = *I.getParent()->getParent();
4942 MachineFrameInfo &MFI = MF.getFrameInfo();
4943
4944 unsigned Depth = I.getOperand(2).getImm();
4945 Register DstReg = I.getOperand(0).getReg();
4946 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4947
4948 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4949 if (!MFReturnAddr) {
4950 // Insert the copy from LR/X30 into the entry block, before it can be
4951 // clobbered by anything.
4952 MFI.setReturnAddressIsTaken(true);
4953 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
4954 AArch64::GPR64RegClass);
4955 }
4956
4957 if (STI.hasV8_3aOps()) {
4958 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
4959 } else {
4960 MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
4961 MIRBuilder.buildInstr(AArch64::XPACLRI);
4962 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4963 }
4964
4965 I.eraseFromParent();
4966 return true;
4967 }
4968
4969 MFI.setFrameAddressIsTaken(true);
4970 Register FrameAddr(AArch64::FP);
4971 while (Depth--) {
4972 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
4973 auto Ldr =
4974 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
4975 .addImm(0);
4976 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
4977 FrameAddr = NextFrame;
4978 }
4979
4980 if (IntrinID == Intrinsic::frameaddress)
4981 MIRBuilder.buildCopy({DstReg}, {FrameAddr});
4982 else {
4983 MFI.setReturnAddressIsTaken(true);
4984
4985 if (STI.hasV8_3aOps()) {
4986 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4987 MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
4988 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
4989 } else {
4990 MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
4991 MIRBuilder.buildInstr(AArch64::XPACLRI);
4992 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4993 }
4994 }
4995
4996 I.eraseFromParent();
4997 return true;
4998 }
4999 }
5000 return false;
5001 }
5002
5003 InstructionSelector::ComplexRendererFns
selectShiftA_32(const MachineOperand & Root) const5004 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5005 auto MaybeImmed = getImmedFromMO(Root);
5006 if (MaybeImmed == None || *MaybeImmed > 31)
5007 return None;
5008 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5009 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5010 }
5011
5012 InstructionSelector::ComplexRendererFns
selectShiftB_32(const MachineOperand & Root) const5013 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5014 auto MaybeImmed = getImmedFromMO(Root);
5015 if (MaybeImmed == None || *MaybeImmed > 31)
5016 return None;
5017 uint64_t Enc = 31 - *MaybeImmed;
5018 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5019 }
5020
5021 InstructionSelector::ComplexRendererFns
selectShiftA_64(const MachineOperand & Root) const5022 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5023 auto MaybeImmed = getImmedFromMO(Root);
5024 if (MaybeImmed == None || *MaybeImmed > 63)
5025 return None;
5026 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5027 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5028 }
5029
5030 InstructionSelector::ComplexRendererFns
selectShiftB_64(const MachineOperand & Root) const5031 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5032 auto MaybeImmed = getImmedFromMO(Root);
5033 if (MaybeImmed == None || *MaybeImmed > 63)
5034 return None;
5035 uint64_t Enc = 63 - *MaybeImmed;
5036 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5037 }
5038
5039 /// Helper to select an immediate value that can be represented as a 12-bit
5040 /// value shifted left by either 0 or 12. If it is possible to do so, return
5041 /// the immediate and shift value. If not, return None.
5042 ///
5043 /// Used by selectArithImmed and selectNegArithImmed.
5044 InstructionSelector::ComplexRendererFns
select12BitValueWithLeftShift(uint64_t Immed) const5045 AArch64InstructionSelector::select12BitValueWithLeftShift(
5046 uint64_t Immed) const {
5047 unsigned ShiftAmt;
5048 if (Immed >> 12 == 0) {
5049 ShiftAmt = 0;
5050 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5051 ShiftAmt = 12;
5052 Immed = Immed >> 12;
5053 } else
5054 return None;
5055
5056 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5057 return {{
5058 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5059 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5060 }};
5061 }
5062
5063 /// SelectArithImmed - Select an immediate value that can be represented as
5064 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
5065 /// Val set to the 12-bit value and Shift set to the shifter operand.
5066 InstructionSelector::ComplexRendererFns
selectArithImmed(MachineOperand & Root) const5067 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5068 // This function is called from the addsub_shifted_imm ComplexPattern,
5069 // which lists [imm] as the list of opcode it's interested in, however
5070 // we still need to check whether the operand is actually an immediate
5071 // here because the ComplexPattern opcode list is only used in
5072 // root-level opcode matching.
5073 auto MaybeImmed = getImmedFromMO(Root);
5074 if (MaybeImmed == None)
5075 return None;
5076 return select12BitValueWithLeftShift(*MaybeImmed);
5077 }
5078
5079 /// SelectNegArithImmed - As above, but negates the value before trying to
5080 /// select it.
5081 InstructionSelector::ComplexRendererFns
selectNegArithImmed(MachineOperand & Root) const5082 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5083 // We need a register here, because we need to know if we have a 64 or 32
5084 // bit immediate.
5085 if (!Root.isReg())
5086 return None;
5087 auto MaybeImmed = getImmedFromMO(Root);
5088 if (MaybeImmed == None)
5089 return None;
5090 uint64_t Immed = *MaybeImmed;
5091
5092 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5093 // have the opposite effect on the C flag, so this pattern mustn't match under
5094 // those circumstances.
5095 if (Immed == 0)
5096 return None;
5097
5098 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5099 // the root.
5100 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5101 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5102 Immed = ~((uint32_t)Immed) + 1;
5103 else
5104 Immed = ~Immed + 1ULL;
5105
5106 if (Immed & 0xFFFFFFFFFF000000ULL)
5107 return None;
5108
5109 Immed &= 0xFFFFFFULL;
5110 return select12BitValueWithLeftShift(Immed);
5111 }
5112
5113 /// Return true if it is worth folding MI into an extended register. That is,
5114 /// if it's safe to pull it into the addressing mode of a load or store as a
5115 /// shift.
isWorthFoldingIntoExtendedReg(MachineInstr & MI,const MachineRegisterInfo & MRI) const5116 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5117 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5118 // Always fold if there is one use, or if we're optimizing for size.
5119 Register DefReg = MI.getOperand(0).getReg();
5120 if (MRI.hasOneNonDBGUse(DefReg) ||
5121 MI.getParent()->getParent()->getFunction().hasMinSize())
5122 return true;
5123
5124 // It's better to avoid folding and recomputing shifts when we don't have a
5125 // fastpath.
5126 if (!STI.hasLSLFast())
5127 return false;
5128
5129 // We have a fastpath, so folding a shift in and potentially computing it
5130 // many times may be beneficial. Check if this is only used in memory ops.
5131 // If it is, then we should fold.
5132 return all_of(MRI.use_nodbg_instructions(DefReg),
5133 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5134 }
5135
isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)5136 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5137 switch (Type) {
5138 case AArch64_AM::SXTB:
5139 case AArch64_AM::SXTH:
5140 case AArch64_AM::SXTW:
5141 return true;
5142 default:
5143 return false;
5144 }
5145 }
5146
5147 InstructionSelector::ComplexRendererFns
selectExtendedSHL(MachineOperand & Root,MachineOperand & Base,MachineOperand & Offset,unsigned SizeInBytes,bool WantsExt) const5148 AArch64InstructionSelector::selectExtendedSHL(
5149 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5150 unsigned SizeInBytes, bool WantsExt) const {
5151 assert(Base.isReg() && "Expected base to be a register operand");
5152 assert(Offset.isReg() && "Expected offset to be a register operand");
5153
5154 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5155 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5156 if (!OffsetInst)
5157 return None;
5158
5159 unsigned OffsetOpc = OffsetInst->getOpcode();
5160 bool LookedThroughZExt = false;
5161 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5162 // Try to look through a ZEXT.
5163 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5164 return None;
5165
5166 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5167 OffsetOpc = OffsetInst->getOpcode();
5168 LookedThroughZExt = true;
5169
5170 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5171 return None;
5172 }
5173 // Make sure that the memory op is a valid size.
5174 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5175 if (LegalShiftVal == 0)
5176 return None;
5177 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5178 return None;
5179
5180 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5181 // register we will offset is the LHS, and the register containing the
5182 // constant is the RHS.
5183 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5184 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5185 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5186 if (!ValAndVReg) {
5187 // We didn't get a constant on the RHS. If the opcode is a shift, then
5188 // we're done.
5189 if (OffsetOpc == TargetOpcode::G_SHL)
5190 return None;
5191
5192 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5193 std::swap(OffsetReg, ConstantReg);
5194 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5195 if (!ValAndVReg)
5196 return None;
5197 }
5198
5199 // The value must fit into 3 bits, and must be positive. Make sure that is
5200 // true.
5201 int64_t ImmVal = ValAndVReg->Value;
5202
5203 // Since we're going to pull this into a shift, the constant value must be
5204 // a power of 2. If we got a multiply, then we need to check this.
5205 if (OffsetOpc == TargetOpcode::G_MUL) {
5206 if (!isPowerOf2_32(ImmVal))
5207 return None;
5208
5209 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5210 ImmVal = Log2_32(ImmVal);
5211 }
5212
5213 if ((ImmVal & 0x7) != ImmVal)
5214 return None;
5215
5216 // We are only allowed to shift by LegalShiftVal. This shift value is built
5217 // into the instruction, so we can't just use whatever we want.
5218 if (ImmVal != LegalShiftVal)
5219 return None;
5220
5221 unsigned SignExtend = 0;
5222 if (WantsExt) {
5223 // Check if the offset is defined by an extend, unless we looked through a
5224 // G_ZEXT earlier.
5225 if (!LookedThroughZExt) {
5226 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5227 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5228 if (Ext == AArch64_AM::InvalidShiftExtend)
5229 return None;
5230
5231 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5232 // We only support SXTW for signed extension here.
5233 if (SignExtend && Ext != AArch64_AM::SXTW)
5234 return None;
5235 OffsetReg = ExtInst->getOperand(1).getReg();
5236 }
5237
5238 // Need a 32-bit wide register here.
5239 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5240 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5241 }
5242
5243 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5244 // offset. Signify that we are shifting by setting the shift flag to 1.
5245 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5246 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5247 [=](MachineInstrBuilder &MIB) {
5248 // Need to add both immediates here to make sure that they are both
5249 // added to the instruction.
5250 MIB.addImm(SignExtend);
5251 MIB.addImm(1);
5252 }}};
5253 }
5254
5255 /// This is used for computing addresses like this:
5256 ///
5257 /// ldr x1, [x2, x3, lsl #3]
5258 ///
5259 /// Where x2 is the base register, and x3 is an offset register. The shift-left
5260 /// is a constant value specific to this load instruction. That is, we'll never
5261 /// see anything other than a 3 here (which corresponds to the size of the
5262 /// element being loaded.)
5263 InstructionSelector::ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand & Root,unsigned SizeInBytes) const5264 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5265 MachineOperand &Root, unsigned SizeInBytes) const {
5266 if (!Root.isReg())
5267 return None;
5268 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5269
5270 // We want to find something like this:
5271 //
5272 // val = G_CONSTANT LegalShiftVal
5273 // shift = G_SHL off_reg val
5274 // ptr = G_PTR_ADD base_reg shift
5275 // x = G_LOAD ptr
5276 //
5277 // And fold it into this addressing mode:
5278 //
5279 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5280
5281 // Check if we can find the G_PTR_ADD.
5282 MachineInstr *PtrAdd =
5283 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5284 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5285 return None;
5286
5287 // Now, try to match an opcode which will match our specific offset.
5288 // We want a G_SHL or a G_MUL.
5289 MachineInstr *OffsetInst =
5290 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5291 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5292 OffsetInst->getOperand(0), SizeInBytes,
5293 /*WantsExt=*/false);
5294 }
5295
5296 /// This is used for computing addresses like this:
5297 ///
5298 /// ldr x1, [x2, x3]
5299 ///
5300 /// Where x2 is the base register, and x3 is an offset register.
5301 ///
5302 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5303 /// this will do so. Otherwise, it will return None.
5304 InstructionSelector::ComplexRendererFns
selectAddrModeRegisterOffset(MachineOperand & Root) const5305 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5306 MachineOperand &Root) const {
5307 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5308
5309 // We need a GEP.
5310 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5311 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5312 return None;
5313
5314 // If this is used more than once, let's not bother folding.
5315 // TODO: Check if they are memory ops. If they are, then we can still fold
5316 // without having to recompute anything.
5317 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5318 return None;
5319
5320 // Base is the GEP's LHS, offset is its RHS.
5321 return {{[=](MachineInstrBuilder &MIB) {
5322 MIB.addUse(Gep->getOperand(1).getReg());
5323 },
5324 [=](MachineInstrBuilder &MIB) {
5325 MIB.addUse(Gep->getOperand(2).getReg());
5326 },
5327 [=](MachineInstrBuilder &MIB) {
5328 // Need to add both immediates here to make sure that they are both
5329 // added to the instruction.
5330 MIB.addImm(0);
5331 MIB.addImm(0);
5332 }}};
5333 }
5334
5335 /// This is intended to be equivalent to selectAddrModeXRO in
5336 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5337 InstructionSelector::ComplexRendererFns
selectAddrModeXRO(MachineOperand & Root,unsigned SizeInBytes) const5338 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5339 unsigned SizeInBytes) const {
5340 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5341 if (!Root.isReg())
5342 return None;
5343 MachineInstr *PtrAdd =
5344 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5345 if (!PtrAdd)
5346 return None;
5347
5348 // Check for an immediates which cannot be encoded in the [base + imm]
5349 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5350 // end up with code like:
5351 //
5352 // mov x0, wide
5353 // add x1 base, x0
5354 // ldr x2, [x1, x0]
5355 //
5356 // In this situation, we can use the [base, xreg] addressing mode to save an
5357 // add/sub:
5358 //
5359 // mov x0, wide
5360 // ldr x2, [base, x0]
5361 auto ValAndVReg =
5362 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5363 if (ValAndVReg) {
5364 unsigned Scale = Log2_32(SizeInBytes);
5365 int64_t ImmOff = ValAndVReg->Value;
5366
5367 // Skip immediates that can be selected in the load/store addresing
5368 // mode.
5369 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5370 ImmOff < (0x1000 << Scale))
5371 return None;
5372
5373 // Helper lambda to decide whether or not it is preferable to emit an add.
5374 auto isPreferredADD = [](int64_t ImmOff) {
5375 // Constants in [0x0, 0xfff] can be encoded in an add.
5376 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5377 return true;
5378
5379 // Can it be encoded in an add lsl #12?
5380 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5381 return false;
5382
5383 // It can be encoded in an add lsl #12, but we may not want to. If it is
5384 // possible to select this as a single movz, then prefer that. A single
5385 // movz is faster than an add with a shift.
5386 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5387 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5388 };
5389
5390 // If the immediate can be encoded in a single add/sub, then bail out.
5391 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5392 return None;
5393 }
5394
5395 // Try to fold shifts into the addressing mode.
5396 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5397 if (AddrModeFns)
5398 return AddrModeFns;
5399
5400 // If that doesn't work, see if it's possible to fold in registers from
5401 // a GEP.
5402 return selectAddrModeRegisterOffset(Root);
5403 }
5404
5405 /// This is used for computing addresses like this:
5406 ///
5407 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5408 ///
5409 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5410 /// extend (which may or may not be signed).
5411 InstructionSelector::ComplexRendererFns
selectAddrModeWRO(MachineOperand & Root,unsigned SizeInBytes) const5412 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5413 unsigned SizeInBytes) const {
5414 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5415
5416 MachineInstr *PtrAdd =
5417 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5418 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5419 return None;
5420
5421 MachineOperand &LHS = PtrAdd->getOperand(1);
5422 MachineOperand &RHS = PtrAdd->getOperand(2);
5423 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5424
5425 // The first case is the same as selectAddrModeXRO, except we need an extend.
5426 // In this case, we try to find a shift and extend, and fold them into the
5427 // addressing mode.
5428 //
5429 // E.g.
5430 //
5431 // off_reg = G_Z/S/ANYEXT ext_reg
5432 // val = G_CONSTANT LegalShiftVal
5433 // shift = G_SHL off_reg val
5434 // ptr = G_PTR_ADD base_reg shift
5435 // x = G_LOAD ptr
5436 //
5437 // In this case we can get a load like this:
5438 //
5439 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5440 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5441 SizeInBytes, /*WantsExt=*/true);
5442 if (ExtendedShl)
5443 return ExtendedShl;
5444
5445 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5446 //
5447 // e.g.
5448 // ldr something, [base_reg, ext_reg, sxtw]
5449 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5450 return None;
5451
5452 // Check if this is an extend. We'll get an extend type if it is.
5453 AArch64_AM::ShiftExtendType Ext =
5454 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5455 if (Ext == AArch64_AM::InvalidShiftExtend)
5456 return None;
5457
5458 // Need a 32-bit wide register.
5459 MachineIRBuilder MIB(*PtrAdd);
5460 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5461 AArch64::GPR32RegClass, MIB);
5462 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5463
5464 // Base is LHS, offset is ExtReg.
5465 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5466 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5467 [=](MachineInstrBuilder &MIB) {
5468 MIB.addImm(SignExtend);
5469 MIB.addImm(0);
5470 }}};
5471 }
5472
5473 /// Select a "register plus unscaled signed 9-bit immediate" address. This
5474 /// should only match when there is an offset that is not valid for a scaled
5475 /// immediate addressing mode. The "Size" argument is the size in bytes of the
5476 /// memory reference, which is needed here to know what is valid for a scaled
5477 /// immediate.
5478 InstructionSelector::ComplexRendererFns
selectAddrModeUnscaled(MachineOperand & Root,unsigned Size) const5479 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5480 unsigned Size) const {
5481 MachineRegisterInfo &MRI =
5482 Root.getParent()->getParent()->getParent()->getRegInfo();
5483
5484 if (!Root.isReg())
5485 return None;
5486
5487 if (!isBaseWithConstantOffset(Root, MRI))
5488 return None;
5489
5490 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5491 if (!RootDef)
5492 return None;
5493
5494 MachineOperand &OffImm = RootDef->getOperand(2);
5495 if (!OffImm.isReg())
5496 return None;
5497 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5498 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5499 return None;
5500 int64_t RHSC;
5501 MachineOperand &RHSOp1 = RHS->getOperand(1);
5502 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5503 return None;
5504 RHSC = RHSOp1.getCImm()->getSExtValue();
5505
5506 // If the offset is valid as a scaled immediate, don't match here.
5507 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5508 return None;
5509 if (RHSC >= -256 && RHSC < 256) {
5510 MachineOperand &Base = RootDef->getOperand(1);
5511 return {{
5512 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5513 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5514 }};
5515 }
5516 return None;
5517 }
5518
5519 InstructionSelector::ComplexRendererFns
tryFoldAddLowIntoImm(MachineInstr & RootDef,unsigned Size,MachineRegisterInfo & MRI) const5520 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5521 unsigned Size,
5522 MachineRegisterInfo &MRI) const {
5523 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5524 return None;
5525 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5526 if (Adrp.getOpcode() != AArch64::ADRP)
5527 return None;
5528
5529 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5530 // TODO: Need to check GV's offset % size if doing offset folding into globals.
5531 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
5532 auto GV = Adrp.getOperand(1).getGlobal();
5533 if (GV->isThreadLocal())
5534 return None;
5535
5536 auto &MF = *RootDef.getParent()->getParent();
5537 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5538 return None;
5539
5540 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5541 MachineIRBuilder MIRBuilder(RootDef);
5542 Register AdrpReg = Adrp.getOperand(0).getReg();
5543 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5544 [=](MachineInstrBuilder &MIB) {
5545 MIB.addGlobalAddress(GV, /* Offset */ 0,
5546 OpFlags | AArch64II::MO_PAGEOFF |
5547 AArch64II::MO_NC);
5548 }}};
5549 }
5550
5551 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
5552 /// "Size" argument is the size in bytes of the memory reference, which
5553 /// determines the scale.
5554 InstructionSelector::ComplexRendererFns
selectAddrModeIndexed(MachineOperand & Root,unsigned Size) const5555 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5556 unsigned Size) const {
5557 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5558 MachineRegisterInfo &MRI = MF.getRegInfo();
5559
5560 if (!Root.isReg())
5561 return None;
5562
5563 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5564 if (!RootDef)
5565 return None;
5566
5567 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5568 return {{
5569 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5570 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5571 }};
5572 }
5573
5574 CodeModel::Model CM = MF.getTarget().getCodeModel();
5575 // Check if we can fold in the ADD of small code model ADRP + ADD address.
5576 if (CM == CodeModel::Small) {
5577 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5578 if (OpFns)
5579 return OpFns;
5580 }
5581
5582 if (isBaseWithConstantOffset(Root, MRI)) {
5583 MachineOperand &LHS = RootDef->getOperand(1);
5584 MachineOperand &RHS = RootDef->getOperand(2);
5585 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5586 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5587 if (LHSDef && RHSDef) {
5588 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5589 unsigned Scale = Log2_32(Size);
5590 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
5591 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5592 return {{
5593 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5594 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5595 }};
5596
5597 return {{
5598 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5599 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5600 }};
5601 }
5602 }
5603 }
5604
5605 // Before falling back to our general case, check if the unscaled
5606 // instructions can handle this. If so, that's preferable.
5607 if (selectAddrModeUnscaled(Root, Size).hasValue())
5608 return None;
5609
5610 return {{
5611 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5612 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5613 }};
5614 }
5615
5616 /// Given a shift instruction, return the correct shift type for that
5617 /// instruction.
getShiftTypeForInst(MachineInstr & MI)5618 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5619 // TODO: Handle AArch64_AM::ROR
5620 switch (MI.getOpcode()) {
5621 default:
5622 return AArch64_AM::InvalidShiftExtend;
5623 case TargetOpcode::G_SHL:
5624 return AArch64_AM::LSL;
5625 case TargetOpcode::G_LSHR:
5626 return AArch64_AM::LSR;
5627 case TargetOpcode::G_ASHR:
5628 return AArch64_AM::ASR;
5629 }
5630 }
5631
5632 /// Select a "shifted register" operand. If the value is not shifted, set the
5633 /// shift operand to a default value of "lsl 0".
5634 ///
5635 /// TODO: Allow shifted register to be rotated in logical instructions.
5636 InstructionSelector::ComplexRendererFns
selectShiftedRegister(MachineOperand & Root) const5637 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5638 if (!Root.isReg())
5639 return None;
5640 MachineRegisterInfo &MRI =
5641 Root.getParent()->getParent()->getParent()->getRegInfo();
5642
5643 // Check if the operand is defined by an instruction which corresponds to
5644 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5645 //
5646 // TODO: Handle AArch64_AM::ROR for logical instructions.
5647 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5648 if (!ShiftInst)
5649 return None;
5650 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5651 if (ShType == AArch64_AM::InvalidShiftExtend)
5652 return None;
5653 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5654 return None;
5655
5656 // Need an immediate on the RHS.
5657 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5658 auto Immed = getImmedFromMO(ShiftRHS);
5659 if (!Immed)
5660 return None;
5661
5662 // We have something that we can fold. Fold in the shift's LHS and RHS into
5663 // the instruction.
5664 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5665 Register ShiftReg = ShiftLHS.getReg();
5666
5667 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5668 unsigned Val = *Immed & (NumBits - 1);
5669 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5670
5671 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5672 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5673 }
5674
getExtendTypeForInst(MachineInstr & MI,MachineRegisterInfo & MRI,bool IsLoadStore) const5675 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5676 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5677 unsigned Opc = MI.getOpcode();
5678
5679 // Handle explicit extend instructions first.
5680 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5681 unsigned Size;
5682 if (Opc == TargetOpcode::G_SEXT)
5683 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5684 else
5685 Size = MI.getOperand(2).getImm();
5686 assert(Size != 64 && "Extend from 64 bits?");
5687 switch (Size) {
5688 case 8:
5689 return AArch64_AM::SXTB;
5690 case 16:
5691 return AArch64_AM::SXTH;
5692 case 32:
5693 return AArch64_AM::SXTW;
5694 default:
5695 return AArch64_AM::InvalidShiftExtend;
5696 }
5697 }
5698
5699 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5700 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5701 assert(Size != 64 && "Extend from 64 bits?");
5702 switch (Size) {
5703 case 8:
5704 return AArch64_AM::UXTB;
5705 case 16:
5706 return AArch64_AM::UXTH;
5707 case 32:
5708 return AArch64_AM::UXTW;
5709 default:
5710 return AArch64_AM::InvalidShiftExtend;
5711 }
5712 }
5713
5714 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5715 // on the RHS.
5716 if (Opc != TargetOpcode::G_AND)
5717 return AArch64_AM::InvalidShiftExtend;
5718
5719 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5720 if (!MaybeAndMask)
5721 return AArch64_AM::InvalidShiftExtend;
5722 uint64_t AndMask = *MaybeAndMask;
5723 switch (AndMask) {
5724 default:
5725 return AArch64_AM::InvalidShiftExtend;
5726 case 0xFF:
5727 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5728 case 0xFFFF:
5729 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5730 case 0xFFFFFFFF:
5731 return AArch64_AM::UXTW;
5732 }
5733 }
5734
moveScalarRegClass(Register Reg,const TargetRegisterClass & RC,MachineIRBuilder & MIB) const5735 Register AArch64InstructionSelector::moveScalarRegClass(
5736 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
5737 MachineRegisterInfo &MRI = *MIB.getMRI();
5738 auto Ty = MRI.getType(Reg);
5739 assert(!Ty.isVector() && "Expected scalars only!");
5740 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
5741 return Reg;
5742
5743 // Create a copy and immediately select it.
5744 // FIXME: We should have an emitCopy function?
5745 auto Copy = MIB.buildCopy({&RC}, {Reg});
5746 selectCopy(*Copy, TII, MRI, TRI, RBI);
5747 return Copy.getReg(0);
5748 }
5749
5750 /// Select an "extended register" operand. This operand folds in an extend
5751 /// followed by an optional left shift.
5752 InstructionSelector::ComplexRendererFns
selectArithExtendedRegister(MachineOperand & Root) const5753 AArch64InstructionSelector::selectArithExtendedRegister(
5754 MachineOperand &Root) const {
5755 if (!Root.isReg())
5756 return None;
5757 MachineRegisterInfo &MRI =
5758 Root.getParent()->getParent()->getParent()->getRegInfo();
5759
5760 uint64_t ShiftVal = 0;
5761 Register ExtReg;
5762 AArch64_AM::ShiftExtendType Ext;
5763 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
5764 if (!RootDef)
5765 return None;
5766
5767 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
5768 return None;
5769
5770 // Check if we can fold a shift and an extend.
5771 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
5772 // Look for a constant on the RHS of the shift.
5773 MachineOperand &RHS = RootDef->getOperand(2);
5774 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
5775 if (!MaybeShiftVal)
5776 return None;
5777 ShiftVal = *MaybeShiftVal;
5778 if (ShiftVal > 4)
5779 return None;
5780 // Look for a valid extend instruction on the LHS of the shift.
5781 MachineOperand &LHS = RootDef->getOperand(1);
5782 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5783 if (!ExtDef)
5784 return None;
5785 Ext = getExtendTypeForInst(*ExtDef, MRI);
5786 if (Ext == AArch64_AM::InvalidShiftExtend)
5787 return None;
5788 ExtReg = ExtDef->getOperand(1).getReg();
5789 } else {
5790 // Didn't get a shift. Try just folding an extend.
5791 Ext = getExtendTypeForInst(*RootDef, MRI);
5792 if (Ext == AArch64_AM::InvalidShiftExtend)
5793 return None;
5794 ExtReg = RootDef->getOperand(1).getReg();
5795
5796 // If we have a 32 bit instruction which zeroes out the high half of a
5797 // register, we get an implicit zero extend for free. Check if we have one.
5798 // FIXME: We actually emit the extend right now even though we don't have
5799 // to.
5800 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
5801 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
5802 if (ExtInst && isDef32(*ExtInst))
5803 return None;
5804 }
5805 }
5806
5807 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
5808 // copy.
5809 MachineIRBuilder MIB(*RootDef);
5810 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
5811
5812 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5813 [=](MachineInstrBuilder &MIB) {
5814 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
5815 }}};
5816 }
5817
renderTruncImm(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const5818 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
5819 const MachineInstr &MI,
5820 int OpIdx) const {
5821 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5822 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5823 "Expected G_CONSTANT");
5824 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
5825 assert(CstVal && "Expected constant value");
5826 MIB.addImm(CstVal.getValue());
5827 }
5828
renderLogicalImm32(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const5829 void AArch64InstructionSelector::renderLogicalImm32(
5830 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5831 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5832 "Expected G_CONSTANT");
5833 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5834 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
5835 MIB.addImm(Enc);
5836 }
5837
renderLogicalImm64(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const5838 void AArch64InstructionSelector::renderLogicalImm64(
5839 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5840 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5841 "Expected G_CONSTANT");
5842 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5843 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
5844 MIB.addImm(Enc);
5845 }
5846
isLoadStoreOfNumBytes(const MachineInstr & MI,unsigned NumBytes) const5847 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
5848 const MachineInstr &MI, unsigned NumBytes) const {
5849 if (!MI.mayLoadOrStore())
5850 return false;
5851 assert(MI.hasOneMemOperand() &&
5852 "Expected load/store to have only one mem op!");
5853 return (*MI.memoperands_begin())->getSize() == NumBytes;
5854 }
5855
isDef32(const MachineInstr & MI) const5856 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
5857 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5858 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
5859 return false;
5860
5861 // Only return true if we know the operation will zero-out the high half of
5862 // the 64-bit register. Truncates can be subregister copies, which don't
5863 // zero out the high bits. Copies and other copy-like instructions can be
5864 // fed by truncates, or could be lowered as subregister copies.
5865 switch (MI.getOpcode()) {
5866 default:
5867 return true;
5868 case TargetOpcode::COPY:
5869 case TargetOpcode::G_BITCAST:
5870 case TargetOpcode::G_TRUNC:
5871 case TargetOpcode::G_PHI:
5872 return false;
5873 }
5874 }
5875
5876
5877 // Perform fixups on the given PHI instruction's operands to force them all
5878 // to be the same as the destination regbank.
fixupPHIOpBanks(MachineInstr & MI,MachineRegisterInfo & MRI,const AArch64RegisterBankInfo & RBI)5879 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
5880 const AArch64RegisterBankInfo &RBI) {
5881 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
5882 Register DstReg = MI.getOperand(0).getReg();
5883 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
5884 assert(DstRB && "Expected PHI dst to have regbank assigned");
5885 MachineIRBuilder MIB(MI);
5886
5887 // Go through each operand and ensure it has the same regbank.
5888 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5889 MachineOperand &MO = MI.getOperand(OpIdx);
5890 if (!MO.isReg())
5891 continue;
5892 Register OpReg = MO.getReg();
5893 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
5894 if (RB != DstRB) {
5895 // Insert a cross-bank copy.
5896 auto *OpDef = MRI.getVRegDef(OpReg);
5897 const LLT &Ty = MRI.getType(OpReg);
5898 MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
5899 auto Copy = MIB.buildCopy(Ty, OpReg);
5900 MRI.setRegBank(Copy.getReg(0), *DstRB);
5901 MO.setReg(Copy.getReg(0));
5902 }
5903 }
5904 }
5905
processPHIs(MachineFunction & MF)5906 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
5907 // We're looking for PHIs, build a list so we don't invalidate iterators.
5908 MachineRegisterInfo &MRI = MF.getRegInfo();
5909 SmallVector<MachineInstr *, 32> Phis;
5910 for (auto &BB : MF) {
5911 for (auto &MI : BB) {
5912 if (MI.getOpcode() == TargetOpcode::G_PHI)
5913 Phis.emplace_back(&MI);
5914 }
5915 }
5916
5917 for (auto *MI : Phis) {
5918 // We need to do some work here if the operand types are < 16 bit and they
5919 // are split across fpr/gpr banks. Since all types <32b on gpr
5920 // end up being assigned gpr32 regclasses, we can end up with PHIs here
5921 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
5922 // be selecting heterogenous regbanks for operands if possible, but we
5923 // still need to be able to deal with it here.
5924 //
5925 // To fix this, if we have a gpr-bank operand < 32b in size and at least
5926 // one other operand is on the fpr bank, then we add cross-bank copies
5927 // to homogenize the operand banks. For simplicity the bank that we choose
5928 // to settle on is whatever bank the def operand has. For example:
5929 //
5930 // %endbb:
5931 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
5932 // =>
5933 // %bb2:
5934 // ...
5935 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
5936 // ...
5937 // %endbb:
5938 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
5939 bool HasGPROp = false, HasFPROp = false;
5940 for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
5941 const auto &MO = MI->getOperand(OpIdx);
5942 if (!MO.isReg())
5943 continue;
5944 const LLT &Ty = MRI.getType(MO.getReg());
5945 if (!Ty.isValid() || !Ty.isScalar())
5946 break;
5947 if (Ty.getSizeInBits() >= 32)
5948 break;
5949 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
5950 // If for some reason we don't have a regbank yet. Don't try anything.
5951 if (!RB)
5952 break;
5953
5954 if (RB->getID() == AArch64::GPRRegBankID)
5955 HasGPROp = true;
5956 else
5957 HasFPROp = true;
5958 }
5959 // We have heterogenous regbanks, need to fixup.
5960 if (HasGPROp && HasFPROp)
5961 fixupPHIOpBanks(*MI, MRI, RBI);
5962 }
5963 }
5964
5965 namespace llvm {
5966 InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine & TM,AArch64Subtarget & Subtarget,AArch64RegisterBankInfo & RBI)5967 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
5968 AArch64Subtarget &Subtarget,
5969 AArch64RegisterBankInfo &RBI) {
5970 return new AArch64InstructionSelector(TM, Subtarget, RBI);
5971 }
5972 }
5973