1 //=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Post-legalization lowering for instructions.
11 ///
12 /// This is used to offload pattern matching from the selector.
13 ///
14 /// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15 /// a G_ZIP, G_UZP, etc.
16 ///
17 /// General optimization combines should be handled by either the
18 /// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19 ///
20 //===----------------------------------------------------------------------===//
21
22 #include "AArch64TargetMachine.h"
23 #include "AArch64GlobalISelUtils.h"
24 #include "MCTargetDesc/AArch64MCTargetDesc.h"
25 #include "llvm/CodeGen/GlobalISel/Combiner.h"
26 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
27 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
28 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30 #include "llvm/CodeGen/GlobalISel/Utils.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/CodeGen/TargetPassConfig.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Support/Debug.h"
38
39 #define DEBUG_TYPE "aarch64-postlegalizer-lowering"
40
41 using namespace llvm;
42 using namespace MIPatternMatch;
43 using namespace AArch64GISelUtils;
44
45 /// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
46 ///
47 /// Used for matching target-supported shuffles before codegen.
48 struct ShuffleVectorPseudo {
49 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
50 Register Dst; ///< Destination register.
51 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
ShuffleVectorPseudoShuffleVectorPseudo52 ShuffleVectorPseudo(unsigned Opc, Register Dst,
53 std::initializer_list<SrcOp> SrcOps)
54 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
ShuffleVectorPseudoShuffleVectorPseudo55 ShuffleVectorPseudo() {}
56 };
57
58 /// Check if a vector shuffle corresponds to a REV instruction with the
59 /// specified blocksize.
isREVMask(ArrayRef<int> M,unsigned EltSize,unsigned NumElts,unsigned BlockSize)60 static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
61 unsigned BlockSize) {
62 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
63 "Only possible block sizes for REV are: 16, 32, 64");
64 assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
65
66 unsigned BlockElts = M[0] + 1;
67
68 // If the first shuffle index is UNDEF, be optimistic.
69 if (M[0] < 0)
70 BlockElts = BlockSize / EltSize;
71
72 if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
73 return false;
74
75 for (unsigned i = 0; i < NumElts; ++i) {
76 // Ignore undef indices.
77 if (M[i] < 0)
78 continue;
79 if (static_cast<unsigned>(M[i]) !=
80 (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
81 return false;
82 }
83
84 return true;
85 }
86
87 /// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
88 /// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
isTRNMask(ArrayRef<int> M,unsigned NumElts,unsigned & WhichResult)89 static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
90 unsigned &WhichResult) {
91 if (NumElts % 2 != 0)
92 return false;
93 WhichResult = (M[0] == 0 ? 0 : 1);
94 for (unsigned i = 0; i < NumElts; i += 2) {
95 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
96 (M[i + 1] >= 0 &&
97 static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
98 return false;
99 }
100 return true;
101 }
102
103 /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
104 /// sources of the shuffle are different.
getExtMask(ArrayRef<int> M,unsigned NumElts)105 static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
106 unsigned NumElts) {
107 // Look for the first non-undef element.
108 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
109 if (FirstRealElt == M.end())
110 return None;
111
112 // Use APInt to handle overflow when calculating expected element.
113 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
114 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
115
116 // The following shuffle indices must be the successive elements after the
117 // first real element.
118 if (any_of(
119 make_range(std::next(FirstRealElt), M.end()),
120 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
121 return None;
122
123 // The index of an EXT is the first element if it is not UNDEF.
124 // Watch out for the beginning UNDEFs. The EXT index should be the expected
125 // value of the first element. E.g.
126 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
127 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
128 // ExpectedElt is the last mask index plus 1.
129 uint64_t Imm = ExpectedElt.getZExtValue();
130 bool ReverseExt = false;
131
132 // There are two difference cases requiring to reverse input vectors.
133 // For example, for vector <4 x i32> we have the following cases,
134 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
135 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
136 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
137 // to reverse two input vectors.
138 if (Imm < NumElts)
139 ReverseExt = true;
140 else
141 Imm -= NumElts;
142 return std::make_pair(ReverseExt, Imm);
143 }
144
145 /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
146 /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
isUZPMask(ArrayRef<int> M,unsigned NumElts,unsigned & WhichResult)147 static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
148 unsigned &WhichResult) {
149 WhichResult = (M[0] == 0 ? 0 : 1);
150 for (unsigned i = 0; i != NumElts; ++i) {
151 // Skip undef indices.
152 if (M[i] < 0)
153 continue;
154 if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
155 return false;
156 }
157 return true;
158 }
159
160 /// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
161 /// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
isZipMask(ArrayRef<int> M,unsigned NumElts,unsigned & WhichResult)162 static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
163 unsigned &WhichResult) {
164 if (NumElts % 2 != 0)
165 return false;
166
167 // 0 means use ZIP1, 1 means use ZIP2.
168 WhichResult = (M[0] == 0 ? 0 : 1);
169 unsigned Idx = WhichResult * NumElts / 2;
170 for (unsigned i = 0; i != NumElts; i += 2) {
171 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
172 (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
173 return false;
174 Idx += 1;
175 }
176 return true;
177 }
178
179 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
180 /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
matchREV(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)181 static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
182 ShuffleVectorPseudo &MatchInfo) {
183 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
184 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
185 Register Dst = MI.getOperand(0).getReg();
186 Register Src = MI.getOperand(1).getReg();
187 LLT Ty = MRI.getType(Dst);
188 unsigned EltSize = Ty.getScalarSizeInBits();
189
190 // Element size for a rev cannot be 64.
191 if (EltSize == 64)
192 return false;
193
194 unsigned NumElts = Ty.getNumElements();
195
196 // Try to produce G_REV64
197 if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
198 MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
199 return true;
200 }
201
202 // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
203 // This should be identical to above, but with a constant 32 and constant
204 // 16.
205 return false;
206 }
207
208 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
209 /// a G_TRN1 or G_TRN2 instruction.
matchTRN(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)210 static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
211 ShuffleVectorPseudo &MatchInfo) {
212 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
213 unsigned WhichResult;
214 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
215 Register Dst = MI.getOperand(0).getReg();
216 unsigned NumElts = MRI.getType(Dst).getNumElements();
217 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
218 return false;
219 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
220 Register V1 = MI.getOperand(1).getReg();
221 Register V2 = MI.getOperand(2).getReg();
222 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
223 return true;
224 }
225
226 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
227 /// a G_UZP1 or G_UZP2 instruction.
228 ///
229 /// \param [in] MI - The shuffle vector instruction.
230 /// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
matchUZP(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)231 static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
232 ShuffleVectorPseudo &MatchInfo) {
233 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
234 unsigned WhichResult;
235 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
236 Register Dst = MI.getOperand(0).getReg();
237 unsigned NumElts = MRI.getType(Dst).getNumElements();
238 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
239 return false;
240 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
241 Register V1 = MI.getOperand(1).getReg();
242 Register V2 = MI.getOperand(2).getReg();
243 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
244 return true;
245 }
246
matchZip(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)247 static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
248 ShuffleVectorPseudo &MatchInfo) {
249 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
250 unsigned WhichResult;
251 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
252 Register Dst = MI.getOperand(0).getReg();
253 unsigned NumElts = MRI.getType(Dst).getNumElements();
254 if (!isZipMask(ShuffleMask, NumElts, WhichResult))
255 return false;
256 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
257 Register V1 = MI.getOperand(1).getReg();
258 Register V2 = MI.getOperand(2).getReg();
259 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
260 return true;
261 }
262
263 /// Helper function for matchDup.
matchDupFromInsertVectorElt(int Lane,MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)264 static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
265 MachineRegisterInfo &MRI,
266 ShuffleVectorPseudo &MatchInfo) {
267 if (Lane != 0)
268 return false;
269
270 // Try to match a vector splat operation into a dup instruction.
271 // We're looking for this pattern:
272 //
273 // %scalar:gpr(s64) = COPY $x0
274 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
275 // %cst0:gpr(s32) = G_CONSTANT i32 0
276 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
277 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
278 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
279 //
280 // ...into:
281 // %splat = G_DUP %scalar
282
283 // Begin matching the insert.
284 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
285 MI.getOperand(1).getReg(), MRI);
286 if (!InsMI)
287 return false;
288 // Match the undef vector operand.
289 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
290 MRI))
291 return false;
292
293 // Match the index constant 0.
294 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
295 return false;
296
297 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
298 {InsMI->getOperand(2).getReg()});
299 return true;
300 }
301
302 /// Helper function for matchDup.
matchDupFromBuildVector(int Lane,MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)303 static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
304 MachineRegisterInfo &MRI,
305 ShuffleVectorPseudo &MatchInfo) {
306 assert(Lane >= 0 && "Expected positive lane?");
307 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
308 // lane's definition directly.
309 auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
310 MI.getOperand(1).getReg(), MRI);
311 if (!BuildVecMI)
312 return false;
313 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
314 MatchInfo =
315 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
316 return true;
317 }
318
matchDup(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)319 static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
320 ShuffleVectorPseudo &MatchInfo) {
321 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
322 auto MaybeLane = getSplatIndex(MI);
323 if (!MaybeLane)
324 return false;
325 int Lane = *MaybeLane;
326 // If this is undef splat, generate it via "just" vdup, if possible.
327 if (Lane < 0)
328 Lane = 0;
329 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
330 return true;
331 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
332 return true;
333 return false;
334 }
335
matchEXT(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)336 static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
337 ShuffleVectorPseudo &MatchInfo) {
338 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
339 Register Dst = MI.getOperand(0).getReg();
340 auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
341 MRI.getType(Dst).getNumElements());
342 if (!ExtInfo)
343 return false;
344 bool ReverseExt;
345 uint64_t Imm;
346 std::tie(ReverseExt, Imm) = *ExtInfo;
347 Register V1 = MI.getOperand(1).getReg();
348 Register V2 = MI.getOperand(2).getReg();
349 if (ReverseExt)
350 std::swap(V1, V2);
351 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
352 Imm *= ExtFactor;
353 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
354 return true;
355 }
356
357 /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
358 /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
applyShuffleVectorPseudo(MachineInstr & MI,ShuffleVectorPseudo & MatchInfo)359 static bool applyShuffleVectorPseudo(MachineInstr &MI,
360 ShuffleVectorPseudo &MatchInfo) {
361 MachineIRBuilder MIRBuilder(MI);
362 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
363 MI.eraseFromParent();
364 return true;
365 }
366
367 /// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
368 /// Special-cased because the constant operand must be emitted as a G_CONSTANT
369 /// for the imported tablegen patterns to work.
applyEXT(MachineInstr & MI,ShuffleVectorPseudo & MatchInfo)370 static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
371 MachineIRBuilder MIRBuilder(MI);
372 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
373 auto Cst =
374 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
375 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
376 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
377 MI.eraseFromParent();
378 return true;
379 }
380
381 /// isVShiftRImm - Check if this is a valid vector for the immediate
382 /// operand of a vector shift right operation. The value must be in the range:
383 /// 1 <= Value <= ElementBits for a right shift.
isVShiftRImm(Register Reg,MachineRegisterInfo & MRI,LLT Ty,int64_t & Cnt)384 static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
385 int64_t &Cnt) {
386 assert(Ty.isVector() && "vector shift count is not a vector type");
387 MachineInstr *MI = MRI.getVRegDef(Reg);
388 auto Cst = getBuildVectorConstantSplat(*MI, MRI);
389 if (!Cst)
390 return false;
391 Cnt = *Cst;
392 int64_t ElementBits = Ty.getScalarSizeInBits();
393 return Cnt >= 1 && Cnt <= ElementBits;
394 }
395
396 /// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
matchVAshrLshrImm(MachineInstr & MI,MachineRegisterInfo & MRI,int64_t & Imm)397 static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
398 int64_t &Imm) {
399 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
400 MI.getOpcode() == TargetOpcode::G_LSHR);
401 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
402 if (!Ty.isVector())
403 return false;
404 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
405 }
406
applyVAshrLshrImm(MachineInstr & MI,MachineRegisterInfo & MRI,int64_t & Imm)407 static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
408 int64_t &Imm) {
409 unsigned Opc = MI.getOpcode();
410 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
411 unsigned NewOpc =
412 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
413 MachineIRBuilder MIB(MI);
414 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
415 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
416 MI.eraseFromParent();
417 return true;
418 }
419
420 /// Determine if it is possible to modify the \p RHS and predicate \p P of a
421 /// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
422 ///
423 /// \returns A pair containing the updated immediate and predicate which may
424 /// be used to optimize the instruction.
425 ///
426 /// \note This assumes that the comparison has been legalized.
427 Optional<std::pair<uint64_t, CmpInst::Predicate>>
tryAdjustICmpImmAndPred(Register RHS,CmpInst::Predicate P,const MachineRegisterInfo & MRI)428 tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
429 const MachineRegisterInfo &MRI) {
430 const auto &Ty = MRI.getType(RHS);
431 if (Ty.isVector())
432 return None;
433 unsigned Size = Ty.getSizeInBits();
434 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
435
436 // If the RHS is not a constant, or the RHS is already a valid arithmetic
437 // immediate, then there is nothing to change.
438 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
439 if (!ValAndVReg)
440 return None;
441 uint64_t C = ValAndVReg->Value;
442 if (isLegalArithImmed(C))
443 return None;
444
445 // We have a non-arithmetic immediate. Check if adjusting the immediate and
446 // adjusting the predicate will result in a legal arithmetic immediate.
447 switch (P) {
448 default:
449 return None;
450 case CmpInst::ICMP_SLT:
451 case CmpInst::ICMP_SGE:
452 // Check for
453 //
454 // x slt c => x sle c - 1
455 // x sge c => x sgt c - 1
456 //
457 // When c is not the smallest possible negative number.
458 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
459 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
460 return None;
461 P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
462 C -= 1;
463 break;
464 case CmpInst::ICMP_ULT:
465 case CmpInst::ICMP_UGE:
466 // Check for
467 //
468 // x ult c => x ule c - 1
469 // x uge c => x ugt c - 1
470 //
471 // When c is not zero.
472 if (C == 0)
473 return None;
474 P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
475 C -= 1;
476 break;
477 case CmpInst::ICMP_SLE:
478 case CmpInst::ICMP_SGT:
479 // Check for
480 //
481 // x sle c => x slt c + 1
482 // x sgt c => s sge c + 1
483 //
484 // When c is not the largest possible signed integer.
485 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
486 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
487 return None;
488 P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
489 C += 1;
490 break;
491 case CmpInst::ICMP_ULE:
492 case CmpInst::ICMP_UGT:
493 // Check for
494 //
495 // x ule c => x ult c + 1
496 // x ugt c => s uge c + 1
497 //
498 // When c is not the largest possible unsigned integer.
499 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
500 (Size == 64 && C == UINT64_MAX))
501 return None;
502 P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
503 C += 1;
504 break;
505 }
506
507 // Check if the new constant is valid, and return the updated constant and
508 // predicate if it is.
509 if (Size == 32)
510 C = static_cast<uint32_t>(C);
511 if (!isLegalArithImmed(C))
512 return None;
513 return {{C, P}};
514 }
515
516 /// Determine whether or not it is possible to update the RHS and predicate of
517 /// a G_ICMP instruction such that the RHS will be selected as an arithmetic
518 /// immediate.
519 ///
520 /// \p MI - The G_ICMP instruction
521 /// \p MatchInfo - The new RHS immediate and predicate on success
522 ///
523 /// See tryAdjustICmpImmAndPred for valid transformations.
matchAdjustICmpImmAndPred(MachineInstr & MI,const MachineRegisterInfo & MRI,std::pair<uint64_t,CmpInst::Predicate> & MatchInfo)524 bool matchAdjustICmpImmAndPred(
525 MachineInstr &MI, const MachineRegisterInfo &MRI,
526 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
527 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
528 Register RHS = MI.getOperand(3).getReg();
529 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
530 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
531 MatchInfo = *MaybeNewImmAndPred;
532 return true;
533 }
534 return false;
535 }
536
applyAdjustICmpImmAndPred(MachineInstr & MI,std::pair<uint64_t,CmpInst::Predicate> & MatchInfo,MachineIRBuilder & MIB,GISelChangeObserver & Observer)537 bool applyAdjustICmpImmAndPred(
538 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
539 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
540 MIB.setInstrAndDebugLoc(MI);
541 MachineOperand &RHS = MI.getOperand(3);
542 MachineRegisterInfo &MRI = *MIB.getMRI();
543 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
544 MatchInfo.first);
545 Observer.changingInstr(MI);
546 RHS.setReg(Cst->getOperand(0).getReg());
547 MI.getOperand(1).setPredicate(MatchInfo.second);
548 Observer.changedInstr(MI);
549 return true;
550 }
551
matchDupLane(MachineInstr & MI,MachineRegisterInfo & MRI,std::pair<unsigned,int> & MatchInfo)552 bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
553 std::pair<unsigned, int> &MatchInfo) {
554 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
555 Register Src1Reg = MI.getOperand(1).getReg();
556 const LLT SrcTy = MRI.getType(Src1Reg);
557 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
558
559 auto LaneIdx = getSplatIndex(MI);
560 if (!LaneIdx)
561 return false;
562
563 // The lane idx should be within the first source vector.
564 if (*LaneIdx >= SrcTy.getNumElements())
565 return false;
566
567 if (DstTy != SrcTy)
568 return false;
569
570 LLT ScalarTy = SrcTy.getElementType();
571 unsigned ScalarSize = ScalarTy.getSizeInBits();
572
573 unsigned Opc = 0;
574 switch (SrcTy.getNumElements()) {
575 case 2:
576 if (ScalarSize == 64)
577 Opc = AArch64::G_DUPLANE64;
578 break;
579 case 4:
580 if (ScalarSize == 32)
581 Opc = AArch64::G_DUPLANE32;
582 break;
583 case 8:
584 if (ScalarSize == 16)
585 Opc = AArch64::G_DUPLANE16;
586 break;
587 case 16:
588 if (ScalarSize == 8)
589 Opc = AArch64::G_DUPLANE8;
590 break;
591 default:
592 break;
593 }
594 if (!Opc)
595 return false;
596
597 MatchInfo.first = Opc;
598 MatchInfo.second = *LaneIdx;
599 return true;
600 }
601
applyDupLane(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,std::pair<unsigned,int> & MatchInfo)602 bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
603 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
604 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
605 B.setInstrAndDebugLoc(MI);
606 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
607 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()},
608 {MI.getOperand(1).getReg(), Lane});
609 MI.eraseFromParent();
610 return true;
611 }
612
613 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
614 #include "AArch64GenPostLegalizeGILowering.inc"
615 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
616
617 namespace {
618 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
619 #include "AArch64GenPostLegalizeGILowering.inc"
620 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
621
622 class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
623 public:
624 AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
625
AArch64PostLegalizerLoweringInfo(bool OptSize,bool MinSize)626 AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
627 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
628 /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
629 MinSize) {
630 if (!GeneratedRuleCfg.parseCommandLineOption())
631 report_fatal_error("Invalid rule identifier");
632 }
633
634 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
635 MachineIRBuilder &B) const override;
636 };
637
combine(GISelChangeObserver & Observer,MachineInstr & MI,MachineIRBuilder & B) const638 bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
639 MachineInstr &MI,
640 MachineIRBuilder &B) const {
641 CombinerHelper Helper(Observer, B);
642 AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
643 return Generated.tryCombineAll(Observer, MI, B, Helper);
644 }
645
646 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
647 #include "AArch64GenPostLegalizeGILowering.inc"
648 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
649
650 class AArch64PostLegalizerLowering : public MachineFunctionPass {
651 public:
652 static char ID;
653
654 AArch64PostLegalizerLowering();
655
getPassName() const656 StringRef getPassName() const override {
657 return "AArch64PostLegalizerLowering";
658 }
659
660 bool runOnMachineFunction(MachineFunction &MF) override;
661 void getAnalysisUsage(AnalysisUsage &AU) const override;
662 };
663 } // end anonymous namespace
664
getAnalysisUsage(AnalysisUsage & AU) const665 void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
666 AU.addRequired<TargetPassConfig>();
667 AU.setPreservesCFG();
668 getSelectionDAGFallbackAnalysisUsage(AU);
669 MachineFunctionPass::getAnalysisUsage(AU);
670 }
671
AArch64PostLegalizerLowering()672 AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
673 : MachineFunctionPass(ID) {
674 initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
675 }
676
runOnMachineFunction(MachineFunction & MF)677 bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
678 if (MF.getProperties().hasProperty(
679 MachineFunctionProperties::Property::FailedISel))
680 return false;
681 assert(MF.getProperties().hasProperty(
682 MachineFunctionProperties::Property::Legalized) &&
683 "Expected a legalized function?");
684 auto *TPC = &getAnalysis<TargetPassConfig>();
685 const Function &F = MF.getFunction();
686 AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
687 Combiner C(PCInfo, TPC);
688 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
689 }
690
691 char AArch64PostLegalizerLowering::ID = 0;
692 INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
693 "Lower AArch64 MachineInstrs after legalization", false,
694 false)
695 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
696 INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
697 "Lower AArch64 MachineInstrs after legalization", false,
698 false)
699
700 namespace llvm {
createAArch64PostLegalizerLowering()701 FunctionPass *createAArch64PostLegalizerLowering() {
702 return new AArch64PostLegalizerLowering();
703 }
704 } // end namespace llvm
705