• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
13 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
15 #include "llvm/CodeGen/GlobalISel/Utils.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineMemOperand.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Target/TargetMachine.h"
25 
26 #define DEBUG_TYPE "gi-combiner"
27 
28 using namespace llvm;
29 using namespace MIPatternMatch;
30 
31 // Option to allow testing of the combiner while no targets know about indexed
32 // addressing.
33 static cl::opt<bool>
34     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
35                        cl::desc("Force all indexed operations to be "
36                                 "legal for the GlobalISel combiner"));
37 
CombinerHelper(GISelChangeObserver & Observer,MachineIRBuilder & B,GISelKnownBits * KB,MachineDominatorTree * MDT,const LegalizerInfo * LI)38 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
39                                MachineIRBuilder &B, GISelKnownBits *KB,
40                                MachineDominatorTree *MDT,
41                                const LegalizerInfo *LI)
42     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
43       KB(KB), MDT(MDT), LI(LI) {
44   (void)this->KB;
45 }
46 
getTargetLowering() const47 const TargetLowering &CombinerHelper::getTargetLowering() const {
48   return *Builder.getMF().getSubtarget().getTargetLowering();
49 }
50 
isLegalOrBeforeLegalizer(const LegalityQuery & Query) const51 bool CombinerHelper::isLegalOrBeforeLegalizer(
52     const LegalityQuery &Query) const {
53   return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
54 }
55 
replaceRegWith(MachineRegisterInfo & MRI,Register FromReg,Register ToReg) const56 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
57                                     Register ToReg) const {
58   Observer.changingAllUsesOfReg(MRI, FromReg);
59 
60   if (MRI.constrainRegAttrs(ToReg, FromReg))
61     MRI.replaceRegWith(FromReg, ToReg);
62   else
63     Builder.buildCopy(ToReg, FromReg);
64 
65   Observer.finishedChangingAllUsesOfReg();
66 }
67 
replaceRegOpWith(MachineRegisterInfo & MRI,MachineOperand & FromRegOp,Register ToReg) const68 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
69                                       MachineOperand &FromRegOp,
70                                       Register ToReg) const {
71   assert(FromRegOp.getParent() && "Expected an operand in an MI");
72   Observer.changingInstr(*FromRegOp.getParent());
73 
74   FromRegOp.setReg(ToReg);
75 
76   Observer.changedInstr(*FromRegOp.getParent());
77 }
78 
tryCombineCopy(MachineInstr & MI)79 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
80   if (matchCombineCopy(MI)) {
81     applyCombineCopy(MI);
82     return true;
83   }
84   return false;
85 }
matchCombineCopy(MachineInstr & MI)86 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
87   if (MI.getOpcode() != TargetOpcode::COPY)
88     return false;
89   Register DstReg = MI.getOperand(0).getReg();
90   Register SrcReg = MI.getOperand(1).getReg();
91   return canReplaceReg(DstReg, SrcReg, MRI);
92 }
applyCombineCopy(MachineInstr & MI)93 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
94   Register DstReg = MI.getOperand(0).getReg();
95   Register SrcReg = MI.getOperand(1).getReg();
96   MI.eraseFromParent();
97   replaceRegWith(MRI, DstReg, SrcReg);
98 }
99 
tryCombineConcatVectors(MachineInstr & MI)100 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
101   bool IsUndef = false;
102   SmallVector<Register, 4> Ops;
103   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
104     applyCombineConcatVectors(MI, IsUndef, Ops);
105     return true;
106   }
107   return false;
108 }
109 
matchCombineConcatVectors(MachineInstr & MI,bool & IsUndef,SmallVectorImpl<Register> & Ops)110 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
111                                                SmallVectorImpl<Register> &Ops) {
112   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
113          "Invalid instruction");
114   IsUndef = true;
115   MachineInstr *Undef = nullptr;
116 
117   // Walk over all the operands of concat vectors and check if they are
118   // build_vector themselves or undef.
119   // Then collect their operands in Ops.
120   for (const MachineOperand &MO : MI.uses()) {
121     Register Reg = MO.getReg();
122     MachineInstr *Def = MRI.getVRegDef(Reg);
123     assert(Def && "Operand not defined");
124     switch (Def->getOpcode()) {
125     case TargetOpcode::G_BUILD_VECTOR:
126       IsUndef = false;
127       // Remember the operands of the build_vector to fold
128       // them into the yet-to-build flattened concat vectors.
129       for (const MachineOperand &BuildVecMO : Def->uses())
130         Ops.push_back(BuildVecMO.getReg());
131       break;
132     case TargetOpcode::G_IMPLICIT_DEF: {
133       LLT OpType = MRI.getType(Reg);
134       // Keep one undef value for all the undef operands.
135       if (!Undef) {
136         Builder.setInsertPt(*MI.getParent(), MI);
137         Undef = Builder.buildUndef(OpType.getScalarType());
138       }
139       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
140                  OpType.getScalarType() &&
141              "All undefs should have the same type");
142       // Break the undef vector in as many scalar elements as needed
143       // for the flattening.
144       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
145            EltIdx != EltEnd; ++EltIdx)
146         Ops.push_back(Undef->getOperand(0).getReg());
147       break;
148     }
149     default:
150       return false;
151     }
152   }
153   return true;
154 }
applyCombineConcatVectors(MachineInstr & MI,bool IsUndef,const ArrayRef<Register> Ops)155 void CombinerHelper::applyCombineConcatVectors(
156     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
157   // We determined that the concat_vectors can be flatten.
158   // Generate the flattened build_vector.
159   Register DstReg = MI.getOperand(0).getReg();
160   Builder.setInsertPt(*MI.getParent(), MI);
161   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
162 
163   // Note: IsUndef is sort of redundant. We could have determine it by
164   // checking that at all Ops are undef.  Alternatively, we could have
165   // generate a build_vector of undefs and rely on another combine to
166   // clean that up.  For now, given we already gather this information
167   // in tryCombineConcatVectors, just save compile time and issue the
168   // right thing.
169   if (IsUndef)
170     Builder.buildUndef(NewDstReg);
171   else
172     Builder.buildBuildVector(NewDstReg, Ops);
173   MI.eraseFromParent();
174   replaceRegWith(MRI, DstReg, NewDstReg);
175 }
176 
tryCombineShuffleVector(MachineInstr & MI)177 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
178   SmallVector<Register, 4> Ops;
179   if (matchCombineShuffleVector(MI, Ops)) {
180     applyCombineShuffleVector(MI, Ops);
181     return true;
182   }
183   return false;
184 }
185 
matchCombineShuffleVector(MachineInstr & MI,SmallVectorImpl<Register> & Ops)186 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
187                                                SmallVectorImpl<Register> &Ops) {
188   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
189          "Invalid instruction kind");
190   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
191   Register Src1 = MI.getOperand(1).getReg();
192   LLT SrcType = MRI.getType(Src1);
193   // As bizarre as it may look, shuffle vector can actually produce
194   // scalar! This is because at the IR level a <1 x ty> shuffle
195   // vector is perfectly valid.
196   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
197   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
198 
199   // If the resulting vector is smaller than the size of the source
200   // vectors being concatenated, we won't be able to replace the
201   // shuffle vector into a concat_vectors.
202   //
203   // Note: We may still be able to produce a concat_vectors fed by
204   //       extract_vector_elt and so on. It is less clear that would
205   //       be better though, so don't bother for now.
206   //
207   // If the destination is a scalar, the size of the sources doesn't
208   // matter. we will lower the shuffle to a plain copy. This will
209   // work only if the source and destination have the same size. But
210   // that's covered by the next condition.
211   //
212   // TODO: If the size between the source and destination don't match
213   //       we could still emit an extract vector element in that case.
214   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
215     return false;
216 
217   // Check that the shuffle mask can be broken evenly between the
218   // different sources.
219   if (DstNumElts % SrcNumElts != 0)
220     return false;
221 
222   // Mask length is a multiple of the source vector length.
223   // Check if the shuffle is some kind of concatenation of the input
224   // vectors.
225   unsigned NumConcat = DstNumElts / SrcNumElts;
226   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
227   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
228   for (unsigned i = 0; i != DstNumElts; ++i) {
229     int Idx = Mask[i];
230     // Undef value.
231     if (Idx < 0)
232       continue;
233     // Ensure the indices in each SrcType sized piece are sequential and that
234     // the same source is used for the whole piece.
235     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
236         (ConcatSrcs[i / SrcNumElts] >= 0 &&
237          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
238       return false;
239     // Remember which source this index came from.
240     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
241   }
242 
243   // The shuffle is concatenating multiple vectors together.
244   // Collect the different operands for that.
245   Register UndefReg;
246   Register Src2 = MI.getOperand(2).getReg();
247   for (auto Src : ConcatSrcs) {
248     if (Src < 0) {
249       if (!UndefReg) {
250         Builder.setInsertPt(*MI.getParent(), MI);
251         UndefReg = Builder.buildUndef(SrcType).getReg(0);
252       }
253       Ops.push_back(UndefReg);
254     } else if (Src == 0)
255       Ops.push_back(Src1);
256     else
257       Ops.push_back(Src2);
258   }
259   return true;
260 }
261 
applyCombineShuffleVector(MachineInstr & MI,const ArrayRef<Register> Ops)262 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
263                                                const ArrayRef<Register> Ops) {
264   Register DstReg = MI.getOperand(0).getReg();
265   Builder.setInsertPt(*MI.getParent(), MI);
266   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
267 
268   if (Ops.size() == 1)
269     Builder.buildCopy(NewDstReg, Ops[0]);
270   else
271     Builder.buildMerge(NewDstReg, Ops);
272 
273   MI.eraseFromParent();
274   replaceRegWith(MRI, DstReg, NewDstReg);
275 }
276 
277 namespace {
278 
279 /// Select a preference between two uses. CurrentUse is the current preference
280 /// while *ForCandidate is attributes of the candidate under consideration.
ChoosePreferredUse(PreferredTuple & CurrentUse,const LLT TyForCandidate,unsigned OpcodeForCandidate,MachineInstr * MIForCandidate)281 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
282                                   const LLT TyForCandidate,
283                                   unsigned OpcodeForCandidate,
284                                   MachineInstr *MIForCandidate) {
285   if (!CurrentUse.Ty.isValid()) {
286     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
287         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
288       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
289     return CurrentUse;
290   }
291 
292   // We permit the extend to hoist through basic blocks but this is only
293   // sensible if the target has extending loads. If you end up lowering back
294   // into a load and extend during the legalizer then the end result is
295   // hoisting the extend up to the load.
296 
297   // Prefer defined extensions to undefined extensions as these are more
298   // likely to reduce the number of instructions.
299   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
300       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
301     return CurrentUse;
302   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
303            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
304     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
305 
306   // Prefer sign extensions to zero extensions as sign-extensions tend to be
307   // more expensive.
308   if (CurrentUse.Ty == TyForCandidate) {
309     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
310         OpcodeForCandidate == TargetOpcode::G_ZEXT)
311       return CurrentUse;
312     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
313              OpcodeForCandidate == TargetOpcode::G_SEXT)
314       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
315   }
316 
317   // This is potentially target specific. We've chosen the largest type
318   // because G_TRUNC is usually free. One potential catch with this is that
319   // some targets have a reduced number of larger registers than smaller
320   // registers and this choice potentially increases the live-range for the
321   // larger value.
322   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
323     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
324   }
325   return CurrentUse;
326 }
327 
328 /// Find a suitable place to insert some instructions and insert them. This
329 /// function accounts for special cases like inserting before a PHI node.
330 /// The current strategy for inserting before PHI's is to duplicate the
331 /// instructions for each predecessor. However, while that's ok for G_TRUNC
332 /// on most targets since it generally requires no code, other targets/cases may
333 /// want to try harder to find a dominating block.
InsertInsnsWithoutSideEffectsBeforeUse(MachineIRBuilder & Builder,MachineInstr & DefMI,MachineOperand & UseMO,std::function<void (MachineBasicBlock *,MachineBasicBlock::iterator,MachineOperand & UseMO)> Inserter)334 static void InsertInsnsWithoutSideEffectsBeforeUse(
335     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
336     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
337                        MachineOperand &UseMO)>
338         Inserter) {
339   MachineInstr &UseMI = *UseMO.getParent();
340 
341   MachineBasicBlock *InsertBB = UseMI.getParent();
342 
343   // If the use is a PHI then we want the predecessor block instead.
344   if (UseMI.isPHI()) {
345     MachineOperand *PredBB = std::next(&UseMO);
346     InsertBB = PredBB->getMBB();
347   }
348 
349   // If the block is the same block as the def then we want to insert just after
350   // the def instead of at the start of the block.
351   if (InsertBB == DefMI.getParent()) {
352     MachineBasicBlock::iterator InsertPt = &DefMI;
353     Inserter(InsertBB, std::next(InsertPt), UseMO);
354     return;
355   }
356 
357   // Otherwise we want the start of the BB
358   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
359 }
360 } // end anonymous namespace
361 
tryCombineExtendingLoads(MachineInstr & MI)362 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
363   PreferredTuple Preferred;
364   if (matchCombineExtendingLoads(MI, Preferred)) {
365     applyCombineExtendingLoads(MI, Preferred);
366     return true;
367   }
368   return false;
369 }
370 
matchCombineExtendingLoads(MachineInstr & MI,PreferredTuple & Preferred)371 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
372                                                 PreferredTuple &Preferred) {
373   // We match the loads and follow the uses to the extend instead of matching
374   // the extends and following the def to the load. This is because the load
375   // must remain in the same position for correctness (unless we also add code
376   // to find a safe place to sink it) whereas the extend is freely movable.
377   // It also prevents us from duplicating the load for the volatile case or just
378   // for performance.
379 
380   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
381       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
382       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
383     return false;
384 
385   auto &LoadValue = MI.getOperand(0);
386   assert(LoadValue.isReg() && "Result wasn't a register?");
387 
388   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
389   if (!LoadValueTy.isScalar())
390     return false;
391 
392   // Most architectures are going to legalize <s8 loads into at least a 1 byte
393   // load, and the MMOs can only describe memory accesses in multiples of bytes.
394   // If we try to perform extload combining on those, we can end up with
395   // %a(s8) = extload %ptr (load 1 byte from %ptr)
396   // ... which is an illegal extload instruction.
397   if (LoadValueTy.getSizeInBits() < 8)
398     return false;
399 
400   // For non power-of-2 types, they will very likely be legalized into multiple
401   // loads. Don't bother trying to match them into extending loads.
402   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
403     return false;
404 
405   // Find the preferred type aside from the any-extends (unless it's the only
406   // one) and non-extending ops. We'll emit an extending load to that type and
407   // and emit a variant of (extend (trunc X)) for the others according to the
408   // relative type sizes. At the same time, pick an extend to use based on the
409   // extend involved in the chosen type.
410   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
411                                  ? TargetOpcode::G_ANYEXT
412                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
413                                        ? TargetOpcode::G_SEXT
414                                        : TargetOpcode::G_ZEXT;
415   Preferred = {LLT(), PreferredOpcode, nullptr};
416   for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
417     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
418         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
419         (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
420       // Check for legality.
421       if (LI) {
422         LegalityQuery::MemDesc MMDesc;
423         const auto &MMO = **MI.memoperands_begin();
424         MMDesc.SizeInBits = MMO.getSizeInBits();
425         MMDesc.AlignInBits = MMO.getAlign().value() * 8;
426         MMDesc.Ordering = MMO.getOrdering();
427         LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
428         LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
429         if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
430             LegalizeActions::Legal)
431           continue;
432       }
433       Preferred = ChoosePreferredUse(Preferred,
434                                      MRI.getType(UseMI.getOperand(0).getReg()),
435                                      UseMI.getOpcode(), &UseMI);
436     }
437   }
438 
439   // There were no extends
440   if (!Preferred.MI)
441     return false;
442   // It should be impossible to chose an extend without selecting a different
443   // type since by definition the result of an extend is larger.
444   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
445 
446   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
447   return true;
448 }
449 
applyCombineExtendingLoads(MachineInstr & MI,PreferredTuple & Preferred)450 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
451                                                 PreferredTuple &Preferred) {
452   // Rewrite the load to the chosen extending load.
453   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
454 
455   // Inserter to insert a truncate back to the original type at a given point
456   // with some basic CSE to limit truncate duplication to one per BB.
457   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
458   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
459                            MachineBasicBlock::iterator InsertBefore,
460                            MachineOperand &UseMO) {
461     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
462     if (PreviouslyEmitted) {
463       Observer.changingInstr(*UseMO.getParent());
464       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
465       Observer.changedInstr(*UseMO.getParent());
466       return;
467     }
468 
469     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
470     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
471     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
472     EmittedInsns[InsertIntoBB] = NewMI;
473     replaceRegOpWith(MRI, UseMO, NewDstReg);
474   };
475 
476   Observer.changingInstr(MI);
477   MI.setDesc(
478       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
479                                ? TargetOpcode::G_SEXTLOAD
480                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
481                                      ? TargetOpcode::G_ZEXTLOAD
482                                      : TargetOpcode::G_LOAD));
483 
484   // Rewrite all the uses to fix up the types.
485   auto &LoadValue = MI.getOperand(0);
486   SmallVector<MachineOperand *, 4> Uses;
487   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
488     Uses.push_back(&UseMO);
489 
490   for (auto *UseMO : Uses) {
491     MachineInstr *UseMI = UseMO->getParent();
492 
493     // If the extend is compatible with the preferred extend then we should fix
494     // up the type and extend so that it uses the preferred use.
495     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
496         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
497       Register UseDstReg = UseMI->getOperand(0).getReg();
498       MachineOperand &UseSrcMO = UseMI->getOperand(1);
499       const LLT UseDstTy = MRI.getType(UseDstReg);
500       if (UseDstReg != ChosenDstReg) {
501         if (Preferred.Ty == UseDstTy) {
502           // If the use has the same type as the preferred use, then merge
503           // the vregs and erase the extend. For example:
504           //    %1:_(s8) = G_LOAD ...
505           //    %2:_(s32) = G_SEXT %1(s8)
506           //    %3:_(s32) = G_ANYEXT %1(s8)
507           //    ... = ... %3(s32)
508           // rewrites to:
509           //    %2:_(s32) = G_SEXTLOAD ...
510           //    ... = ... %2(s32)
511           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
512           Observer.erasingInstr(*UseMO->getParent());
513           UseMO->getParent()->eraseFromParent();
514         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
515           // If the preferred size is smaller, then keep the extend but extend
516           // from the result of the extending load. For example:
517           //    %1:_(s8) = G_LOAD ...
518           //    %2:_(s32) = G_SEXT %1(s8)
519           //    %3:_(s64) = G_ANYEXT %1(s8)
520           //    ... = ... %3(s64)
521           /// rewrites to:
522           //    %2:_(s32) = G_SEXTLOAD ...
523           //    %3:_(s64) = G_ANYEXT %2:_(s32)
524           //    ... = ... %3(s64)
525           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
526         } else {
527           // If the preferred size is large, then insert a truncate. For
528           // example:
529           //    %1:_(s8) = G_LOAD ...
530           //    %2:_(s64) = G_SEXT %1(s8)
531           //    %3:_(s32) = G_ZEXT %1(s8)
532           //    ... = ... %3(s32)
533           /// rewrites to:
534           //    %2:_(s64) = G_SEXTLOAD ...
535           //    %4:_(s8) = G_TRUNC %2:_(s32)
536           //    %3:_(s64) = G_ZEXT %2:_(s8)
537           //    ... = ... %3(s64)
538           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
539                                                  InsertTruncAt);
540         }
541         continue;
542       }
543       // The use is (one of) the uses of the preferred use we chose earlier.
544       // We're going to update the load to def this value later so just erase
545       // the old extend.
546       Observer.erasingInstr(*UseMO->getParent());
547       UseMO->getParent()->eraseFromParent();
548       continue;
549     }
550 
551     // The use isn't an extend. Truncate back to the type we originally loaded.
552     // This is free on many targets.
553     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
554   }
555 
556   MI.getOperand(0).setReg(ChosenDstReg);
557   Observer.changedInstr(MI);
558 }
559 
isPredecessor(const MachineInstr & DefMI,const MachineInstr & UseMI)560 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
561                                    const MachineInstr &UseMI) {
562   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
563          "shouldn't consider debug uses");
564   assert(DefMI.getParent() == UseMI.getParent());
565   if (&DefMI == &UseMI)
566     return false;
567 
568   // Loop through the basic block until we find one of the instructions.
569   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
570   for (; &*I != &DefMI && &*I != &UseMI; ++I)
571     return &*I == &DefMI;
572 
573   llvm_unreachable("Block must contain instructions");
574 }
575 
dominates(const MachineInstr & DefMI,const MachineInstr & UseMI)576 bool CombinerHelper::dominates(const MachineInstr &DefMI,
577                                const MachineInstr &UseMI) {
578   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
579          "shouldn't consider debug uses");
580   if (MDT)
581     return MDT->dominates(&DefMI, &UseMI);
582   else if (DefMI.getParent() != UseMI.getParent())
583     return false;
584 
585   return isPredecessor(DefMI, UseMI);
586 }
587 
matchSextTruncSextLoad(MachineInstr & MI)588 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
589   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
590   Register SrcReg = MI.getOperand(1).getReg();
591   Register LoadUser = SrcReg;
592 
593   if (MRI.getType(SrcReg).isVector())
594     return false;
595 
596   Register TruncSrc;
597   if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
598     LoadUser = TruncSrc;
599 
600   uint64_t SizeInBits = MI.getOperand(2).getImm();
601   // If the source is a G_SEXTLOAD from the same bit width, then we don't
602   // need any extend at all, just a truncate.
603   if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
604     const auto &MMO = **LoadMI->memoperands_begin();
605     // If truncating more than the original extended value, abort.
606     if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
607       return false;
608     if (MMO.getSizeInBits() == SizeInBits)
609       return true;
610   }
611   return false;
612 }
613 
applySextTruncSextLoad(MachineInstr & MI)614 bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
615   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
616   Builder.setInstrAndDebugLoc(MI);
617   Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
618   MI.eraseFromParent();
619   return true;
620 }
621 
matchSextInRegOfLoad(MachineInstr & MI,std::tuple<Register,unsigned> & MatchInfo)622 bool CombinerHelper::matchSextInRegOfLoad(
623     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
624   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
625 
626   // Only supports scalars for now.
627   if (MRI.getType(MI.getOperand(0).getReg()).isVector())
628     return false;
629 
630   Register SrcReg = MI.getOperand(1).getReg();
631   MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
632   if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
633     return false;
634 
635   // If the sign extend extends from a narrower width than the load's width,
636   // then we can narrow the load width when we combine to a G_SEXTLOAD.
637   auto &MMO = **LoadDef->memoperands_begin();
638   // Don't do this for non-simple loads.
639   if (MMO.isAtomic() || MMO.isVolatile())
640     return false;
641 
642   // Avoid widening the load at all.
643   unsigned NewSizeBits =
644       std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
645 
646   // Don't generate G_SEXTLOADs with a < 1 byte width.
647   if (NewSizeBits < 8)
648     return false;
649   // Don't bother creating a non-power-2 sextload, it will likely be broken up
650   // anyway for most targets.
651   if (!isPowerOf2_32(NewSizeBits))
652     return false;
653   MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
654   return true;
655 }
656 
applySextInRegOfLoad(MachineInstr & MI,std::tuple<Register,unsigned> & MatchInfo)657 bool CombinerHelper::applySextInRegOfLoad(
658     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
659   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
660   Register LoadReg;
661   unsigned ScalarSizeBits;
662   std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
663   auto *LoadDef = MRI.getVRegDef(LoadReg);
664   assert(LoadDef && "Expected a load reg");
665 
666   // If we have the following:
667   // %ld = G_LOAD %ptr, (load 2)
668   // %ext = G_SEXT_INREG %ld, 8
669   //    ==>
670   // %ld = G_SEXTLOAD %ptr (load 1)
671 
672   auto &MMO = **LoadDef->memoperands_begin();
673   Builder.setInstrAndDebugLoc(MI);
674   auto &MF = Builder.getMF();
675   auto PtrInfo = MMO.getPointerInfo();
676   auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
677   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
678                          LoadDef->getOperand(1).getReg(), *NewMMO);
679   MI.eraseFromParent();
680   return true;
681 }
682 
findPostIndexCandidate(MachineInstr & MI,Register & Addr,Register & Base,Register & Offset)683 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
684                                             Register &Base, Register &Offset) {
685   auto &MF = *MI.getParent()->getParent();
686   const auto &TLI = *MF.getSubtarget().getTargetLowering();
687 
688 #ifndef NDEBUG
689   unsigned Opcode = MI.getOpcode();
690   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
691          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
692 #endif
693 
694   Base = MI.getOperand(1).getReg();
695   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
696   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
697     return false;
698 
699   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
700   // FIXME: The following use traversal needs a bail out for patholigical cases.
701   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
702     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
703       continue;
704 
705     Offset = Use.getOperand(2).getReg();
706     if (!ForceLegalIndexing &&
707         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
708       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
709                         << Use);
710       continue;
711     }
712 
713     // Make sure the offset calculation is before the potentially indexed op.
714     // FIXME: we really care about dependency here. The offset calculation might
715     // be movable.
716     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
717     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
718       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
719                         << Use);
720       continue;
721     }
722 
723     // FIXME: check whether all uses of Base are load/store with foldable
724     // addressing modes. If so, using the normal addr-modes is better than
725     // forming an indexed one.
726 
727     bool MemOpDominatesAddrUses = true;
728     for (auto &PtrAddUse :
729          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
730       if (!dominates(MI, PtrAddUse)) {
731         MemOpDominatesAddrUses = false;
732         break;
733       }
734     }
735 
736     if (!MemOpDominatesAddrUses) {
737       LLVM_DEBUG(
738           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
739                  << Use);
740       continue;
741     }
742 
743     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
744     Addr = Use.getOperand(0).getReg();
745     return true;
746   }
747 
748   return false;
749 }
750 
findPreIndexCandidate(MachineInstr & MI,Register & Addr,Register & Base,Register & Offset)751 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
752                                            Register &Base, Register &Offset) {
753   auto &MF = *MI.getParent()->getParent();
754   const auto &TLI = *MF.getSubtarget().getTargetLowering();
755 
756 #ifndef NDEBUG
757   unsigned Opcode = MI.getOpcode();
758   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
759          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
760 #endif
761 
762   Addr = MI.getOperand(1).getReg();
763   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
764   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
765     return false;
766 
767   Base = AddrDef->getOperand(1).getReg();
768   Offset = AddrDef->getOperand(2).getReg();
769 
770   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
771 
772   if (!ForceLegalIndexing &&
773       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
774     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
775     return false;
776   }
777 
778   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
779   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
780     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
781     return false;
782   }
783 
784   if (MI.getOpcode() == TargetOpcode::G_STORE) {
785     // Would require a copy.
786     if (Base == MI.getOperand(0).getReg()) {
787       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
788       return false;
789     }
790 
791     // We're expecting one use of Addr in MI, but it could also be the
792     // value stored, which isn't actually dominated by the instruction.
793     if (MI.getOperand(0).getReg() == Addr) {
794       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
795       return false;
796     }
797   }
798 
799   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
800   // That might allow us to end base's liveness here by adjusting the constant.
801 
802   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
803     if (!dominates(MI, UseMI)) {
804       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
805       return false;
806     }
807   }
808 
809   return true;
810 }
811 
tryCombineIndexedLoadStore(MachineInstr & MI)812 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
813   IndexedLoadStoreMatchInfo MatchInfo;
814   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
815     applyCombineIndexedLoadStore(MI, MatchInfo);
816     return true;
817   }
818   return false;
819 }
820 
matchCombineIndexedLoadStore(MachineInstr & MI,IndexedLoadStoreMatchInfo & MatchInfo)821 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
822   unsigned Opcode = MI.getOpcode();
823   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
824       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
825     return false;
826 
827   // For now, no targets actually support these opcodes so don't waste time
828   // running these unless we're forced to for testing.
829   if (!ForceLegalIndexing)
830     return false;
831 
832   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
833                                           MatchInfo.Offset);
834   if (!MatchInfo.IsPre &&
835       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
836                               MatchInfo.Offset))
837     return false;
838 
839   return true;
840 }
841 
applyCombineIndexedLoadStore(MachineInstr & MI,IndexedLoadStoreMatchInfo & MatchInfo)842 void CombinerHelper::applyCombineIndexedLoadStore(
843     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
844   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
845   MachineIRBuilder MIRBuilder(MI);
846   unsigned Opcode = MI.getOpcode();
847   bool IsStore = Opcode == TargetOpcode::G_STORE;
848   unsigned NewOpcode;
849   switch (Opcode) {
850   case TargetOpcode::G_LOAD:
851     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
852     break;
853   case TargetOpcode::G_SEXTLOAD:
854     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
855     break;
856   case TargetOpcode::G_ZEXTLOAD:
857     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
858     break;
859   case TargetOpcode::G_STORE:
860     NewOpcode = TargetOpcode::G_INDEXED_STORE;
861     break;
862   default:
863     llvm_unreachable("Unknown load/store opcode");
864   }
865 
866   auto MIB = MIRBuilder.buildInstr(NewOpcode);
867   if (IsStore) {
868     MIB.addDef(MatchInfo.Addr);
869     MIB.addUse(MI.getOperand(0).getReg());
870   } else {
871     MIB.addDef(MI.getOperand(0).getReg());
872     MIB.addDef(MatchInfo.Addr);
873   }
874 
875   MIB.addUse(MatchInfo.Base);
876   MIB.addUse(MatchInfo.Offset);
877   MIB.addImm(MatchInfo.IsPre);
878   MI.eraseFromParent();
879   AddrDef.eraseFromParent();
880 
881   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
882 }
883 
matchOptBrCondByInvertingCond(MachineInstr & MI)884 bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
885   if (MI.getOpcode() != TargetOpcode::G_BR)
886     return false;
887 
888   // Try to match the following:
889   // bb1:
890   //   G_BRCOND %c1, %bb2
891   //   G_BR %bb3
892   // bb2:
893   // ...
894   // bb3:
895 
896   // The above pattern does not have a fall through to the successor bb2, always
897   // resulting in a branch no matter which path is taken. Here we try to find
898   // and replace that pattern with conditional branch to bb3 and otherwise
899   // fallthrough to bb2. This is generally better for branch predictors.
900 
901   MachineBasicBlock *MBB = MI.getParent();
902   MachineBasicBlock::iterator BrIt(MI);
903   if (BrIt == MBB->begin())
904     return false;
905   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
906 
907   MachineInstr *BrCond = &*std::prev(BrIt);
908   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
909     return false;
910 
911   // Check that the next block is the conditional branch target.
912   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
913     return false;
914   return true;
915 }
916 
applyOptBrCondByInvertingCond(MachineInstr & MI)917 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
918   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
919   MachineBasicBlock::iterator BrIt(MI);
920   MachineInstr *BrCond = &*std::prev(BrIt);
921 
922   Builder.setInstrAndDebugLoc(*BrCond);
923   LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
924   // FIXME: Does int/fp matter for this? If so, we might need to restrict
925   // this to i1 only since we might not know for sure what kind of
926   // compare generated the condition value.
927   auto True = Builder.buildConstant(
928       Ty, getICmpTrueVal(getTargetLowering(), false, false));
929   auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
930 
931   auto *FallthroughBB = BrCond->getOperand(1).getMBB();
932   Observer.changingInstr(MI);
933   MI.getOperand(0).setMBB(FallthroughBB);
934   Observer.changedInstr(MI);
935 
936   // Change the conditional branch to use the inverted condition and
937   // new target block.
938   Observer.changingInstr(*BrCond);
939   BrCond->getOperand(0).setReg(Xor.getReg(0));
940   BrCond->getOperand(1).setMBB(BrTarget);
941   Observer.changedInstr(*BrCond);
942 }
943 
shouldLowerMemFuncForSize(const MachineFunction & MF)944 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
945   // On Darwin, -Os means optimize for size without hurting performance, so
946   // only really optimize for size when -Oz (MinSize) is used.
947   if (MF.getTarget().getTargetTriple().isOSDarwin())
948     return MF.getFunction().hasMinSize();
949   return MF.getFunction().hasOptSize();
950 }
951 
952 // Returns a list of types to use for memory op lowering in MemOps. A partial
953 // port of findOptimalMemOpLowering in TargetLowering.
findGISelOptimalMemOpLowering(std::vector<LLT> & MemOps,unsigned Limit,const MemOp & Op,unsigned DstAS,unsigned SrcAS,const AttributeList & FuncAttributes,const TargetLowering & TLI)954 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
955                                           unsigned Limit, const MemOp &Op,
956                                           unsigned DstAS, unsigned SrcAS,
957                                           const AttributeList &FuncAttributes,
958                                           const TargetLowering &TLI) {
959   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
960     return false;
961 
962   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
963 
964   if (Ty == LLT()) {
965     // Use the largest scalar type whose alignment constraints are satisfied.
966     // We only need to check DstAlign here as SrcAlign is always greater or
967     // equal to DstAlign (or zero).
968     Ty = LLT::scalar(64);
969     if (Op.isFixedDstAlign())
970       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
971              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
972         Ty = LLT::scalar(Ty.getSizeInBytes());
973     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
974     // FIXME: check for the largest legal type we can load/store to.
975   }
976 
977   unsigned NumMemOps = 0;
978   uint64_t Size = Op.size();
979   while (Size) {
980     unsigned TySize = Ty.getSizeInBytes();
981     while (TySize > Size) {
982       // For now, only use non-vector load / store's for the left-over pieces.
983       LLT NewTy = Ty;
984       // FIXME: check for mem op safety and legality of the types. Not all of
985       // SDAGisms map cleanly to GISel concepts.
986       if (NewTy.isVector())
987         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
988       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
989       unsigned NewTySize = NewTy.getSizeInBytes();
990       assert(NewTySize > 0 && "Could not find appropriate type");
991 
992       // If the new LLT cannot cover all of the remaining bits, then consider
993       // issuing a (or a pair of) unaligned and overlapping load / store.
994       bool Fast;
995       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
996       MVT VT = getMVTForLLT(Ty);
997       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
998           TLI.allowsMisalignedMemoryAccesses(
999               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
1000               MachineMemOperand::MONone, &Fast) &&
1001           Fast)
1002         TySize = Size;
1003       else {
1004         Ty = NewTy;
1005         TySize = NewTySize;
1006       }
1007     }
1008 
1009     if (++NumMemOps > Limit)
1010       return false;
1011 
1012     MemOps.push_back(Ty);
1013     Size -= TySize;
1014   }
1015 
1016   return true;
1017 }
1018 
getTypeForLLT(LLT Ty,LLVMContext & C)1019 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1020   if (Ty.isVector())
1021     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1022                                 Ty.getNumElements());
1023   return IntegerType::get(C, Ty.getSizeInBits());
1024 }
1025 
1026 // Get a vectorized representation of the memset value operand, GISel edition.
getMemsetValue(Register Val,LLT Ty,MachineIRBuilder & MIB)1027 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
1028   MachineRegisterInfo &MRI = *MIB.getMRI();
1029   unsigned NumBits = Ty.getScalarSizeInBits();
1030   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1031   if (!Ty.isVector() && ValVRegAndVal) {
1032     unsigned KnownVal = ValVRegAndVal->Value;
1033     APInt Scalar = APInt(8, KnownVal);
1034     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
1035     return MIB.buildConstant(Ty, SplatVal).getReg(0);
1036   }
1037 
1038   // Extend the byte value to the larger type, and then multiply by a magic
1039   // value 0x010101... in order to replicate it across every byte.
1040   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
1041   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
1042     return MIB.buildConstant(Ty, 0).getReg(0);
1043   }
1044 
1045   LLT ExtType = Ty.getScalarType();
1046   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
1047   if (NumBits > 8) {
1048     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
1049     auto MagicMI = MIB.buildConstant(ExtType, Magic);
1050     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
1051   }
1052 
1053   // For vector types create a G_BUILD_VECTOR.
1054   if (Ty.isVector())
1055     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
1056 
1057   return Val;
1058 }
1059 
optimizeMemset(MachineInstr & MI,Register Dst,Register Val,unsigned KnownLen,Align Alignment,bool IsVolatile)1060 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1061                                     Register Val, unsigned KnownLen,
1062                                     Align Alignment, bool IsVolatile) {
1063   auto &MF = *MI.getParent()->getParent();
1064   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1065   auto &DL = MF.getDataLayout();
1066   LLVMContext &C = MF.getFunction().getContext();
1067 
1068   assert(KnownLen != 0 && "Have a zero length memset length!");
1069 
1070   bool DstAlignCanChange = false;
1071   MachineFrameInfo &MFI = MF.getFrameInfo();
1072   bool OptSize = shouldLowerMemFuncForSize(MF);
1073 
1074   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1075   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1076     DstAlignCanChange = true;
1077 
1078   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1079   std::vector<LLT> MemOps;
1080 
1081   const auto &DstMMO = **MI.memoperands_begin();
1082   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1083 
1084   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1085   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1086 
1087   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1088                                      MemOp::Set(KnownLen, DstAlignCanChange,
1089                                                 Alignment,
1090                                                 /*IsZeroMemset=*/IsZeroVal,
1091                                                 /*IsVolatile=*/IsVolatile),
1092                                      DstPtrInfo.getAddrSpace(), ~0u,
1093                                      MF.getFunction().getAttributes(), TLI))
1094     return false;
1095 
1096   if (DstAlignCanChange) {
1097     // Get an estimate of the type from the LLT.
1098     Type *IRTy = getTypeForLLT(MemOps[0], C);
1099     Align NewAlign = DL.getABITypeAlign(IRTy);
1100     if (NewAlign > Alignment) {
1101       Alignment = NewAlign;
1102       unsigned FI = FIDef->getOperand(1).getIndex();
1103       // Give the stack frame object a larger alignment if needed.
1104       if (MFI.getObjectAlign(FI) < Alignment)
1105         MFI.setObjectAlignment(FI, Alignment);
1106     }
1107   }
1108 
1109   MachineIRBuilder MIB(MI);
1110   // Find the largest store and generate the bit pattern for it.
1111   LLT LargestTy = MemOps[0];
1112   for (unsigned i = 1; i < MemOps.size(); i++)
1113     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1114       LargestTy = MemOps[i];
1115 
1116   // The memset stored value is always defined as an s8, so in order to make it
1117   // work with larger store types we need to repeat the bit pattern across the
1118   // wider type.
1119   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1120 
1121   if (!MemSetValue)
1122     return false;
1123 
1124   // Generate the stores. For each store type in the list, we generate the
1125   // matching store of that type to the destination address.
1126   LLT PtrTy = MRI.getType(Dst);
1127   unsigned DstOff = 0;
1128   unsigned Size = KnownLen;
1129   for (unsigned I = 0; I < MemOps.size(); I++) {
1130     LLT Ty = MemOps[I];
1131     unsigned TySize = Ty.getSizeInBytes();
1132     if (TySize > Size) {
1133       // Issuing an unaligned load / store pair that overlaps with the previous
1134       // pair. Adjust the offset accordingly.
1135       assert(I == MemOps.size() - 1 && I != 0);
1136       DstOff -= TySize - Size;
1137     }
1138 
1139     // If this store is smaller than the largest store see whether we can get
1140     // the smaller value for free with a truncate.
1141     Register Value = MemSetValue;
1142     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1143       MVT VT = getMVTForLLT(Ty);
1144       MVT LargestVT = getMVTForLLT(LargestTy);
1145       if (!LargestTy.isVector() && !Ty.isVector() &&
1146           TLI.isTruncateFree(LargestVT, VT))
1147         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1148       else
1149         Value = getMemsetValue(Val, Ty, MIB);
1150       if (!Value)
1151         return false;
1152     }
1153 
1154     auto *StoreMMO =
1155         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1156 
1157     Register Ptr = Dst;
1158     if (DstOff != 0) {
1159       auto Offset =
1160           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1161       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1162     }
1163 
1164     MIB.buildStore(Value, Ptr, *StoreMMO);
1165     DstOff += Ty.getSizeInBytes();
1166     Size -= TySize;
1167   }
1168 
1169   MI.eraseFromParent();
1170   return true;
1171 }
1172 
optimizeMemcpy(MachineInstr & MI,Register Dst,Register Src,unsigned KnownLen,Align DstAlign,Align SrcAlign,bool IsVolatile)1173 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1174                                     Register Src, unsigned KnownLen,
1175                                     Align DstAlign, Align SrcAlign,
1176                                     bool IsVolatile) {
1177   auto &MF = *MI.getParent()->getParent();
1178   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1179   auto &DL = MF.getDataLayout();
1180   LLVMContext &C = MF.getFunction().getContext();
1181 
1182   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1183 
1184   bool DstAlignCanChange = false;
1185   MachineFrameInfo &MFI = MF.getFrameInfo();
1186   bool OptSize = shouldLowerMemFuncForSize(MF);
1187   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1188 
1189   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1190   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1191     DstAlignCanChange = true;
1192 
1193   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1194   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1195   // if the memcpy is in a tail call position.
1196 
1197   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1198   std::vector<LLT> MemOps;
1199 
1200   const auto &DstMMO = **MI.memoperands_begin();
1201   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1202   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1203   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1204 
1205   if (!findGISelOptimalMemOpLowering(
1206           MemOps, Limit,
1207           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1208                       IsVolatile),
1209           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1210           MF.getFunction().getAttributes(), TLI))
1211     return false;
1212 
1213   if (DstAlignCanChange) {
1214     // Get an estimate of the type from the LLT.
1215     Type *IRTy = getTypeForLLT(MemOps[0], C);
1216     Align NewAlign = DL.getABITypeAlign(IRTy);
1217 
1218     // Don't promote to an alignment that would require dynamic stack
1219     // realignment.
1220     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1221     if (!TRI->needsStackRealignment(MF))
1222       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1223         NewAlign = NewAlign / 2;
1224 
1225     if (NewAlign > Alignment) {
1226       Alignment = NewAlign;
1227       unsigned FI = FIDef->getOperand(1).getIndex();
1228       // Give the stack frame object a larger alignment if needed.
1229       if (MFI.getObjectAlign(FI) < Alignment)
1230         MFI.setObjectAlignment(FI, Alignment);
1231     }
1232   }
1233 
1234   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1235 
1236   MachineIRBuilder MIB(MI);
1237   // Now we need to emit a pair of load and stores for each of the types we've
1238   // collected. I.e. for each type, generate a load from the source pointer of
1239   // that type width, and then generate a corresponding store to the dest buffer
1240   // of that value loaded. This can result in a sequence of loads and stores
1241   // mixed types, depending on what the target specifies as good types to use.
1242   unsigned CurrOffset = 0;
1243   LLT PtrTy = MRI.getType(Src);
1244   unsigned Size = KnownLen;
1245   for (auto CopyTy : MemOps) {
1246     // Issuing an unaligned load / store pair  that overlaps with the previous
1247     // pair. Adjust the offset accordingly.
1248     if (CopyTy.getSizeInBytes() > Size)
1249       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1250 
1251     // Construct MMOs for the accesses.
1252     auto *LoadMMO =
1253         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1254     auto *StoreMMO =
1255         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1256 
1257     // Create the load.
1258     Register LoadPtr = Src;
1259     Register Offset;
1260     if (CurrOffset != 0) {
1261       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1262                    .getReg(0);
1263       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1264     }
1265     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1266 
1267     // Create the store.
1268     Register StorePtr =
1269         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1270     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1271     CurrOffset += CopyTy.getSizeInBytes();
1272     Size -= CopyTy.getSizeInBytes();
1273   }
1274 
1275   MI.eraseFromParent();
1276   return true;
1277 }
1278 
optimizeMemmove(MachineInstr & MI,Register Dst,Register Src,unsigned KnownLen,Align DstAlign,Align SrcAlign,bool IsVolatile)1279 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1280                                      Register Src, unsigned KnownLen,
1281                                      Align DstAlign, Align SrcAlign,
1282                                      bool IsVolatile) {
1283   auto &MF = *MI.getParent()->getParent();
1284   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1285   auto &DL = MF.getDataLayout();
1286   LLVMContext &C = MF.getFunction().getContext();
1287 
1288   assert(KnownLen != 0 && "Have a zero length memmove length!");
1289 
1290   bool DstAlignCanChange = false;
1291   MachineFrameInfo &MFI = MF.getFrameInfo();
1292   bool OptSize = shouldLowerMemFuncForSize(MF);
1293   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1294 
1295   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1296   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1297     DstAlignCanChange = true;
1298 
1299   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1300   std::vector<LLT> MemOps;
1301 
1302   const auto &DstMMO = **MI.memoperands_begin();
1303   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1304   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1305   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1306 
1307   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1308   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1309   // same thing here.
1310   if (!findGISelOptimalMemOpLowering(
1311           MemOps, Limit,
1312           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1313                       /*IsVolatile*/ true),
1314           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1315           MF.getFunction().getAttributes(), TLI))
1316     return false;
1317 
1318   if (DstAlignCanChange) {
1319     // Get an estimate of the type from the LLT.
1320     Type *IRTy = getTypeForLLT(MemOps[0], C);
1321     Align NewAlign = DL.getABITypeAlign(IRTy);
1322 
1323     // Don't promote to an alignment that would require dynamic stack
1324     // realignment.
1325     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1326     if (!TRI->needsStackRealignment(MF))
1327       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1328         NewAlign = NewAlign / 2;
1329 
1330     if (NewAlign > Alignment) {
1331       Alignment = NewAlign;
1332       unsigned FI = FIDef->getOperand(1).getIndex();
1333       // Give the stack frame object a larger alignment if needed.
1334       if (MFI.getObjectAlign(FI) < Alignment)
1335         MFI.setObjectAlignment(FI, Alignment);
1336     }
1337   }
1338 
1339   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1340 
1341   MachineIRBuilder MIB(MI);
1342   // Memmove requires that we perform the loads first before issuing the stores.
1343   // Apart from that, this loop is pretty much doing the same thing as the
1344   // memcpy codegen function.
1345   unsigned CurrOffset = 0;
1346   LLT PtrTy = MRI.getType(Src);
1347   SmallVector<Register, 16> LoadVals;
1348   for (auto CopyTy : MemOps) {
1349     // Construct MMO for the load.
1350     auto *LoadMMO =
1351         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1352 
1353     // Create the load.
1354     Register LoadPtr = Src;
1355     if (CurrOffset != 0) {
1356       auto Offset =
1357           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1358       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1359     }
1360     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1361     CurrOffset += CopyTy.getSizeInBytes();
1362   }
1363 
1364   CurrOffset = 0;
1365   for (unsigned I = 0; I < MemOps.size(); ++I) {
1366     LLT CopyTy = MemOps[I];
1367     // Now store the values loaded.
1368     auto *StoreMMO =
1369         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1370 
1371     Register StorePtr = Dst;
1372     if (CurrOffset != 0) {
1373       auto Offset =
1374           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1375       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1376     }
1377     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1378     CurrOffset += CopyTy.getSizeInBytes();
1379   }
1380   MI.eraseFromParent();
1381   return true;
1382 }
1383 
tryCombineMemCpyFamily(MachineInstr & MI,unsigned MaxLen)1384 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1385   const unsigned Opc = MI.getOpcode();
1386   // This combine is fairly complex so it's not written with a separate
1387   // matcher function.
1388   assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
1389           Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
1390 
1391   auto MMOIt = MI.memoperands_begin();
1392   const MachineMemOperand *MemOp = *MMOIt;
1393   bool IsVolatile = MemOp->isVolatile();
1394   // Don't try to optimize volatile.
1395   if (IsVolatile)
1396     return false;
1397 
1398   Align DstAlign = MemOp->getBaseAlign();
1399   Align SrcAlign;
1400   Register Dst = MI.getOperand(0).getReg();
1401   Register Src = MI.getOperand(1).getReg();
1402   Register Len = MI.getOperand(2).getReg();
1403 
1404   if (Opc != TargetOpcode::G_MEMSET) {
1405     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1406     MemOp = *(++MMOIt);
1407     SrcAlign = MemOp->getBaseAlign();
1408   }
1409 
1410   // See if this is a constant length copy
1411   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1412   if (!LenVRegAndVal)
1413     return false; // Leave it to the legalizer to lower it to a libcall.
1414   unsigned KnownLen = LenVRegAndVal->Value;
1415 
1416   if (KnownLen == 0) {
1417     MI.eraseFromParent();
1418     return true;
1419   }
1420 
1421   if (MaxLen && KnownLen > MaxLen)
1422     return false;
1423 
1424   if (Opc == TargetOpcode::G_MEMCPY)
1425     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1426   if (Opc == TargetOpcode::G_MEMMOVE)
1427     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1428   if (Opc == TargetOpcode::G_MEMSET)
1429     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1430   return false;
1431 }
1432 
constantFoldFpUnary(unsigned Opcode,LLT DstTy,const Register Op,const MachineRegisterInfo & MRI)1433 static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
1434                                              const Register Op,
1435                                              const MachineRegisterInfo &MRI) {
1436   const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
1437   if (!MaybeCst)
1438     return None;
1439 
1440   APFloat V = MaybeCst->getValueAPF();
1441   switch (Opcode) {
1442   default:
1443     llvm_unreachable("Unexpected opcode!");
1444   case TargetOpcode::G_FNEG: {
1445     V.changeSign();
1446     return V;
1447   }
1448   case TargetOpcode::G_FABS: {
1449     V.clearSign();
1450     return V;
1451   }
1452   case TargetOpcode::G_FPTRUNC:
1453     break;
1454   case TargetOpcode::G_FSQRT: {
1455     bool Unused;
1456     V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1457     V = APFloat(sqrt(V.convertToDouble()));
1458     break;
1459   }
1460   case TargetOpcode::G_FLOG2: {
1461     bool Unused;
1462     V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1463     V = APFloat(log2(V.convertToDouble()));
1464     break;
1465   }
1466   }
1467   // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1468   // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
1469   // and `G_FLOG2` reach here.
1470   bool Unused;
1471   V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
1472   return V;
1473 }
1474 
matchCombineConstantFoldFpUnary(MachineInstr & MI,Optional<APFloat> & Cst)1475 bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
1476                                                      Optional<APFloat> &Cst) {
1477   Register DstReg = MI.getOperand(0).getReg();
1478   Register SrcReg = MI.getOperand(1).getReg();
1479   LLT DstTy = MRI.getType(DstReg);
1480   Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
1481   return Cst.hasValue();
1482 }
1483 
applyCombineConstantFoldFpUnary(MachineInstr & MI,Optional<APFloat> & Cst)1484 bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
1485                                                      Optional<APFloat> &Cst) {
1486   assert(Cst.hasValue() && "Optional is unexpectedly empty!");
1487   Builder.setInstrAndDebugLoc(MI);
1488   MachineFunction &MF = Builder.getMF();
1489   auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
1490   Register DstReg = MI.getOperand(0).getReg();
1491   Builder.buildFConstant(DstReg, *FPVal);
1492   MI.eraseFromParent();
1493   return true;
1494 }
1495 
matchPtrAddImmedChain(MachineInstr & MI,PtrAddChain & MatchInfo)1496 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1497                                            PtrAddChain &MatchInfo) {
1498   // We're trying to match the following pattern:
1499   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1500   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1501   // -->
1502   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1503 
1504   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1505     return false;
1506 
1507   Register Add2 = MI.getOperand(1).getReg();
1508   Register Imm1 = MI.getOperand(2).getReg();
1509   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1510   if (!MaybeImmVal)
1511     return false;
1512 
1513   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1514   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1515     return false;
1516 
1517   Register Base = Add2Def->getOperand(1).getReg();
1518   Register Imm2 = Add2Def->getOperand(2).getReg();
1519   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1520   if (!MaybeImm2Val)
1521     return false;
1522 
1523   // Pass the combined immediate to the apply function.
1524   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1525   MatchInfo.Base = Base;
1526   return true;
1527 }
1528 
applyPtrAddImmedChain(MachineInstr & MI,PtrAddChain & MatchInfo)1529 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1530                                            PtrAddChain &MatchInfo) {
1531   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1532   MachineIRBuilder MIB(MI);
1533   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1534   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1535   Observer.changingInstr(MI);
1536   MI.getOperand(1).setReg(MatchInfo.Base);
1537   MI.getOperand(2).setReg(NewOffset.getReg(0));
1538   Observer.changedInstr(MI);
1539   return true;
1540 }
1541 
matchShiftImmedChain(MachineInstr & MI,RegisterImmPair & MatchInfo)1542 bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
1543                                           RegisterImmPair &MatchInfo) {
1544   // We're trying to match the following pattern with any of
1545   // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1546   //   %t1 = SHIFT %base, G_CONSTANT imm1
1547   //   %root = SHIFT %t1, G_CONSTANT imm2
1548   // -->
1549   //   %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1550 
1551   unsigned Opcode = MI.getOpcode();
1552   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1553           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1554           Opcode == TargetOpcode::G_USHLSAT) &&
1555          "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1556 
1557   Register Shl2 = MI.getOperand(1).getReg();
1558   Register Imm1 = MI.getOperand(2).getReg();
1559   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1560   if (!MaybeImmVal)
1561     return false;
1562 
1563   MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1564   if (Shl2Def->getOpcode() != Opcode)
1565     return false;
1566 
1567   Register Base = Shl2Def->getOperand(1).getReg();
1568   Register Imm2 = Shl2Def->getOperand(2).getReg();
1569   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1570   if (!MaybeImm2Val)
1571     return false;
1572 
1573   // Pass the combined immediate to the apply function.
1574   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1575   MatchInfo.Reg = Base;
1576 
1577   // There is no simple replacement for a saturating unsigned left shift that
1578   // exceeds the scalar size.
1579   if (Opcode == TargetOpcode::G_USHLSAT &&
1580       MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1581     return false;
1582 
1583   return true;
1584 }
1585 
applyShiftImmedChain(MachineInstr & MI,RegisterImmPair & MatchInfo)1586 bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
1587                                           RegisterImmPair &MatchInfo) {
1588   unsigned Opcode = MI.getOpcode();
1589   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1590           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1591           Opcode == TargetOpcode::G_USHLSAT) &&
1592          "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1593 
1594   Builder.setInstrAndDebugLoc(MI);
1595   LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1596   unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1597   auto Imm = MatchInfo.Imm;
1598 
1599   if (Imm >= ScalarSizeInBits) {
1600     // Any logical shift that exceeds scalar size will produce zero.
1601     if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1602       Builder.buildConstant(MI.getOperand(0), 0);
1603       MI.eraseFromParent();
1604       return true;
1605     }
1606     // Arithmetic shift and saturating signed left shift have no effect beyond
1607     // scalar size.
1608     Imm = ScalarSizeInBits - 1;
1609   }
1610 
1611   LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1612   Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1613   Observer.changingInstr(MI);
1614   MI.getOperand(1).setReg(MatchInfo.Reg);
1615   MI.getOperand(2).setReg(NewImm);
1616   Observer.changedInstr(MI);
1617   return true;
1618 }
1619 
matchShiftOfShiftedLogic(MachineInstr & MI,ShiftOfShiftedLogic & MatchInfo)1620 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
1621                                               ShiftOfShiftedLogic &MatchInfo) {
1622   // We're trying to match the following pattern with any of
1623   // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1624   // with any of G_AND/G_OR/G_XOR logic instructions.
1625   //   %t1 = SHIFT %X, G_CONSTANT C0
1626   //   %t2 = LOGIC %t1, %Y
1627   //   %root = SHIFT %t2, G_CONSTANT C1
1628   // -->
1629   //   %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1630   //   %t4 = SHIFT %Y, G_CONSTANT C1
1631   //   %root = LOGIC %t3, %t4
1632   unsigned ShiftOpcode = MI.getOpcode();
1633   assert((ShiftOpcode == TargetOpcode::G_SHL ||
1634           ShiftOpcode == TargetOpcode::G_ASHR ||
1635           ShiftOpcode == TargetOpcode::G_LSHR ||
1636           ShiftOpcode == TargetOpcode::G_USHLSAT ||
1637           ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1638          "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1639 
1640   // Match a one-use bitwise logic op.
1641   Register LogicDest = MI.getOperand(1).getReg();
1642   if (!MRI.hasOneNonDBGUse(LogicDest))
1643     return false;
1644 
1645   MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1646   unsigned LogicOpcode = LogicMI->getOpcode();
1647   if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1648       LogicOpcode != TargetOpcode::G_XOR)
1649     return false;
1650 
1651   // Find a matching one-use shift by constant.
1652   const Register C1 = MI.getOperand(2).getReg();
1653   auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
1654   if (!MaybeImmVal)
1655     return false;
1656 
1657   const uint64_t C1Val = MaybeImmVal->Value;
1658 
1659   auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1660     // Shift should match previous one and should be a one-use.
1661     if (MI->getOpcode() != ShiftOpcode ||
1662         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1663       return false;
1664 
1665     // Must be a constant.
1666     auto MaybeImmVal =
1667         getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1668     if (!MaybeImmVal)
1669       return false;
1670 
1671     ShiftVal = MaybeImmVal->Value;
1672     return true;
1673   };
1674 
1675   // Logic ops are commutative, so check each operand for a match.
1676   Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1677   MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1678   Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1679   MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1680   uint64_t C0Val;
1681 
1682   if (matchFirstShift(LogicMIOp1, C0Val)) {
1683     MatchInfo.LogicNonShiftReg = LogicMIReg2;
1684     MatchInfo.Shift2 = LogicMIOp1;
1685   } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1686     MatchInfo.LogicNonShiftReg = LogicMIReg1;
1687     MatchInfo.Shift2 = LogicMIOp2;
1688   } else
1689     return false;
1690 
1691   MatchInfo.ValSum = C0Val + C1Val;
1692 
1693   // The fold is not valid if the sum of the shift values exceeds bitwidth.
1694   if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1695     return false;
1696 
1697   MatchInfo.Logic = LogicMI;
1698   return true;
1699 }
1700 
applyShiftOfShiftedLogic(MachineInstr & MI,ShiftOfShiftedLogic & MatchInfo)1701 bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
1702                                               ShiftOfShiftedLogic &MatchInfo) {
1703   unsigned Opcode = MI.getOpcode();
1704   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1705           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1706           Opcode == TargetOpcode::G_SSHLSAT) &&
1707          "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1708 
1709   LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1710   LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1711   Builder.setInstrAndDebugLoc(MI);
1712 
1713   Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1714 
1715   Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1716   Register Shift1 =
1717       Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1718 
1719   Register Shift2Const = MI.getOperand(2).getReg();
1720   Register Shift2 = Builder
1721                         .buildInstr(Opcode, {DestType},
1722                                     {MatchInfo.LogicNonShiftReg, Shift2Const})
1723                         .getReg(0);
1724 
1725   Register Dest = MI.getOperand(0).getReg();
1726   Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1727 
1728   // These were one use so it's safe to remove them.
1729   MatchInfo.Shift2->eraseFromParent();
1730   MatchInfo.Logic->eraseFromParent();
1731 
1732   MI.eraseFromParent();
1733   return true;
1734 }
1735 
matchCombineMulToShl(MachineInstr & MI,unsigned & ShiftVal)1736 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1737                                           unsigned &ShiftVal) {
1738   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1739   auto MaybeImmVal =
1740       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1741   if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1742     return false;
1743   ShiftVal = Log2_64(MaybeImmVal->Value);
1744   return true;
1745 }
1746 
applyCombineMulToShl(MachineInstr & MI,unsigned & ShiftVal)1747 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1748                                           unsigned &ShiftVal) {
1749   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1750   MachineIRBuilder MIB(MI);
1751   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1752   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1753   Observer.changingInstr(MI);
1754   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1755   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1756   Observer.changedInstr(MI);
1757   return true;
1758 }
1759 
1760 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
matchCombineShlOfExtend(MachineInstr & MI,RegisterImmPair & MatchData)1761 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
1762                                              RegisterImmPair &MatchData) {
1763   assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1764 
1765   Register LHS = MI.getOperand(1).getReg();
1766 
1767   Register ExtSrc;
1768   if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1769       !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1770       !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1771     return false;
1772 
1773   // TODO: Should handle vector splat.
1774   Register RHS = MI.getOperand(2).getReg();
1775   auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
1776   if (!MaybeShiftAmtVal)
1777     return false;
1778 
1779   if (LI) {
1780     LLT SrcTy = MRI.getType(ExtSrc);
1781 
1782     // We only really care about the legality with the shifted value. We can
1783     // pick any type the constant shift amount, so ask the target what to
1784     // use. Otherwise we would have to guess and hope it is reported as legal.
1785     LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1786     if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1787       return false;
1788   }
1789 
1790   int64_t ShiftAmt = MaybeShiftAmtVal->Value;
1791   MatchData.Reg = ExtSrc;
1792   MatchData.Imm = ShiftAmt;
1793 
1794   unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
1795   return MinLeadingZeros >= ShiftAmt;
1796 }
1797 
applyCombineShlOfExtend(MachineInstr & MI,const RegisterImmPair & MatchData)1798 bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
1799                                              const RegisterImmPair &MatchData) {
1800   Register ExtSrcReg = MatchData.Reg;
1801   int64_t ShiftAmtVal = MatchData.Imm;
1802 
1803   LLT ExtSrcTy = MRI.getType(ExtSrcReg);
1804   Builder.setInstrAndDebugLoc(MI);
1805   auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
1806   auto NarrowShift =
1807       Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
1808   Builder.buildZExt(MI.getOperand(0), NarrowShift);
1809   MI.eraseFromParent();
1810   return true;
1811 }
1812 
peekThroughBitcast(Register Reg,const MachineRegisterInfo & MRI)1813 static Register peekThroughBitcast(Register Reg,
1814                                    const MachineRegisterInfo &MRI) {
1815   while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
1816     ;
1817 
1818   return Reg;
1819 }
1820 
matchCombineUnmergeMergeToPlainValues(MachineInstr & MI,SmallVectorImpl<Register> & Operands)1821 bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
1822     MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
1823   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1824          "Expected an unmerge");
1825   Register SrcReg =
1826       peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
1827 
1828   MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
1829   if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
1830       SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
1831       SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
1832     return false;
1833 
1834   // Check the source type of the merge.
1835   LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
1836   LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
1837   bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
1838   if (SrcMergeTy != Dst0Ty && !SameSize)
1839     return false;
1840   // They are the same now (modulo a bitcast).
1841   // We can collect all the src registers.
1842   for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
1843        ++Idx)
1844     Operands.push_back(SrcInstr->getOperand(Idx).getReg());
1845   return true;
1846 }
1847 
applyCombineUnmergeMergeToPlainValues(MachineInstr & MI,SmallVectorImpl<Register> & Operands)1848 bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
1849     MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
1850   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1851          "Expected an unmerge");
1852   assert((MI.getNumOperands() - 1 == Operands.size()) &&
1853          "Not enough operands to replace all defs");
1854   unsigned NumElems = MI.getNumOperands() - 1;
1855 
1856   LLT SrcTy = MRI.getType(Operands[0]);
1857   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1858   bool CanReuseInputDirectly = DstTy == SrcTy;
1859   Builder.setInstrAndDebugLoc(MI);
1860   for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
1861     Register DstReg = MI.getOperand(Idx).getReg();
1862     Register SrcReg = Operands[Idx];
1863     if (CanReuseInputDirectly)
1864       replaceRegWith(MRI, DstReg, SrcReg);
1865     else
1866       Builder.buildCast(DstReg, SrcReg);
1867   }
1868   MI.eraseFromParent();
1869   return true;
1870 }
1871 
matchCombineUnmergeConstant(MachineInstr & MI,SmallVectorImpl<APInt> & Csts)1872 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
1873                                                  SmallVectorImpl<APInt> &Csts) {
1874   unsigned SrcIdx = MI.getNumOperands() - 1;
1875   Register SrcReg = MI.getOperand(SrcIdx).getReg();
1876   MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
1877   if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
1878       SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
1879     return false;
1880   // Break down the big constant in smaller ones.
1881   const MachineOperand &CstVal = SrcInstr->getOperand(1);
1882   APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
1883                   ? CstVal.getCImm()->getValue()
1884                   : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
1885 
1886   LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
1887   unsigned ShiftAmt = Dst0Ty.getSizeInBits();
1888   // Unmerge a constant.
1889   for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
1890     Csts.emplace_back(Val.trunc(ShiftAmt));
1891     Val = Val.lshr(ShiftAmt);
1892   }
1893 
1894   return true;
1895 }
1896 
applyCombineUnmergeConstant(MachineInstr & MI,SmallVectorImpl<APInt> & Csts)1897 bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
1898                                                  SmallVectorImpl<APInt> &Csts) {
1899   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1900          "Expected an unmerge");
1901   assert((MI.getNumOperands() - 1 == Csts.size()) &&
1902          "Not enough operands to replace all defs");
1903   unsigned NumElems = MI.getNumOperands() - 1;
1904   Builder.setInstrAndDebugLoc(MI);
1905   for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
1906     Register DstReg = MI.getOperand(Idx).getReg();
1907     Builder.buildConstant(DstReg, Csts[Idx]);
1908   }
1909 
1910   MI.eraseFromParent();
1911   return true;
1912 }
1913 
matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr & MI)1914 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
1915   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1916          "Expected an unmerge");
1917   // Check that all the lanes are dead except the first one.
1918   for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
1919     if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
1920       return false;
1921   }
1922   return true;
1923 }
1924 
applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr & MI)1925 bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
1926   Builder.setInstrAndDebugLoc(MI);
1927   Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
1928   // Truncating a vector is going to truncate every single lane,
1929   // whereas we want the full lowbits.
1930   // Do the operation on a scalar instead.
1931   LLT SrcTy = MRI.getType(SrcReg);
1932   if (SrcTy.isVector())
1933     SrcReg =
1934         Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
1935 
1936   Register Dst0Reg = MI.getOperand(0).getReg();
1937   LLT Dst0Ty = MRI.getType(Dst0Reg);
1938   if (Dst0Ty.isVector()) {
1939     auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
1940     Builder.buildCast(Dst0Reg, MIB);
1941   } else
1942     Builder.buildTrunc(Dst0Reg, SrcReg);
1943   MI.eraseFromParent();
1944   return true;
1945 }
1946 
matchCombineUnmergeZExtToZExt(MachineInstr & MI)1947 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
1948   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1949          "Expected an unmerge");
1950   Register Dst0Reg = MI.getOperand(0).getReg();
1951   LLT Dst0Ty = MRI.getType(Dst0Reg);
1952   // G_ZEXT on vector applies to each lane, so it will
1953   // affect all destinations. Therefore we won't be able
1954   // to simplify the unmerge to just the first definition.
1955   if (Dst0Ty.isVector())
1956     return false;
1957   Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
1958   LLT SrcTy = MRI.getType(SrcReg);
1959   if (SrcTy.isVector())
1960     return false;
1961 
1962   Register ZExtSrcReg;
1963   if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
1964     return false;
1965 
1966   // Finally we can replace the first definition with
1967   // a zext of the source if the definition is big enough to hold
1968   // all of ZExtSrc bits.
1969   LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
1970   return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
1971 }
1972 
applyCombineUnmergeZExtToZExt(MachineInstr & MI)1973 bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
1974   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1975          "Expected an unmerge");
1976 
1977   Register Dst0Reg = MI.getOperand(0).getReg();
1978 
1979   MachineInstr *ZExtInstr =
1980       MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
1981   assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
1982          "Expecting a G_ZEXT");
1983 
1984   Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
1985   LLT Dst0Ty = MRI.getType(Dst0Reg);
1986   LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
1987 
1988   Builder.setInstrAndDebugLoc(MI);
1989 
1990   if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
1991     Builder.buildZExt(Dst0Reg, ZExtSrcReg);
1992   } else {
1993     assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
1994            "ZExt src doesn't fit in destination");
1995     replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
1996   }
1997 
1998   Register ZeroReg;
1999   for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2000     if (!ZeroReg)
2001       ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2002     replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2003   }
2004   MI.eraseFromParent();
2005   return true;
2006 }
2007 
matchCombineShiftToUnmerge(MachineInstr & MI,unsigned TargetShiftSize,unsigned & ShiftVal)2008 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
2009                                                 unsigned TargetShiftSize,
2010                                                 unsigned &ShiftVal) {
2011   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2012           MI.getOpcode() == TargetOpcode::G_LSHR ||
2013           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2014 
2015   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2016   if (Ty.isVector()) // TODO:
2017     return false;
2018 
2019   // Don't narrow further than the requested size.
2020   unsigned Size = Ty.getSizeInBits();
2021   if (Size <= TargetShiftSize)
2022     return false;
2023 
2024   auto MaybeImmVal =
2025     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2026   if (!MaybeImmVal)
2027     return false;
2028 
2029   ShiftVal = MaybeImmVal->Value;
2030   return ShiftVal >= Size / 2 && ShiftVal < Size;
2031 }
2032 
applyCombineShiftToUnmerge(MachineInstr & MI,const unsigned & ShiftVal)2033 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
2034                                                 const unsigned &ShiftVal) {
2035   Register DstReg = MI.getOperand(0).getReg();
2036   Register SrcReg = MI.getOperand(1).getReg();
2037   LLT Ty = MRI.getType(SrcReg);
2038   unsigned Size = Ty.getSizeInBits();
2039   unsigned HalfSize = Size / 2;
2040   assert(ShiftVal >= HalfSize);
2041 
2042   LLT HalfTy = LLT::scalar(HalfSize);
2043 
2044   Builder.setInstr(MI);
2045   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2046   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2047 
2048   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2049     Register Narrowed = Unmerge.getReg(1);
2050 
2051     //  dst = G_LSHR s64:x, C for C >= 32
2052     // =>
2053     //   lo, hi = G_UNMERGE_VALUES x
2054     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2055 
2056     if (NarrowShiftAmt != 0) {
2057       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2058         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2059     }
2060 
2061     auto Zero = Builder.buildConstant(HalfTy, 0);
2062     Builder.buildMerge(DstReg, { Narrowed, Zero });
2063   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2064     Register Narrowed = Unmerge.getReg(0);
2065     //  dst = G_SHL s64:x, C for C >= 32
2066     // =>
2067     //   lo, hi = G_UNMERGE_VALUES x
2068     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2069     if (NarrowShiftAmt != 0) {
2070       Narrowed = Builder.buildShl(HalfTy, Narrowed,
2071         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2072     }
2073 
2074     auto Zero = Builder.buildConstant(HalfTy, 0);
2075     Builder.buildMerge(DstReg, { Zero, Narrowed });
2076   } else {
2077     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2078     auto Hi = Builder.buildAShr(
2079       HalfTy, Unmerge.getReg(1),
2080       Builder.buildConstant(HalfTy, HalfSize - 1));
2081 
2082     if (ShiftVal == HalfSize) {
2083       // (G_ASHR i64:x, 32) ->
2084       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2085       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
2086     } else if (ShiftVal == Size - 1) {
2087       // Don't need a second shift.
2088       // (G_ASHR i64:x, 63) ->
2089       //   %narrowed = (G_ASHR hi_32(x), 31)
2090       //   G_MERGE_VALUES %narrowed, %narrowed
2091       Builder.buildMerge(DstReg, { Hi, Hi });
2092     } else {
2093       auto Lo = Builder.buildAShr(
2094         HalfTy, Unmerge.getReg(1),
2095         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2096 
2097       // (G_ASHR i64:x, C) ->, for C >= 32
2098       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2099       Builder.buildMerge(DstReg, { Lo, Hi });
2100     }
2101   }
2102 
2103   MI.eraseFromParent();
2104   return true;
2105 }
2106 
tryCombineShiftToUnmerge(MachineInstr & MI,unsigned TargetShiftAmount)2107 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
2108                                               unsigned TargetShiftAmount) {
2109   unsigned ShiftAmt;
2110   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2111     applyCombineShiftToUnmerge(MI, ShiftAmt);
2112     return true;
2113   }
2114 
2115   return false;
2116 }
2117 
matchCombineI2PToP2I(MachineInstr & MI,Register & Reg)2118 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2119   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2120   Register DstReg = MI.getOperand(0).getReg();
2121   LLT DstTy = MRI.getType(DstReg);
2122   Register SrcReg = MI.getOperand(1).getReg();
2123   return mi_match(SrcReg, MRI,
2124                   m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2125 }
2126 
applyCombineI2PToP2I(MachineInstr & MI,Register & Reg)2127 bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2128   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2129   Register DstReg = MI.getOperand(0).getReg();
2130   Builder.setInstr(MI);
2131   Builder.buildCopy(DstReg, Reg);
2132   MI.eraseFromParent();
2133   return true;
2134 }
2135 
matchCombineP2IToI2P(MachineInstr & MI,Register & Reg)2136 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2137   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2138   Register SrcReg = MI.getOperand(1).getReg();
2139   return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
2140 }
2141 
applyCombineP2IToI2P(MachineInstr & MI,Register & Reg)2142 bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2143   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2144   Register DstReg = MI.getOperand(0).getReg();
2145   Builder.setInstr(MI);
2146   Builder.buildZExtOrTrunc(DstReg, Reg);
2147   MI.eraseFromParent();
2148   return true;
2149 }
2150 
matchCombineAddP2IToPtrAdd(MachineInstr & MI,std::pair<Register,bool> & PtrReg)2151 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
2152     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2153   assert(MI.getOpcode() == TargetOpcode::G_ADD);
2154   Register LHS = MI.getOperand(1).getReg();
2155   Register RHS = MI.getOperand(2).getReg();
2156   LLT IntTy = MRI.getType(LHS);
2157 
2158   // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2159   // instruction.
2160   PtrReg.second = false;
2161   for (Register SrcReg : {LHS, RHS}) {
2162     if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2163       // Don't handle cases where the integer is implicitly converted to the
2164       // pointer width.
2165       LLT PtrTy = MRI.getType(PtrReg.first);
2166       if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2167         return true;
2168     }
2169 
2170     PtrReg.second = true;
2171   }
2172 
2173   return false;
2174 }
2175 
applyCombineAddP2IToPtrAdd(MachineInstr & MI,std::pair<Register,bool> & PtrReg)2176 bool CombinerHelper::applyCombineAddP2IToPtrAdd(
2177     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2178   Register Dst = MI.getOperand(0).getReg();
2179   Register LHS = MI.getOperand(1).getReg();
2180   Register RHS = MI.getOperand(2).getReg();
2181 
2182   const bool DoCommute = PtrReg.second;
2183   if (DoCommute)
2184     std::swap(LHS, RHS);
2185   LHS = PtrReg.first;
2186 
2187   LLT PtrTy = MRI.getType(LHS);
2188 
2189   Builder.setInstrAndDebugLoc(MI);
2190   auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2191   Builder.buildPtrToInt(Dst, PtrAdd);
2192   MI.eraseFromParent();
2193   return true;
2194 }
2195 
matchCombineConstPtrAddToI2P(MachineInstr & MI,int64_t & NewCst)2196 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
2197                                                   int64_t &NewCst) {
2198   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
2199   Register LHS = MI.getOperand(1).getReg();
2200   Register RHS = MI.getOperand(2).getReg();
2201   MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2202 
2203   if (auto RHSCst = getConstantVRegVal(RHS, MRI)) {
2204     int64_t Cst;
2205     if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2206       NewCst = Cst + *RHSCst;
2207       return true;
2208     }
2209   }
2210 
2211   return false;
2212 }
2213 
applyCombineConstPtrAddToI2P(MachineInstr & MI,int64_t & NewCst)2214 bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
2215                                                   int64_t &NewCst) {
2216   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
2217   Register Dst = MI.getOperand(0).getReg();
2218 
2219   Builder.setInstrAndDebugLoc(MI);
2220   Builder.buildConstant(Dst, NewCst);
2221   MI.eraseFromParent();
2222   return true;
2223 }
2224 
matchCombineAnyExtTrunc(MachineInstr & MI,Register & Reg)2225 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
2226   assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2227   Register DstReg = MI.getOperand(0).getReg();
2228   Register SrcReg = MI.getOperand(1).getReg();
2229   LLT DstTy = MRI.getType(DstReg);
2230   return mi_match(SrcReg, MRI,
2231                   m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2232 }
2233 
applyCombineAnyExtTrunc(MachineInstr & MI,Register & Reg)2234 bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
2235   assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2236   Register DstReg = MI.getOperand(0).getReg();
2237   MI.eraseFromParent();
2238   replaceRegWith(MRI, DstReg, Reg);
2239   return true;
2240 }
2241 
matchCombineExtOfExt(MachineInstr & MI,std::tuple<Register,unsigned> & MatchInfo)2242 bool CombinerHelper::matchCombineExtOfExt(
2243     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2244   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2245           MI.getOpcode() == TargetOpcode::G_SEXT ||
2246           MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2247          "Expected a G_[ASZ]EXT");
2248   Register SrcReg = MI.getOperand(1).getReg();
2249   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2250   // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2251   unsigned Opc = MI.getOpcode();
2252   unsigned SrcOpc = SrcMI->getOpcode();
2253   if (Opc == SrcOpc ||
2254       (Opc == TargetOpcode::G_ANYEXT &&
2255        (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2256       (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2257     MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2258     return true;
2259   }
2260   return false;
2261 }
2262 
applyCombineExtOfExt(MachineInstr & MI,std::tuple<Register,unsigned> & MatchInfo)2263 bool CombinerHelper::applyCombineExtOfExt(
2264     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2265   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2266           MI.getOpcode() == TargetOpcode::G_SEXT ||
2267           MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2268          "Expected a G_[ASZ]EXT");
2269 
2270   Register Reg = std::get<0>(MatchInfo);
2271   unsigned SrcExtOp = std::get<1>(MatchInfo);
2272 
2273   // Combine exts with the same opcode.
2274   if (MI.getOpcode() == SrcExtOp) {
2275     Observer.changingInstr(MI);
2276     MI.getOperand(1).setReg(Reg);
2277     Observer.changedInstr(MI);
2278     return true;
2279   }
2280 
2281   // Combine:
2282   // - anyext([sz]ext x) to [sz]ext x
2283   // - sext(zext x) to zext x
2284   if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2285       (MI.getOpcode() == TargetOpcode::G_SEXT &&
2286        SrcExtOp == TargetOpcode::G_ZEXT)) {
2287     Register DstReg = MI.getOperand(0).getReg();
2288     Builder.setInstrAndDebugLoc(MI);
2289     Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2290     MI.eraseFromParent();
2291     return true;
2292   }
2293 
2294   return false;
2295 }
2296 
applyCombineMulByNegativeOne(MachineInstr & MI)2297 bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
2298   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2299   Register DstReg = MI.getOperand(0).getReg();
2300   Register SrcReg = MI.getOperand(1).getReg();
2301   LLT DstTy = MRI.getType(DstReg);
2302 
2303   Builder.setInstrAndDebugLoc(MI);
2304   Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
2305                    MI.getFlags());
2306   MI.eraseFromParent();
2307   return true;
2308 }
2309 
matchCombineFNegOfFNeg(MachineInstr & MI,Register & Reg)2310 bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
2311   assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
2312   Register SrcReg = MI.getOperand(1).getReg();
2313   return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
2314 }
2315 
matchCombineFAbsOfFAbs(MachineInstr & MI,Register & Src)2316 bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
2317   assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
2318   Src = MI.getOperand(1).getReg();
2319   Register AbsSrc;
2320   return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
2321 }
2322 
applyCombineFAbsOfFAbs(MachineInstr & MI,Register & Src)2323 bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
2324   assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
2325   Register Dst = MI.getOperand(0).getReg();
2326   MI.eraseFromParent();
2327   replaceRegWith(MRI, Dst, Src);
2328   return true;
2329 }
2330 
matchCombineTruncOfExt(MachineInstr & MI,std::pair<Register,unsigned> & MatchInfo)2331 bool CombinerHelper::matchCombineTruncOfExt(
2332     MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2333   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2334   Register SrcReg = MI.getOperand(1).getReg();
2335   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2336   unsigned SrcOpc = SrcMI->getOpcode();
2337   if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2338       SrcOpc == TargetOpcode::G_ZEXT) {
2339     MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2340     return true;
2341   }
2342   return false;
2343 }
2344 
applyCombineTruncOfExt(MachineInstr & MI,std::pair<Register,unsigned> & MatchInfo)2345 bool CombinerHelper::applyCombineTruncOfExt(
2346     MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2347   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2348   Register SrcReg = MatchInfo.first;
2349   unsigned SrcExtOp = MatchInfo.second;
2350   Register DstReg = MI.getOperand(0).getReg();
2351   LLT SrcTy = MRI.getType(SrcReg);
2352   LLT DstTy = MRI.getType(DstReg);
2353   if (SrcTy == DstTy) {
2354     MI.eraseFromParent();
2355     replaceRegWith(MRI, DstReg, SrcReg);
2356     return true;
2357   }
2358   Builder.setInstrAndDebugLoc(MI);
2359   if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2360     Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2361   else
2362     Builder.buildTrunc(DstReg, SrcReg);
2363   MI.eraseFromParent();
2364   return true;
2365 }
2366 
matchCombineTruncOfShl(MachineInstr & MI,std::pair<Register,Register> & MatchInfo)2367 bool CombinerHelper::matchCombineTruncOfShl(
2368     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2369   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2370   Register DstReg = MI.getOperand(0).getReg();
2371   Register SrcReg = MI.getOperand(1).getReg();
2372   LLT DstTy = MRI.getType(DstReg);
2373   Register ShiftSrc;
2374   Register ShiftAmt;
2375 
2376   if (MRI.hasOneNonDBGUse(SrcReg) &&
2377       mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
2378       isLegalOrBeforeLegalizer(
2379           {TargetOpcode::G_SHL,
2380            {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
2381     KnownBits Known = KB->getKnownBits(ShiftAmt);
2382     unsigned Size = DstTy.getSizeInBits();
2383     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
2384       MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
2385       return true;
2386     }
2387   }
2388   return false;
2389 }
2390 
applyCombineTruncOfShl(MachineInstr & MI,std::pair<Register,Register> & MatchInfo)2391 bool CombinerHelper::applyCombineTruncOfShl(
2392     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2393   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2394   Register DstReg = MI.getOperand(0).getReg();
2395   Register SrcReg = MI.getOperand(1).getReg();
2396   LLT DstTy = MRI.getType(DstReg);
2397   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2398 
2399   Register ShiftSrc = MatchInfo.first;
2400   Register ShiftAmt = MatchInfo.second;
2401   Builder.setInstrAndDebugLoc(MI);
2402   auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
2403   Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
2404   MI.eraseFromParent();
2405   return true;
2406 }
2407 
matchAnyExplicitUseIsUndef(MachineInstr & MI)2408 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
2409   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2410     return MO.isReg() &&
2411            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2412   });
2413 }
2414 
matchAllExplicitUsesAreUndef(MachineInstr & MI)2415 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
2416   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2417     return !MO.isReg() ||
2418            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2419   });
2420 }
2421 
matchUndefShuffleVectorMask(MachineInstr & MI)2422 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
2423   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2424   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2425   return all_of(Mask, [](int Elt) { return Elt < 0; });
2426 }
2427 
matchUndefStore(MachineInstr & MI)2428 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
2429   assert(MI.getOpcode() == TargetOpcode::G_STORE);
2430   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2431                       MRI);
2432 }
2433 
matchUndefSelectCmp(MachineInstr & MI)2434 bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
2435   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2436   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2437                       MRI);
2438 }
2439 
matchConstantSelectCmp(MachineInstr & MI,unsigned & OpIdx)2440 bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
2441   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2442   if (auto MaybeCstCmp =
2443           getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
2444     OpIdx = MaybeCstCmp->Value ? 2 : 3;
2445     return true;
2446   }
2447   return false;
2448 }
2449 
eraseInst(MachineInstr & MI)2450 bool CombinerHelper::eraseInst(MachineInstr &MI) {
2451   MI.eraseFromParent();
2452   return true;
2453 }
2454 
matchEqualDefs(const MachineOperand & MOP1,const MachineOperand & MOP2)2455 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
2456                                     const MachineOperand &MOP2) {
2457   if (!MOP1.isReg() || !MOP2.isReg())
2458     return false;
2459   MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
2460   if (!I1)
2461     return false;
2462   MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
2463   if (!I2)
2464     return false;
2465 
2466   // Handle a case like this:
2467   //
2468   // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2469   //
2470   // Even though %0 and %1 are produced by the same instruction they are not
2471   // the same values.
2472   if (I1 == I2)
2473     return MOP1.getReg() == MOP2.getReg();
2474 
2475   // If we have an instruction which loads or stores, we can't guarantee that
2476   // it is identical.
2477   //
2478   // For example, we may have
2479   //
2480   // %x1 = G_LOAD %addr (load N from @somewhere)
2481   // ...
2482   // call @foo
2483   // ...
2484   // %x2 = G_LOAD %addr (load N from @somewhere)
2485   // ...
2486   // %or = G_OR %x1, %x2
2487   //
2488   // It's possible that @foo will modify whatever lives at the address we're
2489   // loading from. To be safe, let's just assume that all loads and stores
2490   // are different (unless we have something which is guaranteed to not
2491   // change.)
2492   if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
2493     return false;
2494 
2495   // Check for physical registers on the instructions first to avoid cases
2496   // like this:
2497   //
2498   // %a = COPY $physreg
2499   // ...
2500   // SOMETHING implicit-def $physreg
2501   // ...
2502   // %b = COPY $physreg
2503   //
2504   // These copies are not equivalent.
2505   if (any_of(I1->uses(), [](const MachineOperand &MO) {
2506         return MO.isReg() && MO.getReg().isPhysical();
2507       })) {
2508     // Check if we have a case like this:
2509     //
2510     // %a = COPY $physreg
2511     // %b = COPY %a
2512     //
2513     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2514     // From that, we know that they must have the same value, since they must
2515     // have come from the same COPY.
2516     return I1->isIdenticalTo(*I2);
2517   }
2518 
2519   // We don't have any physical registers, so we don't necessarily need the
2520   // same vreg defs.
2521   //
2522   // On the off-chance that there's some target instruction feeding into the
2523   // instruction, let's use produceSameValue instead of isIdenticalTo.
2524   return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
2525 }
2526 
matchConstantOp(const MachineOperand & MOP,int64_t C)2527 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
2528   if (!MOP.isReg())
2529     return false;
2530   // MIPatternMatch doesn't let us look through G_ZEXT etc.
2531   auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
2532   return ValAndVReg && ValAndVReg->Value == C;
2533 }
2534 
replaceSingleDefInstWithOperand(MachineInstr & MI,unsigned OpIdx)2535 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
2536                                                      unsigned OpIdx) {
2537   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2538   Register OldReg = MI.getOperand(0).getReg();
2539   Register Replacement = MI.getOperand(OpIdx).getReg();
2540   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2541   MI.eraseFromParent();
2542   replaceRegWith(MRI, OldReg, Replacement);
2543   return true;
2544 }
2545 
replaceSingleDefInstWithReg(MachineInstr & MI,Register Replacement)2546 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
2547                                                  Register Replacement) {
2548   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2549   Register OldReg = MI.getOperand(0).getReg();
2550   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2551   MI.eraseFromParent();
2552   replaceRegWith(MRI, OldReg, Replacement);
2553   return true;
2554 }
2555 
matchSelectSameVal(MachineInstr & MI)2556 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
2557   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2558   // Match (cond ? x : x)
2559   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2560          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2561                        MRI);
2562 }
2563 
matchBinOpSameVal(MachineInstr & MI)2564 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
2565   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2566          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2567                        MRI);
2568 }
2569 
matchOperandIsZero(MachineInstr & MI,unsigned OpIdx)2570 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
2571   return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2572          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2573                        MRI);
2574 }
2575 
matchOperandIsUndef(MachineInstr & MI,unsigned OpIdx)2576 bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
2577   MachineOperand &MO = MI.getOperand(OpIdx);
2578   return MO.isReg() &&
2579          getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2580 }
2581 
replaceInstWithFConstant(MachineInstr & MI,double C)2582 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
2583   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2584   Builder.setInstr(MI);
2585   Builder.buildFConstant(MI.getOperand(0), C);
2586   MI.eraseFromParent();
2587   return true;
2588 }
2589 
replaceInstWithConstant(MachineInstr & MI,int64_t C)2590 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
2591   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2592   Builder.setInstr(MI);
2593   Builder.buildConstant(MI.getOperand(0), C);
2594   MI.eraseFromParent();
2595   return true;
2596 }
2597 
replaceInstWithUndef(MachineInstr & MI)2598 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
2599   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2600   Builder.setInstr(MI);
2601   Builder.buildUndef(MI.getOperand(0));
2602   MI.eraseFromParent();
2603   return true;
2604 }
2605 
matchSimplifyAddToSub(MachineInstr & MI,std::tuple<Register,Register> & MatchInfo)2606 bool CombinerHelper::matchSimplifyAddToSub(
2607     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2608   Register LHS = MI.getOperand(1).getReg();
2609   Register RHS = MI.getOperand(2).getReg();
2610   Register &NewLHS = std::get<0>(MatchInfo);
2611   Register &NewRHS = std::get<1>(MatchInfo);
2612 
2613   // Helper lambda to check for opportunities for
2614   // ((0-A) + B) -> B - A
2615   // (A + (0-B)) -> A - B
2616   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2617     if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2618       return false;
2619     NewLHS = MaybeNewLHS;
2620     return true;
2621   };
2622 
2623   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2624 }
2625 
matchCombineInsertVecElts(MachineInstr & MI,SmallVectorImpl<Register> & MatchInfo)2626 bool CombinerHelper::matchCombineInsertVecElts(
2627     MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2628   assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2629          "Invalid opcode");
2630   Register DstReg = MI.getOperand(0).getReg();
2631   LLT DstTy = MRI.getType(DstReg);
2632   assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2633   unsigned NumElts = DstTy.getNumElements();
2634   // If this MI is part of a sequence of insert_vec_elts, then
2635   // don't do the combine in the middle of the sequence.
2636   if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2637                                    TargetOpcode::G_INSERT_VECTOR_ELT)
2638     return false;
2639   MachineInstr *CurrInst = &MI;
2640   MachineInstr *TmpInst;
2641   int64_t IntImm;
2642   Register TmpReg;
2643   MatchInfo.resize(NumElts);
2644   while (mi_match(
2645       CurrInst->getOperand(0).getReg(), MRI,
2646       m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2647     if (IntImm >= NumElts)
2648       return false;
2649     if (!MatchInfo[IntImm])
2650       MatchInfo[IntImm] = TmpReg;
2651     CurrInst = TmpInst;
2652   }
2653   // Variable index.
2654   if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2655     return false;
2656   if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2657     for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2658       if (!MatchInfo[I - 1].isValid())
2659         MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2660     }
2661     return true;
2662   }
2663   // If we didn't end in a G_IMPLICIT_DEF, bail out.
2664   return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
2665 }
2666 
applyCombineInsertVecElts(MachineInstr & MI,SmallVectorImpl<Register> & MatchInfo)2667 bool CombinerHelper::applyCombineInsertVecElts(
2668     MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2669   Builder.setInstr(MI);
2670   Register UndefReg;
2671   auto GetUndef = [&]() {
2672     if (UndefReg)
2673       return UndefReg;
2674     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2675     UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2676     return UndefReg;
2677   };
2678   for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2679     if (!MatchInfo[I])
2680       MatchInfo[I] = GetUndef();
2681   }
2682   Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2683   MI.eraseFromParent();
2684   return true;
2685 }
2686 
applySimplifyAddToSub(MachineInstr & MI,std::tuple<Register,Register> & MatchInfo)2687 bool CombinerHelper::applySimplifyAddToSub(
2688     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2689   Builder.setInstr(MI);
2690   Register SubLHS, SubRHS;
2691   std::tie(SubLHS, SubRHS) = MatchInfo;
2692   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2693   MI.eraseFromParent();
2694   return true;
2695 }
2696 
matchHoistLogicOpWithSameOpcodeHands(MachineInstr & MI,InstructionStepsMatchInfo & MatchInfo)2697 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2698     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2699   // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2700   //
2701   // Creates the new hand + logic instruction (but does not insert them.)
2702   //
2703   // On success, MatchInfo is populated with the new instructions. These are
2704   // inserted in applyHoistLogicOpWithSameOpcodeHands.
2705   unsigned LogicOpcode = MI.getOpcode();
2706   assert(LogicOpcode == TargetOpcode::G_AND ||
2707          LogicOpcode == TargetOpcode::G_OR ||
2708          LogicOpcode == TargetOpcode::G_XOR);
2709   MachineIRBuilder MIB(MI);
2710   Register Dst = MI.getOperand(0).getReg();
2711   Register LHSReg = MI.getOperand(1).getReg();
2712   Register RHSReg = MI.getOperand(2).getReg();
2713 
2714   // Don't recompute anything.
2715   if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2716     return false;
2717 
2718   // Make sure we have (hand x, ...), (hand y, ...)
2719   MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2720   MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2721   if (!LeftHandInst || !RightHandInst)
2722     return false;
2723   unsigned HandOpcode = LeftHandInst->getOpcode();
2724   if (HandOpcode != RightHandInst->getOpcode())
2725     return false;
2726   if (!LeftHandInst->getOperand(1).isReg() ||
2727       !RightHandInst->getOperand(1).isReg())
2728     return false;
2729 
2730   // Make sure the types match up, and if we're doing this post-legalization,
2731   // we end up with legal types.
2732   Register X = LeftHandInst->getOperand(1).getReg();
2733   Register Y = RightHandInst->getOperand(1).getReg();
2734   LLT XTy = MRI.getType(X);
2735   LLT YTy = MRI.getType(Y);
2736   if (XTy != YTy)
2737     return false;
2738   if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
2739     return false;
2740 
2741   // Optional extra source register.
2742   Register ExtraHandOpSrcReg;
2743   switch (HandOpcode) {
2744   default:
2745     return false;
2746   case TargetOpcode::G_ANYEXT:
2747   case TargetOpcode::G_SEXT:
2748   case TargetOpcode::G_ZEXT: {
2749     // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
2750     break;
2751   }
2752   case TargetOpcode::G_AND:
2753   case TargetOpcode::G_ASHR:
2754   case TargetOpcode::G_LSHR:
2755   case TargetOpcode::G_SHL: {
2756     // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
2757     MachineOperand &ZOp = LeftHandInst->getOperand(2);
2758     if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
2759       return false;
2760     ExtraHandOpSrcReg = ZOp.getReg();
2761     break;
2762   }
2763   }
2764 
2765   // Record the steps to build the new instructions.
2766   //
2767   // Steps to build (logic x, y)
2768   auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
2769   OperandBuildSteps LogicBuildSteps = {
2770       [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
2771       [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
2772       [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
2773   InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
2774 
2775   // Steps to build hand (logic x, y), ...z
2776   OperandBuildSteps HandBuildSteps = {
2777       [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
2778       [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
2779   if (ExtraHandOpSrcReg.isValid())
2780     HandBuildSteps.push_back(
2781         [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
2782   InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
2783 
2784   MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
2785   return true;
2786 }
2787 
applyBuildInstructionSteps(MachineInstr & MI,InstructionStepsMatchInfo & MatchInfo)2788 bool CombinerHelper::applyBuildInstructionSteps(
2789     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2790   assert(MatchInfo.InstrsToBuild.size() &&
2791          "Expected at least one instr to build?");
2792   Builder.setInstr(MI);
2793   for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
2794     assert(InstrToBuild.Opcode && "Expected a valid opcode?");
2795     assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
2796     MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
2797     for (auto &OperandFn : InstrToBuild.OperandFns)
2798       OperandFn(Instr);
2799   }
2800   MI.eraseFromParent();
2801   return true;
2802 }
2803 
matchAshrShlToSextInreg(MachineInstr & MI,std::tuple<Register,int64_t> & MatchInfo)2804 bool CombinerHelper::matchAshrShlToSextInreg(
2805     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2806   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2807   int64_t ShlCst, AshrCst;
2808   Register Src;
2809   // FIXME: detect splat constant vectors.
2810   if (!mi_match(MI.getOperand(0).getReg(), MRI,
2811                 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
2812     return false;
2813   if (ShlCst != AshrCst)
2814     return false;
2815   if (!isLegalOrBeforeLegalizer(
2816           {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
2817     return false;
2818   MatchInfo = std::make_tuple(Src, ShlCst);
2819   return true;
2820 }
applyAshShlToSextInreg(MachineInstr & MI,std::tuple<Register,int64_t> & MatchInfo)2821 bool CombinerHelper::applyAshShlToSextInreg(
2822     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2823   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2824   Register Src;
2825   int64_t ShiftAmt;
2826   std::tie(Src, ShiftAmt) = MatchInfo;
2827   unsigned Size = MRI.getType(Src).getScalarSizeInBits();
2828   Builder.setInstrAndDebugLoc(MI);
2829   Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
2830   MI.eraseFromParent();
2831   return true;
2832 }
2833 
matchRedundantAnd(MachineInstr & MI,Register & Replacement)2834 bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
2835                                        Register &Replacement) {
2836   // Given
2837   //
2838   // %y:_(sN) = G_SOMETHING
2839   // %x:_(sN) = G_SOMETHING
2840   // %res:_(sN) = G_AND %x, %y
2841   //
2842   // Eliminate the G_AND when it is known that x & y == x or x & y == y.
2843   //
2844   // Patterns like this can appear as a result of legalization. E.g.
2845   //
2846   // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
2847   // %one:_(s32) = G_CONSTANT i32 1
2848   // %and:_(s32) = G_AND %cmp, %one
2849   //
2850   // In this case, G_ICMP only produces a single bit, so x & 1 == x.
2851   assert(MI.getOpcode() == TargetOpcode::G_AND);
2852   if (!KB)
2853     return false;
2854 
2855   Register AndDst = MI.getOperand(0).getReg();
2856   LLT DstTy = MRI.getType(AndDst);
2857 
2858   // FIXME: This should be removed once GISelKnownBits supports vectors.
2859   if (DstTy.isVector())
2860     return false;
2861 
2862   Register LHS = MI.getOperand(1).getReg();
2863   Register RHS = MI.getOperand(2).getReg();
2864   KnownBits LHSBits = KB->getKnownBits(LHS);
2865   KnownBits RHSBits = KB->getKnownBits(RHS);
2866 
2867   // Check that x & Mask == x.
2868   // x & 1 == x, always
2869   // x & 0 == x, only if x is also 0
2870   // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
2871   //
2872   // Check if we can replace AndDst with the LHS of the G_AND
2873   if (canReplaceReg(AndDst, LHS, MRI) &&
2874       (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
2875     Replacement = LHS;
2876     return true;
2877   }
2878 
2879   // Check if we can replace AndDst with the RHS of the G_AND
2880   if (canReplaceReg(AndDst, RHS, MRI) &&
2881       (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
2882     Replacement = RHS;
2883     return true;
2884   }
2885 
2886   return false;
2887 }
2888 
matchRedundantOr(MachineInstr & MI,Register & Replacement)2889 bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
2890   // Given
2891   //
2892   // %y:_(sN) = G_SOMETHING
2893   // %x:_(sN) = G_SOMETHING
2894   // %res:_(sN) = G_OR %x, %y
2895   //
2896   // Eliminate the G_OR when it is known that x | y == x or x | y == y.
2897   assert(MI.getOpcode() == TargetOpcode::G_OR);
2898   if (!KB)
2899     return false;
2900 
2901   Register OrDst = MI.getOperand(0).getReg();
2902   LLT DstTy = MRI.getType(OrDst);
2903 
2904   // FIXME: This should be removed once GISelKnownBits supports vectors.
2905   if (DstTy.isVector())
2906     return false;
2907 
2908   Register LHS = MI.getOperand(1).getReg();
2909   Register RHS = MI.getOperand(2).getReg();
2910   KnownBits LHSBits = KB->getKnownBits(LHS);
2911   KnownBits RHSBits = KB->getKnownBits(RHS);
2912 
2913   // Check that x | Mask == x.
2914   // x | 0 == x, always
2915   // x | 1 == x, only if x is also 1
2916   // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
2917   //
2918   // Check if we can replace OrDst with the LHS of the G_OR
2919   if (canReplaceReg(OrDst, LHS, MRI) &&
2920       (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
2921     Replacement = LHS;
2922     return true;
2923   }
2924 
2925   // Check if we can replace OrDst with the RHS of the G_OR
2926   if (canReplaceReg(OrDst, RHS, MRI) &&
2927       (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
2928     Replacement = RHS;
2929     return true;
2930   }
2931 
2932   return false;
2933 }
2934 
matchRedundantSExtInReg(MachineInstr & MI)2935 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
2936   // If the input is already sign extended, just drop the extension.
2937   Register Src = MI.getOperand(1).getReg();
2938   unsigned ExtBits = MI.getOperand(2).getImm();
2939   unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
2940   return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
2941 }
2942 
isConstValidTrue(const TargetLowering & TLI,unsigned ScalarSizeBits,int64_t Cst,bool IsVector,bool IsFP)2943 static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
2944                              int64_t Cst, bool IsVector, bool IsFP) {
2945   // For i1, Cst will always be -1 regardless of boolean contents.
2946   return (ScalarSizeBits == 1 && Cst == -1) ||
2947          isConstTrueVal(TLI, Cst, IsVector, IsFP);
2948 }
2949 
matchNotCmp(MachineInstr & MI,SmallVectorImpl<Register> & RegsToNegate)2950 bool CombinerHelper::matchNotCmp(MachineInstr &MI,
2951                                  SmallVectorImpl<Register> &RegsToNegate) {
2952   assert(MI.getOpcode() == TargetOpcode::G_XOR);
2953   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2954   const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
2955   Register XorSrc;
2956   Register CstReg;
2957   // We match xor(src, true) here.
2958   if (!mi_match(MI.getOperand(0).getReg(), MRI,
2959                 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
2960     return false;
2961 
2962   if (!MRI.hasOneNonDBGUse(XorSrc))
2963     return false;
2964 
2965   // Check that XorSrc is the root of a tree of comparisons combined with ANDs
2966   // and ORs. The suffix of RegsToNegate starting from index I is used a work
2967   // list of tree nodes to visit.
2968   RegsToNegate.push_back(XorSrc);
2969   // Remember whether the comparisons are all integer or all floating point.
2970   bool IsInt = false;
2971   bool IsFP = false;
2972   for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
2973     Register Reg = RegsToNegate[I];
2974     if (!MRI.hasOneNonDBGUse(Reg))
2975       return false;
2976     MachineInstr *Def = MRI.getVRegDef(Reg);
2977     switch (Def->getOpcode()) {
2978     default:
2979       // Don't match if the tree contains anything other than ANDs, ORs and
2980       // comparisons.
2981       return false;
2982     case TargetOpcode::G_ICMP:
2983       if (IsFP)
2984         return false;
2985       IsInt = true;
2986       // When we apply the combine we will invert the predicate.
2987       break;
2988     case TargetOpcode::G_FCMP:
2989       if (IsInt)
2990         return false;
2991       IsFP = true;
2992       // When we apply the combine we will invert the predicate.
2993       break;
2994     case TargetOpcode::G_AND:
2995     case TargetOpcode::G_OR:
2996       // Implement De Morgan's laws:
2997       // ~(x & y) -> ~x | ~y
2998       // ~(x | y) -> ~x & ~y
2999       // When we apply the combine we will change the opcode and recursively
3000       // negate the operands.
3001       RegsToNegate.push_back(Def->getOperand(1).getReg());
3002       RegsToNegate.push_back(Def->getOperand(2).getReg());
3003       break;
3004     }
3005   }
3006 
3007   // Now we know whether the comparisons are integer or floating point, check
3008   // the constant in the xor.
3009   int64_t Cst;
3010   if (Ty.isVector()) {
3011     MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3012     auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
3013     if (!MaybeCst)
3014       return false;
3015     if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3016       return false;
3017   } else {
3018     if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3019       return false;
3020     if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3021       return false;
3022   }
3023 
3024   return true;
3025 }
3026 
applyNotCmp(MachineInstr & MI,SmallVectorImpl<Register> & RegsToNegate)3027 bool CombinerHelper::applyNotCmp(MachineInstr &MI,
3028                                  SmallVectorImpl<Register> &RegsToNegate) {
3029   for (Register Reg : RegsToNegate) {
3030     MachineInstr *Def = MRI.getVRegDef(Reg);
3031     Observer.changingInstr(*Def);
3032     // For each comparison, invert the opcode. For each AND and OR, change the
3033     // opcode.
3034     switch (Def->getOpcode()) {
3035     default:
3036       llvm_unreachable("Unexpected opcode");
3037     case TargetOpcode::G_ICMP:
3038     case TargetOpcode::G_FCMP: {
3039       MachineOperand &PredOp = Def->getOperand(1);
3040       CmpInst::Predicate NewP = CmpInst::getInversePredicate(
3041           (CmpInst::Predicate)PredOp.getPredicate());
3042       PredOp.setPredicate(NewP);
3043       break;
3044     }
3045     case TargetOpcode::G_AND:
3046       Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3047       break;
3048     case TargetOpcode::G_OR:
3049       Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3050       break;
3051     }
3052     Observer.changedInstr(*Def);
3053   }
3054 
3055   replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3056   MI.eraseFromParent();
3057   return true;
3058 }
3059 
matchXorOfAndWithSameReg(MachineInstr & MI,std::pair<Register,Register> & MatchInfo)3060 bool CombinerHelper::matchXorOfAndWithSameReg(
3061     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3062   // Match (xor (and x, y), y) (or any of its commuted cases)
3063   assert(MI.getOpcode() == TargetOpcode::G_XOR);
3064   Register &X = MatchInfo.first;
3065   Register &Y = MatchInfo.second;
3066   Register AndReg = MI.getOperand(1).getReg();
3067   Register SharedReg = MI.getOperand(2).getReg();
3068 
3069   // Find a G_AND on either side of the G_XOR.
3070   // Look for one of
3071   //
3072   // (xor (and x, y), SharedReg)
3073   // (xor SharedReg, (and x, y))
3074   if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3075     std::swap(AndReg, SharedReg);
3076     if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3077       return false;
3078   }
3079 
3080   // Only do this if we'll eliminate the G_AND.
3081   if (!MRI.hasOneNonDBGUse(AndReg))
3082     return false;
3083 
3084   // We can combine if SharedReg is the same as either the LHS or RHS of the
3085   // G_AND.
3086   if (Y != SharedReg)
3087     std::swap(X, Y);
3088   return Y == SharedReg;
3089 }
3090 
applyXorOfAndWithSameReg(MachineInstr & MI,std::pair<Register,Register> & MatchInfo)3091 bool CombinerHelper::applyXorOfAndWithSameReg(
3092     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3093   // Fold (xor (and x, y), y) -> (and (not x), y)
3094   Builder.setInstrAndDebugLoc(MI);
3095   Register X, Y;
3096   std::tie(X, Y) = MatchInfo;
3097   auto Not = Builder.buildNot(MRI.getType(X), X);
3098   Observer.changingInstr(MI);
3099   MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3100   MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3101   MI.getOperand(2).setReg(Y);
3102   Observer.changedInstr(MI);
3103   return true;
3104 }
3105 
matchPtrAddZero(MachineInstr & MI)3106 bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
3107   Register DstReg = MI.getOperand(0).getReg();
3108   LLT Ty = MRI.getType(DstReg);
3109   const DataLayout &DL = Builder.getMF().getDataLayout();
3110 
3111   if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3112     return false;
3113 
3114   if (Ty.isPointer()) {
3115     auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
3116     return ConstVal && *ConstVal == 0;
3117   }
3118 
3119   assert(Ty.isVector() && "Expecting a vector type");
3120   const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
3121   return isBuildVectorAllZeros(*VecMI, MRI);
3122 }
3123 
applyPtrAddZero(MachineInstr & MI)3124 bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
3125   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
3126   Builder.setInstrAndDebugLoc(MI);
3127   Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
3128   MI.eraseFromParent();
3129   return true;
3130 }
3131 
tryCombine(MachineInstr & MI)3132 bool CombinerHelper::tryCombine(MachineInstr &MI) {
3133   if (tryCombineCopy(MI))
3134     return true;
3135   if (tryCombineExtendingLoads(MI))
3136     return true;
3137   if (tryCombineIndexedLoadStore(MI))
3138     return true;
3139   return false;
3140 }
3141