• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a pass to prepare loops for pre-increment addressing
11 // modes. Additional PHIs are created for loop induction variables used by
12 // load/store instructions so that the pre-increment forms can be used.
13 // Generically, this means transforming loops like this:
14 //   for (int i = 0; i < n; ++i)
15 //     array[i] = c;
16 // to look like this:
17 //   T *p = array[-1];
18 //   for (int i = 0; i < n; ++i)
19 //     *++p = c;
20 //===----------------------------------------------------------------------===//
21 
22 #define DEBUG_TYPE "ppc-loop-preinc-prep"
23 
24 #include "PPC.h"
25 #include "PPCSubtarget.h"
26 #include "PPCTargetMachine.h"
27 #include "llvm/ADT/DepthFirstIterator.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/LoopInfo.h"
33 #include "llvm/Analysis/ScalarEvolution.h"
34 #include "llvm/Analysis/ScalarEvolutionExpander.h"
35 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
36 #include "llvm/Transforms/Utils/Local.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Transforms/Scalar.h"
51 #include "llvm/Transforms/Utils.h"
52 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
53 #include "llvm/Transforms/Utils/LoopUtils.h"
54 #include <cassert>
55 #include <iterator>
56 #include <utility>
57 
58 using namespace llvm;
59 
60 // By default, we limit this to creating 16 PHIs (which is a little over half
61 // of the allocatable register set).
62 static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
63                                  cl::Hidden, cl::init(16),
64   cl::desc("Potential PHI threshold for PPC preinc loop prep"));
65 
66 STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
67 
68 namespace llvm {
69 
70   void initializePPCLoopPreIncPrepPass(PassRegistry&);
71 
72 } // end namespace llvm
73 
74 namespace {
75 
76   class PPCLoopPreIncPrep : public FunctionPass {
77   public:
78     static char ID; // Pass ID, replacement for typeid
79 
PPCLoopPreIncPrep()80     PPCLoopPreIncPrep() : FunctionPass(ID) {
81       initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
82     }
83 
PPCLoopPreIncPrep(PPCTargetMachine & TM)84     PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
85       initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
86     }
87 
getAnalysisUsage(AnalysisUsage & AU) const88     void getAnalysisUsage(AnalysisUsage &AU) const override {
89       AU.addPreserved<DominatorTreeWrapperPass>();
90       AU.addRequired<LoopInfoWrapperPass>();
91       AU.addPreserved<LoopInfoWrapperPass>();
92       AU.addRequired<ScalarEvolutionWrapperPass>();
93     }
94 
95     bool alreadyPrepared(Loop *L, Instruction* MemI,
96                          const SCEV *BasePtrStartSCEV,
97                          const SCEVConstant *BasePtrIncSCEV);
98     bool runOnFunction(Function &F) override;
99 
100     bool runOnLoop(Loop *L);
101     void simplifyLoopLatch(Loop *L);
102     bool rotateLoop(Loop *L);
103 
104   private:
105     PPCTargetMachine *TM = nullptr;
106     DominatorTree *DT;
107     LoopInfo *LI;
108     ScalarEvolution *SE;
109     bool PreserveLCSSA;
110   };
111 
112 } // end anonymous namespace
113 
114 char PPCLoopPreIncPrep::ID = 0;
115 static const char *name = "Prepare loop for pre-inc. addressing modes";
INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep,DEBUG_TYPE,name,false,false)116 INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
117 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
118 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
119 INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
120 
121 FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
122   return new PPCLoopPreIncPrep(TM);
123 }
124 
125 namespace {
126 
127   struct BucketElement {
BucketElement__anona9c3c12f0211::BucketElement128     BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
BucketElement__anona9c3c12f0211::BucketElement129     BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
130 
131     const SCEVConstant *Offset;
132     Instruction *Instr;
133   };
134 
135   struct Bucket {
Bucket__anona9c3c12f0211::Bucket136     Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
137                                             Elements(1, BucketElement(I)) {}
138 
139     const SCEV *BaseSCEV;
140     SmallVector<BucketElement, 16> Elements;
141   };
142 
143 } // end anonymous namespace
144 
IsPtrInBounds(Value * BasePtr)145 static bool IsPtrInBounds(Value *BasePtr) {
146   Value *StrippedBasePtr = BasePtr;
147   while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
148     StrippedBasePtr = BC->getOperand(0);
149   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
150     return GEP->isInBounds();
151 
152   return false;
153 }
154 
GetPointerOperand(Value * MemI)155 static Value *GetPointerOperand(Value *MemI) {
156   if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
157     return LMemI->getPointerOperand();
158   } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
159     return SMemI->getPointerOperand();
160   } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
161     if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
162       return IMemI->getArgOperand(0);
163   }
164 
165   return nullptr;
166 }
167 
runOnFunction(Function & F)168 bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
169   if (skipFunction(F))
170     return false;
171 
172   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
173   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
174   auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
175   DT = DTWP ? &DTWP->getDomTree() : nullptr;
176   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
177 
178   bool MadeChange = false;
179 
180   for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
181     for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
182       MadeChange |= runOnLoop(*L);
183 
184   return MadeChange;
185 }
186 
187 // In order to prepare for the pre-increment a PHI is added.
188 // This function will check to see if that PHI already exists and will return
189 //  true if it found an existing PHI with the same start and increment as the
190 //  one we wanted to create.
alreadyPrepared(Loop * L,Instruction * MemI,const SCEV * BasePtrStartSCEV,const SCEVConstant * BasePtrIncSCEV)191 bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
192                                         const SCEV *BasePtrStartSCEV,
193                                         const SCEVConstant *BasePtrIncSCEV) {
194   BasicBlock *BB = MemI->getParent();
195   if (!BB)
196     return false;
197 
198   BasicBlock *PredBB = L->getLoopPredecessor();
199   BasicBlock *LatchBB = L->getLoopLatch();
200 
201   if (!PredBB || !LatchBB)
202     return false;
203 
204   // Run through the PHIs and see if we have some that looks like a preparation
205   iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
206   for (auto & CurrentPHI : PHIIter) {
207     PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
208     if (!CurrentPHINode)
209       continue;
210 
211     if (!SE->isSCEVable(CurrentPHINode->getType()))
212       continue;
213 
214     const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
215 
216     const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
217     if (!PHIBasePtrSCEV)
218       continue;
219 
220     const SCEVConstant *PHIBasePtrIncSCEV =
221       dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE));
222     if (!PHIBasePtrIncSCEV)
223       continue;
224 
225     if (CurrentPHINode->getNumIncomingValues() == 2) {
226       if ( (CurrentPHINode->getIncomingBlock(0) == LatchBB &&
227             CurrentPHINode->getIncomingBlock(1) == PredBB) ||
228             (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
229             CurrentPHINode->getIncomingBlock(0) == PredBB) ) {
230         if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV &&
231             PHIBasePtrIncSCEV == BasePtrIncSCEV) {
232           // The existing PHI (CurrentPHINode) has the same start and increment
233           //  as the PHI that we wanted to create.
234           ++PHINodeAlreadyExists;
235           return true;
236         }
237       }
238     }
239   }
240   return false;
241 }
242 
runOnLoop(Loop * L)243 bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
244   bool MadeChange = false;
245 
246   // Only prep. the inner-most loop
247   if (!L->empty())
248     return MadeChange;
249 
250   LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
251 
252   BasicBlock *Header = L->getHeader();
253 
254   const PPCSubtarget *ST =
255     TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
256 
257   unsigned HeaderLoopPredCount = pred_size(Header);
258 
259   // Collect buckets of comparable addresses used by loads and stores.
260   SmallVector<Bucket, 16> Buckets;
261   for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
262        I != IE; ++I) {
263     for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
264         J != JE; ++J) {
265       Value *PtrValue;
266       Instruction *MemI;
267 
268       if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
269         MemI = LMemI;
270         PtrValue = LMemI->getPointerOperand();
271       } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
272         MemI = SMemI;
273         PtrValue = SMemI->getPointerOperand();
274       } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
275         if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
276           MemI = IMemI;
277           PtrValue = IMemI->getArgOperand(0);
278         } else continue;
279       } else continue;
280 
281       unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
282       if (PtrAddrSpace)
283         continue;
284 
285       // There are no update forms for Altivec vector load/stores.
286       if (ST && ST->hasAltivec() &&
287           PtrValue->getType()->getPointerElementType()->isVectorTy())
288         continue;
289 
290       if (L->isLoopInvariant(PtrValue))
291         continue;
292 
293       const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
294       if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
295         if (LARSCEV->getLoop() != L)
296           continue;
297         // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
298         // be 4's multiple (DS-form). For i64 loads/stores when the displacement
299         // fits in a 16-bit signed field but isn't a multiple of 4, it will be
300         // useless and possible to break some original well-form addressing mode
301         // to make this pre-inc prep for it.
302         if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) {
303           if (const SCEVConstant *StepConst =
304                   dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
305             const APInt &ConstInt = StepConst->getValue()->getValue();
306             if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
307               continue;
308           }
309         }
310       } else {
311         continue;
312       }
313 
314       bool FoundBucket = false;
315       for (auto &B : Buckets) {
316         const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
317         if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
318           B.Elements.push_back(BucketElement(CDiff, MemI));
319           FoundBucket = true;
320           break;
321         }
322       }
323 
324       if (!FoundBucket) {
325         if (Buckets.size() == MaxVars)
326           return MadeChange;
327         Buckets.push_back(Bucket(LSCEV, MemI));
328       }
329     }
330   }
331 
332   if (Buckets.empty())
333     return MadeChange;
334 
335   BasicBlock *LoopPredecessor = L->getLoopPredecessor();
336   // If there is no loop predecessor, or the loop predecessor's terminator
337   // returns a value (which might contribute to determining the loop's
338   // iteration space), insert a new preheader for the loop.
339   if (!LoopPredecessor ||
340       !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
341     LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
342     if (LoopPredecessor)
343       MadeChange = true;
344   }
345   if (!LoopPredecessor)
346     return MadeChange;
347 
348   LLVM_DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
349 
350   SmallSet<BasicBlock *, 16> BBChanged;
351   for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
352     // The base address of each bucket is transformed into a phi and the others
353     // are rewritten as offsets of that variable.
354 
355     // We have a choice now of which instruction's memory operand we use as the
356     // base for the generated PHI. Always picking the first instruction in each
357     // bucket does not work well, specifically because that instruction might
358     // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
359     // the choice is somewhat arbitrary, because the backend will happily
360     // generate direct offsets from both the pre-incremented and
361     // post-incremented pointer values. Thus, we'll pick the first non-prefetch
362     // instruction in each bucket, and adjust the recurrence and other offsets
363     // accordingly.
364     for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
365       if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
366         if (II->getIntrinsicID() == Intrinsic::prefetch)
367           continue;
368 
369       // If we'd otherwise pick the first element anyway, there's nothing to do.
370       if (j == 0)
371         break;
372 
373       // If our chosen element has no offset from the base pointer, there's
374       // nothing to do.
375       if (!Buckets[i].Elements[j].Offset ||
376           Buckets[i].Elements[j].Offset->isZero())
377         break;
378 
379       const SCEV *Offset = Buckets[i].Elements[j].Offset;
380       Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset);
381       for (auto &E : Buckets[i].Elements) {
382         if (E.Offset)
383           E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
384         else
385           E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
386       }
387 
388       std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]);
389       break;
390     }
391 
392     const SCEVAddRecExpr *BasePtrSCEV =
393       cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV);
394     if (!BasePtrSCEV->isAffine())
395       continue;
396 
397     LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
398     assert(BasePtrSCEV->getLoop() == L &&
399            "AddRec for the wrong loop?");
400 
401     // The instruction corresponding to the Bucket's BaseSCEV must be the first
402     // in the vector of elements.
403     Instruction *MemI = Buckets[i].Elements.begin()->Instr;
404     Value *BasePtr = GetPointerOperand(MemI);
405     assert(BasePtr && "No pointer operand");
406 
407     Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
408     Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
409       BasePtr->getType()->getPointerAddressSpace());
410 
411     const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
412     if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
413       continue;
414 
415     const SCEVConstant *BasePtrIncSCEV =
416       dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
417     if (!BasePtrIncSCEV)
418       continue;
419     BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
420     if (!isSafeToExpand(BasePtrStartSCEV, *SE))
421       continue;
422 
423     LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
424 
425     if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV))
426       continue;
427 
428     PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
429       MemI->hasName() ? MemI->getName() + ".phi" : "",
430       Header->getFirstNonPHI());
431 
432     SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
433     Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
434       LoopPredecessor->getTerminator());
435 
436     // Note that LoopPredecessor might occur in the predecessor list multiple
437     // times, and we need to add it the right number of times.
438     for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
439          PI != PE; ++PI) {
440       if (*PI != LoopPredecessor)
441         continue;
442 
443       NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
444     }
445 
446     Instruction *InsPoint = &*Header->getFirstInsertionPt();
447     GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
448         I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
449         MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
450     PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
451     for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
452          PI != PE; ++PI) {
453       if (*PI == LoopPredecessor)
454         continue;
455 
456       NewPHI->addIncoming(PtrInc, *PI);
457     }
458 
459     Instruction *NewBasePtr;
460     if (PtrInc->getType() != BasePtr->getType())
461       NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
462         PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint);
463     else
464       NewBasePtr = PtrInc;
465 
466     if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
467       BBChanged.insert(IDel->getParent());
468     BasePtr->replaceAllUsesWith(NewBasePtr);
469     RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
470 
471     // Keep track of the replacement pointer values we've inserted so that we
472     // don't generate more pointer values than necessary.
473     SmallPtrSet<Value *, 16> NewPtrs;
474     NewPtrs.insert( NewBasePtr);
475 
476     for (auto I = std::next(Buckets[i].Elements.begin()),
477          IE = Buckets[i].Elements.end(); I != IE; ++I) {
478       Value *Ptr = GetPointerOperand(I->Instr);
479       assert(Ptr && "No pointer operand");
480       if (NewPtrs.count(Ptr))
481         continue;
482 
483       Instruction *RealNewPtr;
484       if (!I->Offset || I->Offset->getValue()->isZero()) {
485         RealNewPtr = NewBasePtr;
486       } else {
487         Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
488         if (PtrIP && isa<Instruction>(NewBasePtr) &&
489             cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
490           PtrIP = nullptr;
491         else if (isa<PHINode>(PtrIP))
492           PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
493         else if (!PtrIP)
494           PtrIP = I->Instr;
495 
496         GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
497             I8Ty, PtrInc, I->Offset->getValue(),
498             I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP);
499         if (!PtrIP)
500           NewPtr->insertAfter(cast<Instruction>(PtrInc));
501         NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
502         RealNewPtr = NewPtr;
503       }
504 
505       if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
506         BBChanged.insert(IDel->getParent());
507 
508       Instruction *ReplNewPtr;
509       if (Ptr->getType() != RealNewPtr->getType()) {
510         ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
511           Ptr->hasName() ? Ptr->getName() + ".cast" : "");
512         ReplNewPtr->insertAfter(RealNewPtr);
513       } else
514         ReplNewPtr = RealNewPtr;
515 
516       Ptr->replaceAllUsesWith(ReplNewPtr);
517       RecursivelyDeleteTriviallyDeadInstructions(Ptr);
518 
519       NewPtrs.insert(RealNewPtr);
520     }
521 
522     MadeChange = true;
523   }
524 
525   for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
526        I != IE; ++I) {
527     if (BBChanged.count(*I))
528       DeleteDeadPHIs(*I);
529   }
530 
531   return MadeChange;
532 }
533