1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/InitializePasses.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/Local.h"
83 #include "llvm/Transforms/Utils/LoopUtils.h"
84 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
85 #include <cassert>
86 #include <cstdint>
87 #include <utility>
88
89 using namespace llvm;
90
91 #define DEBUG_TYPE "indvars"
92
93 STATISTIC(NumWidened , "Number of indvars widened");
94 STATISTIC(NumReplaced , "Number of exit values replaced");
95 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
96 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
97 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
98
99 // Trip count verification can be enabled by default under NDEBUG if we
100 // implement a strong expression equivalence checker in SCEV. Until then, we
101 // use the verify-indvars flag, which may assert in some cases.
102 static cl::opt<bool> VerifyIndvars(
103 "verify-indvars", cl::Hidden,
104 cl::desc("Verify the ScalarEvolution result after running indvars"));
105
106 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
107
108 static cl::opt<ReplaceExitVal> ReplaceExitValue(
109 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
110 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
111 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
112 clEnumValN(OnlyCheapRepl, "cheap",
113 "only replace exit value when the cost is cheap"),
114 clEnumValN(NoHardUse, "noharduse",
115 "only replace exit values when loop def likely dead"),
116 clEnumValN(AlwaysRepl, "always",
117 "always replace exit value whenever possible")));
118
119 static cl::opt<bool> UsePostIncrementRanges(
120 "indvars-post-increment-ranges", cl::Hidden,
121 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
122 cl::init(true));
123
124 static cl::opt<bool>
125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
126 cl::desc("Disable Linear Function Test Replace optimization"));
127
128 static cl::opt<bool>
129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
130 cl::desc("Predicate conditions in read only loops"));
131
132 namespace {
133
134 struct RewritePhi;
135
136 class IndVarSimplify {
137 LoopInfo *LI;
138 ScalarEvolution *SE;
139 DominatorTree *DT;
140 const DataLayout &DL;
141 TargetLibraryInfo *TLI;
142 const TargetTransformInfo *TTI;
143
144 SmallVector<WeakTrackingVH, 16> DeadInsts;
145
146 bool isValidRewrite(Value *FromVal, Value *ToVal);
147
148 bool handleFloatingPointIV(Loop *L, PHINode *PH);
149 bool rewriteNonIntegerIVs(Loop *L);
150
151 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152 /// Try to eliminate loop exits based on analyzeable exit counts
153 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
154 /// Try to form loop invariant tests for loop exits by changing how many
155 /// iterations of the loop run when that is unobservable.
156 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
157
158 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
159 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
160 bool rewriteFirstIterationLoopExitValues(Loop *L);
161 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
162
163 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
164 const SCEV *ExitCount,
165 PHINode *IndVar, SCEVExpander &Rewriter);
166
167 bool sinkUnusedInvariants(Loop *L);
168
169 public:
IndVarSimplify(LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,const DataLayout & DL,TargetLibraryInfo * TLI,TargetTransformInfo * TTI)170 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
171 const DataLayout &DL, TargetLibraryInfo *TLI,
172 TargetTransformInfo *TTI)
173 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
174
175 bool run(Loop *L);
176 };
177
178 } // end anonymous namespace
179
180 /// Return true if the SCEV expansion generated by the rewriter can replace the
181 /// original value. SCEV guarantees that it produces the same value, but the way
182 /// it is produced may be illegal IR. Ideally, this function will only be
183 /// called for verification.
isValidRewrite(Value * FromVal,Value * ToVal)184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
185 // If an SCEV expression subsumed multiple pointers, its expansion could
186 // reassociate the GEP changing the base pointer. This is illegal because the
187 // final address produced by a GEP chain must be inbounds relative to its
188 // underlying object. Otherwise basic alias analysis, among other things,
189 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
190 // producing an expression involving multiple pointers. Until then, we must
191 // bail out here.
192 //
193 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
194 // because it understands lcssa phis while SCEV does not.
195 Value *FromPtr = FromVal;
196 Value *ToPtr = ToVal;
197 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
198 FromPtr = GEP->getPointerOperand();
199 }
200 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
201 ToPtr = GEP->getPointerOperand();
202 }
203 if (FromPtr != FromVal || ToPtr != ToVal) {
204 // Quickly check the common case
205 if (FromPtr == ToPtr)
206 return true;
207
208 // SCEV may have rewritten an expression that produces the GEP's pointer
209 // operand. That's ok as long as the pointer operand has the same base
210 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
211 // base of a recurrence. This handles the case in which SCEV expansion
212 // converts a pointer type recurrence into a nonrecurrent pointer base
213 // indexed by an integer recurrence.
214
215 // If the GEP base pointer is a vector of pointers, abort.
216 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
217 return false;
218
219 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
220 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
221 if (FromBase == ToBase)
222 return true;
223
224 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
225 << " != " << *ToBase << "\n");
226
227 return false;
228 }
229 return true;
230 }
231
232 /// Determine the insertion point for this user. By default, insert immediately
233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
235 /// common dominator for the incoming blocks. A nullptr can be returned if no
236 /// viable location is found: it may happen if User is a PHI and Def only comes
237 /// to this PHI from unreachable blocks.
getInsertPointForUses(Instruction * User,Value * Def,DominatorTree * DT,LoopInfo * LI)238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
239 DominatorTree *DT, LoopInfo *LI) {
240 PHINode *PHI = dyn_cast<PHINode>(User);
241 if (!PHI)
242 return User;
243
244 Instruction *InsertPt = nullptr;
245 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
246 if (PHI->getIncomingValue(i) != Def)
247 continue;
248
249 BasicBlock *InsertBB = PHI->getIncomingBlock(i);
250
251 if (!DT->isReachableFromEntry(InsertBB))
252 continue;
253
254 if (!InsertPt) {
255 InsertPt = InsertBB->getTerminator();
256 continue;
257 }
258 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
259 InsertPt = InsertBB->getTerminator();
260 }
261
262 // If we have skipped all inputs, it means that Def only comes to Phi from
263 // unreachable blocks.
264 if (!InsertPt)
265 return nullptr;
266
267 auto *DefI = dyn_cast<Instruction>(Def);
268 if (!DefI)
269 return InsertPt;
270
271 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
272
273 auto *L = LI->getLoopFor(DefI->getParent());
274 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
275
276 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
277 if (LI->getLoopFor(DTN->getBlock()) == L)
278 return DTN->getBlock()->getTerminator();
279
280 llvm_unreachable("DefI dominates InsertPt!");
281 }
282
283 //===----------------------------------------------------------------------===//
284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
285 //===----------------------------------------------------------------------===//
286
287 /// Convert APF to an integer, if possible.
ConvertToSInt(const APFloat & APF,int64_t & IntVal)288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
289 bool isExact = false;
290 // See if we can convert this to an int64_t
291 uint64_t UIntVal;
292 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
293 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
294 !isExact)
295 return false;
296 IntVal = UIntVal;
297 return true;
298 }
299
300 /// If the loop has floating induction variable then insert corresponding
301 /// integer induction variable if possible.
302 /// For example,
303 /// for(double i = 0; i < 10000; ++i)
304 /// bar(i)
305 /// is converted into
306 /// for(int i = 0; i < 10000; ++i)
307 /// bar((double)i);
handleFloatingPointIV(Loop * L,PHINode * PN)308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
309 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
310 unsigned BackEdge = IncomingEdge^1;
311
312 // Check incoming value.
313 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
314
315 int64_t InitValue;
316 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
317 return false;
318
319 // Check IV increment. Reject this PN if increment operation is not
320 // an add or increment value can not be represented by an integer.
321 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
322 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
323
324 // If this is not an add of the PHI with a constantfp, or if the constant fp
325 // is not an integer, bail out.
326 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
327 int64_t IncValue;
328 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
329 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
330 return false;
331
332 // Check Incr uses. One user is PN and the other user is an exit condition
333 // used by the conditional terminator.
334 Value::user_iterator IncrUse = Incr->user_begin();
335 Instruction *U1 = cast<Instruction>(*IncrUse++);
336 if (IncrUse == Incr->user_end()) return false;
337 Instruction *U2 = cast<Instruction>(*IncrUse++);
338 if (IncrUse != Incr->user_end()) return false;
339
340 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
341 // only used by a branch, we can't transform it.
342 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
343 if (!Compare)
344 Compare = dyn_cast<FCmpInst>(U2);
345 if (!Compare || !Compare->hasOneUse() ||
346 !isa<BranchInst>(Compare->user_back()))
347 return false;
348
349 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
350
351 // We need to verify that the branch actually controls the iteration count
352 // of the loop. If not, the new IV can overflow and no one will notice.
353 // The branch block must be in the loop and one of the successors must be out
354 // of the loop.
355 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
356 if (!L->contains(TheBr->getParent()) ||
357 (L->contains(TheBr->getSuccessor(0)) &&
358 L->contains(TheBr->getSuccessor(1))))
359 return false;
360
361 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
362 // transform it.
363 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
364 int64_t ExitValue;
365 if (ExitValueVal == nullptr ||
366 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
367 return false;
368
369 // Find new predicate for integer comparison.
370 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
371 switch (Compare->getPredicate()) {
372 default: return false; // Unknown comparison.
373 case CmpInst::FCMP_OEQ:
374 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
375 case CmpInst::FCMP_ONE:
376 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
377 case CmpInst::FCMP_OGT:
378 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
379 case CmpInst::FCMP_OGE:
380 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
381 case CmpInst::FCMP_OLT:
382 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
383 case CmpInst::FCMP_OLE:
384 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
385 }
386
387 // We convert the floating point induction variable to a signed i32 value if
388 // we can. This is only safe if the comparison will not overflow in a way
389 // that won't be trapped by the integer equivalent operations. Check for this
390 // now.
391 // TODO: We could use i64 if it is native and the range requires it.
392
393 // The start/stride/exit values must all fit in signed i32.
394 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
395 return false;
396
397 // If not actually striding (add x, 0.0), avoid touching the code.
398 if (IncValue == 0)
399 return false;
400
401 // Positive and negative strides have different safety conditions.
402 if (IncValue > 0) {
403 // If we have a positive stride, we require the init to be less than the
404 // exit value.
405 if (InitValue >= ExitValue)
406 return false;
407
408 uint32_t Range = uint32_t(ExitValue-InitValue);
409 // Check for infinite loop, either:
410 // while (i <= Exit) or until (i > Exit)
411 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
412 if (++Range == 0) return false; // Range overflows.
413 }
414
415 unsigned Leftover = Range % uint32_t(IncValue);
416
417 // If this is an equality comparison, we require that the strided value
418 // exactly land on the exit value, otherwise the IV condition will wrap
419 // around and do things the fp IV wouldn't.
420 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
421 Leftover != 0)
422 return false;
423
424 // If the stride would wrap around the i32 before exiting, we can't
425 // transform the IV.
426 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
427 return false;
428 } else {
429 // If we have a negative stride, we require the init to be greater than the
430 // exit value.
431 if (InitValue <= ExitValue)
432 return false;
433
434 uint32_t Range = uint32_t(InitValue-ExitValue);
435 // Check for infinite loop, either:
436 // while (i >= Exit) or until (i < Exit)
437 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
438 if (++Range == 0) return false; // Range overflows.
439 }
440
441 unsigned Leftover = Range % uint32_t(-IncValue);
442
443 // If this is an equality comparison, we require that the strided value
444 // exactly land on the exit value, otherwise the IV condition will wrap
445 // around and do things the fp IV wouldn't.
446 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
447 Leftover != 0)
448 return false;
449
450 // If the stride would wrap around the i32 before exiting, we can't
451 // transform the IV.
452 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
453 return false;
454 }
455
456 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
457
458 // Insert new integer induction variable.
459 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
460 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
461 PN->getIncomingBlock(IncomingEdge));
462
463 Value *NewAdd =
464 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
465 Incr->getName()+".int", Incr);
466 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
467
468 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
469 ConstantInt::get(Int32Ty, ExitValue),
470 Compare->getName());
471
472 // In the following deletions, PN may become dead and may be deleted.
473 // Use a WeakTrackingVH to observe whether this happens.
474 WeakTrackingVH WeakPH = PN;
475
476 // Delete the old floating point exit comparison. The branch starts using the
477 // new comparison.
478 NewCompare->takeName(Compare);
479 Compare->replaceAllUsesWith(NewCompare);
480 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
481
482 // Delete the old floating point increment.
483 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
484 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
485
486 // If the FP induction variable still has uses, this is because something else
487 // in the loop uses its value. In order to canonicalize the induction
488 // variable, we chose to eliminate the IV and rewrite it in terms of an
489 // int->fp cast.
490 //
491 // We give preference to sitofp over uitofp because it is faster on most
492 // platforms.
493 if (WeakPH) {
494 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
495 &*PN->getParent()->getFirstInsertionPt());
496 PN->replaceAllUsesWith(Conv);
497 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
498 }
499 return true;
500 }
501
rewriteNonIntegerIVs(Loop * L)502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
503 // First step. Check to see if there are any floating-point recurrences.
504 // If there are, change them into integer recurrences, permitting analysis by
505 // the SCEV routines.
506 BasicBlock *Header = L->getHeader();
507
508 SmallVector<WeakTrackingVH, 8> PHIs;
509 for (PHINode &PN : Header->phis())
510 PHIs.push_back(&PN);
511
512 bool Changed = false;
513 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
514 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
515 Changed |= handleFloatingPointIV(L, PN);
516
517 // If the loop previously had floating-point IV, ScalarEvolution
518 // may not have been able to compute a trip count. Now that we've done some
519 // re-writing, the trip count may be computable.
520 if (Changed)
521 SE->forgetLoop(L);
522 return Changed;
523 }
524
525 namespace {
526
527 // Collect information about PHI nodes which can be transformed in
528 // rewriteLoopExitValues.
529 struct RewritePhi {
530 PHINode *PN;
531
532 // Ith incoming value.
533 unsigned Ith;
534
535 // Exit value after expansion.
536 Value *Val;
537
538 // High Cost when expansion.
539 bool HighCost;
540
RewritePhi__anon0796242e0211::RewritePhi541 RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
542 : PN(P), Ith(I), Val(V), HighCost(H) {}
543 };
544
545 } // end anonymous namespace
546
547 //===----------------------------------------------------------------------===//
548 // rewriteLoopExitValues - Optimize IV users outside the loop.
549 // As a side effect, reduces the amount of IV processing within the loop.
550 //===----------------------------------------------------------------------===//
551
hasHardUserWithinLoop(const Loop * L,const Instruction * I) const552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
553 SmallPtrSet<const Instruction *, 8> Visited;
554 SmallVector<const Instruction *, 8> WorkList;
555 Visited.insert(I);
556 WorkList.push_back(I);
557 while (!WorkList.empty()) {
558 const Instruction *Curr = WorkList.pop_back_val();
559 // This use is outside the loop, nothing to do.
560 if (!L->contains(Curr))
561 continue;
562 // Do we assume it is a "hard" use which will not be eliminated easily?
563 if (Curr->mayHaveSideEffects())
564 return true;
565 // Otherwise, add all its users to worklist.
566 for (auto U : Curr->users()) {
567 auto *UI = cast<Instruction>(U);
568 if (Visited.insert(UI).second)
569 WorkList.push_back(UI);
570 }
571 }
572 return false;
573 }
574
575 /// Check to see if this loop has a computable loop-invariant execution count.
576 /// If so, this means that we can compute the final value of any expressions
577 /// that are recurrent in the loop, and substitute the exit values from the loop
578 /// into any instructions outside of the loop that use the final values of the
579 /// current expressions.
580 ///
581 /// This is mostly redundant with the regular IndVarSimplify activities that
582 /// happen later, except that it's more powerful in some cases, because it's
583 /// able to brute-force evaluate arbitrary instructions as long as they have
584 /// constant operands at the beginning of the loop.
rewriteLoopExitValues(Loop * L,SCEVExpander & Rewriter)585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
586 // Check a pre-condition.
587 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
588 "Indvars did not preserve LCSSA!");
589
590 SmallVector<BasicBlock*, 8> ExitBlocks;
591 L->getUniqueExitBlocks(ExitBlocks);
592
593 SmallVector<RewritePhi, 8> RewritePhiSet;
594 // Find all values that are computed inside the loop, but used outside of it.
595 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
596 // the exit blocks of the loop to find them.
597 for (BasicBlock *ExitBB : ExitBlocks) {
598 // If there are no PHI nodes in this exit block, then no values defined
599 // inside the loop are used on this path, skip it.
600 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
601 if (!PN) continue;
602
603 unsigned NumPreds = PN->getNumIncomingValues();
604
605 // Iterate over all of the PHI nodes.
606 BasicBlock::iterator BBI = ExitBB->begin();
607 while ((PN = dyn_cast<PHINode>(BBI++))) {
608 if (PN->use_empty())
609 continue; // dead use, don't replace it
610
611 if (!SE->isSCEVable(PN->getType()))
612 continue;
613
614 // It's necessary to tell ScalarEvolution about this explicitly so that
615 // it can walk the def-use list and forget all SCEVs, as it may not be
616 // watching the PHI itself. Once the new exit value is in place, there
617 // may not be a def-use connection between the loop and every instruction
618 // which got a SCEVAddRecExpr for that loop.
619 SE->forgetValue(PN);
620
621 // Iterate over all of the values in all the PHI nodes.
622 for (unsigned i = 0; i != NumPreds; ++i) {
623 // If the value being merged in is not integer or is not defined
624 // in the loop, skip it.
625 Value *InVal = PN->getIncomingValue(i);
626 if (!isa<Instruction>(InVal))
627 continue;
628
629 // If this pred is for a subloop, not L itself, skip it.
630 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
631 continue; // The Block is in a subloop, skip it.
632
633 // Check that InVal is defined in the loop.
634 Instruction *Inst = cast<Instruction>(InVal);
635 if (!L->contains(Inst))
636 continue;
637
638 // Okay, this instruction has a user outside of the current loop
639 // and varies predictably *inside* the loop. Evaluate the value it
640 // contains when the loop exits, if possible. We prefer to start with
641 // expressions which are true for all exits (so as to maximize
642 // expression reuse by the SCEVExpander), but resort to per-exit
643 // evaluation if that fails.
644 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
645 if (isa<SCEVCouldNotCompute>(ExitValue) ||
646 !SE->isLoopInvariant(ExitValue, L) ||
647 !isSafeToExpand(ExitValue, *SE)) {
648 // TODO: This should probably be sunk into SCEV in some way; maybe a
649 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
650 // most SCEV expressions and other recurrence types (e.g. shift
651 // recurrences). Is there existing code we can reuse?
652 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
653 if (isa<SCEVCouldNotCompute>(ExitCount))
654 continue;
655 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
656 if (AddRec->getLoop() == L)
657 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
658 if (isa<SCEVCouldNotCompute>(ExitValue) ||
659 !SE->isLoopInvariant(ExitValue, L) ||
660 !isSafeToExpand(ExitValue, *SE))
661 continue;
662 }
663
664 // Computing the value outside of the loop brings no benefit if it is
665 // definitely used inside the loop in a way which can not be optimized
666 // away. Avoid doing so unless we know we have a value which computes
667 // the ExitValue already. TODO: This should be merged into SCEV
668 // expander to leverage its knowledge of existing expressions.
669 if (ReplaceExitValue != AlwaysRepl &&
670 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
671 hasHardUserWithinLoop(L, Inst))
672 continue;
673
674 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
675 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
676
677 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
678 << '\n'
679 << " LoopVal = " << *Inst << "\n");
680
681 if (!isValidRewrite(Inst, ExitVal)) {
682 DeadInsts.push_back(ExitVal);
683 continue;
684 }
685
686 #ifndef NDEBUG
687 // If we reuse an instruction from a loop which is neither L nor one of
688 // its containing loops, we end up breaking LCSSA form for this loop by
689 // creating a new use of its instruction.
690 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
691 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
692 if (EVL != L)
693 assert(EVL->contains(L) && "LCSSA breach detected!");
694 #endif
695
696 // Collect all the candidate PHINodes to be rewritten.
697 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
698 }
699 }
700 }
701
702 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
703
704 bool Changed = false;
705 // Transformation.
706 for (const RewritePhi &Phi : RewritePhiSet) {
707 PHINode *PN = Phi.PN;
708 Value *ExitVal = Phi.Val;
709
710 // Only do the rewrite when the ExitValue can be expanded cheaply.
711 // If LoopCanBeDel is true, rewrite exit value aggressively.
712 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
713 DeadInsts.push_back(ExitVal);
714 continue;
715 }
716
717 Changed = true;
718 ++NumReplaced;
719 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
720 PN->setIncomingValue(Phi.Ith, ExitVal);
721
722 // If this instruction is dead now, delete it. Don't do it now to avoid
723 // invalidating iterators.
724 if (isInstructionTriviallyDead(Inst, TLI))
725 DeadInsts.push_back(Inst);
726
727 // Replace PN with ExitVal if that is legal and does not break LCSSA.
728 if (PN->getNumIncomingValues() == 1 &&
729 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
730 PN->replaceAllUsesWith(ExitVal);
731 PN->eraseFromParent();
732 }
733 }
734
735 // The insertion point instruction may have been deleted; clear it out
736 // so that the rewriter doesn't trip over it later.
737 Rewriter.clearInsertPoint();
738 return Changed;
739 }
740
741 //===---------------------------------------------------------------------===//
742 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
743 // they will exit at the first iteration.
744 //===---------------------------------------------------------------------===//
745
746 /// Check to see if this loop has loop invariant conditions which lead to loop
747 /// exits. If so, we know that if the exit path is taken, it is at the first
748 /// loop iteration. This lets us predict exit values of PHI nodes that live in
749 /// loop header.
rewriteFirstIterationLoopExitValues(Loop * L)750 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
751 // Verify the input to the pass is already in LCSSA form.
752 assert(L->isLCSSAForm(*DT));
753
754 SmallVector<BasicBlock *, 8> ExitBlocks;
755 L->getUniqueExitBlocks(ExitBlocks);
756
757 bool MadeAnyChanges = false;
758 for (auto *ExitBB : ExitBlocks) {
759 // If there are no more PHI nodes in this exit block, then no more
760 // values defined inside the loop are used on this path.
761 for (PHINode &PN : ExitBB->phis()) {
762 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
763 IncomingValIdx != E; ++IncomingValIdx) {
764 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
765
766 // Can we prove that the exit must run on the first iteration if it
767 // runs at all? (i.e. early exits are fine for our purposes, but
768 // traces which lead to this exit being taken on the 2nd iteration
769 // aren't.) Note that this is about whether the exit branch is
770 // executed, not about whether it is taken.
771 if (!L->getLoopLatch() ||
772 !DT->dominates(IncomingBB, L->getLoopLatch()))
773 continue;
774
775 // Get condition that leads to the exit path.
776 auto *TermInst = IncomingBB->getTerminator();
777
778 Value *Cond = nullptr;
779 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
780 // Must be a conditional branch, otherwise the block
781 // should not be in the loop.
782 Cond = BI->getCondition();
783 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
784 Cond = SI->getCondition();
785 else
786 continue;
787
788 if (!L->isLoopInvariant(Cond))
789 continue;
790
791 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
792
793 // Only deal with PHIs in the loop header.
794 if (!ExitVal || ExitVal->getParent() != L->getHeader())
795 continue;
796
797 // If ExitVal is a PHI on the loop header, then we know its
798 // value along this exit because the exit can only be taken
799 // on the first iteration.
800 auto *LoopPreheader = L->getLoopPreheader();
801 assert(LoopPreheader && "Invalid loop");
802 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
803 if (PreheaderIdx != -1) {
804 assert(ExitVal->getParent() == L->getHeader() &&
805 "ExitVal must be in loop header");
806 MadeAnyChanges = true;
807 PN.setIncomingValue(IncomingValIdx,
808 ExitVal->getIncomingValue(PreheaderIdx));
809 }
810 }
811 }
812 }
813 return MadeAnyChanges;
814 }
815
816 /// Check whether it is possible to delete the loop after rewriting exit
817 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
818 /// aggressively.
canLoopBeDeleted(Loop * L,SmallVector<RewritePhi,8> & RewritePhiSet)819 bool IndVarSimplify::canLoopBeDeleted(
820 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
821 BasicBlock *Preheader = L->getLoopPreheader();
822 // If there is no preheader, the loop will not be deleted.
823 if (!Preheader)
824 return false;
825
826 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
827 // We obviate multiple ExitingBlocks case for simplicity.
828 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
829 // after exit value rewriting, we can enhance the logic here.
830 SmallVector<BasicBlock *, 4> ExitingBlocks;
831 L->getExitingBlocks(ExitingBlocks);
832 SmallVector<BasicBlock *, 8> ExitBlocks;
833 L->getUniqueExitBlocks(ExitBlocks);
834 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
835 return false;
836
837 BasicBlock *ExitBlock = ExitBlocks[0];
838 BasicBlock::iterator BI = ExitBlock->begin();
839 while (PHINode *P = dyn_cast<PHINode>(BI)) {
840 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
841
842 // If the Incoming value of P is found in RewritePhiSet, we know it
843 // could be rewritten to use a loop invariant value in transformation
844 // phase later. Skip it in the loop invariant check below.
845 bool found = false;
846 for (const RewritePhi &Phi : RewritePhiSet) {
847 unsigned i = Phi.Ith;
848 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
849 found = true;
850 break;
851 }
852 }
853
854 Instruction *I;
855 if (!found && (I = dyn_cast<Instruction>(Incoming)))
856 if (!L->hasLoopInvariantOperands(I))
857 return false;
858
859 ++BI;
860 }
861
862 for (auto *BB : L->blocks())
863 if (llvm::any_of(*BB, [](Instruction &I) {
864 return I.mayHaveSideEffects();
865 }))
866 return false;
867
868 return true;
869 }
870
871 //===----------------------------------------------------------------------===//
872 // IV Widening - Extend the width of an IV to cover its widest uses.
873 //===----------------------------------------------------------------------===//
874
875 namespace {
876
877 // Collect information about induction variables that are used by sign/zero
878 // extend operations. This information is recorded by CollectExtend and provides
879 // the input to WidenIV.
880 struct WideIVInfo {
881 PHINode *NarrowIV = nullptr;
882
883 // Widest integer type created [sz]ext
884 Type *WidestNativeType = nullptr;
885
886 // Was a sext user seen before a zext?
887 bool IsSigned = false;
888 };
889
890 } // end anonymous namespace
891
892 /// Update information about the induction variable that is extended by this
893 /// sign or zero extend operation. This is used to determine the final width of
894 /// the IV before actually widening it.
visitIVCast(CastInst * Cast,WideIVInfo & WI,ScalarEvolution * SE,const TargetTransformInfo * TTI)895 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
896 const TargetTransformInfo *TTI) {
897 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
898 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
899 return;
900
901 Type *Ty = Cast->getType();
902 uint64_t Width = SE->getTypeSizeInBits(Ty);
903 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
904 return;
905
906 // Check that `Cast` actually extends the induction variable (we rely on this
907 // later). This takes care of cases where `Cast` is extending a truncation of
908 // the narrow induction variable, and thus can end up being narrower than the
909 // "narrow" induction variable.
910 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
911 if (NarrowIVWidth >= Width)
912 return;
913
914 // Cast is either an sext or zext up to this point.
915 // We should not widen an indvar if arithmetics on the wider indvar are more
916 // expensive than those on the narrower indvar. We check only the cost of ADD
917 // because at least an ADD is required to increment the induction variable. We
918 // could compute more comprehensively the cost of all instructions on the
919 // induction variable when necessary.
920 if (TTI &&
921 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
922 TTI->getArithmeticInstrCost(Instruction::Add,
923 Cast->getOperand(0)->getType())) {
924 return;
925 }
926
927 if (!WI.WidestNativeType) {
928 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
929 WI.IsSigned = IsSigned;
930 return;
931 }
932
933 // We extend the IV to satisfy the sign of its first user, arbitrarily.
934 if (WI.IsSigned != IsSigned)
935 return;
936
937 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
938 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
939 }
940
941 namespace {
942
943 /// Record a link in the Narrow IV def-use chain along with the WideIV that
944 /// computes the same value as the Narrow IV def. This avoids caching Use*
945 /// pointers.
946 struct NarrowIVDefUse {
947 Instruction *NarrowDef = nullptr;
948 Instruction *NarrowUse = nullptr;
949 Instruction *WideDef = nullptr;
950
951 // True if the narrow def is never negative. Tracking this information lets
952 // us use a sign extension instead of a zero extension or vice versa, when
953 // profitable and legal.
954 bool NeverNegative = false;
955
NarrowIVDefUse__anon0796242e0511::NarrowIVDefUse956 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
957 bool NeverNegative)
958 : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
959 NeverNegative(NeverNegative) {}
960 };
961
962 /// The goal of this transform is to remove sign and zero extends without
963 /// creating any new induction variables. To do this, it creates a new phi of
964 /// the wider type and redirects all users, either removing extends or inserting
965 /// truncs whenever we stop propagating the type.
966 class WidenIV {
967 // Parameters
968 PHINode *OrigPhi;
969 Type *WideType;
970
971 // Context
972 LoopInfo *LI;
973 Loop *L;
974 ScalarEvolution *SE;
975 DominatorTree *DT;
976
977 // Does the module have any calls to the llvm.experimental.guard intrinsic
978 // at all? If not we can avoid scanning instructions looking for guards.
979 bool HasGuards;
980
981 // Result
982 PHINode *WidePhi = nullptr;
983 Instruction *WideInc = nullptr;
984 const SCEV *WideIncExpr = nullptr;
985 SmallVectorImpl<WeakTrackingVH> &DeadInsts;
986
987 SmallPtrSet<Instruction *,16> Widened;
988 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
989
990 enum ExtendKind { ZeroExtended, SignExtended, Unknown };
991
992 // A map tracking the kind of extension used to widen each narrow IV
993 // and narrow IV user.
994 // Key: pointer to a narrow IV or IV user.
995 // Value: the kind of extension used to widen this Instruction.
996 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
997
998 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
999
1000 // A map with control-dependent ranges for post increment IV uses. The key is
1001 // a pair of IV def and a use of this def denoting the context. The value is
1002 // a ConstantRange representing possible values of the def at the given
1003 // context.
1004 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1005
getPostIncRangeInfo(Value * Def,Instruction * UseI)1006 Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1007 Instruction *UseI) {
1008 DefUserPair Key(Def, UseI);
1009 auto It = PostIncRangeInfos.find(Key);
1010 return It == PostIncRangeInfos.end()
1011 ? Optional<ConstantRange>(None)
1012 : Optional<ConstantRange>(It->second);
1013 }
1014
1015 void calculatePostIncRanges(PHINode *OrigPhi);
1016 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1017
updatePostIncRangeInfo(Value * Def,Instruction * UseI,ConstantRange R)1018 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1019 DefUserPair Key(Def, UseI);
1020 auto It = PostIncRangeInfos.find(Key);
1021 if (It == PostIncRangeInfos.end())
1022 PostIncRangeInfos.insert({Key, R});
1023 else
1024 It->second = R.intersectWith(It->second);
1025 }
1026
1027 public:
WidenIV(const WideIVInfo & WI,LoopInfo * LInfo,ScalarEvolution * SEv,DominatorTree * DTree,SmallVectorImpl<WeakTrackingVH> & DI,bool HasGuards)1028 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1029 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1030 bool HasGuards)
1031 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1032 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1033 HasGuards(HasGuards), DeadInsts(DI) {
1034 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1035 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1036 }
1037
1038 PHINode *createWideIV(SCEVExpander &Rewriter);
1039
1040 protected:
1041 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1042 Instruction *Use);
1043
1044 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1045 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1046 const SCEVAddRecExpr *WideAR);
1047 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1048
1049 ExtendKind getExtendKind(Instruction *I);
1050
1051 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1052
1053 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1054
1055 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1056
1057 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1058 unsigned OpCode) const;
1059
1060 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1061
1062 bool widenLoopCompare(NarrowIVDefUse DU);
1063 bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1064 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1065
1066 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1067 };
1068
1069 } // end anonymous namespace
1070
createExtendInst(Value * NarrowOper,Type * WideType,bool IsSigned,Instruction * Use)1071 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1072 bool IsSigned, Instruction *Use) {
1073 // Set the debug location and conservative insertion point.
1074 IRBuilder<> Builder(Use);
1075 // Hoist the insertion point into loop preheaders as far as possible.
1076 for (const Loop *L = LI->getLoopFor(Use->getParent());
1077 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1078 L = L->getParentLoop())
1079 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1080
1081 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1082 Builder.CreateZExt(NarrowOper, WideType);
1083 }
1084
1085 /// Instantiate a wide operation to replace a narrow operation. This only needs
1086 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1087 /// 0 for any operation we decide not to clone.
cloneIVUser(NarrowIVDefUse DU,const SCEVAddRecExpr * WideAR)1088 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1089 const SCEVAddRecExpr *WideAR) {
1090 unsigned Opcode = DU.NarrowUse->getOpcode();
1091 switch (Opcode) {
1092 default:
1093 return nullptr;
1094 case Instruction::Add:
1095 case Instruction::Mul:
1096 case Instruction::UDiv:
1097 case Instruction::Sub:
1098 return cloneArithmeticIVUser(DU, WideAR);
1099
1100 case Instruction::And:
1101 case Instruction::Or:
1102 case Instruction::Xor:
1103 case Instruction::Shl:
1104 case Instruction::LShr:
1105 case Instruction::AShr:
1106 return cloneBitwiseIVUser(DU);
1107 }
1108 }
1109
cloneBitwiseIVUser(NarrowIVDefUse DU)1110 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1111 Instruction *NarrowUse = DU.NarrowUse;
1112 Instruction *NarrowDef = DU.NarrowDef;
1113 Instruction *WideDef = DU.WideDef;
1114
1115 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1116
1117 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1118 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1119 // invariant and will be folded or hoisted. If it actually comes from a
1120 // widened IV, it should be removed during a future call to widenIVUse.
1121 bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1122 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1123 ? WideDef
1124 : createExtendInst(NarrowUse->getOperand(0), WideType,
1125 IsSigned, NarrowUse);
1126 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1127 ? WideDef
1128 : createExtendInst(NarrowUse->getOperand(1), WideType,
1129 IsSigned, NarrowUse);
1130
1131 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1132 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1133 NarrowBO->getName());
1134 IRBuilder<> Builder(NarrowUse);
1135 Builder.Insert(WideBO);
1136 WideBO->copyIRFlags(NarrowBO);
1137 return WideBO;
1138 }
1139
cloneArithmeticIVUser(NarrowIVDefUse DU,const SCEVAddRecExpr * WideAR)1140 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1141 const SCEVAddRecExpr *WideAR) {
1142 Instruction *NarrowUse = DU.NarrowUse;
1143 Instruction *NarrowDef = DU.NarrowDef;
1144 Instruction *WideDef = DU.WideDef;
1145
1146 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1147
1148 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1149
1150 // We're trying to find X such that
1151 //
1152 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1153 //
1154 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1155 // and check using SCEV if any of them are correct.
1156
1157 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1158 // correct solution to X.
1159 auto GuessNonIVOperand = [&](bool SignExt) {
1160 const SCEV *WideLHS;
1161 const SCEV *WideRHS;
1162
1163 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1164 if (SignExt)
1165 return SE->getSignExtendExpr(S, Ty);
1166 return SE->getZeroExtendExpr(S, Ty);
1167 };
1168
1169 if (IVOpIdx == 0) {
1170 WideLHS = SE->getSCEV(WideDef);
1171 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1172 WideRHS = GetExtend(NarrowRHS, WideType);
1173 } else {
1174 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1175 WideLHS = GetExtend(NarrowLHS, WideType);
1176 WideRHS = SE->getSCEV(WideDef);
1177 }
1178
1179 // WideUse is "WideDef `op.wide` X" as described in the comment.
1180 const SCEV *WideUse = nullptr;
1181
1182 switch (NarrowUse->getOpcode()) {
1183 default:
1184 llvm_unreachable("No other possibility!");
1185
1186 case Instruction::Add:
1187 WideUse = SE->getAddExpr(WideLHS, WideRHS);
1188 break;
1189
1190 case Instruction::Mul:
1191 WideUse = SE->getMulExpr(WideLHS, WideRHS);
1192 break;
1193
1194 case Instruction::UDiv:
1195 WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1196 break;
1197
1198 case Instruction::Sub:
1199 WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1200 break;
1201 }
1202
1203 return WideUse == WideAR;
1204 };
1205
1206 bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1207 if (!GuessNonIVOperand(SignExtend)) {
1208 SignExtend = !SignExtend;
1209 if (!GuessNonIVOperand(SignExtend))
1210 return nullptr;
1211 }
1212
1213 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1214 ? WideDef
1215 : createExtendInst(NarrowUse->getOperand(0), WideType,
1216 SignExtend, NarrowUse);
1217 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1218 ? WideDef
1219 : createExtendInst(NarrowUse->getOperand(1), WideType,
1220 SignExtend, NarrowUse);
1221
1222 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1223 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1224 NarrowBO->getName());
1225
1226 IRBuilder<> Builder(NarrowUse);
1227 Builder.Insert(WideBO);
1228 WideBO->copyIRFlags(NarrowBO);
1229 return WideBO;
1230 }
1231
getExtendKind(Instruction * I)1232 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1233 auto It = ExtendKindMap.find(I);
1234 assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1235 return It->second;
1236 }
1237
getSCEVByOpCode(const SCEV * LHS,const SCEV * RHS,unsigned OpCode) const1238 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1239 unsigned OpCode) const {
1240 if (OpCode == Instruction::Add)
1241 return SE->getAddExpr(LHS, RHS);
1242 if (OpCode == Instruction::Sub)
1243 return SE->getMinusSCEV(LHS, RHS);
1244 if (OpCode == Instruction::Mul)
1245 return SE->getMulExpr(LHS, RHS);
1246
1247 llvm_unreachable("Unsupported opcode.");
1248 }
1249
1250 /// No-wrap operations can transfer sign extension of their result to their
1251 /// operands. Generate the SCEV value for the widened operation without
1252 /// actually modifying the IR yet. If the expression after extending the
1253 /// operands is an AddRec for this loop, return the AddRec and the kind of
1254 /// extension used.
getExtendedOperandRecurrence(NarrowIVDefUse DU)1255 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1256 // Handle the common case of add<nsw/nuw>
1257 const unsigned OpCode = DU.NarrowUse->getOpcode();
1258 // Only Add/Sub/Mul instructions supported yet.
1259 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1260 OpCode != Instruction::Mul)
1261 return {nullptr, Unknown};
1262
1263 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1264 // if extending the other will lead to a recurrence.
1265 const unsigned ExtendOperIdx =
1266 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1267 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1268
1269 const SCEV *ExtendOperExpr = nullptr;
1270 const OverflowingBinaryOperator *OBO =
1271 cast<OverflowingBinaryOperator>(DU.NarrowUse);
1272 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1273 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1274 ExtendOperExpr = SE->getSignExtendExpr(
1275 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1276 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1277 ExtendOperExpr = SE->getZeroExtendExpr(
1278 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1279 else
1280 return {nullptr, Unknown};
1281
1282 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1283 // flags. This instruction may be guarded by control flow that the no-wrap
1284 // behavior depends on. Non-control-equivalent instructions can be mapped to
1285 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1286 // semantics to those operations.
1287 const SCEV *lhs = SE->getSCEV(DU.WideDef);
1288 const SCEV *rhs = ExtendOperExpr;
1289
1290 // Let's swap operands to the initial order for the case of non-commutative
1291 // operations, like SUB. See PR21014.
1292 if (ExtendOperIdx == 0)
1293 std::swap(lhs, rhs);
1294 const SCEVAddRecExpr *AddRec =
1295 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1296
1297 if (!AddRec || AddRec->getLoop() != L)
1298 return {nullptr, Unknown};
1299
1300 return {AddRec, ExtKind};
1301 }
1302
1303 /// Is this instruction potentially interesting for further simplification after
1304 /// widening it's type? In other words, can the extend be safely hoisted out of
1305 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1306 /// so, return the extended recurrence and the kind of extension used. Otherwise
1307 /// return {nullptr, Unknown}.
getWideRecurrence(NarrowIVDefUse DU)1308 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1309 if (!SE->isSCEVable(DU.NarrowUse->getType()))
1310 return {nullptr, Unknown};
1311
1312 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1313 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1314 SE->getTypeSizeInBits(WideType)) {
1315 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1316 // index. So don't follow this use.
1317 return {nullptr, Unknown};
1318 }
1319
1320 const SCEV *WideExpr;
1321 ExtendKind ExtKind;
1322 if (DU.NeverNegative) {
1323 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1324 if (isa<SCEVAddRecExpr>(WideExpr))
1325 ExtKind = SignExtended;
1326 else {
1327 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1328 ExtKind = ZeroExtended;
1329 }
1330 } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1331 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1332 ExtKind = SignExtended;
1333 } else {
1334 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1335 ExtKind = ZeroExtended;
1336 }
1337 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1338 if (!AddRec || AddRec->getLoop() != L)
1339 return {nullptr, Unknown};
1340 return {AddRec, ExtKind};
1341 }
1342
1343 /// This IV user cannot be widened. Replace this use of the original narrow IV
1344 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
truncateIVUse(NarrowIVDefUse DU,DominatorTree * DT,LoopInfo * LI)1345 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1346 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1347 if (!InsertPt)
1348 return;
1349 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1350 << *DU.NarrowUse << "\n");
1351 IRBuilder<> Builder(InsertPt);
1352 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1353 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1354 }
1355
1356 /// If the narrow use is a compare instruction, then widen the compare
1357 // (and possibly the other operand). The extend operation is hoisted into the
1358 // loop preheader as far as possible.
widenLoopCompare(NarrowIVDefUse DU)1359 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1360 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1361 if (!Cmp)
1362 return false;
1363
1364 // We can legally widen the comparison in the following two cases:
1365 //
1366 // - The signedness of the IV extension and comparison match
1367 //
1368 // - The narrow IV is always positive (and thus its sign extension is equal
1369 // to its zero extension). For instance, let's say we're zero extending
1370 // %narrow for the following use
1371 //
1372 // icmp slt i32 %narrow, %val ... (A)
1373 //
1374 // and %narrow is always positive. Then
1375 //
1376 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1377 // == icmp slt i32 zext(%narrow), sext(%val)
1378 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1379 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1380 return false;
1381
1382 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1383 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1384 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1385 assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1386
1387 // Widen the compare instruction.
1388 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1389 if (!InsertPt)
1390 return false;
1391 IRBuilder<> Builder(InsertPt);
1392 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1393
1394 // Widen the other operand of the compare, if necessary.
1395 if (CastWidth < IVWidth) {
1396 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1397 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1398 }
1399 return true;
1400 }
1401
1402 /// If the narrow use is an instruction whose two operands are the defining
1403 /// instruction of DU and a load instruction, then we have the following:
1404 /// if the load is hoisted outside the loop, then we do not reach this function
1405 /// as scalar evolution analysis works fine in widenIVUse with variables
1406 /// hoisted outside the loop and efficient code is subsequently generated by
1407 /// not emitting truncate instructions. But when the load is not hoisted
1408 /// (whether due to limitation in alias analysis or due to a true legality),
1409 /// then scalar evolution can not proceed with loop variant values and
1410 /// inefficient code is generated. This function handles the non-hoisted load
1411 /// special case by making the optimization generate the same type of code for
1412 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1413 /// instruction). This special case is important especially when the induction
1414 /// variables are affecting addressing mode in code generation.
widenWithVariantLoadUse(NarrowIVDefUse DU)1415 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1416 Instruction *NarrowUse = DU.NarrowUse;
1417 Instruction *NarrowDef = DU.NarrowDef;
1418 Instruction *WideDef = DU.WideDef;
1419
1420 // Handle the common case of add<nsw/nuw>
1421 const unsigned OpCode = NarrowUse->getOpcode();
1422 // Only Add/Sub/Mul instructions are supported.
1423 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1424 OpCode != Instruction::Mul)
1425 return false;
1426
1427 // The operand that is not defined by NarrowDef of DU. Let's call it the
1428 // other operand.
1429 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1430 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1431 "bad DU");
1432
1433 const SCEV *ExtendOperExpr = nullptr;
1434 const OverflowingBinaryOperator *OBO =
1435 cast<OverflowingBinaryOperator>(NarrowUse);
1436 ExtendKind ExtKind = getExtendKind(NarrowDef);
1437 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1438 ExtendOperExpr = SE->getSignExtendExpr(
1439 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1440 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1441 ExtendOperExpr = SE->getZeroExtendExpr(
1442 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1443 else
1444 return false;
1445
1446 // We are interested in the other operand being a load instruction.
1447 // But, we should look into relaxing this restriction later on.
1448 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1449 if (I && I->getOpcode() != Instruction::Load)
1450 return false;
1451
1452 // Verifying that Defining operand is an AddRec
1453 const SCEV *Op1 = SE->getSCEV(WideDef);
1454 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1455 if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1456 return false;
1457 // Verifying that other operand is an Extend.
1458 if (ExtKind == SignExtended) {
1459 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1460 return false;
1461 } else {
1462 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1463 return false;
1464 }
1465
1466 if (ExtKind == SignExtended) {
1467 for (Use &U : NarrowUse->uses()) {
1468 SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1469 if (!User || User->getType() != WideType)
1470 return false;
1471 }
1472 } else { // ExtKind == ZeroExtended
1473 for (Use &U : NarrowUse->uses()) {
1474 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1475 if (!User || User->getType() != WideType)
1476 return false;
1477 }
1478 }
1479
1480 return true;
1481 }
1482
1483 /// Special Case for widening with variant Loads (see
1484 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
widenWithVariantLoadUseCodegen(NarrowIVDefUse DU)1485 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1486 Instruction *NarrowUse = DU.NarrowUse;
1487 Instruction *NarrowDef = DU.NarrowDef;
1488 Instruction *WideDef = DU.WideDef;
1489
1490 ExtendKind ExtKind = getExtendKind(NarrowDef);
1491
1492 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1493
1494 // Generating a widening use instruction.
1495 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1496 ? WideDef
1497 : createExtendInst(NarrowUse->getOperand(0), WideType,
1498 ExtKind, NarrowUse);
1499 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1500 ? WideDef
1501 : createExtendInst(NarrowUse->getOperand(1), WideType,
1502 ExtKind, NarrowUse);
1503
1504 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1505 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1506 NarrowBO->getName());
1507 IRBuilder<> Builder(NarrowUse);
1508 Builder.Insert(WideBO);
1509 WideBO->copyIRFlags(NarrowBO);
1510
1511 if (ExtKind == SignExtended)
1512 ExtendKindMap[NarrowUse] = SignExtended;
1513 else
1514 ExtendKindMap[NarrowUse] = ZeroExtended;
1515
1516 // Update the Use.
1517 if (ExtKind == SignExtended) {
1518 for (Use &U : NarrowUse->uses()) {
1519 SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1520 if (User && User->getType() == WideType) {
1521 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1522 << *WideBO << "\n");
1523 ++NumElimExt;
1524 User->replaceAllUsesWith(WideBO);
1525 DeadInsts.emplace_back(User);
1526 }
1527 }
1528 } else { // ExtKind == ZeroExtended
1529 for (Use &U : NarrowUse->uses()) {
1530 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1531 if (User && User->getType() == WideType) {
1532 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1533 << *WideBO << "\n");
1534 ++NumElimExt;
1535 User->replaceAllUsesWith(WideBO);
1536 DeadInsts.emplace_back(User);
1537 }
1538 }
1539 }
1540 }
1541
1542 /// Determine whether an individual user of the narrow IV can be widened. If so,
1543 /// return the wide clone of the user.
widenIVUse(NarrowIVDefUse DU,SCEVExpander & Rewriter)1544 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1545 assert(ExtendKindMap.count(DU.NarrowDef) &&
1546 "Should already know the kind of extension used to widen NarrowDef");
1547
1548 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1549 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1550 if (LI->getLoopFor(UsePhi->getParent()) != L) {
1551 // For LCSSA phis, sink the truncate outside the loop.
1552 // After SimplifyCFG most loop exit targets have a single predecessor.
1553 // Otherwise fall back to a truncate within the loop.
1554 if (UsePhi->getNumOperands() != 1)
1555 truncateIVUse(DU, DT, LI);
1556 else {
1557 // Widening the PHI requires us to insert a trunc. The logical place
1558 // for this trunc is in the same BB as the PHI. This is not possible if
1559 // the BB is terminated by a catchswitch.
1560 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1561 return nullptr;
1562
1563 PHINode *WidePhi =
1564 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1565 UsePhi);
1566 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1567 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1568 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1569 UsePhi->replaceAllUsesWith(Trunc);
1570 DeadInsts.emplace_back(UsePhi);
1571 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1572 << *WidePhi << "\n");
1573 }
1574 return nullptr;
1575 }
1576 }
1577
1578 // This narrow use can be widened by a sext if it's non-negative or its narrow
1579 // def was widended by a sext. Same for zext.
1580 auto canWidenBySExt = [&]() {
1581 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1582 };
1583 auto canWidenByZExt = [&]() {
1584 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1585 };
1586
1587 // Our raison d'etre! Eliminate sign and zero extension.
1588 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1589 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1590 Value *NewDef = DU.WideDef;
1591 if (DU.NarrowUse->getType() != WideType) {
1592 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1593 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1594 if (CastWidth < IVWidth) {
1595 // The cast isn't as wide as the IV, so insert a Trunc.
1596 IRBuilder<> Builder(DU.NarrowUse);
1597 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1598 }
1599 else {
1600 // A wider extend was hidden behind a narrower one. This may induce
1601 // another round of IV widening in which the intermediate IV becomes
1602 // dead. It should be very rare.
1603 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1604 << " not wide enough to subsume " << *DU.NarrowUse
1605 << "\n");
1606 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1607 NewDef = DU.NarrowUse;
1608 }
1609 }
1610 if (NewDef != DU.NarrowUse) {
1611 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1612 << " replaced by " << *DU.WideDef << "\n");
1613 ++NumElimExt;
1614 DU.NarrowUse->replaceAllUsesWith(NewDef);
1615 DeadInsts.emplace_back(DU.NarrowUse);
1616 }
1617 // Now that the extend is gone, we want to expose it's uses for potential
1618 // further simplification. We don't need to directly inform SimplifyIVUsers
1619 // of the new users, because their parent IV will be processed later as a
1620 // new loop phi. If we preserved IVUsers analysis, we would also want to
1621 // push the uses of WideDef here.
1622
1623 // No further widening is needed. The deceased [sz]ext had done it for us.
1624 return nullptr;
1625 }
1626
1627 // Does this user itself evaluate to a recurrence after widening?
1628 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1629 if (!WideAddRec.first)
1630 WideAddRec = getWideRecurrence(DU);
1631
1632 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1633 if (!WideAddRec.first) {
1634 // If use is a loop condition, try to promote the condition instead of
1635 // truncating the IV first.
1636 if (widenLoopCompare(DU))
1637 return nullptr;
1638
1639 // We are here about to generate a truncate instruction that may hurt
1640 // performance because the scalar evolution expression computed earlier
1641 // in WideAddRec.first does not indicate a polynomial induction expression.
1642 // In that case, look at the operands of the use instruction to determine
1643 // if we can still widen the use instead of truncating its operand.
1644 if (widenWithVariantLoadUse(DU)) {
1645 widenWithVariantLoadUseCodegen(DU);
1646 return nullptr;
1647 }
1648
1649 // This user does not evaluate to a recurrence after widening, so don't
1650 // follow it. Instead insert a Trunc to kill off the original use,
1651 // eventually isolating the original narrow IV so it can be removed.
1652 truncateIVUse(DU, DT, LI);
1653 return nullptr;
1654 }
1655 // Assume block terminators cannot evaluate to a recurrence. We can't to
1656 // insert a Trunc after a terminator if there happens to be a critical edge.
1657 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1658 "SCEV is not expected to evaluate a block terminator");
1659
1660 // Reuse the IV increment that SCEVExpander created as long as it dominates
1661 // NarrowUse.
1662 Instruction *WideUse = nullptr;
1663 if (WideAddRec.first == WideIncExpr &&
1664 Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1665 WideUse = WideInc;
1666 else {
1667 WideUse = cloneIVUser(DU, WideAddRec.first);
1668 if (!WideUse)
1669 return nullptr;
1670 }
1671 // Evaluation of WideAddRec ensured that the narrow expression could be
1672 // extended outside the loop without overflow. This suggests that the wide use
1673 // evaluates to the same expression as the extended narrow use, but doesn't
1674 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1675 // where it fails, we simply throw away the newly created wide use.
1676 if (WideAddRec.first != SE->getSCEV(WideUse)) {
1677 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1678 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1679 << "\n");
1680 DeadInsts.emplace_back(WideUse);
1681 return nullptr;
1682 }
1683
1684 // if we reached this point then we are going to replace
1685 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1686 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1687
1688 ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1689 // Returning WideUse pushes it on the worklist.
1690 return WideUse;
1691 }
1692
1693 /// Add eligible users of NarrowDef to NarrowIVUsers.
pushNarrowIVUsers(Instruction * NarrowDef,Instruction * WideDef)1694 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1695 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1696 bool NonNegativeDef =
1697 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1698 SE->getConstant(NarrowSCEV->getType(), 0));
1699 for (User *U : NarrowDef->users()) {
1700 Instruction *NarrowUser = cast<Instruction>(U);
1701
1702 // Handle data flow merges and bizarre phi cycles.
1703 if (!Widened.insert(NarrowUser).second)
1704 continue;
1705
1706 bool NonNegativeUse = false;
1707 if (!NonNegativeDef) {
1708 // We might have a control-dependent range information for this context.
1709 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1710 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1711 }
1712
1713 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1714 NonNegativeDef || NonNegativeUse);
1715 }
1716 }
1717
1718 /// Process a single induction variable. First use the SCEVExpander to create a
1719 /// wide induction variable that evaluates to the same recurrence as the
1720 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1721 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1722 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1723 ///
1724 /// It would be simpler to delete uses as they are processed, but we must avoid
1725 /// invalidating SCEV expressions.
createWideIV(SCEVExpander & Rewriter)1726 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1727 // Is this phi an induction variable?
1728 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1729 if (!AddRec)
1730 return nullptr;
1731
1732 // Widen the induction variable expression.
1733 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1734 ? SE->getSignExtendExpr(AddRec, WideType)
1735 : SE->getZeroExtendExpr(AddRec, WideType);
1736
1737 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1738 "Expect the new IV expression to preserve its type");
1739
1740 // Can the IV be extended outside the loop without overflow?
1741 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1742 if (!AddRec || AddRec->getLoop() != L)
1743 return nullptr;
1744
1745 // An AddRec must have loop-invariant operands. Since this AddRec is
1746 // materialized by a loop header phi, the expression cannot have any post-loop
1747 // operands, so they must dominate the loop header.
1748 assert(
1749 SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1750 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1751 "Loop header phi recurrence inputs do not dominate the loop");
1752
1753 // Iterate over IV uses (including transitive ones) looking for IV increments
1754 // of the form 'add nsw %iv, <const>'. For each increment and each use of
1755 // the increment calculate control-dependent range information basing on
1756 // dominating conditions inside of the loop (e.g. a range check inside of the
1757 // loop). Calculated ranges are stored in PostIncRangeInfos map.
1758 //
1759 // Control-dependent range information is later used to prove that a narrow
1760 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1761 // this on demand because when pushNarrowIVUsers needs this information some
1762 // of the dominating conditions might be already widened.
1763 if (UsePostIncrementRanges)
1764 calculatePostIncRanges(OrigPhi);
1765
1766 // The rewriter provides a value for the desired IV expression. This may
1767 // either find an existing phi or materialize a new one. Either way, we
1768 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1769 // of the phi-SCC dominates the loop entry.
1770 Instruction *InsertPt = &L->getHeader()->front();
1771 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1772
1773 // Remembering the WideIV increment generated by SCEVExpander allows
1774 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1775 // employ a general reuse mechanism because the call above is the only call to
1776 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1777 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1778 WideInc =
1779 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1780 WideIncExpr = SE->getSCEV(WideInc);
1781 // Propagate the debug location associated with the original loop increment
1782 // to the new (widened) increment.
1783 auto *OrigInc =
1784 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1785 WideInc->setDebugLoc(OrigInc->getDebugLoc());
1786 }
1787
1788 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1789 ++NumWidened;
1790
1791 // Traverse the def-use chain using a worklist starting at the original IV.
1792 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1793
1794 Widened.insert(OrigPhi);
1795 pushNarrowIVUsers(OrigPhi, WidePhi);
1796
1797 while (!NarrowIVUsers.empty()) {
1798 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1799
1800 // Process a def-use edge. This may replace the use, so don't hold a
1801 // use_iterator across it.
1802 Instruction *WideUse = widenIVUse(DU, Rewriter);
1803
1804 // Follow all def-use edges from the previous narrow use.
1805 if (WideUse)
1806 pushNarrowIVUsers(DU.NarrowUse, WideUse);
1807
1808 // widenIVUse may have removed the def-use edge.
1809 if (DU.NarrowDef->use_empty())
1810 DeadInsts.emplace_back(DU.NarrowDef);
1811 }
1812
1813 // Attach any debug information to the new PHI.
1814 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1815
1816 return WidePhi;
1817 }
1818
1819 /// Calculates control-dependent range for the given def at the given context
1820 /// by looking at dominating conditions inside of the loop
calculatePostIncRange(Instruction * NarrowDef,Instruction * NarrowUser)1821 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1822 Instruction *NarrowUser) {
1823 using namespace llvm::PatternMatch;
1824
1825 Value *NarrowDefLHS;
1826 const APInt *NarrowDefRHS;
1827 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1828 m_APInt(NarrowDefRHS))) ||
1829 !NarrowDefRHS->isNonNegative())
1830 return;
1831
1832 auto UpdateRangeFromCondition = [&] (Value *Condition,
1833 bool TrueDest) {
1834 CmpInst::Predicate Pred;
1835 Value *CmpRHS;
1836 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1837 m_Value(CmpRHS))))
1838 return;
1839
1840 CmpInst::Predicate P =
1841 TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1842
1843 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1844 auto CmpConstrainedLHSRange =
1845 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1846 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1847 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1848
1849 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1850 };
1851
1852 auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1853 if (!HasGuards)
1854 return;
1855
1856 for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1857 Ctx->getParent()->rend())) {
1858 Value *C = nullptr;
1859 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1860 UpdateRangeFromCondition(C, /*TrueDest=*/true);
1861 }
1862 };
1863
1864 UpdateRangeFromGuards(NarrowUser);
1865
1866 BasicBlock *NarrowUserBB = NarrowUser->getParent();
1867 // If NarrowUserBB is statically unreachable asking dominator queries may
1868 // yield surprising results. (e.g. the block may not have a dom tree node)
1869 if (!DT->isReachableFromEntry(NarrowUserBB))
1870 return;
1871
1872 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1873 L->contains(DTB->getBlock());
1874 DTB = DTB->getIDom()) {
1875 auto *BB = DTB->getBlock();
1876 auto *TI = BB->getTerminator();
1877 UpdateRangeFromGuards(TI);
1878
1879 auto *BI = dyn_cast<BranchInst>(TI);
1880 if (!BI || !BI->isConditional())
1881 continue;
1882
1883 auto *TrueSuccessor = BI->getSuccessor(0);
1884 auto *FalseSuccessor = BI->getSuccessor(1);
1885
1886 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1887 return BBE.isSingleEdge() &&
1888 DT->dominates(BBE, NarrowUser->getParent());
1889 };
1890
1891 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1892 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1893
1894 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1895 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1896 }
1897 }
1898
1899 /// Calculates PostIncRangeInfos map for the given IV
calculatePostIncRanges(PHINode * OrigPhi)1900 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1901 SmallPtrSet<Instruction *, 16> Visited;
1902 SmallVector<Instruction *, 6> Worklist;
1903 Worklist.push_back(OrigPhi);
1904 Visited.insert(OrigPhi);
1905
1906 while (!Worklist.empty()) {
1907 Instruction *NarrowDef = Worklist.pop_back_val();
1908
1909 for (Use &U : NarrowDef->uses()) {
1910 auto *NarrowUser = cast<Instruction>(U.getUser());
1911
1912 // Don't go looking outside the current loop.
1913 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1914 if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1915 continue;
1916
1917 if (!Visited.insert(NarrowUser).second)
1918 continue;
1919
1920 Worklist.push_back(NarrowUser);
1921
1922 calculatePostIncRange(NarrowDef, NarrowUser);
1923 }
1924 }
1925 }
1926
1927 //===----------------------------------------------------------------------===//
1928 // Live IV Reduction - Minimize IVs live across the loop.
1929 //===----------------------------------------------------------------------===//
1930
1931 //===----------------------------------------------------------------------===//
1932 // Simplification of IV users based on SCEV evaluation.
1933 //===----------------------------------------------------------------------===//
1934
1935 namespace {
1936
1937 class IndVarSimplifyVisitor : public IVVisitor {
1938 ScalarEvolution *SE;
1939 const TargetTransformInfo *TTI;
1940 PHINode *IVPhi;
1941
1942 public:
1943 WideIVInfo WI;
1944
IndVarSimplifyVisitor(PHINode * IV,ScalarEvolution * SCEV,const TargetTransformInfo * TTI,const DominatorTree * DTree)1945 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1946 const TargetTransformInfo *TTI,
1947 const DominatorTree *DTree)
1948 : SE(SCEV), TTI(TTI), IVPhi(IV) {
1949 DT = DTree;
1950 WI.NarrowIV = IVPhi;
1951 }
1952
1953 // Implement the interface used by simplifyUsersOfIV.
visitCast(CastInst * Cast)1954 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1955 };
1956
1957 } // end anonymous namespace
1958
1959 /// Iteratively perform simplification on a worklist of IV users. Each
1960 /// successive simplification may push more users which may themselves be
1961 /// candidates for simplification.
1962 ///
1963 /// Sign/Zero extend elimination is interleaved with IV simplification.
simplifyAndExtend(Loop * L,SCEVExpander & Rewriter,LoopInfo * LI)1964 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1965 SCEVExpander &Rewriter,
1966 LoopInfo *LI) {
1967 SmallVector<WideIVInfo, 8> WideIVs;
1968
1969 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1970 Intrinsic::getName(Intrinsic::experimental_guard));
1971 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1972
1973 SmallVector<PHINode*, 8> LoopPhis;
1974 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1975 LoopPhis.push_back(cast<PHINode>(I));
1976 }
1977 // Each round of simplification iterates through the SimplifyIVUsers worklist
1978 // for all current phis, then determines whether any IVs can be
1979 // widened. Widening adds new phis to LoopPhis, inducing another round of
1980 // simplification on the wide IVs.
1981 bool Changed = false;
1982 while (!LoopPhis.empty()) {
1983 // Evaluate as many IV expressions as possible before widening any IVs. This
1984 // forces SCEV to set no-wrap flags before evaluating sign/zero
1985 // extension. The first time SCEV attempts to normalize sign/zero extension,
1986 // the result becomes final. So for the most predictable results, we delay
1987 // evaluation of sign/zero extend evaluation until needed, and avoid running
1988 // other SCEV based analysis prior to simplifyAndExtend.
1989 do {
1990 PHINode *CurrIV = LoopPhis.pop_back_val();
1991
1992 // Information about sign/zero extensions of CurrIV.
1993 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1994
1995 Changed |=
1996 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1997
1998 if (Visitor.WI.WidestNativeType) {
1999 WideIVs.push_back(Visitor.WI);
2000 }
2001 } while(!LoopPhis.empty());
2002
2003 for (; !WideIVs.empty(); WideIVs.pop_back()) {
2004 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2005 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2006 Changed = true;
2007 LoopPhis.push_back(WidePhi);
2008 }
2009 }
2010 }
2011 return Changed;
2012 }
2013
2014 //===----------------------------------------------------------------------===//
2015 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2016 //===----------------------------------------------------------------------===//
2017
2018 /// Given an Value which is hoped to be part of an add recurance in the given
2019 /// loop, return the associated Phi node if so. Otherwise, return null. Note
2020 /// that this is less general than SCEVs AddRec checking.
getLoopPhiForCounter(Value * IncV,Loop * L)2021 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2022 Instruction *IncI = dyn_cast<Instruction>(IncV);
2023 if (!IncI)
2024 return nullptr;
2025
2026 switch (IncI->getOpcode()) {
2027 case Instruction::Add:
2028 case Instruction::Sub:
2029 break;
2030 case Instruction::GetElementPtr:
2031 // An IV counter must preserve its type.
2032 if (IncI->getNumOperands() == 2)
2033 break;
2034 LLVM_FALLTHROUGH;
2035 default:
2036 return nullptr;
2037 }
2038
2039 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2040 if (Phi && Phi->getParent() == L->getHeader()) {
2041 if (L->isLoopInvariant(IncI->getOperand(1)))
2042 return Phi;
2043 return nullptr;
2044 }
2045 if (IncI->getOpcode() == Instruction::GetElementPtr)
2046 return nullptr;
2047
2048 // Allow add/sub to be commuted.
2049 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2050 if (Phi && Phi->getParent() == L->getHeader()) {
2051 if (L->isLoopInvariant(IncI->getOperand(0)))
2052 return Phi;
2053 }
2054 return nullptr;
2055 }
2056
2057 /// Whether the current loop exit test is based on this value. Currently this
2058 /// is limited to a direct use in the loop condition.
isLoopExitTestBasedOn(Value * V,BasicBlock * ExitingBB)2059 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2060 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2061 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2062 // TODO: Allow non-icmp loop test.
2063 if (!ICmp)
2064 return false;
2065
2066 // TODO: Allow indirect use.
2067 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2068 }
2069
2070 /// linearFunctionTestReplace policy. Return true unless we can show that the
2071 /// current exit test is already sufficiently canonical.
needsLFTR(Loop * L,BasicBlock * ExitingBB)2072 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2073 assert(L->getLoopLatch() && "Must be in simplified form");
2074
2075 // Avoid converting a constant or loop invariant test back to a runtime
2076 // test. This is critical for when SCEV's cached ExitCount is less precise
2077 // than the current IR (such as after we've proven a particular exit is
2078 // actually dead and thus the BE count never reaches our ExitCount.)
2079 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2080 if (L->isLoopInvariant(BI->getCondition()))
2081 return false;
2082
2083 // Do LFTR to simplify the exit condition to an ICMP.
2084 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2085 if (!Cond)
2086 return true;
2087
2088 // Do LFTR to simplify the exit ICMP to EQ/NE
2089 ICmpInst::Predicate Pred = Cond->getPredicate();
2090 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2091 return true;
2092
2093 // Look for a loop invariant RHS
2094 Value *LHS = Cond->getOperand(0);
2095 Value *RHS = Cond->getOperand(1);
2096 if (!L->isLoopInvariant(RHS)) {
2097 if (!L->isLoopInvariant(LHS))
2098 return true;
2099 std::swap(LHS, RHS);
2100 }
2101 // Look for a simple IV counter LHS
2102 PHINode *Phi = dyn_cast<PHINode>(LHS);
2103 if (!Phi)
2104 Phi = getLoopPhiForCounter(LHS, L);
2105
2106 if (!Phi)
2107 return true;
2108
2109 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2110 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2111 if (Idx < 0)
2112 return true;
2113
2114 // Do LFTR if the exit condition's IV is *not* a simple counter.
2115 Value *IncV = Phi->getIncomingValue(Idx);
2116 return Phi != getLoopPhiForCounter(IncV, L);
2117 }
2118
2119 /// Return true if undefined behavior would provable be executed on the path to
2120 /// OnPathTo if Root produced a posion result. Note that this doesn't say
2121 /// anything about whether OnPathTo is actually executed or whether Root is
2122 /// actually poison. This can be used to assess whether a new use of Root can
2123 /// be added at a location which is control equivalent with OnPathTo (such as
2124 /// immediately before it) without introducing UB which didn't previously
2125 /// exist. Note that a false result conveys no information.
mustExecuteUBIfPoisonOnPathTo(Instruction * Root,Instruction * OnPathTo,DominatorTree * DT)2126 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2127 Instruction *OnPathTo,
2128 DominatorTree *DT) {
2129 // Basic approach is to assume Root is poison, propagate poison forward
2130 // through all users we can easily track, and then check whether any of those
2131 // users are provable UB and must execute before out exiting block might
2132 // exit.
2133
2134 // The set of all recursive users we've visited (which are assumed to all be
2135 // poison because of said visit)
2136 SmallSet<const Value *, 16> KnownPoison;
2137 SmallVector<const Instruction*, 16> Worklist;
2138 Worklist.push_back(Root);
2139 while (!Worklist.empty()) {
2140 const Instruction *I = Worklist.pop_back_val();
2141
2142 // If we know this must trigger UB on a path leading our target.
2143 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2144 return true;
2145
2146 // If we can't analyze propagation through this instruction, just skip it
2147 // and transitive users. Safe as false is a conservative result.
2148 if (!propagatesFullPoison(I) && I != Root)
2149 continue;
2150
2151 if (KnownPoison.insert(I).second)
2152 for (const User *User : I->users())
2153 Worklist.push_back(cast<Instruction>(User));
2154 }
2155
2156 // Might be non-UB, or might have a path we couldn't prove must execute on
2157 // way to exiting bb.
2158 return false;
2159 }
2160
2161 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2162 /// down to checking that all operands are constant and listing instructions
2163 /// that may hide undef.
hasConcreteDefImpl(Value * V,SmallPtrSetImpl<Value * > & Visited,unsigned Depth)2164 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2165 unsigned Depth) {
2166 if (isa<Constant>(V))
2167 return !isa<UndefValue>(V);
2168
2169 if (Depth >= 6)
2170 return false;
2171
2172 // Conservatively handle non-constant non-instructions. For example, Arguments
2173 // may be undef.
2174 Instruction *I = dyn_cast<Instruction>(V);
2175 if (!I)
2176 return false;
2177
2178 // Load and return values may be undef.
2179 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2180 return false;
2181
2182 // Optimistically handle other instructions.
2183 for (Value *Op : I->operands()) {
2184 if (!Visited.insert(Op).second)
2185 continue;
2186 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2187 return false;
2188 }
2189 return true;
2190 }
2191
2192 /// Return true if the given value is concrete. We must prove that undef can
2193 /// never reach it.
2194 ///
2195 /// TODO: If we decide that this is a good approach to checking for undef, we
2196 /// may factor it into a common location.
hasConcreteDef(Value * V)2197 static bool hasConcreteDef(Value *V) {
2198 SmallPtrSet<Value*, 8> Visited;
2199 Visited.insert(V);
2200 return hasConcreteDefImpl(V, Visited, 0);
2201 }
2202
2203 /// Return true if this IV has any uses other than the (soon to be rewritten)
2204 /// loop exit test.
AlmostDeadIV(PHINode * Phi,BasicBlock * LatchBlock,Value * Cond)2205 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2206 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2207 Value *IncV = Phi->getIncomingValue(LatchIdx);
2208
2209 for (User *U : Phi->users())
2210 if (U != Cond && U != IncV) return false;
2211
2212 for (User *U : IncV->users())
2213 if (U != Cond && U != Phi) return false;
2214 return true;
2215 }
2216
2217 /// Return true if the given phi is a "counter" in L. A counter is an
2218 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2219 /// step of 1. Note that L must have exactly one latch.
isLoopCounter(PHINode * Phi,Loop * L,ScalarEvolution * SE)2220 static bool isLoopCounter(PHINode* Phi, Loop *L,
2221 ScalarEvolution *SE) {
2222 assert(Phi->getParent() == L->getHeader());
2223 assert(L->getLoopLatch());
2224
2225 if (!SE->isSCEVable(Phi->getType()))
2226 return false;
2227
2228 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2229 if (!AR || AR->getLoop() != L || !AR->isAffine())
2230 return false;
2231
2232 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2233 if (!Step || !Step->isOne())
2234 return false;
2235
2236 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2237 Value *IncV = Phi->getIncomingValue(LatchIdx);
2238 return (getLoopPhiForCounter(IncV, L) == Phi);
2239 }
2240
2241 /// Search the loop header for a loop counter (anadd rec w/step of one)
2242 /// suitable for use by LFTR. If multiple counters are available, select the
2243 /// "best" one based profitable heuristics.
2244 ///
2245 /// BECount may be an i8* pointer type. The pointer difference is already
2246 /// valid count without scaling the address stride, so it remains a pointer
2247 /// expression as far as SCEV is concerned.
FindLoopCounter(Loop * L,BasicBlock * ExitingBB,const SCEV * BECount,ScalarEvolution * SE,DominatorTree * DT)2248 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2249 const SCEV *BECount,
2250 ScalarEvolution *SE, DominatorTree *DT) {
2251 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2252
2253 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2254
2255 // Loop over all of the PHI nodes, looking for a simple counter.
2256 PHINode *BestPhi = nullptr;
2257 const SCEV *BestInit = nullptr;
2258 BasicBlock *LatchBlock = L->getLoopLatch();
2259 assert(LatchBlock && "Must be in simplified form");
2260 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2261
2262 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2263 PHINode *Phi = cast<PHINode>(I);
2264 if (!isLoopCounter(Phi, L, SE))
2265 continue;
2266
2267 // Avoid comparing an integer IV against a pointer Limit.
2268 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2269 continue;
2270
2271 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2272
2273 // AR may be a pointer type, while BECount is an integer type.
2274 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2275 // AR may not be a narrower type, or we may never exit.
2276 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2277 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2278 continue;
2279
2280 // Avoid reusing a potentially undef value to compute other values that may
2281 // have originally had a concrete definition.
2282 if (!hasConcreteDef(Phi)) {
2283 // We explicitly allow unknown phis as long as they are already used by
2284 // the loop exit test. This is legal since performing LFTR could not
2285 // increase the number of undef users.
2286 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2287 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2288 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2289 continue;
2290 }
2291
2292 // Avoid introducing undefined behavior due to poison which didn't exist in
2293 // the original program. (Annoyingly, the rules for poison and undef
2294 // propagation are distinct, so this does NOT cover the undef case above.)
2295 // We have to ensure that we don't introduce UB by introducing a use on an
2296 // iteration where said IV produces poison. Our strategy here differs for
2297 // pointers and integer IVs. For integers, we strip and reinfer as needed,
2298 // see code in linearFunctionTestReplace. For pointers, we restrict
2299 // transforms as there is no good way to reinfer inbounds once lost.
2300 if (!Phi->getType()->isIntegerTy() &&
2301 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2302 continue;
2303
2304 const SCEV *Init = AR->getStart();
2305
2306 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2307 // Don't force a live loop counter if another IV can be used.
2308 if (AlmostDeadIV(Phi, LatchBlock, Cond))
2309 continue;
2310
2311 // Prefer to count-from-zero. This is a more "canonical" counter form. It
2312 // also prefers integer to pointer IVs.
2313 if (BestInit->isZero() != Init->isZero()) {
2314 if (BestInit->isZero())
2315 continue;
2316 }
2317 // If two IVs both count from zero or both count from nonzero then the
2318 // narrower is likely a dead phi that has been widened. Use the wider phi
2319 // to allow the other to be eliminated.
2320 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2321 continue;
2322 }
2323 BestPhi = Phi;
2324 BestInit = Init;
2325 }
2326 return BestPhi;
2327 }
2328
2329 /// Insert an IR expression which computes the value held by the IV IndVar
2330 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2331 /// is taken ExitCount times.
genLoopLimit(PHINode * IndVar,BasicBlock * ExitingBB,const SCEV * ExitCount,bool UsePostInc,Loop * L,SCEVExpander & Rewriter,ScalarEvolution * SE)2332 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2333 const SCEV *ExitCount, bool UsePostInc, Loop *L,
2334 SCEVExpander &Rewriter, ScalarEvolution *SE) {
2335 assert(isLoopCounter(IndVar, L, SE));
2336 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2337 const SCEV *IVInit = AR->getStart();
2338
2339 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2340 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2341 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2342 // the existing GEPs whenever possible.
2343 if (IndVar->getType()->isPointerTy() &&
2344 !ExitCount->getType()->isPointerTy()) {
2345 // IVOffset will be the new GEP offset that is interpreted by GEP as a
2346 // signed value. ExitCount on the other hand represents the loop trip count,
2347 // which is an unsigned value. FindLoopCounter only allows induction
2348 // variables that have a positive unit stride of one. This means we don't
2349 // have to handle the case of negative offsets (yet) and just need to zero
2350 // extend ExitCount.
2351 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2352 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2353 if (UsePostInc)
2354 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2355
2356 // Expand the code for the iteration count.
2357 assert(SE->isLoopInvariant(IVOffset, L) &&
2358 "Computed iteration count is not loop invariant!");
2359
2360 // We could handle pointer IVs other than i8*, but we need to compensate for
2361 // gep index scaling.
2362 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2363 cast<PointerType>(IndVar->getType())
2364 ->getElementType())->isOne() &&
2365 "unit stride pointer IV must be i8*");
2366
2367 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2368 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2369 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2370 } else {
2371 // In any other case, convert both IVInit and ExitCount to integers before
2372 // comparing. This may result in SCEV expansion of pointers, but in practice
2373 // SCEV will fold the pointer arithmetic away as such:
2374 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2375 //
2376 // Valid Cases: (1) both integers is most common; (2) both may be pointers
2377 // for simple memset-style loops.
2378 //
2379 // IVInit integer and ExitCount pointer would only occur if a canonical IV
2380 // were generated on top of case #2, which is not expected.
2381
2382 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2383 // For unit stride, IVCount = Start + ExitCount with 2's complement
2384 // overflow.
2385
2386 // For integer IVs, truncate the IV before computing IVInit + BECount,
2387 // unless we know apriori that the limit must be a constant when evaluated
2388 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
2389 // of the IV in the loop over a (potentially) expensive expansion of the
2390 // widened exit count add(zext(add)) expression.
2391 if (SE->getTypeSizeInBits(IVInit->getType())
2392 > SE->getTypeSizeInBits(ExitCount->getType())) {
2393 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2394 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2395 else
2396 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2397 }
2398
2399 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2400
2401 if (UsePostInc)
2402 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2403
2404 // Expand the code for the iteration count.
2405 assert(SE->isLoopInvariant(IVLimit, L) &&
2406 "Computed iteration count is not loop invariant!");
2407 // Ensure that we generate the same type as IndVar, or a smaller integer
2408 // type. In the presence of null pointer values, we have an integer type
2409 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2410 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2411 IndVar->getType() : ExitCount->getType();
2412 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2413 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2414 }
2415 }
2416
2417 /// This method rewrites the exit condition of the loop to be a canonical !=
2418 /// comparison against the incremented loop induction variable. This pass is
2419 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2420 /// determine a loop-invariant trip count of the loop, which is actually a much
2421 /// broader range than just linear tests.
2422 bool IndVarSimplify::
linearFunctionTestReplace(Loop * L,BasicBlock * ExitingBB,const SCEV * ExitCount,PHINode * IndVar,SCEVExpander & Rewriter)2423 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2424 const SCEV *ExitCount,
2425 PHINode *IndVar, SCEVExpander &Rewriter) {
2426 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2427 assert(isLoopCounter(IndVar, L, SE));
2428 Instruction * const IncVar =
2429 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2430
2431 // Initialize CmpIndVar to the preincremented IV.
2432 Value *CmpIndVar = IndVar;
2433 bool UsePostInc = false;
2434
2435 // If the exiting block is the same as the backedge block, we prefer to
2436 // compare against the post-incremented value, otherwise we must compare
2437 // against the preincremented value.
2438 if (ExitingBB == L->getLoopLatch()) {
2439 // For pointer IVs, we chose to not strip inbounds which requires us not
2440 // to add a potentially UB introducing use. We need to either a) show
2441 // the loop test we're modifying is already in post-inc form, or b) show
2442 // that adding a use must not introduce UB.
2443 bool SafeToPostInc =
2444 IndVar->getType()->isIntegerTy() ||
2445 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2446 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2447 if (SafeToPostInc) {
2448 UsePostInc = true;
2449 CmpIndVar = IncVar;
2450 }
2451 }
2452
2453 // It may be necessary to drop nowrap flags on the incrementing instruction
2454 // if either LFTR moves from a pre-inc check to a post-inc check (in which
2455 // case the increment might have previously been poison on the last iteration
2456 // only) or if LFTR switches to a different IV that was previously dynamically
2457 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2458 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2459 // check), because the pre-inc addrec flags may be adopted from the original
2460 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2461 // TODO: This handling is inaccurate for one case: If we switch to a
2462 // dynamically dead IV that wraps on the first loop iteration only, which is
2463 // not covered by the post-inc addrec. (If the new IV was not dynamically
2464 // dead, it could not be poison on the first iteration in the first place.)
2465 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2466 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2467 if (BO->hasNoUnsignedWrap())
2468 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2469 if (BO->hasNoSignedWrap())
2470 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2471 }
2472
2473 Value *ExitCnt = genLoopLimit(
2474 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2475 assert(ExitCnt->getType()->isPointerTy() ==
2476 IndVar->getType()->isPointerTy() &&
2477 "genLoopLimit missed a cast");
2478
2479 // Insert a new icmp_ne or icmp_eq instruction before the branch.
2480 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2481 ICmpInst::Predicate P;
2482 if (L->contains(BI->getSuccessor(0)))
2483 P = ICmpInst::ICMP_NE;
2484 else
2485 P = ICmpInst::ICMP_EQ;
2486
2487 IRBuilder<> Builder(BI);
2488
2489 // The new loop exit condition should reuse the debug location of the
2490 // original loop exit condition.
2491 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2492 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2493
2494 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2495 // avoid the expensive expansion of the limit expression in the wider type,
2496 // emit a truncate to narrow the IV to the ExitCount type. This is safe
2497 // since we know (from the exit count bitwidth), that we can't self-wrap in
2498 // the narrower type.
2499 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2500 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2501 if (CmpIndVarSize > ExitCntSize) {
2502 assert(!CmpIndVar->getType()->isPointerTy() &&
2503 !ExitCnt->getType()->isPointerTy());
2504
2505 // Before resorting to actually inserting the truncate, use the same
2506 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2507 // the other side of the comparison instead. We still evaluate the limit
2508 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2509 // a truncate within in.
2510 bool Extended = false;
2511 const SCEV *IV = SE->getSCEV(CmpIndVar);
2512 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2513 ExitCnt->getType());
2514 const SCEV *ZExtTrunc =
2515 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2516
2517 if (ZExtTrunc == IV) {
2518 Extended = true;
2519 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2520 "wide.trip.count");
2521 } else {
2522 const SCEV *SExtTrunc =
2523 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2524 if (SExtTrunc == IV) {
2525 Extended = true;
2526 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2527 "wide.trip.count");
2528 }
2529 }
2530
2531 if (Extended) {
2532 bool Discard;
2533 L->makeLoopInvariant(ExitCnt, Discard);
2534 } else
2535 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2536 "lftr.wideiv");
2537 }
2538 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2539 << " LHS:" << *CmpIndVar << '\n'
2540 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2541 << "\n"
2542 << " RHS:\t" << *ExitCnt << "\n"
2543 << "ExitCount:\t" << *ExitCount << "\n"
2544 << " was: " << *BI->getCondition() << "\n");
2545
2546 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2547 Value *OrigCond = BI->getCondition();
2548 // It's tempting to use replaceAllUsesWith here to fully replace the old
2549 // comparison, but that's not immediately safe, since users of the old
2550 // comparison may not be dominated by the new comparison. Instead, just
2551 // update the branch to use the new comparison; in the common case this
2552 // will make old comparison dead.
2553 BI->setCondition(Cond);
2554 DeadInsts.push_back(OrigCond);
2555
2556 ++NumLFTR;
2557 return true;
2558 }
2559
2560 //===----------------------------------------------------------------------===//
2561 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2562 //===----------------------------------------------------------------------===//
2563
2564 /// If there's a single exit block, sink any loop-invariant values that
2565 /// were defined in the preheader but not used inside the loop into the
2566 /// exit block to reduce register pressure in the loop.
sinkUnusedInvariants(Loop * L)2567 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2568 BasicBlock *ExitBlock = L->getExitBlock();
2569 if (!ExitBlock) return false;
2570
2571 BasicBlock *Preheader = L->getLoopPreheader();
2572 if (!Preheader) return false;
2573
2574 bool MadeAnyChanges = false;
2575 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2576 BasicBlock::iterator I(Preheader->getTerminator());
2577 while (I != Preheader->begin()) {
2578 --I;
2579 // New instructions were inserted at the end of the preheader.
2580 if (isa<PHINode>(I))
2581 break;
2582
2583 // Don't move instructions which might have side effects, since the side
2584 // effects need to complete before instructions inside the loop. Also don't
2585 // move instructions which might read memory, since the loop may modify
2586 // memory. Note that it's okay if the instruction might have undefined
2587 // behavior: LoopSimplify guarantees that the preheader dominates the exit
2588 // block.
2589 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2590 continue;
2591
2592 // Skip debug info intrinsics.
2593 if (isa<DbgInfoIntrinsic>(I))
2594 continue;
2595
2596 // Skip eh pad instructions.
2597 if (I->isEHPad())
2598 continue;
2599
2600 // Don't sink alloca: we never want to sink static alloca's out of the
2601 // entry block, and correctly sinking dynamic alloca's requires
2602 // checks for stacksave/stackrestore intrinsics.
2603 // FIXME: Refactor this check somehow?
2604 if (isa<AllocaInst>(I))
2605 continue;
2606
2607 // Determine if there is a use in or before the loop (direct or
2608 // otherwise).
2609 bool UsedInLoop = false;
2610 for (Use &U : I->uses()) {
2611 Instruction *User = cast<Instruction>(U.getUser());
2612 BasicBlock *UseBB = User->getParent();
2613 if (PHINode *P = dyn_cast<PHINode>(User)) {
2614 unsigned i =
2615 PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2616 UseBB = P->getIncomingBlock(i);
2617 }
2618 if (UseBB == Preheader || L->contains(UseBB)) {
2619 UsedInLoop = true;
2620 break;
2621 }
2622 }
2623
2624 // If there is, the def must remain in the preheader.
2625 if (UsedInLoop)
2626 continue;
2627
2628 // Otherwise, sink it to the exit block.
2629 Instruction *ToMove = &*I;
2630 bool Done = false;
2631
2632 if (I != Preheader->begin()) {
2633 // Skip debug info intrinsics.
2634 do {
2635 --I;
2636 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2637
2638 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2639 Done = true;
2640 } else {
2641 Done = true;
2642 }
2643
2644 MadeAnyChanges = true;
2645 ToMove->moveBefore(*ExitBlock, InsertPt);
2646 if (Done) break;
2647 InsertPt = ToMove->getIterator();
2648 }
2649
2650 return MadeAnyChanges;
2651 }
2652
2653 /// Return a symbolic upper bound for the backedge taken count of the loop.
2654 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2655 /// an arbitrary expression as opposed to only constants.
2656 /// TODO: Move into the ScalarEvolution class.
getMaxBackedgeTakenCount(ScalarEvolution & SE,DominatorTree & DT,Loop * L)2657 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2658 DominatorTree &DT, Loop *L) {
2659 SmallVector<BasicBlock*, 16> ExitingBlocks;
2660 L->getExitingBlocks(ExitingBlocks);
2661
2662 // Form an expression for the maximum exit count possible for this loop. We
2663 // merge the max and exact information to approximate a version of
2664 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2665 SmallVector<const SCEV*, 4> ExitCounts;
2666 for (BasicBlock *ExitingBB : ExitingBlocks) {
2667 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2668 if (isa<SCEVCouldNotCompute>(ExitCount))
2669 ExitCount = SE.getExitCount(L, ExitingBB,
2670 ScalarEvolution::ConstantMaximum);
2671 if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2672 assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2673 "We should only have known counts for exiting blocks that "
2674 "dominate latch!");
2675 ExitCounts.push_back(ExitCount);
2676 }
2677 }
2678 if (ExitCounts.empty())
2679 return SE.getCouldNotCompute();
2680 return SE.getUMinFromMismatchedTypes(ExitCounts);
2681 }
2682
optimizeLoopExits(Loop * L,SCEVExpander & Rewriter)2683 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2684 SmallVector<BasicBlock*, 16> ExitingBlocks;
2685 L->getExitingBlocks(ExitingBlocks);
2686
2687 // Remove all exits which aren't both rewriteable and analyzeable.
2688 auto NewEnd = llvm::remove_if(ExitingBlocks,
2689 [&](BasicBlock *ExitingBB) {
2690 // If our exitting block exits multiple loops, we can only rewrite the
2691 // innermost one. Otherwise, we're changing how many times the innermost
2692 // loop runs before it exits.
2693 if (LI->getLoopFor(ExitingBB) != L)
2694 return true;
2695
2696 // Can't rewrite non-branch yet.
2697 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2698 if (!BI)
2699 return true;
2700
2701 // If already constant, nothing to do.
2702 if (isa<Constant>(BI->getCondition()))
2703 return true;
2704
2705 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2706 if (isa<SCEVCouldNotCompute>(ExitCount))
2707 return true;
2708 return false;
2709 });
2710 ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2711
2712 if (ExitingBlocks.empty())
2713 return false;
2714
2715 // Get a symbolic upper bound on the loop backedge taken count.
2716 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2717 if (isa<SCEVCouldNotCompute>(MaxExitCount))
2718 return false;
2719
2720 // Visit our exit blocks in order of dominance. We know from the fact that
2721 // all exits (left) are analyzeable that the must be a total dominance order
2722 // between them as each must dominate the latch. The visit order only
2723 // matters for the provably equal case.
2724 llvm::sort(ExitingBlocks,
2725 [&](BasicBlock *A, BasicBlock *B) {
2726 // std::sort sorts in ascending order, so we want the inverse of
2727 // the normal dominance relation.
2728 if (DT->properlyDominates(A, B)) return true;
2729 if (DT->properlyDominates(B, A)) return false;
2730 llvm_unreachable("expected total dominance order!");
2731 });
2732 #ifdef ASSERT
2733 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2734 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2735 }
2736 #endif
2737
2738 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2739 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2740 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2741 auto *OldCond = BI->getCondition();
2742 auto *NewCond = ConstantInt::get(OldCond->getType(),
2743 IsTaken ? ExitIfTrue : !ExitIfTrue);
2744 BI->setCondition(NewCond);
2745 if (OldCond->use_empty())
2746 DeadInsts.push_back(OldCond);
2747 };
2748
2749 bool Changed = false;
2750 SmallSet<const SCEV*, 8> DominatingExitCounts;
2751 for (BasicBlock *ExitingBB : ExitingBlocks) {
2752 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2753 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2754
2755 // If we know we'd exit on the first iteration, rewrite the exit to
2756 // reflect this. This does not imply the loop must exit through this
2757 // exit; there may be an earlier one taken on the first iteration.
2758 // TODO: Given we know the backedge can't be taken, we should go ahead
2759 // and break it. Or at least, kill all the header phis and simplify.
2760 if (ExitCount->isZero()) {
2761 FoldExit(ExitingBB, true);
2762 Changed = true;
2763 continue;
2764 }
2765
2766 // If we end up with a pointer exit count, bail. Note that we can end up
2767 // with a pointer exit count for one exiting block, and not for another in
2768 // the same loop.
2769 if (!ExitCount->getType()->isIntegerTy() ||
2770 !MaxExitCount->getType()->isIntegerTy())
2771 continue;
2772
2773 Type *WiderType =
2774 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2775 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2776 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2777 assert(MaxExitCount->getType() == ExitCount->getType());
2778
2779 // Can we prove that some other exit must be taken strictly before this
2780 // one?
2781 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2782 MaxExitCount, ExitCount)) {
2783 FoldExit(ExitingBB, false);
2784 Changed = true;
2785 continue;
2786 }
2787
2788 // As we run, keep track of which exit counts we've encountered. If we
2789 // find a duplicate, we've found an exit which would have exited on the
2790 // exiting iteration, but (from the visit order) strictly follows another
2791 // which does the same and is thus dead.
2792 if (!DominatingExitCounts.insert(ExitCount).second) {
2793 FoldExit(ExitingBB, false);
2794 Changed = true;
2795 continue;
2796 }
2797
2798 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2799 // here. If we kept track of the min of dominanting exits so far, we could
2800 // discharge exits with EC >= MDEC. This is less powerful than the existing
2801 // transform (since later exits aren't considered), but potentially more
2802 // powerful for any case where SCEV can prove a >=u b, but neither a == b
2803 // or a >u b. Such a case is not currently known.
2804 }
2805 return Changed;
2806 }
2807
predicateLoopExits(Loop * L,SCEVExpander & Rewriter)2808 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2809 SmallVector<BasicBlock*, 16> ExitingBlocks;
2810 L->getExitingBlocks(ExitingBlocks);
2811
2812 bool Changed = false;
2813
2814 // Finally, see if we can rewrite our exit conditions into a loop invariant
2815 // form. If we have a read-only loop, and we can tell that we must exit down
2816 // a path which does not need any of the values computed within the loop, we
2817 // can rewrite the loop to exit on the first iteration. Note that this
2818 // doesn't either a) tell us the loop exits on the first iteration (unless
2819 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2820 // This transformation looks a lot like a restricted form of dead loop
2821 // elimination, but restricted to read-only loops and without neccesssarily
2822 // needing to kill the loop entirely.
2823 if (!LoopPredication)
2824 return Changed;
2825
2826 if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2827 return Changed;
2828
2829 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2830 // through *explicit* control flow. We have to eliminate the possibility of
2831 // implicit exits (see below) before we know it's truly exact.
2832 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2833 if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2834 !SE->isLoopInvariant(ExactBTC, L) ||
2835 !isSafeToExpand(ExactBTC, *SE))
2836 return Changed;
2837
2838 // If we end up with a pointer exit count, bail. It may be unsized.
2839 if (!ExactBTC->getType()->isIntegerTy())
2840 return Changed;
2841
2842 auto BadExit = [&](BasicBlock *ExitingBB) {
2843 // If our exiting block exits multiple loops, we can only rewrite the
2844 // innermost one. Otherwise, we're changing how many times the innermost
2845 // loop runs before it exits.
2846 if (LI->getLoopFor(ExitingBB) != L)
2847 return true;
2848
2849 // Can't rewrite non-branch yet.
2850 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2851 if (!BI)
2852 return true;
2853
2854 // If already constant, nothing to do.
2855 if (isa<Constant>(BI->getCondition()))
2856 return true;
2857
2858 // If the exit block has phis, we need to be able to compute the values
2859 // within the loop which contains them. This assumes trivially lcssa phis
2860 // have already been removed; TODO: generalize
2861 BasicBlock *ExitBlock =
2862 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2863 if (!ExitBlock->phis().empty())
2864 return true;
2865
2866 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2867 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2868 if (!SE->isLoopInvariant(ExitCount, L) ||
2869 !isSafeToExpand(ExitCount, *SE))
2870 return true;
2871
2872 // If we end up with a pointer exit count, bail. It may be unsized.
2873 if (!ExitCount->getType()->isIntegerTy())
2874 return true;
2875
2876 return false;
2877 };
2878
2879 // If we have any exits which can't be predicated themselves, than we can't
2880 // predicate any exit which isn't guaranteed to execute before it. Consider
2881 // two exits (a) and (b) which would both exit on the same iteration. If we
2882 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2883 // we could convert a loop from exiting through (a) to one exiting through
2884 // (b). Note that this problem exists only for exits with the same exit
2885 // count, and we could be more aggressive when exit counts are known inequal.
2886 llvm::sort(ExitingBlocks,
2887 [&](BasicBlock *A, BasicBlock *B) {
2888 // std::sort sorts in ascending order, so we want the inverse of
2889 // the normal dominance relation, plus a tie breaker for blocks
2890 // unordered by dominance.
2891 if (DT->properlyDominates(A, B)) return true;
2892 if (DT->properlyDominates(B, A)) return false;
2893 return A->getName() < B->getName();
2894 });
2895 // Check to see if our exit blocks are a total order (i.e. a linear chain of
2896 // exits before the backedge). If they aren't, reasoning about reachability
2897 // is complicated and we choose not to for now.
2898 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2899 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2900 return Changed;
2901
2902 // Given our sorted total order, we know that exit[j] must be evaluated
2903 // after all exit[i] such j > i.
2904 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2905 if (BadExit(ExitingBlocks[i])) {
2906 ExitingBlocks.resize(i);
2907 break;
2908 }
2909
2910 if (ExitingBlocks.empty())
2911 return Changed;
2912
2913 // We rely on not being able to reach an exiting block on a later iteration
2914 // then it's statically compute exit count. The implementaton of
2915 // getExitCount currently has this invariant, but assert it here so that
2916 // breakage is obvious if this ever changes..
2917 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2918 return DT->dominates(ExitingBB, L->getLoopLatch());
2919 }));
2920
2921 // At this point, ExitingBlocks consists of only those blocks which are
2922 // predicatable. Given that, we know we have at least one exit we can
2923 // predicate if the loop is doesn't have side effects and doesn't have any
2924 // implicit exits (because then our exact BTC isn't actually exact).
2925 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
2926 // suggestions on how to improve this? I can obviously bail out for outer
2927 // loops, but that seems less than ideal. MemorySSA can find memory writes,
2928 // is that enough for *all* side effects?
2929 for (BasicBlock *BB : L->blocks())
2930 for (auto &I : *BB)
2931 // TODO:isGuaranteedToTransfer
2932 if (I.mayHaveSideEffects() || I.mayThrow())
2933 return Changed;
2934
2935 // Finally, do the actual predication for all predicatable blocks. A couple
2936 // of notes here:
2937 // 1) We don't bother to constant fold dominated exits with identical exit
2938 // counts; that's simply a form of CSE/equality propagation and we leave
2939 // it for dedicated passes.
2940 // 2) We insert the comparison at the branch. Hoisting introduces additional
2941 // legality constraints and we leave that to dedicated logic. We want to
2942 // predicate even if we can't insert a loop invariant expression as
2943 // peeling or unrolling will likely reduce the cost of the otherwise loop
2944 // varying check.
2945 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2946 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2947 Value *ExactBTCV = nullptr; // Lazily generated if needed.
2948 for (BasicBlock *ExitingBB : ExitingBlocks) {
2949 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2950
2951 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2952 Value *NewCond;
2953 if (ExitCount == ExactBTC) {
2954 NewCond = L->contains(BI->getSuccessor(0)) ?
2955 B.getFalse() : B.getTrue();
2956 } else {
2957 Value *ECV = Rewriter.expandCodeFor(ExitCount);
2958 if (!ExactBTCV)
2959 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2960 Value *RHS = ExactBTCV;
2961 if (ECV->getType() != RHS->getType()) {
2962 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2963 ECV = B.CreateZExt(ECV, WiderTy);
2964 RHS = B.CreateZExt(RHS, WiderTy);
2965 }
2966 auto Pred = L->contains(BI->getSuccessor(0)) ?
2967 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2968 NewCond = B.CreateICmp(Pred, ECV, RHS);
2969 }
2970 Value *OldCond = BI->getCondition();
2971 BI->setCondition(NewCond);
2972 if (OldCond->use_empty())
2973 DeadInsts.push_back(OldCond);
2974 Changed = true;
2975 }
2976
2977 return Changed;
2978 }
2979
2980 //===----------------------------------------------------------------------===//
2981 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2982 //===----------------------------------------------------------------------===//
2983
run(Loop * L)2984 bool IndVarSimplify::run(Loop *L) {
2985 // We need (and expect!) the incoming loop to be in LCSSA.
2986 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2987 "LCSSA required to run indvars!");
2988 bool Changed = false;
2989
2990 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2991 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2992 // canonicalization can be a pessimization without LSR to "clean up"
2993 // afterwards.
2994 // - We depend on having a preheader; in particular,
2995 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2996 // and we're in trouble if we can't find the induction variable even when
2997 // we've manually inserted one.
2998 // - LFTR relies on having a single backedge.
2999 if (!L->isLoopSimplifyForm())
3000 return false;
3001
3002 #ifndef NDEBUG
3003 // Used below for a consistency check only
3004 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
3005 #endif
3006
3007 // If there are any floating-point recurrences, attempt to
3008 // transform them to use integer recurrences.
3009 Changed |= rewriteNonIntegerIVs(L);
3010
3011 // Create a rewriter object which we'll use to transform the code with.
3012 SCEVExpander Rewriter(*SE, DL, "indvars");
3013 #ifndef NDEBUG
3014 Rewriter.setDebugType(DEBUG_TYPE);
3015 #endif
3016
3017 // Eliminate redundant IV users.
3018 //
3019 // Simplification works best when run before other consumers of SCEV. We
3020 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3021 // other expressions involving loop IVs have been evaluated. This helps SCEV
3022 // set no-wrap flags before normalizing sign/zero extension.
3023 Rewriter.disableCanonicalMode();
3024 Changed |= simplifyAndExtend(L, Rewriter, LI);
3025
3026 // Check to see if we can compute the final value of any expressions
3027 // that are recurrent in the loop, and substitute the exit values from the
3028 // loop into any instructions outside of the loop that use the final values
3029 // of the current expressions.
3030 if (ReplaceExitValue != NeverRepl)
3031 Changed |= rewriteLoopExitValues(L, Rewriter);
3032
3033 // Eliminate redundant IV cycles.
3034 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3035
3036 // Try to eliminate loop exits based on analyzeable exit counts
3037 if (optimizeLoopExits(L, Rewriter)) {
3038 Changed = true;
3039 // Given we've changed exit counts, notify SCEV
3040 SE->forgetLoop(L);
3041 }
3042
3043 // Try to form loop invariant tests for loop exits by changing how many
3044 // iterations of the loop run when that is unobservable.
3045 if (predicateLoopExits(L, Rewriter)) {
3046 Changed = true;
3047 // Given we've changed exit counts, notify SCEV
3048 SE->forgetLoop(L);
3049 }
3050
3051 // If we have a trip count expression, rewrite the loop's exit condition
3052 // using it.
3053 if (!DisableLFTR) {
3054 SmallVector<BasicBlock*, 16> ExitingBlocks;
3055 L->getExitingBlocks(ExitingBlocks);
3056 for (BasicBlock *ExitingBB : ExitingBlocks) {
3057 // Can't rewrite non-branch yet.
3058 if (!isa<BranchInst>(ExitingBB->getTerminator()))
3059 continue;
3060
3061 // If our exitting block exits multiple loops, we can only rewrite the
3062 // innermost one. Otherwise, we're changing how many times the innermost
3063 // loop runs before it exits.
3064 if (LI->getLoopFor(ExitingBB) != L)
3065 continue;
3066
3067 if (!needsLFTR(L, ExitingBB))
3068 continue;
3069
3070 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3071 if (isa<SCEVCouldNotCompute>(ExitCount))
3072 continue;
3073
3074 // This was handled above, but as we form SCEVs, we can sometimes refine
3075 // existing ones; this allows exit counts to be folded to zero which
3076 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
3077 // until stable to handle cases like this better.
3078 if (ExitCount->isZero())
3079 continue;
3080
3081 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3082 if (!IndVar)
3083 continue;
3084
3085 // Avoid high cost expansions. Note: This heuristic is questionable in
3086 // that our definition of "high cost" is not exactly principled.
3087 if (Rewriter.isHighCostExpansion(ExitCount, L))
3088 continue;
3089
3090 // Check preconditions for proper SCEVExpander operation. SCEV does not
3091 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3092 // any pass that uses the SCEVExpander must do it. This does not work
3093 // well for loop passes because SCEVExpander makes assumptions about
3094 // all loops, while LoopPassManager only forces the current loop to be
3095 // simplified.
3096 //
3097 // FIXME: SCEV expansion has no way to bail out, so the caller must
3098 // explicitly check any assumptions made by SCEV. Brittle.
3099 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3100 if (!AR || AR->getLoop()->getLoopPreheader())
3101 Changed |= linearFunctionTestReplace(L, ExitingBB,
3102 ExitCount, IndVar,
3103 Rewriter);
3104 }
3105 }
3106 // Clear the rewriter cache, because values that are in the rewriter's cache
3107 // can be deleted in the loop below, causing the AssertingVH in the cache to
3108 // trigger.
3109 Rewriter.clear();
3110
3111 // Now that we're done iterating through lists, clean up any instructions
3112 // which are now dead.
3113 while (!DeadInsts.empty())
3114 if (Instruction *Inst =
3115 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3116 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3117
3118 // The Rewriter may not be used from this point on.
3119
3120 // Loop-invariant instructions in the preheader that aren't used in the
3121 // loop may be sunk below the loop to reduce register pressure.
3122 Changed |= sinkUnusedInvariants(L);
3123
3124 // rewriteFirstIterationLoopExitValues does not rely on the computation of
3125 // trip count and therefore can further simplify exit values in addition to
3126 // rewriteLoopExitValues.
3127 Changed |= rewriteFirstIterationLoopExitValues(L);
3128
3129 // Clean up dead instructions.
3130 Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3131
3132 // Check a post-condition.
3133 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3134 "Indvars did not preserve LCSSA!");
3135
3136 // Verify that LFTR, and any other change have not interfered with SCEV's
3137 // ability to compute trip count. We may have *changed* the exit count, but
3138 // only by reducing it.
3139 #ifndef NDEBUG
3140 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3141 SE->forgetLoop(L);
3142 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3143 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3144 SE->getTypeSizeInBits(NewBECount->getType()))
3145 NewBECount = SE->getTruncateOrNoop(NewBECount,
3146 BackedgeTakenCount->getType());
3147 else
3148 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3149 NewBECount->getType());
3150 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
3151 NewBECount) && "indvars must preserve SCEV");
3152 }
3153 #endif
3154
3155 return Changed;
3156 }
3157
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)3158 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3159 LoopStandardAnalysisResults &AR,
3160 LPMUpdater &) {
3161 Function *F = L.getHeader()->getParent();
3162 const DataLayout &DL = F->getParent()->getDataLayout();
3163
3164 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3165 if (!IVS.run(&L))
3166 return PreservedAnalyses::all();
3167
3168 auto PA = getLoopPassPreservedAnalyses();
3169 PA.preserveSet<CFGAnalyses>();
3170 return PA;
3171 }
3172
3173 namespace {
3174
3175 struct IndVarSimplifyLegacyPass : public LoopPass {
3176 static char ID; // Pass identification, replacement for typeid
3177
IndVarSimplifyLegacyPass__anon0796242e1411::IndVarSimplifyLegacyPass3178 IndVarSimplifyLegacyPass() : LoopPass(ID) {
3179 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3180 }
3181
runOnLoop__anon0796242e1411::IndVarSimplifyLegacyPass3182 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3183 if (skipLoop(L))
3184 return false;
3185
3186 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3187 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3188 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3189 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3190 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3191 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3192 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3193 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3194
3195 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3196 return IVS.run(L);
3197 }
3198
getAnalysisUsage__anon0796242e1411::IndVarSimplifyLegacyPass3199 void getAnalysisUsage(AnalysisUsage &AU) const override {
3200 AU.setPreservesCFG();
3201 getLoopAnalysisUsage(AU);
3202 }
3203 };
3204
3205 } // end anonymous namespace
3206
3207 char IndVarSimplifyLegacyPass::ID = 0;
3208
3209 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3210 "Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)3211 INITIALIZE_PASS_DEPENDENCY(LoopPass)
3212 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3213 "Induction Variable Simplification", false, false)
3214
3215 Pass *llvm::createIndVarSimplifyPass() {
3216 return new IndVarSimplifyLegacyPass();
3217 }
3218