• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines the LoopVectorizationLegality class. Original code
11 /// in Loop Vectorizer has been moved out to its own file for modularity
12 /// and reusability.
13 ///
14 /// Currently, it works for innermost loop vectorization. Extending this to
15 /// outer loop vectorization is a TODO item.
16 ///
17 /// Also provides:
18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
19 /// locally for easy look up. It has the ability to write them back as
20 /// loop metadata, upon request.
21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22 /// of reporting useful failure to vectorize message.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 
29 #include "llvm/ADT/MapVector.h"
30 #include "llvm/Analysis/LoopAccessAnalysis.h"
31 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
32 #include "llvm/Support/TypeSize.h"
33 #include "llvm/Transforms/Utils/LoopUtils.h"
34 
35 namespace llvm {
36 
37 /// Utility class for getting and setting loop vectorizer hints in the form
38 /// of loop metadata.
39 /// This class keeps a number of loop annotations locally (as member variables)
40 /// and can, upon request, write them back as metadata on the loop. It will
41 /// initially scan the loop for existing metadata, and will update the local
42 /// values based on information in the loop.
43 /// We cannot write all values to metadata, as the mere presence of some info,
44 /// for example 'force', means a decision has been made. So, we need to be
45 /// careful NOT to add them if the user hasn't specifically asked so.
46 class LoopVectorizeHints {
47   enum HintKind {
48     HK_WIDTH,
49     HK_UNROLL,
50     HK_FORCE,
51     HK_ISVECTORIZED,
52     HK_PREDICATE,
53     HK_SCALABLE
54   };
55 
56   /// Hint - associates name and validation with the hint value.
57   struct Hint {
58     const char *Name;
59     unsigned Value; // This may have to change for non-numeric values.
60     HintKind Kind;
61 
HintHint62     Hint(const char *Name, unsigned Value, HintKind Kind)
63         : Name(Name), Value(Value), Kind(Kind) {}
64 
65     bool validate(unsigned Val);
66   };
67 
68   /// Vectorization width.
69   Hint Width;
70 
71   /// Vectorization interleave factor.
72   Hint Interleave;
73 
74   /// Vectorization forced
75   Hint Force;
76 
77   /// Already Vectorized
78   Hint IsVectorized;
79 
80   /// Vector Predicate
81   Hint Predicate;
82 
83   /// Says whether we should use fixed width or scalable vectorization.
84   Hint Scalable;
85 
86   /// Return the loop metadata prefix.
Prefix()87   static StringRef Prefix() { return "llvm.loop."; }
88 
89   /// True if there is any unsafe math in the loop.
90   bool PotentiallyUnsafe = false;
91 
92 public:
93   enum ForceKind {
94     FK_Undefined = -1, ///< Not selected.
95     FK_Disabled = 0,   ///< Forcing disabled.
96     FK_Enabled = 1,    ///< Forcing enabled.
97   };
98 
99   LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
100                      OptimizationRemarkEmitter &ORE);
101 
102   /// Mark the loop L as already vectorized by setting the width to 1.
103   void setAlreadyVectorized();
104 
105   bool allowVectorization(Function *F, Loop *L,
106                           bool VectorizeOnlyWhenForced) const;
107 
108   /// Dumps all the hint information.
109   void emitRemarkWithHints() const;
110 
getWidth()111   ElementCount getWidth() const {
112     return ElementCount::get(Width.Value, isScalable());
113   }
getInterleave()114   unsigned getInterleave() const { return Interleave.Value; }
getIsVectorized()115   unsigned getIsVectorized() const { return IsVectorized.Value; }
getPredicate()116   unsigned getPredicate() const { return Predicate.Value; }
getForce()117   enum ForceKind getForce() const {
118     if ((ForceKind)Force.Value == FK_Undefined &&
119         hasDisableAllTransformsHint(TheLoop))
120       return FK_Disabled;
121     return (ForceKind)Force.Value;
122   }
123 
isScalable()124   bool isScalable() const { return Scalable.Value; }
125 
126   /// If hints are provided that force vectorization, use the AlwaysPrint
127   /// pass name to force the frontend to print the diagnostic.
128   const char *vectorizeAnalysisPassName() const;
129 
allowReordering()130   bool allowReordering() const {
131     // When enabling loop hints are provided we allow the vectorizer to change
132     // the order of operations that is given by the scalar loop. This is not
133     // enabled by default because can be unsafe or inefficient. For example,
134     // reordering floating-point operations will change the way round-off
135     // error accumulates in the loop.
136     ElementCount EC = getWidth();
137     return getForce() == LoopVectorizeHints::FK_Enabled ||
138            EC.getKnownMinValue() > 1;
139   }
140 
isPotentiallyUnsafe()141   bool isPotentiallyUnsafe() const {
142     // Avoid FP vectorization if the target is unsure about proper support.
143     // This may be related to the SIMD unit in the target not handling
144     // IEEE 754 FP ops properly, or bad single-to-double promotions.
145     // Otherwise, a sequence of vectorized loops, even without reduction,
146     // could lead to different end results on the destination vectors.
147     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
148   }
149 
setPotentiallyUnsafe()150   void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
151 
152 private:
153   /// Find hints specified in the loop metadata and update local values.
154   void getHintsFromMetadata();
155 
156   /// Checks string hint with one operand and set value if valid.
157   void setHint(StringRef Name, Metadata *Arg);
158 
159   /// The loop these hints belong to.
160   const Loop *TheLoop;
161 
162   /// Interface to emit optimization remarks.
163   OptimizationRemarkEmitter &ORE;
164 };
165 
166 /// This holds vectorization requirements that must be verified late in
167 /// the process. The requirements are set by legalize and costmodel. Once
168 /// vectorization has been determined to be possible and profitable the
169 /// requirements can be verified by looking for metadata or compiler options.
170 /// For example, some loops require FP commutativity which is only allowed if
171 /// vectorization is explicitly specified or if the fast-math compiler option
172 /// has been provided.
173 /// Late evaluation of these requirements allows helpful diagnostics to be
174 /// composed that tells the user what need to be done to vectorize the loop. For
175 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
176 /// evaluation should be used only when diagnostics can generated that can be
177 /// followed by a non-expert user.
178 class LoopVectorizationRequirements {
179 public:
LoopVectorizationRequirements(OptimizationRemarkEmitter & ORE)180   LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
181 
addUnsafeAlgebraInst(Instruction * I)182   void addUnsafeAlgebraInst(Instruction *I) {
183     // First unsafe algebra instruction.
184     if (!UnsafeAlgebraInst)
185       UnsafeAlgebraInst = I;
186   }
187 
addRuntimePointerChecks(unsigned Num)188   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
189 
190   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
191 
192 private:
193   unsigned NumRuntimePointerChecks = 0;
194   Instruction *UnsafeAlgebraInst = nullptr;
195 
196   /// Interface to emit optimization remarks.
197   OptimizationRemarkEmitter &ORE;
198 };
199 
200 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
201 /// to what vectorization factor.
202 /// This class does not look at the profitability of vectorization, only the
203 /// legality. This class has two main kinds of checks:
204 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
205 ///   will change the order of memory accesses in a way that will change the
206 ///   correctness of the program.
207 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
208 /// checks for a number of different conditions, such as the availability of a
209 /// single induction variable, that all types are supported and vectorize-able,
210 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
211 /// This class is also used by InnerLoopVectorizer for identifying
212 /// induction variable and the different reduction variables.
213 class LoopVectorizationLegality {
214 public:
LoopVectorizationLegality(Loop * L,PredicatedScalarEvolution & PSE,DominatorTree * DT,TargetTransformInfo * TTI,TargetLibraryInfo * TLI,AAResults * AA,Function * F,std::function<const LoopAccessInfo & (Loop &)> * GetLAA,LoopInfo * LI,OptimizationRemarkEmitter * ORE,LoopVectorizationRequirements * R,LoopVectorizeHints * H,DemandedBits * DB,AssumptionCache * AC,BlockFrequencyInfo * BFI,ProfileSummaryInfo * PSI)215   LoopVectorizationLegality(
216       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
217       TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AAResults *AA,
218       Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
219       LoopInfo *LI, OptimizationRemarkEmitter *ORE,
220       LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
221       AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
222       : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
223         GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
224         BFI(BFI), PSI(PSI) {}
225 
226   /// ReductionList contains the reduction descriptors for all
227   /// of the reductions that were found in the loop.
228   using ReductionList = MapVector<PHINode *, RecurrenceDescriptor>;
229 
230   /// InductionList saves induction variables and maps them to the
231   /// induction descriptor.
232   using InductionList = MapVector<PHINode *, InductionDescriptor>;
233 
234   /// RecurrenceSet contains the phi nodes that are recurrences other than
235   /// inductions and reductions.
236   using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
237 
238   /// Returns true if it is legal to vectorize this loop.
239   /// This does not mean that it is profitable to vectorize this
240   /// loop, only that it is legal to do so.
241   /// Temporarily taking UseVPlanNativePath parameter. If true, take
242   /// the new code path being implemented for outer loop vectorization
243   /// (should be functional for inner loop vectorization) based on VPlan.
244   /// If false, good old LV code.
245   bool canVectorize(bool UseVPlanNativePath);
246 
247   /// Return true if we can vectorize this loop while folding its tail by
248   /// masking, and mark all respective loads/stores for masking.
249   /// This object's state is only modified iff this function returns true.
250   bool prepareToFoldTailByMasking();
251 
252   /// Returns the primary induction variable.
getPrimaryInduction()253   PHINode *getPrimaryInduction() { return PrimaryInduction; }
254 
255   /// Returns the reduction variables found in the loop.
getReductionVars()256   ReductionList &getReductionVars() { return Reductions; }
257 
258   /// Returns the induction variables found in the loop.
getInductionVars()259   InductionList &getInductionVars() { return Inductions; }
260 
261   /// Return the first-order recurrences found in the loop.
getFirstOrderRecurrences()262   RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; }
263 
264   /// Return the set of instructions to sink to handle first-order recurrences.
getSinkAfter()265   DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
266 
267   /// Returns the widest induction type.
getWidestInductionType()268   Type *getWidestInductionType() { return WidestIndTy; }
269 
270   /// Returns True if V is a Phi node of an induction variable in this loop.
271   bool isInductionPhi(const Value *V);
272 
273   /// Returns True if V is a cast that is part of an induction def-use chain,
274   /// and had been proven to be redundant under a runtime guard (in other
275   /// words, the cast has the same SCEV expression as the induction phi).
276   bool isCastedInductionVariable(const Value *V);
277 
278   /// Returns True if V can be considered as an induction variable in this
279   /// loop. V can be the induction phi, or some redundant cast in the def-use
280   /// chain of the inducion phi.
281   bool isInductionVariable(const Value *V);
282 
283   /// Returns True if PN is a reduction variable in this loop.
isReductionVariable(PHINode * PN)284   bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
285 
286   /// Returns True if Phi is a first-order recurrence in this loop.
287   bool isFirstOrderRecurrence(const PHINode *Phi);
288 
289   /// Return true if the block BB needs to be predicated in order for the loop
290   /// to be vectorized.
291   bool blockNeedsPredication(BasicBlock *BB);
292 
293   /// Check if this pointer is consecutive when vectorizing. This happens
294   /// when the last index of the GEP is the induction variable, or that the
295   /// pointer itself is an induction variable.
296   /// This check allows us to vectorize A[idx] into a wide load/store.
297   /// Returns:
298   /// 0 - Stride is unknown or non-consecutive.
299   /// 1 - Address is consecutive.
300   /// -1 - Address is consecutive, and decreasing.
301   /// NOTE: This method must only be used before modifying the original scalar
302   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
303   int isConsecutivePtr(Value *Ptr);
304 
305   /// Returns true if the value V is uniform within the loop.
306   bool isUniform(Value *V);
307 
308   /// A uniform memory op is a load or store which accesses the same memory
309   /// location on all lanes.
isUniformMemOp(Instruction & I)310   bool isUniformMemOp(Instruction &I) {
311     Value *Ptr = getLoadStorePointerOperand(&I);
312     if (!Ptr)
313       return false;
314     // Note: There's nothing inherent which prevents predicated loads and
315     // stores from being uniform.  The current lowering simply doesn't handle
316     // it; in particular, the cost model distinguishes scatter/gather from
317     // scalar w/predication, and we currently rely on the scalar path.
318     return isUniform(Ptr) && !blockNeedsPredication(I.getParent());
319   }
320 
321   /// Returns the information that we collected about runtime memory check.
getRuntimePointerChecking()322   const RuntimePointerChecking *getRuntimePointerChecking() const {
323     return LAI->getRuntimePointerChecking();
324   }
325 
getLAI()326   const LoopAccessInfo *getLAI() const { return LAI; }
327 
getMaxSafeDepDistBytes()328   unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
329 
getMaxSafeVectorWidthInBits()330   uint64_t getMaxSafeVectorWidthInBits() const {
331     return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
332   }
333 
hasStride(Value * V)334   bool hasStride(Value *V) { return LAI->hasStride(V); }
335 
336   /// Returns true if vector representation of the instruction \p I
337   /// requires mask.
isMaskRequired(const Instruction * I)338   bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
339 
getNumStores()340   unsigned getNumStores() const { return LAI->getNumStores(); }
getNumLoads()341   unsigned getNumLoads() const { return LAI->getNumLoads(); }
342 
343   // Returns true if the NoNaN attribute is set on the function.
hasFunNoNaNAttr()344   bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
345 
346   /// Returns all assume calls in predicated blocks. They need to be dropped
347   /// when flattening the CFG.
getConditionalAssumes()348   const SmallPtrSetImpl<Instruction *> &getConditionalAssumes() const {
349     return ConditionalAssumes;
350   }
351 
352 private:
353   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
354   /// its nested loops are considered legal for vectorization. These legal
355   /// checks are common for inner and outer loop vectorization.
356   /// Temporarily taking UseVPlanNativePath parameter. If true, take
357   /// the new code path being implemented for outer loop vectorization
358   /// (should be functional for inner loop vectorization) based on VPlan.
359   /// If false, good old LV code.
360   bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
361 
362   /// Set up outer loop inductions by checking Phis in outer loop header for
363   /// supported inductions (int inductions). Return false if any of these Phis
364   /// is not a supported induction or if we fail to find an induction.
365   bool setupOuterLoopInductions();
366 
367   /// Return true if the pre-header, exiting and latch blocks of \p Lp
368   /// (non-recursive) are considered legal for vectorization.
369   /// Temporarily taking UseVPlanNativePath parameter. If true, take
370   /// the new code path being implemented for outer loop vectorization
371   /// (should be functional for inner loop vectorization) based on VPlan.
372   /// If false, good old LV code.
373   bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
374 
375   /// Check if a single basic block loop is vectorizable.
376   /// At this point we know that this is a loop with a constant trip count
377   /// and we only need to check individual instructions.
378   bool canVectorizeInstrs();
379 
380   /// When we vectorize loops we may change the order in which
381   /// we read and write from memory. This method checks if it is
382   /// legal to vectorize the code, considering only memory constrains.
383   /// Returns true if the loop is vectorizable
384   bool canVectorizeMemory();
385 
386   /// Return true if we can vectorize this loop using the IF-conversion
387   /// transformation.
388   bool canVectorizeWithIfConvert();
389 
390   /// Return true if we can vectorize this outer loop. The method performs
391   /// specific checks for outer loop vectorization.
392   bool canVectorizeOuterLoop();
393 
394   /// Return true if all of the instructions in the block can be speculatively
395   /// executed, and record the loads/stores that require masking. If's that
396   /// guard loads can be ignored under "assume safety" unless \p PreserveGuards
397   /// is true. This can happen when we introduces guards for which the original
398   /// "unguarded-loads are safe" assumption does not hold. For example, the
399   /// vectorizer's fold-tail transformation changes the loop to execute beyond
400   /// its original trip-count, under a proper guard, which should be preserved.
401   /// \p SafePtrs is a list of addresses that are known to be legal and we know
402   /// that we can read from them without segfault.
403   /// \p MaskedOp is a list of instructions that have to be transformed into
404   /// calls to the appropriate masked intrinsic when the loop is vectorized.
405   /// \p ConditionalAssumes is a list of assume instructions in predicated
406   /// blocks that must be dropped if the CFG gets flattened.
407   bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
408                             SmallPtrSetImpl<const Instruction *> &MaskedOp,
409                             SmallPtrSetImpl<Instruction *> &ConditionalAssumes,
410                             bool PreserveGuards = false) const;
411 
412   /// Updates the vectorization state by adding \p Phi to the inductions list.
413   /// This can set \p Phi as the main induction of the loop if \p Phi is a
414   /// better choice for the main induction than the existing one.
415   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
416                        SmallPtrSetImpl<Value *> &AllowedExit);
417 
418   /// If an access has a symbolic strides, this maps the pointer value to
419   /// the stride symbol.
getSymbolicStrides()420   const ValueToValueMap *getSymbolicStrides() {
421     // FIXME: Currently, the set of symbolic strides is sometimes queried before
422     // it's collected.  This happens from canVectorizeWithIfConvert, when the
423     // pointer is checked to reference consecutive elements suitable for a
424     // masked access.
425     return LAI ? &LAI->getSymbolicStrides() : nullptr;
426   }
427 
428   /// The loop that we evaluate.
429   Loop *TheLoop;
430 
431   /// Loop Info analysis.
432   LoopInfo *LI;
433 
434   /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
435   /// Applies dynamic knowledge to simplify SCEV expressions in the context
436   /// of existing SCEV assumptions. The analysis will also add a minimal set
437   /// of new predicates if this is required to enable vectorization and
438   /// unrolling.
439   PredicatedScalarEvolution &PSE;
440 
441   /// Target Transform Info.
442   TargetTransformInfo *TTI;
443 
444   /// Target Library Info.
445   TargetLibraryInfo *TLI;
446 
447   /// Dominator Tree.
448   DominatorTree *DT;
449 
450   // LoopAccess analysis.
451   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
452 
453   // And the loop-accesses info corresponding to this loop.  This pointer is
454   // null until canVectorizeMemory sets it up.
455   const LoopAccessInfo *LAI = nullptr;
456 
457   /// Interface to emit optimization remarks.
458   OptimizationRemarkEmitter *ORE;
459 
460   //  ---  vectorization state --- //
461 
462   /// Holds the primary induction variable. This is the counter of the
463   /// loop.
464   PHINode *PrimaryInduction = nullptr;
465 
466   /// Holds the reduction variables.
467   ReductionList Reductions;
468 
469   /// Holds all of the induction variables that we found in the loop.
470   /// Notice that inductions don't need to start at zero and that induction
471   /// variables can be pointers.
472   InductionList Inductions;
473 
474   /// Holds all the casts that participate in the update chain of the induction
475   /// variables, and that have been proven to be redundant (possibly under a
476   /// runtime guard). These casts can be ignored when creating the vectorized
477   /// loop body.
478   SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
479 
480   /// Holds the phi nodes that are first-order recurrences.
481   RecurrenceSet FirstOrderRecurrences;
482 
483   /// Holds instructions that need to sink past other instructions to handle
484   /// first-order recurrences.
485   DenseMap<Instruction *, Instruction *> SinkAfter;
486 
487   /// Holds the widest induction type encountered.
488   Type *WidestIndTy = nullptr;
489 
490   /// Allowed outside users. This holds the variables that can be accessed from
491   /// outside the loop.
492   SmallPtrSet<Value *, 4> AllowedExit;
493 
494   /// Can we assume the absence of NaNs.
495   bool HasFunNoNaNAttr = false;
496 
497   /// Vectorization requirements that will go through late-evaluation.
498   LoopVectorizationRequirements *Requirements;
499 
500   /// Used to emit an analysis of any legality issues.
501   LoopVectorizeHints *Hints;
502 
503   /// The demanded bits analysis is used to compute the minimum type size in
504   /// which a reduction can be computed.
505   DemandedBits *DB;
506 
507   /// The assumption cache analysis is used to compute the minimum type size in
508   /// which a reduction can be computed.
509   AssumptionCache *AC;
510 
511   /// While vectorizing these instructions we have to generate a
512   /// call to the appropriate masked intrinsic
513   SmallPtrSet<const Instruction *, 8> MaskedOp;
514 
515   /// Assume instructions in predicated blocks must be dropped if the CFG gets
516   /// flattened.
517   SmallPtrSet<Instruction *, 8> ConditionalAssumes;
518 
519   /// BFI and PSI are used to check for profile guided size optimizations.
520   BlockFrequencyInfo *BFI;
521   ProfileSummaryInfo *PSI;
522 };
523 
524 } // namespace llvm
525 
526 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
527