1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "mlir/Analysis/LoopAnalysis.h"
14
15 #include "mlir/Analysis/AffineAnalysis.h"
16 #include "mlir/Analysis/AffineStructures.h"
17 #include "mlir/Analysis/NestedMatcher.h"
18 #include "mlir/Dialect/Affine/IR/AffineOps.h"
19 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
20 #include "mlir/Support/MathExtras.h"
21
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/SmallString.h"
24 #include <type_traits>
25
26 using namespace mlir;
27
28 /// Returns the trip count of the loop as an affine expression if the latter is
29 /// expressible as an affine expression, and nullptr otherwise. The trip count
30 /// expression is simplified before returning. This method only utilizes map
31 /// composition to construct lower and upper bounds before computing the trip
32 /// count expressions.
buildTripCountMapAndOperands(AffineForOp forOp,AffineMap * tripCountMap,SmallVectorImpl<Value> * tripCountOperands)33 void mlir::buildTripCountMapAndOperands(
34 AffineForOp forOp, AffineMap *tripCountMap,
35 SmallVectorImpl<Value> *tripCountOperands) {
36 int64_t loopSpan;
37
38 int64_t step = forOp.getStep();
39 OpBuilder b(forOp.getOperation());
40
41 if (forOp.hasConstantBounds()) {
42 int64_t lb = forOp.getConstantLowerBound();
43 int64_t ub = forOp.getConstantUpperBound();
44 loopSpan = ub - lb;
45 if (loopSpan < 0)
46 loopSpan = 0;
47 *tripCountMap = b.getConstantAffineMap(ceilDiv(loopSpan, step));
48 tripCountOperands->clear();
49 return;
50 }
51 auto lbMap = forOp.getLowerBoundMap();
52 auto ubMap = forOp.getUpperBoundMap();
53 if (lbMap.getNumResults() != 1) {
54 *tripCountMap = AffineMap();
55 return;
56 }
57
58 // Difference of each upper bound expression from the single lower bound
59 // expression (divided by the step) provides the expressions for the trip
60 // count map.
61 AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
62
63 SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
64 lbMap.getResult(0));
65 auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
66 lbSplatExpr, b.getContext());
67 AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
68
69 AffineValueMap tripCountValueMap;
70 AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
71 for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
72 tripCountValueMap.setResult(i,
73 tripCountValueMap.getResult(i).ceilDiv(step));
74
75 *tripCountMap = tripCountValueMap.getAffineMap();
76 tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
77 tripCountValueMap.getOperands().end());
78 }
79
80 /// Returns the trip count of the loop if it's a constant, None otherwise. This
81 /// method uses affine expression analysis (in turn using getTripCount) and is
82 /// able to determine constant trip count in non-trivial cases.
83 // FIXME(mlir-team): this is really relying on buildTripCountMapAndOperands;
84 // being an analysis utility, it shouldn't. Replace with a version that just
85 // works with analysis structures (FlatAffineConstraints) and thus doesn't
86 // update the IR.
getConstantTripCount(AffineForOp forOp)87 Optional<uint64_t> mlir::getConstantTripCount(AffineForOp forOp) {
88 SmallVector<Value, 4> operands;
89 AffineMap map;
90 buildTripCountMapAndOperands(forOp, &map, &operands);
91
92 if (!map)
93 return None;
94
95 // Take the min if all trip counts are constant.
96 Optional<uint64_t> tripCount;
97 for (auto resultExpr : map.getResults()) {
98 if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
99 if (tripCount.hasValue())
100 tripCount = std::min(tripCount.getValue(),
101 static_cast<uint64_t>(constExpr.getValue()));
102 else
103 tripCount = constExpr.getValue();
104 } else
105 return None;
106 }
107 return tripCount;
108 }
109
110 /// Returns the greatest known integral divisor of the trip count. Affine
111 /// expression analysis is used (indirectly through getTripCount), and
112 /// this method is thus able to determine non-trivial divisors.
getLargestDivisorOfTripCount(AffineForOp forOp)113 uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
114 SmallVector<Value, 4> operands;
115 AffineMap map;
116 buildTripCountMapAndOperands(forOp, &map, &operands);
117
118 if (!map)
119 return 1;
120
121 // The largest divisor of the trip count is the GCD of the individual largest
122 // divisors.
123 assert(map.getNumResults() >= 1 && "expected one or more results");
124 Optional<uint64_t> gcd;
125 for (auto resultExpr : map.getResults()) {
126 uint64_t thisGcd;
127 if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
128 uint64_t tripCount = constExpr.getValue();
129 // 0 iteration loops (greatest divisor is 2^64 - 1).
130 if (tripCount == 0)
131 thisGcd = std::numeric_limits<uint64_t>::max();
132 else
133 // The greatest divisor is the trip count.
134 thisGcd = tripCount;
135 } else {
136 // Trip count is not a known constant; return its largest known divisor.
137 thisGcd = resultExpr.getLargestKnownDivisor();
138 }
139 if (gcd.hasValue())
140 gcd = llvm::GreatestCommonDivisor64(gcd.getValue(), thisGcd);
141 else
142 gcd = thisGcd;
143 }
144 assert(gcd.hasValue() && "value expected per above logic");
145 return gcd.getValue();
146 }
147
148 /// Given an induction variable `iv` of type AffineForOp and an access `index`
149 /// of type index, returns `true` if `index` is independent of `iv` and
150 /// false otherwise. The determination supports composition with at most one
151 /// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
152 /// the composition of AffineApplyOp needs to be canonicalized by construction
153 /// to avoid writing code that composes arbitrary numbers of AffineApplyOps
154 /// everywhere. To achieve this, at the very least, the compose-affine-apply
155 /// pass must have been run.
156 ///
157 /// Prerequisites:
158 /// 1. `iv` and `index` of the proper type;
159 /// 2. at most one reachable AffineApplyOp from index;
160 ///
161 /// Returns false in cases with more than one AffineApplyOp, this is
162 /// conservative.
isAccessIndexInvariant(Value iv,Value index)163 static bool isAccessIndexInvariant(Value iv, Value index) {
164 assert(isForInductionVar(iv) && "iv must be a AffineForOp");
165 assert(index.getType().isa<IndexType>() && "index must be of IndexType");
166 SmallVector<Operation *, 4> affineApplyOps;
167 getReachableAffineApplyOps({index}, affineApplyOps);
168
169 if (affineApplyOps.empty()) {
170 // Pointer equality test because of Value pointer semantics.
171 return index != iv;
172 }
173
174 if (affineApplyOps.size() > 1) {
175 affineApplyOps[0]->emitRemark(
176 "CompositionAffineMapsPass must have been run: there should be at most "
177 "one AffineApplyOp, returning false conservatively.");
178 return false;
179 }
180
181 auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
182 // We need yet another level of indirection because the `dim` index of the
183 // access may not correspond to the `dim` index of composeOp.
184 return !composeOp.getAffineValueMap().isFunctionOf(0, iv);
185 }
186
getInvariantAccesses(Value iv,ArrayRef<Value> indices)187 DenseSet<Value> mlir::getInvariantAccesses(Value iv, ArrayRef<Value> indices) {
188 DenseSet<Value> res;
189 for (unsigned idx = 0, n = indices.size(); idx < n; ++idx) {
190 auto val = indices[idx];
191 if (isAccessIndexInvariant(iv, val)) {
192 res.insert(val);
193 }
194 }
195 return res;
196 }
197
198 /// Given:
199 /// 1. an induction variable `iv` of type AffineForOp;
200 /// 2. a `memoryOp` of type const LoadOp& or const StoreOp&;
201 /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
202 /// is defined as either invariant or varying only along a unique MemRef dim.
203 /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
204 /// convey the memRef access is invariant along `iv`).
205 ///
206 /// Prerequisites:
207 /// 1. `memRefDim` ~= nullptr;
208 /// 2. `iv` of the proper type;
209 /// 3. the MemRef accessed by `memoryOp` has no layout map or at most an
210 /// identity layout map.
211 ///
212 /// Currently only supports no layoutMap or identity layoutMap in the MemRef.
213 /// Returns false if the MemRef has a non-identity layoutMap or more than 1
214 /// layoutMap. This is conservative.
215 ///
216 // TODO: check strides.
217 template <typename LoadOrStoreOp>
isContiguousAccess(Value iv,LoadOrStoreOp memoryOp,int * memRefDim)218 static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
219 int *memRefDim) {
220 static_assert(
221 llvm::is_one_of<LoadOrStoreOp, AffineLoadOp, AffineStoreOp>::value,
222 "Must be called on either LoadOp or StoreOp");
223 assert(memRefDim && "memRefDim == nullptr");
224 auto memRefType = memoryOp.getMemRefType();
225
226 auto layoutMap = memRefType.getAffineMaps();
227 // TODO: remove dependence on Builder once we support non-identity layout map.
228 Builder b(memoryOp.getContext());
229 if (layoutMap.size() >= 2 ||
230 (layoutMap.size() == 1 &&
231 !(layoutMap[0] ==
232 b.getMultiDimIdentityMap(layoutMap[0].getNumDims())))) {
233 return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
234 }
235
236 int uniqueVaryingIndexAlongIv = -1;
237 auto accessMap = memoryOp.getAffineMap();
238 SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
239 unsigned numDims = accessMap.getNumDims();
240 for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
241 // Gather map operands used result expr 'i' in 'exprOperands'.
242 SmallVector<Value, 4> exprOperands;
243 auto resultExpr = accessMap.getResult(i);
244 resultExpr.walk([&](AffineExpr expr) {
245 if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
246 exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
247 else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
248 exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
249 });
250 // Check access invariance of each operand in 'exprOperands'.
251 for (auto exprOperand : exprOperands) {
252 if (!isAccessIndexInvariant(iv, exprOperand)) {
253 if (uniqueVaryingIndexAlongIv != -1) {
254 // 2+ varying indices -> do not vectorize along iv.
255 return false;
256 }
257 uniqueVaryingIndexAlongIv = i;
258 }
259 }
260 }
261
262 if (uniqueVaryingIndexAlongIv == -1)
263 *memRefDim = -1;
264 else
265 *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
266 return true;
267 }
268
269 template <typename LoadOrStoreOp>
isVectorElement(LoadOrStoreOp memoryOp)270 static bool isVectorElement(LoadOrStoreOp memoryOp) {
271 auto memRefType = memoryOp.getMemRefType();
272 return memRefType.getElementType().template isa<VectorType>();
273 }
274
275 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
276
277 static bool
isVectorizableLoopBodyWithOpCond(AffineForOp loop,VectorizableOpFun isVectorizableOp,NestedPattern & vectorTransferMatcher)278 isVectorizableLoopBodyWithOpCond(AffineForOp loop,
279 VectorizableOpFun isVectorizableOp,
280 NestedPattern &vectorTransferMatcher) {
281 auto *forOp = loop.getOperation();
282
283 // No vectorization across conditionals for now.
284 auto conditionals = matcher::If();
285 SmallVector<NestedMatch, 8> conditionalsMatched;
286 conditionals.match(forOp, &conditionalsMatched);
287 if (!conditionalsMatched.empty()) {
288 return false;
289 }
290
291 // No vectorization across unknown regions.
292 auto regions = matcher::Op([](Operation &op) -> bool {
293 return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
294 });
295 SmallVector<NestedMatch, 8> regionsMatched;
296 regions.match(forOp, ®ionsMatched);
297 if (!regionsMatched.empty()) {
298 return false;
299 }
300
301 SmallVector<NestedMatch, 8> vectorTransfersMatched;
302 vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
303 if (!vectorTransfersMatched.empty()) {
304 return false;
305 }
306
307 auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
308 SmallVector<NestedMatch, 8> loadAndStoresMatched;
309 loadAndStores.match(forOp, &loadAndStoresMatched);
310 for (auto ls : loadAndStoresMatched) {
311 auto *op = ls.getMatchedOperation();
312 auto load = dyn_cast<AffineLoadOp>(op);
313 auto store = dyn_cast<AffineStoreOp>(op);
314 // Only scalar types are considered vectorizable, all load/store must be
315 // vectorizable for a loop to qualify as vectorizable.
316 // TODO: ponder whether we want to be more general here.
317 bool vector = load ? isVectorElement(load) : isVectorElement(store);
318 if (vector) {
319 return false;
320 }
321 if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
322 return false;
323 }
324 }
325 return true;
326 }
327
isVectorizableLoopBody(AffineForOp loop,int * memRefDim,NestedPattern & vectorTransferMatcher)328 bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
329 NestedPattern &vectorTransferMatcher) {
330 VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
331 auto load = dyn_cast<AffineLoadOp>(op);
332 auto store = dyn_cast<AffineStoreOp>(op);
333 return load ? isContiguousAccess(loop.getInductionVar(), load, memRefDim)
334 : isContiguousAccess(loop.getInductionVar(), store, memRefDim);
335 });
336 return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
337 }
338
isVectorizableLoopBody(AffineForOp loop,NestedPattern & vectorTransferMatcher)339 bool mlir::isVectorizableLoopBody(AffineForOp loop,
340 NestedPattern &vectorTransferMatcher) {
341 return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
342 }
343
344 /// Checks whether SSA dominance would be violated if a for op's body
345 /// operations are shifted by the specified shifts. This method checks if a
346 /// 'def' and all its uses have the same shift factor.
347 // TODO: extend this to check for memory-based dependence violation when we have
348 // the support.
isOpwiseShiftValid(AffineForOp forOp,ArrayRef<uint64_t> shifts)349 bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
350 auto *forBody = forOp.getBody();
351 assert(shifts.size() == forBody->getOperations().size());
352
353 // Work backwards over the body of the block so that the shift of a use's
354 // ancestor operation in the block gets recorded before it's looked up.
355 DenseMap<Operation *, uint64_t> forBodyShift;
356 for (auto it : llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
357 auto &op = it.value();
358
359 // Get the index of the current operation, note that we are iterating in
360 // reverse so we need to fix it up.
361 size_t index = shifts.size() - it.index() - 1;
362
363 // Remember the shift of this operation.
364 uint64_t shift = shifts[index];
365 forBodyShift.try_emplace(&op, shift);
366
367 // Validate the results of this operation if it were to be shifted.
368 for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
369 Value result = op.getResult(i);
370 for (auto *user : result.getUsers()) {
371 // If an ancestor operation doesn't lie in the block of forOp,
372 // there is no shift to check.
373 if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
374 assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
375 if (shift != forBodyShift[ancOp])
376 return false;
377 }
378 }
379 }
380 }
381 return true;
382 }
383