1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utility methods for working with the Vector dialect.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "mlir/Dialect/Vector/VectorUtils.h"
14 #include "mlir/Analysis/LoopAnalysis.h"
15 #include "mlir/Dialect/Affine/IR/AffineOps.h"
16 #include "mlir/Dialect/StandardOps/IR/Ops.h"
17 #include "mlir/Dialect/Vector/VectorOps.h"
18 #include "mlir/IR/Builders.h"
19 #include "mlir/IR/IntegerSet.h"
20 #include "mlir/IR/Operation.h"
21 #include "mlir/Support/LLVM.h"
22 #include "mlir/Support/MathExtras.h"
23 #include <numeric>
24
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/SetVector.h"
27
28 using llvm::SetVector;
29
30 using namespace mlir;
31
32 /// Return the number of elements of basis, `0` if empty.
computeMaxLinearIndex(ArrayRef<int64_t> basis)33 int64_t mlir::computeMaxLinearIndex(ArrayRef<int64_t> basis) {
34 if (basis.empty())
35 return 0;
36 return std::accumulate(basis.begin(), basis.end(), 1,
37 std::multiplies<int64_t>());
38 }
39
40 /// Given a shape with sizes greater than 0 along all dimensions,
41 /// return the distance, in number of elements, between a slice in a dimension
42 /// and the next slice in the same dimension.
43 /// e.g. shape[3, 4, 5] -> linearization_basis[20, 5, 1]
computeStrides(ArrayRef<int64_t> shape)44 SmallVector<int64_t, 8> mlir::computeStrides(ArrayRef<int64_t> shape) {
45 if (shape.empty())
46 return {};
47 SmallVector<int64_t, 8> tmp;
48 tmp.reserve(shape.size());
49 int64_t running = 1;
50 for (auto size : llvm::reverse(shape)) {
51 assert(size > 0 && "size must be nonnegative");
52 tmp.push_back(running);
53 running *= size;
54 }
55 return SmallVector<int64_t, 8>(tmp.rbegin(), tmp.rend());
56 }
57
computeStrides(ArrayRef<int64_t> shape,ArrayRef<int64_t> sizes)58 SmallVector<int64_t, 4> mlir::computeStrides(ArrayRef<int64_t> shape,
59 ArrayRef<int64_t> sizes) {
60 int64_t rank = shape.size();
61 // Compute the count for each dimension.
62 SmallVector<int64_t, 4> sliceDimCounts(rank);
63 for (int64_t r = 0; r < rank; ++r)
64 sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]);
65 // Use that to compute the slice stride for each dimension.
66 SmallVector<int64_t, 4> sliceStrides(rank);
67 sliceStrides[rank - 1] = 1;
68 for (int64_t r = rank - 2; r >= 0; --r)
69 sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1];
70 return sliceStrides;
71 }
72
linearize(ArrayRef<int64_t> offsets,ArrayRef<int64_t> basis)73 int64_t mlir::linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) {
74 assert(offsets.size() == basis.size());
75 int64_t linearIndex = 0;
76 for (unsigned idx = 0, e = basis.size(); idx < e; ++idx)
77 linearIndex += offsets[idx] * basis[idx];
78 return linearIndex;
79 }
80
delinearize(ArrayRef<int64_t> sliceStrides,int64_t index)81 SmallVector<int64_t, 4> mlir::delinearize(ArrayRef<int64_t> sliceStrides,
82 int64_t index) {
83 int64_t rank = sliceStrides.size();
84 SmallVector<int64_t, 4> vectorOffsets(rank);
85 for (int64_t r = 0; r < rank; ++r) {
86 assert(sliceStrides[r] > 0);
87 vectorOffsets[r] = index / sliceStrides[r];
88 index %= sliceStrides[r];
89 }
90 return vectorOffsets;
91 }
92
computeElementOffsetsFromVectorSliceOffsets(ArrayRef<int64_t> sizes,ArrayRef<int64_t> vectorOffsets)93 SmallVector<int64_t, 4> mlir::computeElementOffsetsFromVectorSliceOffsets(
94 ArrayRef<int64_t> sizes, ArrayRef<int64_t> vectorOffsets) {
95 SmallVector<int64_t, 4> result;
96 for (auto it : llvm::zip(vectorOffsets, sizes))
97 result.push_back(std::get<0>(it) * std::get<1>(it));
98 return result;
99 }
100
101 SmallVector<int64_t, 4>
computeSliceSizes(ArrayRef<int64_t> shape,ArrayRef<int64_t> sizes,ArrayRef<int64_t> elementOffsets)102 mlir::computeSliceSizes(ArrayRef<int64_t> shape, ArrayRef<int64_t> sizes,
103 ArrayRef<int64_t> elementOffsets) {
104 int64_t rank = shape.size();
105 SmallVector<int64_t, 4> sliceSizes(rank);
106 for (unsigned r = 0; r < rank; ++r)
107 sliceSizes[r] = std::min(sizes[r], shape[r] - elementOffsets[r]);
108 return sliceSizes;
109 }
110
shapeRatio(ArrayRef<int64_t> superShape,ArrayRef<int64_t> subShape)111 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape,
112 ArrayRef<int64_t> subShape) {
113 if (superShape.size() < subShape.size()) {
114 return Optional<SmallVector<int64_t, 4>>();
115 }
116
117 // Starting from the end, compute the integer divisors.
118 std::vector<int64_t> result;
119 result.reserve(superShape.size());
120 int64_t superSize = 0, subSize = 0;
121 for (auto it :
122 llvm::zip(llvm::reverse(superShape), llvm::reverse(subShape))) {
123 std::tie(superSize, subSize) = it;
124 assert(superSize > 0 && "superSize must be > 0");
125 assert(subSize > 0 && "subSize must be > 0");
126
127 // If integral division does not occur, return and let the caller decide.
128 if (superSize % subSize != 0)
129 return None;
130 result.push_back(superSize / subSize);
131 }
132
133 // At this point we computed the ratio (in reverse) for the common
134 // size. Fill with the remaining entries from the super-vector shape (still in
135 // reverse).
136 int commonSize = subShape.size();
137 std::copy(superShape.rbegin() + commonSize, superShape.rend(),
138 std::back_inserter(result));
139
140 assert(result.size() == superShape.size() &&
141 "super to sub shape ratio is not of the same size as the super rank");
142
143 // Reverse again to get it back in the proper order and return.
144 return SmallVector<int64_t, 4>{result.rbegin(), result.rend()};
145 }
146
shapeRatio(VectorType superVectorType,VectorType subVectorType)147 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType,
148 VectorType subVectorType) {
149 assert(superVectorType.getElementType() == subVectorType.getElementType() &&
150 "vector types must be of the same elemental type");
151 return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
152 }
153
154 /// Constructs a permutation map from memref indices to vector dimension.
155 ///
156 /// The implementation uses the knowledge of the mapping of enclosing loop to
157 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a
158 /// map with:
159 /// - keys representing "vectorized enclosing loops";
160 /// - values representing the corresponding vector dimension.
161 /// The algorithm traverses "vectorized enclosing loops" and extracts the
162 /// at-most-one MemRef index that is invariant along said loop. This index is
163 /// guaranteed to be at most one by construction: otherwise the MemRef is not
164 /// vectorizable.
165 /// If this invariant index is found, it is added to the permutation_map at the
166 /// proper vector dimension.
167 /// If no index is found to be invariant, 0 is added to the permutation_map and
168 /// corresponds to a vector broadcast along that dimension.
169 ///
170 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
171 /// signalling that no permutation map can be constructed given
172 /// `enclosingLoopToVectorDim`.
173 ///
174 /// Examples can be found in the documentation of `makePermutationMap`, in the
175 /// header file.
makePermutationMap(ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & enclosingLoopToVectorDim)176 static AffineMap makePermutationMap(
177 ArrayRef<Value> indices,
178 const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
179 if (enclosingLoopToVectorDim.empty())
180 return AffineMap();
181 MLIRContext *context =
182 enclosingLoopToVectorDim.begin()->getFirst()->getContext();
183 SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
184 getAffineConstantExpr(0, context));
185
186 for (auto kvp : enclosingLoopToVectorDim) {
187 assert(kvp.second < perm.size());
188 auto invariants = getInvariantAccesses(
189 cast<AffineForOp>(kvp.first).getInductionVar(), indices);
190 unsigned numIndices = indices.size();
191 unsigned countInvariantIndices = 0;
192 for (unsigned dim = 0; dim < numIndices; ++dim) {
193 if (!invariants.count(indices[dim])) {
194 assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
195 "permutationMap already has an entry along dim");
196 perm[kvp.second] = getAffineDimExpr(dim, context);
197 } else {
198 ++countInvariantIndices;
199 }
200 }
201 assert((countInvariantIndices == numIndices ||
202 countInvariantIndices == numIndices - 1) &&
203 "Vectorization prerequisite violated: at most 1 index may be "
204 "invariant wrt a vectorized loop");
205 }
206 return AffineMap::get(indices.size(), 0, perm, context);
207 }
208
209 /// Implementation detail that walks up the parents and records the ones with
210 /// the specified type.
211 /// TODO: could also be implemented as a collect parents followed by a
212 /// filter and made available outside this file.
213 template <typename T>
getParentsOfType(Operation * op)214 static SetVector<Operation *> getParentsOfType(Operation *op) {
215 SetVector<Operation *> res;
216 auto *current = op;
217 while (auto *parent = current->getParentOp()) {
218 if (auto typedParent = dyn_cast<T>(parent)) {
219 assert(res.count(parent) == 0 && "Already inserted");
220 res.insert(parent);
221 }
222 current = parent;
223 }
224 return res;
225 }
226
227 /// Returns the enclosing AffineForOp, from closest to farthest.
getEnclosingforOps(Operation * op)228 static SetVector<Operation *> getEnclosingforOps(Operation *op) {
229 return getParentsOfType<AffineForOp>(op);
230 }
231
makePermutationMap(Operation * op,ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & loopToVectorDim)232 AffineMap mlir::makePermutationMap(
233 Operation *op, ArrayRef<Value> indices,
234 const DenseMap<Operation *, unsigned> &loopToVectorDim) {
235 DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
236 auto enclosingLoops = getEnclosingforOps(op);
237 for (auto *forInst : enclosingLoops) {
238 auto it = loopToVectorDim.find(forInst);
239 if (it != loopToVectorDim.end()) {
240 enclosingLoopToVectorDim.insert(*it);
241 }
242 }
243 return ::makePermutationMap(indices, enclosingLoopToVectorDim);
244 }
245
getTransferMinorIdentityMap(MemRefType memRefType,VectorType vectorType)246 AffineMap mlir::getTransferMinorIdentityMap(MemRefType memRefType,
247 VectorType vectorType) {
248 int64_t elementVectorRank = 0;
249 VectorType elementVectorType =
250 memRefType.getElementType().dyn_cast<VectorType>();
251 if (elementVectorType)
252 elementVectorRank += elementVectorType.getRank();
253 return AffineMap::getMinorIdentityMap(
254 memRefType.getRank(), vectorType.getRank() - elementVectorRank,
255 memRefType.getContext());
256 }
257
operatesOnSuperVectorsOf(Operation & op,VectorType subVectorType)258 bool matcher::operatesOnSuperVectorsOf(Operation &op,
259 VectorType subVectorType) {
260 // First, extract the vector type and distinguish between:
261 // a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
262 // vector.transfer_write); and
263 // b. ops that *may* lower a super-vector (all other ops).
264 // The ops that *may* lower a super-vector only do so if the super-vector to
265 // sub-vector ratio exists. The ops that *must* lower a super-vector are
266 // explicitly checked for this property.
267 /// TODO: there should be a single function for all ops to do this so we
268 /// do not have to special case. Maybe a trait, or just a method, unclear atm.
269 bool mustDivide = false;
270 (void)mustDivide;
271 VectorType superVectorType;
272 if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
273 superVectorType = transfer.getVectorType();
274 mustDivide = true;
275 } else if (op.getNumResults() == 0) {
276 if (!isa<ReturnOp>(op)) {
277 op.emitError("NYI: assuming only return operations can have 0 "
278 " results at this point");
279 }
280 return false;
281 } else if (op.getNumResults() == 1) {
282 if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) {
283 superVectorType = v;
284 } else {
285 // Not a vector type.
286 return false;
287 }
288 } else {
289 // Not a vector.transfer and has more than 1 result, fail hard for now to
290 // wake us up when something changes.
291 op.emitError("NYI: operation has more than 1 result");
292 return false;
293 }
294
295 // Get the ratio.
296 auto ratio = shapeRatio(superVectorType, subVectorType);
297
298 // Sanity check.
299 assert((ratio.hasValue() || !mustDivide) &&
300 "vector.transfer operation in which super-vector size is not an"
301 " integer multiple of sub-vector size");
302
303 // This catches cases that are not strictly necessary to have multiplicity but
304 // still aren't divisible by the sub-vector shape.
305 // This could be useful information if we wanted to reshape at the level of
306 // the vector type (but we would have to look at the compute and distinguish
307 // between parallel, reduction and possibly other cases.
308 if (!ratio.hasValue()) {
309 return false;
310 }
311
312 return true;
313 }
314
isDisjointTransferSet(VectorTransferOpInterface transferA,VectorTransferOpInterface transferB)315 bool mlir::isDisjointTransferSet(VectorTransferOpInterface transferA,
316 VectorTransferOpInterface transferB) {
317 if (transferA.memref() != transferB.memref())
318 return false;
319 // For simplicity only look at transfer of same type.
320 if (transferA.getVectorType() != transferB.getVectorType())
321 return false;
322 unsigned rankOffset = transferA.getLeadingMemRefRank();
323 for (unsigned i = 0, e = transferA.indices().size(); i < e; i++) {
324 auto indexA = transferA.indices()[i].getDefiningOp<ConstantOp>();
325 auto indexB = transferB.indices()[i].getDefiningOp<ConstantOp>();
326 // If any of the indices are dynamic we cannot prove anything.
327 if (!indexA || !indexB)
328 continue;
329
330 if (i < rankOffset) {
331 // For leading dimensions, if we can prove that index are different we
332 // know we are accessing disjoint slices.
333 if (indexA.getValue().cast<IntegerAttr>().getInt() !=
334 indexB.getValue().cast<IntegerAttr>().getInt())
335 return true;
336 } else {
337 // For this dimension, we slice a part of the memref we need to make sure
338 // the intervals accessed don't overlap.
339 int64_t distance =
340 std::abs(indexA.getValue().cast<IntegerAttr>().getInt() -
341 indexB.getValue().cast<IntegerAttr>().getInt());
342 if (distance >= transferA.getVectorType().getDimSize(i - rankOffset))
343 return true;
344 }
345 }
346 return false;
347 }
348