1 //===- Utils.h - General analysis utilities ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header file defines prototypes for various transformation utilities for 10 // memref's and non-loop IR structures. These are not passes by themselves but 11 // are used either by passes, optimization sequences, or in turn by other 12 // transformation utilities. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef MLIR_ANALYSIS_UTILS_H 17 #define MLIR_ANALYSIS_UTILS_H 18 19 #include "mlir/Analysis/AffineStructures.h" 20 #include "mlir/IR/AffineMap.h" 21 #include "mlir/IR/Block.h" 22 #include "mlir/IR/Location.h" 23 #include "mlir/Support/LLVM.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include <memory> 26 27 namespace mlir { 28 29 class AffineForOp; 30 class Block; 31 class FlatAffineConstraints; 32 class Location; 33 struct MemRefAccess; 34 class Operation; 35 class Value; 36 37 /// Populates 'loops' with IVs of the loops surrounding 'op' ordered from 38 /// the outermost 'affine.for' operation to the innermost one. 39 // TODO: handle 'affine.if' ops. 40 void getLoopIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops); 41 42 /// Populates 'ops' with IVs of the loops surrounding `op`, along with 43 /// `affine.if` operations interleaved between these loops, ordered from the 44 /// outermost `affine.for` or `affine.if` operation to the innermost one. 45 void getEnclosingAffineForAndIfOps(Operation &op, 46 SmallVectorImpl<Operation *> *ops); 47 48 /// Returns the nesting depth of this operation, i.e., the number of loops 49 /// surrounding this operation. 50 unsigned getNestingDepth(Operation *op); 51 52 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted 53 /// at 'forOp'. 54 void getSequentialLoops(AffineForOp forOp, 55 llvm::SmallDenseSet<Value, 8> *sequentialLoops); 56 57 /// ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their 58 /// associated operands for a set of loops within a loop nest (typically the 59 /// set of loops surrounding a store operation). Loop bound AffineMaps which 60 /// are non-null represent slices of that loop's iteration space. 61 struct ComputationSliceState { 62 // List of sliced loop IVs (ordered from outermost to innermost). 63 // EX: 'ivs[i]' has lower bound 'lbs[i]' and upper bound 'ubs[i]'. 64 SmallVector<Value, 4> ivs; 65 // List of lower bound AffineMaps. 66 SmallVector<AffineMap, 4> lbs; 67 // List of upper bound AffineMaps. 68 SmallVector<AffineMap, 4> ubs; 69 // List of lower bound operands (lbOperands[i] are used by 'lbs[i]'). 70 std::vector<SmallVector<Value, 4>> lbOperands; 71 // List of upper bound operands (ubOperands[i] are used by 'ubs[i]'). 72 std::vector<SmallVector<Value, 4>> ubOperands; 73 // Slice loop nest insertion point in target loop nest. 74 Block::iterator insertPoint; 75 // Adds to 'cst' with constraints which represent the slice bounds on 'ivs' 76 // in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim 77 // identifiers and the values in 'lb/ubOperands' are added as symbols. 78 // Constraints are added for all loop IV bounds (dim or symbol), and 79 // constraints are added for slice bounds in 'lbs'/'ubs'. 80 // Returns failure if we cannot add loop bounds because of unsupported cases. 81 LogicalResult getAsConstraints(FlatAffineConstraints *cst); 82 83 // Clears all bounds and operands in slice state. 84 void clearBounds(); 85 86 /// Return true if the computation slice is empty. isEmptyComputationSliceState87 bool isEmpty() const { return ivs.empty(); } 88 89 void dump() const; 90 }; 91 92 /// Computes the computation slice loop bounds for one loop nest as affine maps 93 /// of the other loop nest's IVs and symbols, using 'dependenceConstraints' 94 /// computed between 'depSourceAccess' and 'depSinkAccess'. 95 /// If 'isBackwardSlice' is true, a backwards slice is computed in which the 96 /// slice bounds of loop nest surrounding 'depSourceAccess' are computed in 97 /// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess' 98 /// at 'loopDepth'. 99 /// If 'isBackwardSlice' is false, a forward slice is computed in which the 100 /// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms 101 /// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at 102 /// 'loopDepth'. 103 /// The slice loop bounds and associated operands are returned in 'sliceState'. 104 // 105 // Backward slice example: 106 // 107 // affine.for %i0 = 0 to 10 { 108 // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' 109 // } 110 // affine.for %i1 = 0 to 10 { 111 // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' 112 // } 113 // 114 // // Backward computation slice of loop nest '%i0'. 115 // affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) { 116 // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' 117 // } 118 // 119 // Forward slice example: 120 // 121 // affine.for %i0 = 0 to 10 { 122 // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' 123 // } 124 // affine.for %i1 = 0 to 10 { 125 // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' 126 // } 127 // 128 // // Forward computation slice of loop nest '%i1'. 129 // affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) { 130 // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' 131 // } 132 // 133 void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, 134 FlatAffineConstraints *dependenceConstraints, 135 unsigned loopDepth, bool isBackwardSlice, 136 ComputationSliceState *sliceState); 137 138 /// Computes in 'sliceUnion' the union of all slice bounds computed at 139 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB'. 140 /// The parameter 'numCommonLoops' is the number of loops common to the 141 /// operations in 'opsA' and 'opsB'. 142 /// If 'isBackwardSlice' is true, computes slice bounds for loop nest 143 /// surrounding ops in 'opsA', as a function of IVs and symbols of loop nest 144 /// surrounding ops in 'opsB' at 'loopDepth'. 145 /// If 'isBackwardSlice' is false, computes slice bounds for loop nest 146 /// surrounding ops in 'opsB', as a function of IVs and symbols of loop nest 147 /// surrounding ops in 'opsA' at 'loopDepth'. 148 /// Returns 'success' if union was computed, 'failure' otherwise. 149 // TODO: Change this API to take 'forOpA'/'forOpB'. 150 LogicalResult computeSliceUnion(ArrayRef<Operation *> opsA, 151 ArrayRef<Operation *> opsB, unsigned loopDepth, 152 unsigned numCommonLoops, bool isBackwardSlice, 153 ComputationSliceState *sliceUnion); 154 155 /// Creates a clone of the computation contained in the loop nest surrounding 156 /// 'srcOpInst', slices the iteration space of src loop based on slice bounds 157 /// in 'sliceState', and inserts the computation slice at the beginning of the 158 /// operation block of the loop at 'dstLoopDepth' in the loop nest surrounding 159 /// 'dstOpInst'. Returns the top-level loop of the computation slice on 160 /// success, returns nullptr otherwise. 161 // Loop depth is a crucial optimization choice that determines where to 162 // materialize the results of the backward slice - presenting a trade-off b/w 163 // storage and redundant computation in several cases. 164 // TODO: Support computation slices with common surrounding loops. 165 AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, 166 Operation *dstOpInst, 167 unsigned dstLoopDepth, 168 ComputationSliceState *sliceState); 169 170 /// A region of a memref's data space; this is typically constructed by 171 /// analyzing load/store op's on this memref and the index space of loops 172 /// surrounding such op's. 173 // For example, the memref region for a load operation at loop depth = 1: 174 // 175 // affine.for %i = 0 to 32 { 176 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { 177 // affine.load %A[%ii] 178 // } 179 // } 180 // 181 // Region: {memref = %A, write = false, {%i <= m0 <= %i + 7} } 182 // The last field is a 2-d FlatAffineConstraints symbolic in %i. 183 // 184 struct MemRefRegion { MemRefRegionMemRefRegion185 explicit MemRefRegion(Location loc) : loc(loc) {} 186 187 /// Computes the memory region accessed by this memref with the region 188 /// represented as constraints symbolic/parametric in 'loopDepth' loops 189 /// surrounding opInst. The computed region's 'cst' field has exactly as many 190 /// dimensional identifiers as the rank of the memref, and *potentially* 191 /// additional symbolic identifiers which could include any of the loop IVs 192 /// surrounding opInst up until 'loopDepth' and another additional Function 193 /// symbols involved with the access (for eg., those appear in affine.apply's, 194 /// loop bounds, etc.). If 'sliceState' is non-null, operands from 195 /// 'sliceState' are added as symbols, and the following constraints are added 196 /// to the system: 197 /// *) Inequality constraints which represent loop bounds for 'sliceState' 198 /// operands which are loop IVS (these represent the destination loop IVs 199 /// of the slice, and are added as symbols to MemRefRegion's constraint 200 /// system). 201 /// *) Inequality constraints for the slice bounds in 'sliceState', which 202 /// represent the bounds on the loop IVs in this constraint system w.r.t 203 /// to slice operands (which correspond to symbols). 204 /// If 'addMemRefDimBounds' is true, constant upper/lower bounds 205 /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'. 206 /// 207 /// For example, the memref region for this operation at loopDepth = 1 will 208 /// be: 209 /// 210 /// affine.for %i = 0 to 32 { 211 /// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { 212 /// load %A[%ii] 213 /// } 214 /// } 215 /// 216 /// {memref = %A, write = false, {%i <= m0 <= %i + 7} } 217 /// The last field is a 2-d FlatAffineConstraints symbolic in %i. 218 /// 219 LogicalResult compute(Operation *op, unsigned loopDepth, 220 const ComputationSliceState *sliceState = nullptr, 221 bool addMemRefDimBounds = true); 222 getConstraintsMemRefRegion223 FlatAffineConstraints *getConstraints() { return &cst; } getConstraintsMemRefRegion224 const FlatAffineConstraints *getConstraints() const { return &cst; } isWriteMemRefRegion225 bool isWrite() const { return write; } setWriteMemRefRegion226 void setWrite(bool flag) { write = flag; } 227 228 /// Returns a constant upper bound on the number of elements in this region if 229 /// bounded by a known constant (always possible for static shapes), None 230 /// otherwise. Note that the symbols of the region are treated specially, 231 /// i.e., the returned bounding constant holds for *any given* value of the 232 /// symbol identifiers. The 'shape' vector is set to the corresponding 233 /// dimension-wise bounds major to minor. We use int64_t instead of uint64_t 234 /// since index types can be at most int64_t. `lbs` are set to the lower 235 /// bounds for each of the rank dimensions, and lbDivisors contains the 236 /// corresponding denominators for floorDivs. 237 Optional<int64_t> getConstantBoundingSizeAndShape( 238 SmallVectorImpl<int64_t> *shape = nullptr, 239 std::vector<SmallVector<int64_t, 4>> *lbs = nullptr, 240 SmallVectorImpl<int64_t> *lbDivisors = nullptr) const; 241 242 /// Gets the lower and upper bound map for the dimensional identifier at 243 /// `pos`. 244 void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, 245 AffineMap &ubMap) const; 246 247 /// A wrapper around FlatAffineConstraints::getConstantBoundOnDimSize(). 'pos' 248 /// corresponds to the position of the memref shape's dimension (major to 249 /// minor) which matches 1:1 with the dimensional identifier positions in 250 //'cst'. 251 Optional<int64_t> 252 getConstantBoundOnDimSize(unsigned pos, 253 SmallVectorImpl<int64_t> *lb = nullptr, 254 int64_t *lbFloorDivisor = nullptr) const { 255 assert(pos < getRank() && "invalid position"); 256 return cst.getConstantBoundOnDimSize(pos, lb); 257 } 258 259 /// Returns the size of this MemRefRegion in bytes. 260 Optional<int64_t> getRegionSize(); 261 262 // Wrapper around FlatAffineConstraints::unionBoundingBox. 263 LogicalResult unionBoundingBox(const MemRefRegion &other); 264 265 /// Returns the rank of the memref that this region corresponds to. 266 unsigned getRank() const; 267 268 /// Memref that this region corresponds to. 269 Value memref; 270 271 /// Read or write. 272 bool write; 273 274 /// If there is more than one load/store op associated with the region, the 275 /// location information would correspond to one of those op's. 276 Location loc; 277 278 /// Region (data space) of the memref accessed. This set will thus have at 279 /// least as many dimensional identifiers as the shape dimensionality of the 280 /// memref, and these are the leading dimensions of the set appearing in that 281 /// order (major to minor / outermost to innermost). There may be additional 282 /// identifiers since getMemRefRegion() is called with a specific loop depth, 283 /// and thus the region is symbolic in the outer surrounding loops at that 284 /// depth. 285 // TODO: Replace this to exploit HyperRectangularSet. 286 FlatAffineConstraints cst; 287 }; 288 289 /// Returns the size of memref data in bytes if it's statically shaped, None 290 /// otherwise. 291 Optional<uint64_t> getMemRefSizeInBytes(MemRefType memRefType); 292 293 /// Checks a load or store op for an out of bound access; returns failure if the 294 /// access is out of bounds along any of the dimensions, success otherwise. 295 /// Emits a diagnostic error (with location information) if emitError is true. 296 template <typename LoadOrStoreOpPointer> 297 LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, 298 bool emitError = true); 299 300 /// Returns the number of surrounding loops common to both A and B. 301 unsigned getNumCommonSurroundingLoops(Operation &A, Operation &B); 302 303 /// Gets the memory footprint of all data touched in the specified memory space 304 /// in bytes; if the memory space is unspecified, considers all memory spaces. 305 Optional<int64_t> getMemoryFootprintBytes(AffineForOp forOp, 306 int memorySpace = -1); 307 308 /// Returns true if `forOp' is a parallel loop. 309 bool isLoopParallel(AffineForOp forOp); 310 311 /// Simplify the integer set by simplifying the underlying affine expressions by 312 /// flattening and some simple inference. Also, drop any duplicate constraints. 313 /// Returns the simplified integer set. This method runs in time linear in the 314 /// number of constraints. 315 IntegerSet simplifyIntegerSet(IntegerSet set); 316 317 /// Returns the innermost common loop depth for the set of operations in 'ops'. 318 unsigned getInnermostCommonLoopDepth( 319 ArrayRef<Operation *> ops, 320 SmallVectorImpl<AffineForOp> *surroundingLoops = nullptr); 321 322 } // end namespace mlir 323 324 #endif // MLIR_ANALYSIS_UTILS_H 325