1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13
14 #include "PassDetail.h"
15 #include "mlir/IR/Operation.h"
16 #include "mlir/Interfaces/LoopLikeInterface.h"
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Transforms/BufferUtils.h"
19 #include "mlir/Transforms/Passes.h"
20
21 using namespace mlir;
22
23 /// Returns true if the given operation implements a known high-level region-
24 /// based control-flow interface.
isKnownControlFlowInterface(Operation * op)25 static bool isKnownControlFlowInterface(Operation *op) {
26 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
27 }
28
29 /// Check if the size of the allocation is less than the given size. The
30 /// transformation is only applied to small buffers since large buffers could
31 /// exceed the stack space.
isSmallAlloc(Value alloc,unsigned maximumSizeInBytes,unsigned bitwidthOfIndexType,unsigned maxRankOfAllocatedMemRef)32 static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
33 unsigned bitwidthOfIndexType,
34 unsigned maxRankOfAllocatedMemRef) {
35 auto type = alloc.getType().dyn_cast<ShapedType>();
36 if (!type || !alloc.getDefiningOp<AllocOp>())
37 return false;
38 if (!type.hasStaticShape()) {
39 // Check if the dynamic shape dimension of the alloc is produced by RankOp.
40 // If this is the case, it is likely to be small. Furthermore, the dimension
41 // is limited to the maximum rank of the allocated memref to avoid large
42 // values by multiplying several small values.
43 if (type.getRank() <= maxRankOfAllocatedMemRef) {
44 return llvm::all_of(
45 alloc.getDefiningOp()->getOperands(),
46 [&](Value operand) { return operand.getDefiningOp<RankOp>(); });
47 }
48 return false;
49 }
50 // For index types, use the provided size, as the type does not know.
51 unsigned int bitwidth = type.getElementType().isIndex()
52 ? bitwidthOfIndexType
53 : type.getElementTypeBitWidth();
54 return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
55 }
56
57 /// Checks whether the given aliases leave the allocation scope.
58 static bool
leavesAllocationScope(Region * parentRegion,const BufferAliasAnalysis::ValueSetT & aliases)59 leavesAllocationScope(Region *parentRegion,
60 const BufferAliasAnalysis::ValueSetT &aliases) {
61 for (Value alias : aliases) {
62 for (auto *use : alias.getUsers()) {
63 // If there is at least one alias that leaves the parent region, we know
64 // that this alias escapes the whole region and hence the associated
65 // allocation leaves allocation scope.
66 if (use->hasTrait<OpTrait::ReturnLike>() &&
67 use->getParentRegion() == parentRegion)
68 return true;
69 }
70 }
71 return false;
72 }
73
74 /// Checks, if an automated allocation scope for a given alloc value exists.
hasAllocationScope(Value alloc,const BufferAliasAnalysis & aliasAnalysis)75 static bool hasAllocationScope(Value alloc,
76 const BufferAliasAnalysis &aliasAnalysis) {
77 Region *region = alloc.getParentRegion();
78 do {
79 if (Operation *parentOp = region->getParentOp()) {
80 // Check if the operation is an automatic allocation scope and whether an
81 // alias leaves the scope. This means, an allocation yields out of
82 // this scope and can not be transformed in a stack-based allocation.
83 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
84 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
85 return true;
86 // Check if the operation is a known control flow interface and break the
87 // loop to avoid transformation in loops. Furthermore skip transformation
88 // if the operation does not implement a RegionBeanchOpInterface.
89 if (BufferPlacementTransformationBase::isLoop(parentOp) ||
90 !isKnownControlFlowInterface(parentOp))
91 break;
92 }
93 } while ((region = region->getParentRegion()));
94 return false;
95 }
96
97 namespace {
98
99 //===----------------------------------------------------------------------===//
100 // BufferAllocationHoisting
101 //===----------------------------------------------------------------------===//
102
103 /// A base implementation compatible with the `BufferAllocationHoisting` class.
104 struct BufferAllocationHoistingStateBase {
105 /// A pointer to the current dominance info.
106 DominanceInfo *dominators;
107
108 /// The current allocation value.
109 Value allocValue;
110
111 /// The current placement block (if any).
112 Block *placementBlock;
113
114 /// Initializes the state base.
BufferAllocationHoistingStateBase__anonfc1d3c320211::BufferAllocationHoistingStateBase115 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
116 Block *placementBlock)
117 : dominators(dominators), allocValue(allocValue),
118 placementBlock(placementBlock) {}
119 };
120
121 /// Implements the actual hoisting logic for allocation nodes.
122 template <typename StateT>
123 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
124 public:
BufferAllocationHoisting(Operation * op)125 BufferAllocationHoisting(Operation *op)
126 : BufferPlacementTransformationBase(op), dominators(op),
127 postDominators(op) {}
128
129 /// Moves allocations upwards.
hoist()130 void hoist() {
131 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
132 Value allocValue = std::get<0>(entry);
133 Operation *definingOp = allocValue.getDefiningOp();
134 assert(definingOp && "No defining op");
135 auto operands = definingOp->getOperands();
136 auto resultAliases = aliases.resolve(allocValue);
137 // Determine the common dominator block of all aliases.
138 Block *dominatorBlock =
139 findCommonDominator(allocValue, resultAliases, dominators);
140 // Init the initial hoisting state.
141 StateT state(&dominators, allocValue, allocValue.getParentBlock());
142 // Check for additional allocation dependencies to compute an upper bound
143 // for hoisting.
144 Block *dependencyBlock = nullptr;
145 if (!operands.empty()) {
146 // If this node has dependencies, check all dependent nodes with respect
147 // to a common post dominator. This ensures that all dependency values
148 // have been computed before allocating the buffer.
149 ValueSetT dependencies(std::next(operands.begin()), operands.end());
150 dependencyBlock = findCommonDominator(*operands.begin(), dependencies,
151 postDominators);
152 }
153
154 // Find the actual placement block and determine the start operation using
155 // an upper placement-block boundary. The idea is that placement block
156 // cannot be moved any further upwards than the given upper bound.
157 Block *placementBlock = findPlacementBlock(
158 state, state.computeUpperBound(dominatorBlock, dependencyBlock));
159 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
160 allocValue, placementBlock, liveness);
161
162 // Move the alloc in front of the start operation.
163 Operation *allocOperation = allocValue.getDefiningOp();
164 allocOperation->moveBefore(startOperation);
165 }
166 }
167
168 private:
169 /// Finds a valid placement block by walking upwards in the CFG until we
170 /// either cannot continue our walk due to constraints (given by the StateT
171 /// implementation) or we have reached the upper-most dominator block.
findPlacementBlock(StateT & state,Block * upperBound)172 Block *findPlacementBlock(StateT &state, Block *upperBound) {
173 Block *currentBlock = state.placementBlock;
174 // Walk from the innermost regions/loops to the outermost regions/loops and
175 // find an appropriate placement block that satisfies the constraint of the
176 // current StateT implementation. Walk until we reach the upperBound block
177 // (if any).
178
179 // If we are not able to find a valid parent operation or an associated
180 // parent block, break the walk loop.
181 Operation *parentOp;
182 Block *parentBlock;
183 while ((parentOp = currentBlock->getParentOp()) &&
184 (parentBlock = parentOp->getBlock()) &&
185 (!upperBound ||
186 dominators.properlyDominates(upperBound, currentBlock))) {
187 // Try to find an immediate dominator and check whether the parent block
188 // is above the immediate dominator (if any).
189 DominanceInfoNode *idom = dominators.getNode(currentBlock)->getIDom();
190 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
191 // If the current immediate dominator is below the placement block, move
192 // to the immediate dominator block.
193 currentBlock = idom->getBlock();
194 state.recordMoveToDominator(currentBlock);
195 } else {
196 // We have to move to our parent block since an immediate dominator does
197 // either not exist or is above our parent block. If we cannot move to
198 // our parent operation due to constraints given by the StateT
199 // implementation, break the walk loop. Furthermore, we should not move
200 // allocations out of unknown region-based control-flow operations.
201 if (!isKnownControlFlowInterface(parentOp) ||
202 !state.isLegalPlacement(parentOp))
203 break;
204 // Move to our parent block by notifying the current StateT
205 // implementation.
206 currentBlock = parentBlock;
207 state.recordMoveToParent(currentBlock);
208 }
209 }
210 // Return the finally determined placement block.
211 return state.placementBlock;
212 }
213
214 /// The dominator info to find the appropriate start operation to move the
215 /// allocs.
216 DominanceInfo dominators;
217
218 /// The post dominator info to move the dependent allocs in the right
219 /// position.
220 PostDominanceInfo postDominators;
221
222 /// The map storing the final placement blocks of a given alloc value.
223 llvm::DenseMap<Value, Block *> placementBlocks;
224 };
225
226 /// A state implementation compatible with the `BufferAllocationHoisting` class
227 /// that hoists allocations into dominator blocks while keeping them inside of
228 /// loops.
229 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
230 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
231
232 /// Computes the upper bound for the placement block search.
computeUpperBound__anonfc1d3c320211::BufferAllocationHoistingState233 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
234 // If we do not have a dependency block, the upper bound is given by the
235 // dominator block.
236 if (!dependencyBlock)
237 return dominatorBlock;
238
239 // Find the "lower" block of the dominator and the dependency block to
240 // ensure that we do not move allocations above this block.
241 return dominators->properlyDominates(dominatorBlock, dependencyBlock)
242 ? dependencyBlock
243 : dominatorBlock;
244 }
245
246 /// Returns true if the given operation does not represent a loop.
isLegalPlacement__anonfc1d3c320211::BufferAllocationHoistingState247 bool isLegalPlacement(Operation *op) {
248 return !BufferPlacementTransformationBase::isLoop(op);
249 }
250
251 /// Sets the current placement block to the given block.
recordMoveToDominator__anonfc1d3c320211::BufferAllocationHoistingState252 void recordMoveToDominator(Block *block) { placementBlock = block; }
253
254 /// Sets the current placement block to the given block.
recordMoveToParent__anonfc1d3c320211::BufferAllocationHoistingState255 void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
256 };
257
258 /// A state implementation compatible with the `BufferAllocationHoisting` class
259 /// that hoists allocations out of loops.
260 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
261 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
262
263 /// Remembers the dominator block of all aliases.
264 Block *aliasDominatorBlock;
265
266 /// Computes the upper bound for the placement block search.
computeUpperBound__anonfc1d3c320211::BufferAllocationLoopHoistingState267 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
268 aliasDominatorBlock = dominatorBlock;
269 // If there is a dependency block, we have to use this block as an upper
270 // bound to satisfy all allocation value dependencies.
271 return dependencyBlock ? dependencyBlock : nullptr;
272 }
273
274 /// Returns true if the given operation represents a loop and one of the
275 /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
276 /// given loop operation. If this is the case, it indicates that the
277 /// allocation is passed via a back edge.
isLegalPlacement__anonfc1d3c320211::BufferAllocationLoopHoistingState278 bool isLegalPlacement(Operation *op) {
279 return BufferPlacementTransformationBase::isLoop(op) &&
280 !dominators->dominates(aliasDominatorBlock, op->getBlock());
281 }
282
283 /// Does not change the internal placement block, as we want to move
284 /// operations out of loops only.
recordMoveToDominator__anonfc1d3c320211::BufferAllocationLoopHoistingState285 void recordMoveToDominator(Block *block) {}
286
287 /// Sets the current placement block to the given block.
recordMoveToParent__anonfc1d3c320211::BufferAllocationLoopHoistingState288 void recordMoveToParent(Block *block) { placementBlock = block; }
289 };
290
291 //===----------------------------------------------------------------------===//
292 // BufferPlacementPromotion
293 //===----------------------------------------------------------------------===//
294
295 /// Promotes heap-based allocations to stack-based allocations (if possible).
296 class BufferPlacementPromotion : BufferPlacementTransformationBase {
297 public:
BufferPlacementPromotion(Operation * op)298 BufferPlacementPromotion(Operation *op)
299 : BufferPlacementTransformationBase(op) {}
300
301 /// Promote buffers to stack-based allocations.
promote(unsigned maximumSize,unsigned bitwidthOfIndexType,unsigned maxRankOfAllocatedMemRef)302 void promote(unsigned maximumSize, unsigned bitwidthOfIndexType,
303 unsigned maxRankOfAllocatedMemRef) {
304 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
305 Value alloc = std::get<0>(entry);
306 Operation *dealloc = std::get<1>(entry);
307 // Checking several requirements to transform an AllocOp into an AllocaOp.
308 // The transformation is done if the allocation is limited to a given
309 // size. Furthermore, a deallocation must not be defined for this
310 // allocation entry and a parent allocation scope must exist.
311 if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType,
312 maxRankOfAllocatedMemRef) ||
313 dealloc || !hasAllocationScope(alloc, aliases))
314 continue;
315
316 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
317 alloc, alloc.getParentBlock(), liveness);
318 // Build a new alloca that is associated with its parent
319 // `AutomaticAllocationScope` determined during the initialization phase.
320 OpBuilder builder(startOperation);
321 Operation *allocOp = alloc.getDefiningOp();
322 Operation *alloca = builder.create<AllocaOp>(
323 alloc.getLoc(), alloc.getType().cast<MemRefType>(),
324 allocOp->getOperands());
325
326 // Replace the original alloc by a newly created alloca.
327 allocOp->replaceAllUsesWith(alloca);
328 allocOp->erase();
329 }
330 }
331 };
332
333 //===----------------------------------------------------------------------===//
334 // BufferOptimizationPasses
335 //===----------------------------------------------------------------------===//
336
337 /// The buffer hoisting pass that hoists allocation nodes into dominating
338 /// blocks.
339 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
340
runOnFunction__anonfc1d3c320211::BufferHoistingPass341 void runOnFunction() override {
342 // Hoist all allocations into dominator blocks.
343 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
344 getFunction());
345 optimizer.hoist();
346 }
347 };
348
349 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
350 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
351
runOnFunction__anonfc1d3c320211::BufferLoopHoistingPass352 void runOnFunction() override {
353 // Hoist all allocations out of loops.
354 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
355 getFunction());
356 optimizer.hoist();
357 }
358 };
359
360 /// The promote buffer to stack pass that tries to convert alloc nodes into
361 /// alloca nodes.
362 struct PromoteBuffersToStackPass
363 : PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
364
PromoteBuffersToStackPass__anonfc1d3c320211::PromoteBuffersToStackPass365 PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
366 unsigned bitwidthOfIndexType,
367 unsigned maxRankOfAllocatedMemRef) {
368 this->maxAllocSizeInBytes = maxAllocSizeInBytes;
369 this->bitwidthOfIndexType = bitwidthOfIndexType;
370 this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
371 }
372
runOnFunction__anonfc1d3c320211::PromoteBuffersToStackPass373 void runOnFunction() override {
374 // Move all allocation nodes and convert candidates into allocas.
375 BufferPlacementPromotion optimizer(getFunction());
376 optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType,
377 this->maxRankOfAllocatedMemRef);
378 }
379 };
380
381 } // end anonymous namespace
382
createBufferHoistingPass()383 std::unique_ptr<Pass> mlir::createBufferHoistingPass() {
384 return std::make_unique<BufferHoistingPass>();
385 }
386
createBufferLoopHoistingPass()387 std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
388 return std::make_unique<BufferLoopHoistingPass>();
389 }
390
391 std::unique_ptr<Pass>
createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,unsigned bitwidthOfIndexType,unsigned maxRankOfAllocatedMemRef)392 mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
393 unsigned bitwidthOfIndexType,
394 unsigned maxRankOfAllocatedMemRef) {
395 return std::make_unique<PromoteBuffersToStackPass>(
396 maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
397 }
398