• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
14 #include "PassDetail.h"
15 #include "mlir/IR/Operation.h"
16 #include "mlir/Interfaces/LoopLikeInterface.h"
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Transforms/BufferUtils.h"
19 #include "mlir/Transforms/Passes.h"
20 
21 using namespace mlir;
22 
23 /// Returns true if the given operation implements a known high-level region-
24 /// based control-flow interface.
isKnownControlFlowInterface(Operation * op)25 static bool isKnownControlFlowInterface(Operation *op) {
26   return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
27 }
28 
29 /// Check if the size of the allocation is less than the given size. The
30 /// transformation is only applied to small buffers since large buffers could
31 /// exceed the stack space.
isSmallAlloc(Value alloc,unsigned maximumSizeInBytes,unsigned bitwidthOfIndexType,unsigned maxRankOfAllocatedMemRef)32 static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
33                          unsigned bitwidthOfIndexType,
34                          unsigned maxRankOfAllocatedMemRef) {
35   auto type = alloc.getType().dyn_cast<ShapedType>();
36   if (!type || !alloc.getDefiningOp<AllocOp>())
37     return false;
38   if (!type.hasStaticShape()) {
39     // Check if the dynamic shape dimension of the alloc is produced by RankOp.
40     // If this is the case, it is likely to be small. Furthermore, the dimension
41     // is limited to the maximum rank of the allocated memref to avoid large
42     // values by multiplying several small values.
43     if (type.getRank() <= maxRankOfAllocatedMemRef) {
44       return llvm::all_of(
45           alloc.getDefiningOp()->getOperands(),
46           [&](Value operand) { return operand.getDefiningOp<RankOp>(); });
47     }
48     return false;
49   }
50   // For index types, use the provided size, as the type does not know.
51   unsigned int bitwidth = type.getElementType().isIndex()
52                               ? bitwidthOfIndexType
53                               : type.getElementTypeBitWidth();
54   return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
55 }
56 
57 /// Checks whether the given aliases leave the allocation scope.
58 static bool
leavesAllocationScope(Region * parentRegion,const BufferAliasAnalysis::ValueSetT & aliases)59 leavesAllocationScope(Region *parentRegion,
60                       const BufferAliasAnalysis::ValueSetT &aliases) {
61   for (Value alias : aliases) {
62     for (auto *use : alias.getUsers()) {
63       // If there is at least one alias that leaves the parent region, we know
64       // that this alias escapes the whole region and hence the associated
65       // allocation leaves allocation scope.
66       if (use->hasTrait<OpTrait::ReturnLike>() &&
67           use->getParentRegion() == parentRegion)
68         return true;
69     }
70   }
71   return false;
72 }
73 
74 /// Checks, if an automated allocation scope for a given alloc value exists.
hasAllocationScope(Value alloc,const BufferAliasAnalysis & aliasAnalysis)75 static bool hasAllocationScope(Value alloc,
76                                const BufferAliasAnalysis &aliasAnalysis) {
77   Region *region = alloc.getParentRegion();
78   do {
79     if (Operation *parentOp = region->getParentOp()) {
80       // Check if the operation is an automatic allocation scope and whether an
81       // alias leaves the scope. This means, an allocation yields out of
82       // this scope and can not be transformed in a stack-based allocation.
83       if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
84           !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
85         return true;
86       // Check if the operation is a known control flow interface and break the
87       // loop to avoid transformation in loops. Furthermore skip transformation
88       // if the operation does not implement a RegionBeanchOpInterface.
89       if (BufferPlacementTransformationBase::isLoop(parentOp) ||
90           !isKnownControlFlowInterface(parentOp))
91         break;
92     }
93   } while ((region = region->getParentRegion()));
94   return false;
95 }
96 
97 namespace {
98 
99 //===----------------------------------------------------------------------===//
100 // BufferAllocationHoisting
101 //===----------------------------------------------------------------------===//
102 
103 /// A base implementation compatible with the `BufferAllocationHoisting` class.
104 struct BufferAllocationHoistingStateBase {
105   /// A pointer to the current dominance info.
106   DominanceInfo *dominators;
107 
108   /// The current allocation value.
109   Value allocValue;
110 
111   /// The current placement block (if any).
112   Block *placementBlock;
113 
114   /// Initializes the state base.
BufferAllocationHoistingStateBase__anonfc1d3c320211::BufferAllocationHoistingStateBase115   BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
116                                     Block *placementBlock)
117       : dominators(dominators), allocValue(allocValue),
118         placementBlock(placementBlock) {}
119 };
120 
121 /// Implements the actual hoisting logic for allocation nodes.
122 template <typename StateT>
123 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
124 public:
BufferAllocationHoisting(Operation * op)125   BufferAllocationHoisting(Operation *op)
126       : BufferPlacementTransformationBase(op), dominators(op),
127         postDominators(op) {}
128 
129   /// Moves allocations upwards.
hoist()130   void hoist() {
131     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
132       Value allocValue = std::get<0>(entry);
133       Operation *definingOp = allocValue.getDefiningOp();
134       assert(definingOp && "No defining op");
135       auto operands = definingOp->getOperands();
136       auto resultAliases = aliases.resolve(allocValue);
137       // Determine the common dominator block of all aliases.
138       Block *dominatorBlock =
139           findCommonDominator(allocValue, resultAliases, dominators);
140       // Init the initial hoisting state.
141       StateT state(&dominators, allocValue, allocValue.getParentBlock());
142       // Check for additional allocation dependencies to compute an upper bound
143       // for hoisting.
144       Block *dependencyBlock = nullptr;
145       if (!operands.empty()) {
146         // If this node has dependencies, check all dependent nodes with respect
147         // to a common post dominator. This ensures that all dependency values
148         // have been computed before allocating the buffer.
149         ValueSetT dependencies(std::next(operands.begin()), operands.end());
150         dependencyBlock = findCommonDominator(*operands.begin(), dependencies,
151                                               postDominators);
152       }
153 
154       // Find the actual placement block and determine the start operation using
155       // an upper placement-block boundary. The idea is that placement block
156       // cannot be moved any further upwards than the given upper bound.
157       Block *placementBlock = findPlacementBlock(
158           state, state.computeUpperBound(dominatorBlock, dependencyBlock));
159       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
160           allocValue, placementBlock, liveness);
161 
162       // Move the alloc in front of the start operation.
163       Operation *allocOperation = allocValue.getDefiningOp();
164       allocOperation->moveBefore(startOperation);
165     }
166   }
167 
168 private:
169   /// Finds a valid placement block by walking upwards in the CFG until we
170   /// either cannot continue our walk due to constraints (given by the StateT
171   /// implementation) or we have reached the upper-most dominator block.
findPlacementBlock(StateT & state,Block * upperBound)172   Block *findPlacementBlock(StateT &state, Block *upperBound) {
173     Block *currentBlock = state.placementBlock;
174     // Walk from the innermost regions/loops to the outermost regions/loops and
175     // find an appropriate placement block that satisfies the constraint of the
176     // current StateT implementation. Walk until we reach the upperBound block
177     // (if any).
178 
179     // If we are not able to find a valid parent operation or an associated
180     // parent block, break the walk loop.
181     Operation *parentOp;
182     Block *parentBlock;
183     while ((parentOp = currentBlock->getParentOp()) &&
184            (parentBlock = parentOp->getBlock()) &&
185            (!upperBound ||
186             dominators.properlyDominates(upperBound, currentBlock))) {
187       // Try to find an immediate dominator and check whether the parent block
188       // is above the immediate dominator (if any).
189       DominanceInfoNode *idom = dominators.getNode(currentBlock)->getIDom();
190       if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
191         // If the current immediate dominator is below the placement block, move
192         // to the immediate dominator block.
193         currentBlock = idom->getBlock();
194         state.recordMoveToDominator(currentBlock);
195       } else {
196         // We have to move to our parent block since an immediate dominator does
197         // either not exist or is above our parent block. If we cannot move to
198         // our parent operation due to constraints given by the StateT
199         // implementation, break the walk loop. Furthermore, we should not move
200         // allocations out of unknown region-based control-flow operations.
201         if (!isKnownControlFlowInterface(parentOp) ||
202             !state.isLegalPlacement(parentOp))
203           break;
204         // Move to our parent block by notifying the current StateT
205         // implementation.
206         currentBlock = parentBlock;
207         state.recordMoveToParent(currentBlock);
208       }
209     }
210     // Return the finally determined placement block.
211     return state.placementBlock;
212   }
213 
214   /// The dominator info to find the appropriate start operation to move the
215   /// allocs.
216   DominanceInfo dominators;
217 
218   /// The post dominator info to move the dependent allocs in the right
219   /// position.
220   PostDominanceInfo postDominators;
221 
222   /// The map storing the final placement blocks of a given alloc value.
223   llvm::DenseMap<Value, Block *> placementBlocks;
224 };
225 
226 /// A state implementation compatible with the `BufferAllocationHoisting` class
227 /// that hoists allocations into dominator blocks while keeping them inside of
228 /// loops.
229 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
230   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
231 
232   /// Computes the upper bound for the placement block search.
computeUpperBound__anonfc1d3c320211::BufferAllocationHoistingState233   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
234     // If we do not have a dependency block, the upper bound is given by the
235     // dominator block.
236     if (!dependencyBlock)
237       return dominatorBlock;
238 
239     // Find the "lower" block of the dominator and the dependency block to
240     // ensure that we do not move allocations above this block.
241     return dominators->properlyDominates(dominatorBlock, dependencyBlock)
242                ? dependencyBlock
243                : dominatorBlock;
244   }
245 
246   /// Returns true if the given operation does not represent a loop.
isLegalPlacement__anonfc1d3c320211::BufferAllocationHoistingState247   bool isLegalPlacement(Operation *op) {
248     return !BufferPlacementTransformationBase::isLoop(op);
249   }
250 
251   /// Sets the current placement block to the given block.
recordMoveToDominator__anonfc1d3c320211::BufferAllocationHoistingState252   void recordMoveToDominator(Block *block) { placementBlock = block; }
253 
254   /// Sets the current placement block to the given block.
recordMoveToParent__anonfc1d3c320211::BufferAllocationHoistingState255   void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
256 };
257 
258 /// A state implementation compatible with the `BufferAllocationHoisting` class
259 /// that hoists allocations out of loops.
260 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
261   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
262 
263   /// Remembers the dominator block of all aliases.
264   Block *aliasDominatorBlock;
265 
266   /// Computes the upper bound for the placement block search.
computeUpperBound__anonfc1d3c320211::BufferAllocationLoopHoistingState267   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
268     aliasDominatorBlock = dominatorBlock;
269     // If there is a dependency block, we have to use this block as an upper
270     // bound to satisfy all allocation value dependencies.
271     return dependencyBlock ? dependencyBlock : nullptr;
272   }
273 
274   /// Returns true if the given operation represents a loop and one of the
275   /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
276   /// given loop operation. If this is the case, it indicates that the
277   /// allocation is passed via a back edge.
isLegalPlacement__anonfc1d3c320211::BufferAllocationLoopHoistingState278   bool isLegalPlacement(Operation *op) {
279     return BufferPlacementTransformationBase::isLoop(op) &&
280            !dominators->dominates(aliasDominatorBlock, op->getBlock());
281   }
282 
283   /// Does not change the internal placement block, as we want to move
284   /// operations out of loops only.
recordMoveToDominator__anonfc1d3c320211::BufferAllocationLoopHoistingState285   void recordMoveToDominator(Block *block) {}
286 
287   /// Sets the current placement block to the given block.
recordMoveToParent__anonfc1d3c320211::BufferAllocationLoopHoistingState288   void recordMoveToParent(Block *block) { placementBlock = block; }
289 };
290 
291 //===----------------------------------------------------------------------===//
292 // BufferPlacementPromotion
293 //===----------------------------------------------------------------------===//
294 
295 /// Promotes heap-based allocations to stack-based allocations (if possible).
296 class BufferPlacementPromotion : BufferPlacementTransformationBase {
297 public:
BufferPlacementPromotion(Operation * op)298   BufferPlacementPromotion(Operation *op)
299       : BufferPlacementTransformationBase(op) {}
300 
301   /// Promote buffers to stack-based allocations.
promote(unsigned maximumSize,unsigned bitwidthOfIndexType,unsigned maxRankOfAllocatedMemRef)302   void promote(unsigned maximumSize, unsigned bitwidthOfIndexType,
303                unsigned maxRankOfAllocatedMemRef) {
304     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
305       Value alloc = std::get<0>(entry);
306       Operation *dealloc = std::get<1>(entry);
307       // Checking several requirements to transform an AllocOp into an AllocaOp.
308       // The transformation is done if the allocation is limited to a given
309       // size. Furthermore, a deallocation must not be defined for this
310       // allocation entry and a parent allocation scope must exist.
311       if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType,
312                         maxRankOfAllocatedMemRef) ||
313           dealloc || !hasAllocationScope(alloc, aliases))
314         continue;
315 
316       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
317           alloc, alloc.getParentBlock(), liveness);
318       // Build a new alloca that is associated with its parent
319       // `AutomaticAllocationScope` determined during the initialization phase.
320       OpBuilder builder(startOperation);
321       Operation *allocOp = alloc.getDefiningOp();
322       Operation *alloca = builder.create<AllocaOp>(
323           alloc.getLoc(), alloc.getType().cast<MemRefType>(),
324           allocOp->getOperands());
325 
326       // Replace the original alloc by a newly created alloca.
327       allocOp->replaceAllUsesWith(alloca);
328       allocOp->erase();
329     }
330   }
331 };
332 
333 //===----------------------------------------------------------------------===//
334 // BufferOptimizationPasses
335 //===----------------------------------------------------------------------===//
336 
337 /// The buffer hoisting pass that hoists allocation nodes into dominating
338 /// blocks.
339 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
340 
runOnFunction__anonfc1d3c320211::BufferHoistingPass341   void runOnFunction() override {
342     // Hoist all allocations into dominator blocks.
343     BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
344         getFunction());
345     optimizer.hoist();
346   }
347 };
348 
349 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
350 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
351 
runOnFunction__anonfc1d3c320211::BufferLoopHoistingPass352   void runOnFunction() override {
353     // Hoist all allocations out of loops.
354     BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
355         getFunction());
356     optimizer.hoist();
357   }
358 };
359 
360 /// The promote buffer to stack pass that tries to convert alloc nodes into
361 /// alloca nodes.
362 struct PromoteBuffersToStackPass
363     : PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
364 
PromoteBuffersToStackPass__anonfc1d3c320211::PromoteBuffersToStackPass365   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
366                             unsigned bitwidthOfIndexType,
367                             unsigned maxRankOfAllocatedMemRef) {
368     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
369     this->bitwidthOfIndexType = bitwidthOfIndexType;
370     this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
371   }
372 
runOnFunction__anonfc1d3c320211::PromoteBuffersToStackPass373   void runOnFunction() override {
374     // Move all allocation nodes and convert candidates into allocas.
375     BufferPlacementPromotion optimizer(getFunction());
376     optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType,
377                       this->maxRankOfAllocatedMemRef);
378   }
379 };
380 
381 } // end anonymous namespace
382 
createBufferHoistingPass()383 std::unique_ptr<Pass> mlir::createBufferHoistingPass() {
384   return std::make_unique<BufferHoistingPass>();
385 }
386 
createBufferLoopHoistingPass()387 std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
388   return std::make_unique<BufferLoopHoistingPass>();
389 }
390 
391 std::unique_ptr<Pass>
createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,unsigned bitwidthOfIndexType,unsigned maxRankOfAllocatedMemRef)392 mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
393                                       unsigned bitwidthOfIndexType,
394                                       unsigned maxRankOfAllocatedMemRef) {
395   return std::make_unique<PromoteBuffersToStackPass>(
396       maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
397 }
398