1 //===- AffineLoopNormalize.cpp - AffineLoopNormalize Pass -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a normalizer for affine loop-like ops.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "PassDetail.h"
14 #include "mlir/Dialect/Affine/IR/AffineOps.h"
15 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
16 #include "mlir/Dialect/Affine/Passes.h"
17 #include "mlir/Dialect/Affine/Utils.h"
18 #include "mlir/IR/PatternMatch.h"
19 #include "mlir/Transforms/LoopUtils.h"
20
21 using namespace mlir;
22
normalizeAffineParallel(AffineParallelOp op)23 void mlir::normalizeAffineParallel(AffineParallelOp op) {
24 AffineMap lbMap = op.lowerBoundsMap();
25 SmallVector<int64_t, 8> steps = op.getSteps();
26 // No need to do any work if the parallel op is already normalized.
27 bool isAlreadyNormalized =
28 llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
29 int64_t step = std::get<0>(tuple);
30 auto lbExpr =
31 std::get<1>(tuple).template dyn_cast<AffineConstantExpr>();
32 return lbExpr && lbExpr.getValue() == 0 && step == 1;
33 });
34 if (isAlreadyNormalized)
35 return;
36
37 AffineValueMap ranges = op.getRangesValueMap();
38 auto builder = OpBuilder::atBlockBegin(op.getBody());
39 auto zeroExpr = builder.getAffineConstantExpr(0);
40 SmallVector<AffineExpr, 8> lbExprs;
41 SmallVector<AffineExpr, 8> ubExprs;
42 for (unsigned i = 0, e = steps.size(); i < e; ++i) {
43 int64_t step = steps[i];
44
45 // Adjust the lower bound to be 0.
46 lbExprs.push_back(zeroExpr);
47
48 // Adjust the upper bound expression: 'range / step'.
49 AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
50 ubExprs.push_back(ubExpr);
51
52 // Adjust the corresponding IV: 'lb + i * step'.
53 BlockArgument iv = op.getBody()->getArgument(i);
54 AffineExpr lbExpr = lbMap.getResult(i);
55 unsigned nDims = lbMap.getNumDims();
56 auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
57 auto map = AffineMap::get(/*dimCount=*/nDims + 1,
58 /*symbolCount=*/lbMap.getNumSymbols(), expr);
59
60 // Use an 'affine.apply' op that will be simplified later in subsequent
61 // canonicalizations.
62 OperandRange lbOperands = op.getLowerBoundsOperands();
63 OperandRange dimOperands = lbOperands.take_front(nDims);
64 OperandRange symbolOperands = lbOperands.drop_front(nDims);
65 SmallVector<Value, 8> applyOperands{dimOperands};
66 applyOperands.push_back(iv);
67 applyOperands.append(symbolOperands.begin(), symbolOperands.end());
68 auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
69 iv.replaceAllUsesExcept(apply, SmallPtrSet<Operation *, 1>{apply});
70 }
71
72 SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
73 op.setSteps(newSteps);
74 auto newLowerMap = AffineMap::get(
75 /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
76 op.setLowerBounds({}, newLowerMap);
77 auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
78 ubExprs, op.getContext());
79 op.setUpperBounds(ranges.getOperands(), newUpperMap);
80 }
81
82 /// Normalizes affine.for ops. If the affine.for op has only a single iteration
83 /// only then it is simply promoted, else it is normalized in the traditional
84 /// way, by converting the lower bound to zero and loop step to one. The upper
85 /// bound is set to the trip count of the loop. For now, original loops must
86 /// have lower bound with a single result only. There is no such restriction on
87 /// upper bounds.
normalizeAffineFor(AffineForOp op)88 static void normalizeAffineFor(AffineForOp op) {
89 if (succeeded(promoteIfSingleIteration(op)))
90 return;
91
92 // Check if the forop is already normalized.
93 if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
94 (op.getStep() == 1))
95 return;
96
97 // Check if the lower bound has a single result only. Loops with a max lower
98 // bound can't be normalized without additional support like
99 // affine.execute_region's. If the lower bound does not have a single result
100 // then skip this op.
101 if (op.getLowerBoundMap().getNumResults() != 1)
102 return;
103
104 Location loc = op.getLoc();
105 OpBuilder opBuilder(op);
106 int64_t origLoopStep = op.getStep();
107
108 // Calculate upperBound for normalized loop.
109 SmallVector<Value, 4> ubOperands;
110 AffineBound lb = op.getLowerBound();
111 AffineBound ub = op.getUpperBound();
112 ubOperands.reserve(ub.getNumOperands() + lb.getNumOperands());
113 AffineMap origLbMap = lb.getMap();
114 AffineMap origUbMap = ub.getMap();
115
116 // Add dimension operands from upper/lower bound.
117 for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j)
118 ubOperands.push_back(ub.getOperand(j));
119 for (unsigned j = 0, e = origLbMap.getNumDims(); j < e; ++j)
120 ubOperands.push_back(lb.getOperand(j));
121
122 // Add symbol operands from upper/lower bound.
123 for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j)
124 ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
125 for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
126 ubOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
127
128 // Add original result expressions from lower/upper bound map.
129 SmallVector<AffineExpr, 1> origLbExprs(origLbMap.getResults().begin(),
130 origLbMap.getResults().end());
131 SmallVector<AffineExpr, 2> origUbExprs(origUbMap.getResults().begin(),
132 origUbMap.getResults().end());
133 SmallVector<AffineExpr, 4> newUbExprs;
134
135 // The original upperBound can have more than one result. For the new
136 // upperBound of this loop, take difference of all possible combinations of
137 // the ub results and lb result and ceildiv with the loop step. For e.g.,
138 //
139 // affine.for %i1 = 0 to min affine_map<(d0)[] -> (d0 + 32, 1024)>(%i0)
140 // will have an upperBound map as,
141 // affine_map<(d0)[] -> (((d0 + 32) - 0) ceildiv 1, (1024 - 0) ceildiv
142 // 1)>(%i0)
143 //
144 // Insert all combinations of upper/lower bound results.
145 for (unsigned i = 0, e = origUbExprs.size(); i < e; ++i) {
146 newUbExprs.push_back(
147 (origUbExprs[i] - origLbExprs[0]).ceilDiv(origLoopStep));
148 }
149
150 // Construct newUbMap.
151 AffineMap newUbMap =
152 AffineMap::get(origLbMap.getNumDims() + origUbMap.getNumDims(),
153 origLbMap.getNumSymbols() + origUbMap.getNumSymbols(),
154 newUbExprs, opBuilder.getContext());
155
156 // Normalize the loop.
157 op.setUpperBound(ubOperands, newUbMap);
158 op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
159 op.setStep(1);
160
161 // Calculate the Value of new loopIV. Create affine.apply for the value of
162 // the loopIV in normalized loop.
163 opBuilder.setInsertionPointToStart(op.getBody());
164 SmallVector<Value, 4> lbOperands(lb.getOperands().begin(),
165 lb.getOperands().begin() +
166 lb.getMap().getNumDims());
167 // Add an extra dim operand for loopIV.
168 lbOperands.push_back(op.getInductionVar());
169 // Add symbol operands from lower bound.
170 for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
171 lbOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
172
173 AffineExpr origIVExpr = opBuilder.getAffineDimExpr(lb.getMap().getNumDims());
174 AffineExpr newIVExpr = origIVExpr * origLoopStep + origLbMap.getResult(0);
175 AffineMap ivMap = AffineMap::get(origLbMap.getNumDims() + 1,
176 origLbMap.getNumSymbols(), newIVExpr);
177 Operation *newIV = opBuilder.create<AffineApplyOp>(loc, ivMap, lbOperands);
178 op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0),
179 SmallPtrSet<Operation *, 1>{newIV});
180 }
181
182 namespace {
183
184 /// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1.
185 /// As currently implemented, this pass cannot fail, but it might skip over ops
186 /// that are already in a normalized form.
187 struct AffineLoopNormalizePass
188 : public AffineLoopNormalizeBase<AffineLoopNormalizePass> {
189
runOnFunction__anon9f5e4f4f0211::AffineLoopNormalizePass190 void runOnFunction() override {
191 getFunction().walk([](Operation *op) {
192 if (auto affineParallel = dyn_cast<AffineParallelOp>(op))
193 normalizeAffineParallel(affineParallel);
194 else if (auto affineFor = dyn_cast<AffineForOp>(op))
195 normalizeAffineFor(affineFor);
196 });
197 }
198 };
199
200 } // namespace
201
createAffineLoopNormalizePass()202 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineLoopNormalizePass() {
203 return std::make_unique<AffineLoopNormalizePass>();
204 }
205