• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- StdExpandDivs.cpp - Code to prepare Std for lowring Divs 0to LLVM  -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file Std transformations to expand Divs operation to help for the
10 // lowering to LLVM. Currently implemented tranformations are Ceil and Floor
11 // for Signed Integers.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "PassDetail.h"
16 #include "mlir/Dialect/StandardOps/IR/Ops.h"
17 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
18 #include "mlir/IR/PatternMatch.h"
19 
20 using namespace mlir;
21 
22 namespace {
23 
24 /// Converts `atomic_rmw` that cannot be lowered to a simple atomic op with
25 /// AtomicRMWOpLowering pattern, e.g. with "minf" or "maxf" attributes, to
26 /// `generic_atomic_rmw` with the expanded code.
27 ///
28 /// %x = atomic_rmw "maxf" %fval, %F[%i] : (f32, memref<10xf32>) -> f32
29 ///
30 /// will be lowered to
31 ///
32 /// %x = std.generic_atomic_rmw %F[%i] : memref<10xf32> {
33 /// ^bb0(%current: f32):
34 ///   %cmp = cmpf "ogt", %current, %fval : f32
35 ///   %new_value = select %cmp, %current, %fval : f32
36 ///   atomic_yield %new_value : f32
37 /// }
38 struct AtomicRMWOpConverter : public OpRewritePattern<AtomicRMWOp> {
39 public:
40   using OpRewritePattern::OpRewritePattern;
41 
matchAndRewrite__anon9acad0e70111::AtomicRMWOpConverter42   LogicalResult matchAndRewrite(AtomicRMWOp op,
43                                 PatternRewriter &rewriter) const final {
44     CmpFPredicate predicate;
45     switch (op.kind()) {
46     case AtomicRMWKind::maxf:
47       predicate = CmpFPredicate::OGT;
48       break;
49     case AtomicRMWKind::minf:
50       predicate = CmpFPredicate::OLT;
51       break;
52     default:
53       return failure();
54     }
55 
56     auto loc = op.getLoc();
57     auto genericOp =
58         rewriter.create<GenericAtomicRMWOp>(loc, op.memref(), op.indices());
59     OpBuilder bodyBuilder =
60         OpBuilder::atBlockEnd(genericOp.getBody(), rewriter.getListener());
61 
62     Value lhs = genericOp.getCurrentValue();
63     Value rhs = op.value();
64     Value cmp = bodyBuilder.create<CmpFOp>(loc, predicate, lhs, rhs);
65     Value select = bodyBuilder.create<SelectOp>(loc, cmp, lhs, rhs);
66     bodyBuilder.create<AtomicYieldOp>(loc, select);
67 
68     rewriter.replaceOp(op, genericOp.getResult());
69     return success();
70   }
71 };
72 
73 /// Converts `memref_reshape` that has a target shape of a statically-known
74 /// size to `memref_reinterpret_cast`.
75 struct MemRefReshapeOpConverter : public OpRewritePattern<MemRefReshapeOp> {
76 public:
77   using OpRewritePattern::OpRewritePattern;
78 
matchAndRewrite__anon9acad0e70111::MemRefReshapeOpConverter79   LogicalResult matchAndRewrite(MemRefReshapeOp op,
80                                 PatternRewriter &rewriter) const final {
81     auto shapeType = op.shape().getType().cast<MemRefType>();
82     if (!shapeType.hasStaticShape())
83       return failure();
84 
85     int64_t rank = shapeType.cast<MemRefType>().getDimSize(0);
86     SmallVector<Value, 4> sizes, strides;
87     sizes.resize(rank);
88     strides.resize(rank);
89 
90     Location loc = op.getLoc();
91     Value stride = rewriter.create<ConstantIndexOp>(loc, 1);
92     for (int i = rank - 1; i >= 0; --i) {
93       Value index = rewriter.create<ConstantIndexOp>(loc, i);
94       Value size = rewriter.create<LoadOp>(loc, op.shape(), index);
95       if (!size.getType().isa<IndexType>())
96         size = rewriter.create<IndexCastOp>(loc, size, rewriter.getIndexType());
97       sizes[i] = size;
98       strides[i] = stride;
99       if (i > 0)
100         stride = rewriter.create<MulIOp>(loc, stride, size);
101     }
102     SmallVector<int64_t, 2> staticSizes(rank, ShapedType::kDynamicSize);
103     SmallVector<int64_t, 2> staticStrides(rank,
104                                           ShapedType::kDynamicStrideOrOffset);
105     rewriter.replaceOpWithNewOp<MemRefReinterpretCastOp>(
106         op, op.getType(), op.source(), /*staticOffset = */ 0, staticSizes,
107         staticStrides, /*offset=*/llvm::None, sizes, strides);
108     return success();
109   }
110 };
111 
112 /// Expands SignedCeilDivIOP (n, m) into
113 ///   1) x = (m > 0) ? -1 : 1
114 ///   2) (n*m>0) ? ((n+x) / m) + 1 : - (-n / m)
115 struct SignedCeilDivIOpConverter : public OpRewritePattern<SignedCeilDivIOp> {
116 public:
117   using OpRewritePattern::OpRewritePattern;
matchAndRewrite__anon9acad0e70111::SignedCeilDivIOpConverter118   LogicalResult matchAndRewrite(SignedCeilDivIOp op,
119                                 PatternRewriter &rewriter) const final {
120     Location loc = op.getLoc();
121     SignedCeilDivIOp signedCeilDivIOp = cast<SignedCeilDivIOp>(op);
122     Type type = signedCeilDivIOp.getType();
123     Value a = signedCeilDivIOp.lhs();
124     Value b = signedCeilDivIOp.rhs();
125     Value plusOne =
126         rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 1));
127     Value zero =
128         rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 0));
129     Value minusOne =
130         rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, -1));
131     // Compute x = (b>0) ? -1 : 1.
132     Value compare = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, b, zero);
133     Value x = rewriter.create<SelectOp>(loc, compare, minusOne, plusOne);
134     // Compute positive res: 1 + ((x+a)/b).
135     Value xPlusA = rewriter.create<AddIOp>(loc, x, a);
136     Value xPlusADivB = rewriter.create<SignedDivIOp>(loc, xPlusA, b);
137     Value posRes = rewriter.create<AddIOp>(loc, plusOne, xPlusADivB);
138     // Compute negative res: - ((-a)/b).
139     Value minusA = rewriter.create<SubIOp>(loc, zero, a);
140     Value minusADivB = rewriter.create<SignedDivIOp>(loc, minusA, b);
141     Value negRes = rewriter.create<SubIOp>(loc, zero, minusADivB);
142     // Result is (a*b>0) ? pos result : neg result.
143     // Note, we want to avoid using a*b because of possible overflow.
144     // The case that matters are a>0, a==0, a<0, b>0 and b<0. We do
145     // not particuliarly care if a*b<0 is true or false when b is zero
146     // as this will result in an illegal divide. So `a*b<0` can be reformulated
147     // as `(a<0 && b<0) || (a>0 && b>0)' or `(a<0 && b<0) || (a>0 && b>=0)'.
148     // We pick the first expression here.
149     Value aNeg = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, a, zero);
150     Value aPos = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, a, zero);
151     Value bNeg = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, b, zero);
152     Value bPos = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, b, zero);
153     Value firstTerm = rewriter.create<AndOp>(loc, aNeg, bNeg);
154     Value secondTerm = rewriter.create<AndOp>(loc, aPos, bPos);
155     Value compareRes = rewriter.create<OrOp>(loc, firstTerm, secondTerm);
156     Value res = rewriter.create<SelectOp>(loc, compareRes, posRes, negRes);
157     // Perform substitution and return success.
158     rewriter.replaceOp(op, {res});
159     return success();
160   }
161 };
162 
163 /// Expands SignedFloorDivIOP (n, m) into
164 ///   1)  x = (m<0) ? 1 : -1
165 ///   2)  return (n*m<0) ? - ((-n+x) / m) -1 : n / m
166 struct SignedFloorDivIOpConverter : public OpRewritePattern<SignedFloorDivIOp> {
167 public:
168   using OpRewritePattern::OpRewritePattern;
matchAndRewrite__anon9acad0e70111::SignedFloorDivIOpConverter169   LogicalResult matchAndRewrite(SignedFloorDivIOp op,
170                                 PatternRewriter &rewriter) const final {
171     Location loc = op.getLoc();
172     SignedFloorDivIOp signedFloorDivIOp = cast<SignedFloorDivIOp>(op);
173     Type type = signedFloorDivIOp.getType();
174     Value a = signedFloorDivIOp.lhs();
175     Value b = signedFloorDivIOp.rhs();
176     Value plusOne =
177         rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 1));
178     Value zero =
179         rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 0));
180     Value minusOne =
181         rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, -1));
182     // Compute x = (b<0) ? 1 : -1.
183     Value compare = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, b, zero);
184     Value x = rewriter.create<SelectOp>(loc, compare, plusOne, minusOne);
185     // Compute negative res: -1 - ((x-a)/b).
186     Value xMinusA = rewriter.create<SubIOp>(loc, x, a);
187     Value xMinusADivB = rewriter.create<SignedDivIOp>(loc, xMinusA, b);
188     Value negRes = rewriter.create<SubIOp>(loc, minusOne, xMinusADivB);
189     // Compute positive res: a/b.
190     Value posRes = rewriter.create<SignedDivIOp>(loc, a, b);
191     // Result is (a*b<0) ? negative result : positive result.
192     // Note, we want to avoid using a*b because of possible overflow.
193     // The case that matters are a>0, a==0, a<0, b>0 and b<0. We do
194     // not particuliarly care if a*b<0 is true or false when b is zero
195     // as this will result in an illegal divide. So `a*b<0` can be reformulated
196     // as `(a>0 && b<0) || (a>0 && b<0)' or `(a>0 && b<0) || (a>0 && b<=0)'.
197     // We pick the first expression here.
198     Value aNeg = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, a, zero);
199     Value aPos = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, a, zero);
200     Value bNeg = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, b, zero);
201     Value bPos = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, b, zero);
202     Value firstTerm = rewriter.create<AndOp>(loc, aNeg, bPos);
203     Value secondTerm = rewriter.create<AndOp>(loc, aPos, bNeg);
204     Value compareRes = rewriter.create<OrOp>(loc, firstTerm, secondTerm);
205     Value res = rewriter.create<SelectOp>(loc, compareRes, negRes, posRes);
206     // Perform substitution and return success.
207     rewriter.replaceOp(op, {res});
208     return success();
209   }
210 };
211 
212 struct StdExpandOpsPass : public StdExpandOpsBase<StdExpandOpsPass> {
runOnFunction__anon9acad0e70111::StdExpandOpsPass213   void runOnFunction() override {
214     MLIRContext &ctx = getContext();
215 
216     OwningRewritePatternList patterns;
217     populateStdExpandOpsPatterns(&ctx, patterns);
218 
219     ConversionTarget target(getContext());
220 
221     target.addLegalDialect<StandardOpsDialect>();
222     target.addDynamicallyLegalOp<AtomicRMWOp>([](AtomicRMWOp op) {
223       return op.kind() != AtomicRMWKind::maxf &&
224              op.kind() != AtomicRMWKind::minf;
225     });
226     target.addDynamicallyLegalOp<MemRefReshapeOp>([](MemRefReshapeOp op) {
227       return !op.shape().getType().cast<MemRefType>().hasStaticShape();
228     });
229     target.addIllegalOp<SignedCeilDivIOp>();
230     target.addIllegalOp<SignedFloorDivIOp>();
231     if (failed(
232             applyPartialConversion(getFunction(), target, std::move(patterns))))
233       signalPassFailure();
234   }
235 };
236 
237 } // namespace
238 
populateStdExpandOpsPatterns(MLIRContext * context,OwningRewritePatternList & patterns)239 void mlir::populateStdExpandOpsPatterns(MLIRContext *context,
240                                         OwningRewritePatternList &patterns) {
241   patterns.insert<AtomicRMWOpConverter, MemRefReshapeOpConverter,
242                   SignedCeilDivIOpConverter, SignedFloorDivIOpConverter>(
243       context);
244 }
245 
createStdExpandOpsPass()246 std::unique_ptr<Pass> mlir::createStdExpandOpsPass() {
247   return std::make_unique<StdExpandOpsPass>();
248 }
249