1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
10 #include "../PassDetail.h"
11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
12 #include "mlir/Dialect/Affine/IR/AffineOps.h"
13 #include "mlir/Dialect/GPU/GPUDialect.h"
14 #include "mlir/Dialect/SCF/SCF.h"
15 #include "mlir/Dialect/StandardOps/IR/Ops.h"
16 #include "mlir/Transforms/DialectConversion.h"
17
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/Support/CommandLine.h"
20
21 using namespace mlir;
22 using namespace mlir::scf;
23
24 namespace {
25 // A pass that traverses top-level loops in the function and converts them to
26 // GPU launch operations. Nested launches are not allowed, so this does not
27 // walk the function recursively to avoid considering nested loops.
28 struct ForLoopMapper : public ConvertAffineForToGPUBase<ForLoopMapper> {
29 ForLoopMapper() = default;
ForLoopMapper__anonf87d01d10111::ForLoopMapper30 ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
31 this->numBlockDims = numBlockDims;
32 this->numThreadDims = numThreadDims;
33 }
34
runOnFunction__anonf87d01d10111::ForLoopMapper35 void runOnFunction() override {
36 for (Operation &op : llvm::make_early_inc_range(getFunction().getOps())) {
37 if (auto forOp = dyn_cast<AffineForOp>(&op)) {
38 if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
39 numThreadDims)))
40 signalPassFailure();
41 }
42 }
43 }
44 };
45
46 struct ParallelLoopToGpuPass
47 : public ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
runOnOperation__anonf87d01d10111::ParallelLoopToGpuPass48 void runOnOperation() override {
49 OwningRewritePatternList patterns;
50 populateParallelLoopToGPUPatterns(patterns, &getContext());
51 ConversionTarget target(getContext());
52 target.addLegalDialect<StandardOpsDialect>();
53 target.addLegalDialect<AffineDialect>();
54 target.addLegalDialect<gpu::GPUDialect>();
55 target.addLegalDialect<scf::SCFDialect>();
56 configureParallelLoopToGPULegality(target);
57 if (failed(applyPartialConversion(getOperation(), target,
58 std::move(patterns))))
59 signalPassFailure();
60 }
61 };
62
63 } // namespace
64
65 std::unique_ptr<OperationPass<FuncOp>>
createAffineForToGPUPass(unsigned numBlockDims,unsigned numThreadDims)66 mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
67 return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
68 }
createAffineForToGPUPass()69 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineForToGPUPass() {
70 return std::make_unique<ForLoopMapper>();
71 }
72
createParallelLoopToGpuPass()73 std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
74 return std::make_unique<ParallelLoopToGpuPass>();
75 }
76