1 //===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to convert gpu kernel functions into a
10 // corresponding binary blob that can be executed on a GPU. Currently
11 // only translates the function itself but no dependencies.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
16
17 #include "mlir/Dialect/GPU/GPUDialect.h"
18 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
19 #include "mlir/IR/Attributes.h"
20 #include "mlir/IR/Builders.h"
21 #include "mlir/IR/BuiltinOps.h"
22 #include "mlir/Pass/Pass.h"
23 #include "mlir/Pass/PassRegistry.h"
24 #include "mlir/Support/LogicalResult.h"
25
26 #include "llvm/ADT/Optional.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/LegacyPassManager.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/Mutex.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/TargetSelect.h"
35 #include "llvm/Target/TargetMachine.h"
36
37 using namespace mlir;
38
39 namespace {
40
41 /// A pass converting tagged kernel modules to a blob with target instructions.
42 ///
43 /// If tagged as a kernel module, each contained function is translated to
44 /// user-specified IR. A user provided BlobGenerator then compiles the IR to
45 /// GPU binary code, which is then attached as an attribute to the function.
46 /// The function body is erased.
47 class GpuKernelToBlobPass
48 : public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {
49 public:
GpuKernelToBlobPass(LoweringCallback loweringCallback,BlobGenerator blobGenerator,StringRef triple,StringRef targetChip,StringRef features,StringRef gpuBinaryAnnotation)50 GpuKernelToBlobPass(LoweringCallback loweringCallback,
51 BlobGenerator blobGenerator, StringRef triple,
52 StringRef targetChip, StringRef features,
53 StringRef gpuBinaryAnnotation)
54 : loweringCallback(loweringCallback), blobGenerator(blobGenerator),
55 triple(triple), targetChip(targetChip), features(features),
56 blobAnnotation(gpuBinaryAnnotation) {}
57
runOnOperation()58 void runOnOperation() override {
59 gpu::GPUModuleOp module = getOperation();
60
61 // Lower the module to an LLVM IR module using a separate context to enable
62 // multi-threaded processing.
63 llvm::LLVMContext llvmContext;
64 std::unique_ptr<llvm::Module> llvmModule =
65 loweringCallback(module, llvmContext, "LLVMDialectModule");
66 if (!llvmModule)
67 return signalPassFailure();
68
69 // Translate the llvm module to a target blob and attach the result as
70 // attribute to the module.
71 if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
72 *llvmModule, module.getLoc(), module.getName()))
73 module.setAttr(blobAnnotation, blobAttr);
74 else
75 signalPassFailure();
76 }
77
78 private:
79 std::string translateModuleToISA(llvm::Module &module,
80 llvm::TargetMachine &targetMachine);
81
82 /// Converts llvmModule to a blob with target instructions using the
83 /// user-provided generator. Location is used for error reporting and name is
84 /// forwarded to the blob generator to use in its logging mechanisms.
85 OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,
86 StringRef name);
87
88 /// Translates llvmModule to a blob with target instructions and returns the
89 /// result as attribute.
90 StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
91 Location loc, StringRef name);
92
93 LoweringCallback loweringCallback;
94 BlobGenerator blobGenerator;
95 llvm::Triple triple;
96 StringRef targetChip;
97 StringRef features;
98 StringRef blobAnnotation;
99 };
100
101 } // anonymous namespace
102
103 std::string
translateModuleToISA(llvm::Module & module,llvm::TargetMachine & targetMachine)104 GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
105 llvm::TargetMachine &targetMachine) {
106 std::string targetISA;
107 {
108 llvm::raw_string_ostream stream(targetISA);
109 llvm::buffer_ostream pstream(stream);
110 llvm::legacy::PassManager codegenPasses;
111 targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
112 llvm::CGFT_AssemblyFile);
113 codegenPasses.run(module);
114 }
115
116 return targetISA;
117 }
118
convertModuleToBlob(llvm::Module & llvmModule,Location loc,StringRef name)119 OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
120 Location loc,
121 StringRef name) {
122 std::unique_ptr<llvm::TargetMachine> targetMachine;
123 {
124 std::string error;
125 const llvm::Target *target =
126 llvm::TargetRegistry::lookupTarget("", triple, error);
127 if (target == nullptr) {
128 emitError(loc, "cannot initialize target triple");
129 return {};
130 }
131 targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
132 features, {}, {}));
133 if (targetMachine == nullptr) {
134 emitError(loc, "connot initialize target machine");
135 return {};
136 }
137 }
138
139 llvmModule.setDataLayout(targetMachine->createDataLayout());
140
141 auto targetISA = translateModuleToISA(llvmModule, *targetMachine);
142
143 return blobGenerator(targetISA, loc, name);
144 }
145
translateGPUModuleToBinaryAnnotation(llvm::Module & llvmModule,Location loc,StringRef name)146 StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
147 llvm::Module &llvmModule, Location loc, StringRef name) {
148 auto blob = convertModuleToBlob(llvmModule, loc, name);
149 if (!blob)
150 return {};
151 return StringAttr::get({blob->data(), blob->size()}, loc->getContext());
152 }
153
154 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,BlobGenerator blobGenerator,StringRef triple,StringRef targetChip,StringRef features,StringRef gpuBinaryAnnotation)155 mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
156 BlobGenerator blobGenerator,
157 StringRef triple, StringRef targetChip,
158 StringRef features,
159 StringRef gpuBinaryAnnotation) {
160 return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator,
161 triple, targetChip, features,
162 gpuBinaryAnnotation);
163 }
164