//===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a pass to convert gpu kernel functions into a // corresponding binary blob that can be executed on a GPU. Currently // only translates the function itself but no dependencies. // //===----------------------------------------------------------------------===// #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassRegistry.h" #include "mlir/Support/LogicalResult.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/Support/Error.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" using namespace mlir; namespace { /// A pass converting tagged kernel modules to a blob with target instructions. /// /// If tagged as a kernel module, each contained function is translated to /// user-specified IR. A user provided BlobGenerator then compiles the IR to /// GPU binary code, which is then attached as an attribute to the function. /// The function body is erased. class GpuKernelToBlobPass : public PassWrapper> { public: GpuKernelToBlobPass(LoweringCallback loweringCallback, BlobGenerator blobGenerator, StringRef triple, StringRef targetChip, StringRef features, StringRef gpuBinaryAnnotation) : loweringCallback(loweringCallback), blobGenerator(blobGenerator), triple(triple), targetChip(targetChip), features(features), blobAnnotation(gpuBinaryAnnotation) {} void runOnOperation() override { gpu::GPUModuleOp module = getOperation(); // Lower the module to an LLVM IR module using a separate context to enable // multi-threaded processing. llvm::LLVMContext llvmContext; std::unique_ptr llvmModule = loweringCallback(module, llvmContext, "LLVMDialectModule"); if (!llvmModule) return signalPassFailure(); // Translate the llvm module to a target blob and attach the result as // attribute to the module. if (auto blobAttr = translateGPUModuleToBinaryAnnotation( *llvmModule, module.getLoc(), module.getName())) module.setAttr(blobAnnotation, blobAttr); else signalPassFailure(); } private: std::string translateModuleToISA(llvm::Module &module, llvm::TargetMachine &targetMachine); /// Converts llvmModule to a blob with target instructions using the /// user-provided generator. Location is used for error reporting and name is /// forwarded to the blob generator to use in its logging mechanisms. OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc, StringRef name); /// Translates llvmModule to a blob with target instructions and returns the /// result as attribute. StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule, Location loc, StringRef name); LoweringCallback loweringCallback; BlobGenerator blobGenerator; llvm::Triple triple; StringRef targetChip; StringRef features; StringRef blobAnnotation; }; } // anonymous namespace std::string GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module, llvm::TargetMachine &targetMachine) { std::string targetISA; { llvm::raw_string_ostream stream(targetISA); llvm::buffer_ostream pstream(stream); llvm::legacy::PassManager codegenPasses; targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, llvm::CGFT_AssemblyFile); codegenPasses.run(module); } return targetISA; } OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule, Location loc, StringRef name) { std::unique_ptr targetMachine; { std::string error; const llvm::Target *target = llvm::TargetRegistry::lookupTarget("", triple, error); if (target == nullptr) { emitError(loc, "cannot initialize target triple"); return {}; } targetMachine.reset(target->createTargetMachine(triple.str(), targetChip, features, {}, {})); if (targetMachine == nullptr) { emitError(loc, "connot initialize target machine"); return {}; } } llvmModule.setDataLayout(targetMachine->createDataLayout()); auto targetISA = translateModuleToISA(llvmModule, *targetMachine); return blobGenerator(targetISA, loc, name); } StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation( llvm::Module &llvmModule, Location loc, StringRef name) { auto blob = convertModuleToBlob(llvmModule, loc, name); if (!blob) return {}; return StringAttr::get({blob->data(), blob->size()}, loc->getContext()); } std::unique_ptr> mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback, BlobGenerator blobGenerator, StringRef triple, StringRef targetChip, StringRef features, StringRef gpuBinaryAnnotation) { return std::make_unique(loweringCallback, blobGenerator, triple, targetChip, features, gpuBinaryAnnotation); }