1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/gpu/tests/mlir_gpu_test_base.h"
17
18 #include "llvm/IR/LLVMContext.h"
19 #include "mlir/IR/MLIRContext.h" // from @llvm-project
20 #include "mlir/InitAllDialects.h" // from @llvm-project
21 #include "mlir/Parser.h" // from @llvm-project
22 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/register.h"
23 #include "tensorflow/compiler/mlir/xla/type_to_shape.h"
24 #include "tensorflow/compiler/xla/debug_options_flags.h"
25 #include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h"
26 #include "tensorflow/compiler/xla/service/gpu/target_constants.h"
27 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
28
29 namespace xla {
30 namespace gpu {
31
MlirGpuTestBase()32 MlirGpuTestBase::MlirGpuTestBase() {
33 se::Platform* platform =
34 se::MultiPlatformManager::PlatformWithName(tensorflow::GpuPlatformName())
35 .ConsumeValueOrDie();
36 BackendOptions options;
37 options.set_platform(platform);
38 backend_ = xla::Backend::CreateBackend(options).ConsumeValueOrDie();
39 }
40
CompileMlirModule(mlir::ModuleOp module,se::Stream * stream)41 StatusOr<std::unique_ptr<Executable>> MlirGpuTestBase::CompileMlirModule(
42 mlir::ModuleOp module, se::Stream* stream) {
43 llvm::LLVMContext llvm_context;
44 auto llvm_module = absl::make_unique<llvm::Module>("", llvm_context);
45 #if TENSORFLOW_USE_ROCM
46 llvm_module->setTargetTriple(amdgpu::kTargetTriple);
47 llvm_module->setDataLayout(amdgpu::kDataLayout);
48 #else
49 llvm_module->setTargetTriple(nvptx::kTargetTriple);
50 llvm_module->setDataLayout(nvptx::kDataLayout);
51 #endif
52
53 se::StreamExecutor* stream_exec = stream->parent();
54 GpuDeviceInfo gpu_device_info = GetGpuDeviceInfo(stream_exec);
55
56 absl::optional<CudaComputeCapability> cuda_compute_capability =
57 [&]() -> absl::optional<CudaComputeCapability> {
58 CudaComputeCapability cuda_compute_capability;
59 stream_exec->GetDeviceDescription().cuda_compute_capability(
60 &cuda_compute_capability.cc_major, &cuda_compute_capability.cc_minor);
61 if (cuda_compute_capability.cc_major == -1) {
62 return absl::nullopt;
63 }
64 return cuda_compute_capability;
65 }();
66
67 IrEmitterContext ir_emitter_context(
68 /*hlo_module=*/nullptr, /*buffer_assignment=*/nullptr,
69 backend_->platform()->Name(), gpu_device_info, cuda_compute_capability,
70 /*profile_index_map=*/nullptr, /*mlir_context=*/nullptr,
71 llvm_module.get());
72
73 HloModuleConfig module_config;
74 module_config.set_debug_options(DefaultDebugOptionsIgnoringFlags());
75 return CompileLmhloToExecutable(
76 static_cast<GpuCompiler*>(backend_->compiler()), module, "TestModule",
77 module_config, Compiler::CompileOptions(), "main", stream_exec,
78 std::move(llvm_module), &ir_emitter_context);
79 }
80
RunMlirModule(mlir::ModuleOp module,se::Stream * stream,absl::Span<const se::DeviceMemoryBase> arguments)81 StatusOr<ExecutionOutput> MlirGpuTestBase::RunMlirModule(
82 mlir::ModuleOp module, se::Stream* stream,
83 absl::Span<const se::DeviceMemoryBase> arguments) {
84 TF_ASSIGN_OR_RETURN(auto executable, CompileMlirModule(module, stream));
85
86 ExecutableRunOptions executable_run_options;
87 executable_run_options.set_stream(stream);
88 executable_run_options.set_allocator(backend_->memory_allocator());
89 ServiceExecutableRunOptions run_options(executable_run_options);
90 std::vector<ExecutionInput> execution_inputs;
91
92 for (auto arg : arguments) {
93 Shape shape =
94 ShapeUtil::MakeShape(xla::U8, {static_cast<int64>(arg.size())});
95 execution_inputs.emplace_back(shape);
96 execution_inputs.back().SetBuffer({}, MaybeOwningDeviceMemory(arg));
97 }
98
99 TF_ASSIGN_OR_RETURN(auto output,
100 executable->ExecuteAsyncOnStream(
101 &run_options, std::move(execution_inputs),
102 /*hlo_execution_profile=*/nullptr));
103
104 TF_CHECK_OK(stream->BlockHostUntilDone());
105
106 return std::move(output);
107 }
108
109 StatusOr<std::vector<std::vector<uint8>>>
RunMlirModuleWithHostBuffers(mlir::ModuleOp module,std::vector<absl::Span<uint8>> arguments)110 MlirGpuTestBase::RunMlirModuleWithHostBuffers(
111 mlir::ModuleOp module, std::vector<absl::Span<uint8>> arguments) {
112 auto* allocator = backend_->memory_allocator();
113 std::vector<se::OwningDeviceMemory> owning_memory;
114 owning_memory.reserve(arguments.size());
115 for (auto host_buffer : arguments) {
116 owning_memory.push_back(
117 allocator
118 ->Allocate(backend_->default_device_ordinal(), host_buffer.size())
119 .ConsumeValueOrDie());
120 }
121 auto stream = backend_->BorrowStream(backend_->default_device_ordinal())
122 .ConsumeValueOrDie();
123 std::vector<se::DeviceMemoryBase> args;
124 for (int i = 0; i < owning_memory.size(); i++) {
125 se::DeviceMemoryBase memory(*owning_memory[i]);
126 stream->ThenMemcpy(&memory, static_cast<void*>(arguments[i].data()),
127 memory.size());
128 args.push_back(memory);
129 }
130 TF_ASSIGN_OR_RETURN(ExecutionOutput output,
131 RunMlirModule(module, stream.get(), args));
132
133 std::vector<std::vector<uint8>> host_outputs;
134 for (const auto& result : output.Result().buffers().leaves()) {
135 host_outputs.emplace_back();
136 host_outputs.back().resize(result.second.size());
137 stream->ThenMemcpy(static_cast<void*>(host_outputs.back().data()),
138 result.second, result.second.size());
139 }
140 TF_CHECK_OK(stream->BlockHostUntilDone());
141 return host_outputs;
142 }
143
ParseMlirModule(absl::string_view module_text,mlir::MLIRContext & context)144 mlir::OwningModuleRef MlirGpuTestBase::ParseMlirModule(
145 absl::string_view module_text, mlir::MLIRContext& context) {
146 context.loadDialect<mlir::lmhlo::LmhloDialect, mlir::mhlo::MhloDialect,
147 mlir::StandardOpsDialect,
148 mlir::lmhlo_gpu::LmhloGpuDialect>();
149
150 mlir::OwningModuleRef module = parseSourceString(
151 llvm::StringRef(module_text.data(), module_text.size()), &context);
152 CHECK(module);
153 return module;
154 }
155
156 StatusOr<std::vector<std::vector<uint8>>>
RunMlirTextWithHostBuffers(absl::string_view module_text,std::vector<absl::Span<uint8>> arguments)157 MlirGpuTestBase::RunMlirTextWithHostBuffers(
158 absl::string_view module_text, std::vector<absl::Span<uint8>> arguments) {
159 mlir::MLIRContext context;
160 mlir::OwningModuleRef module = ParseMlirModule(module_text, context);
161 return RunMlirModuleWithHostBuffers(*module, arguments);
162 }
163
CompileMlirText(absl::string_view module_text)164 StatusOr<std::unique_ptr<Executable>> MlirGpuTestBase::CompileMlirText(
165 absl::string_view module_text) {
166 mlir::MLIRContext context;
167 mlir::OwningModuleRef module = ParseMlirModule(module_text, context);
168 auto stream = backend_->BorrowStream(backend_->default_device_ordinal())
169 .ConsumeValueOrDie();
170 return CompileMlirModule(*module, stream.get());
171 }
172
173 } // namespace gpu
174 } // namespace xla
175