• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COMPILER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COMPILER_H_
18 
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
24 #include "tensorflow/compiler/xla/service/executable.h"
25 #include "tensorflow/compiler/xla/service/gpu/gpu_device_info.h"
26 #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
27 #include "tensorflow/compiler/xla/service/gpu/ir_emitter_context.h"
28 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
29 #include "tensorflow/compiler/xla/service/hlo_module.h"
30 #include "tensorflow/compiler/xla/service/llvm_compiler.h"
31 #include "tensorflow/compiler/xla/statusor.h"
32 #include "tensorflow/compiler/xla/types.h"
33 #include "tensorflow/core/lib/hash/hash.h"
34 #include "tensorflow/core/platform/macros.h"
35 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
36 #include "tensorflow/core/platform/thread_annotations.h"
37 #include "tensorflow/stream_executor/stream_executor_pimpl.h"
38 
39 namespace xla {
40 namespace gpu {
41 
42 // The GPU compiler generates efficient GPU executables.
43 class GpuCompiler : public LLVMCompiler {
44  public:
45   GpuCompiler(se::Platform::Id platform_id, const char* target_triple,
46               const char* data_layout);
~GpuCompiler()47   ~GpuCompiler() override {}
48 
49   using LLVMCompiler::Compile;
50 
51   StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
52       std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
53       const CompileOptions& options) override;
54 
55   StatusOr<
56       std::tuple<std::unique_ptr<HloModule>, std::unique_ptr<BufferAssignment>>>
57   RunHloPassesAndBufferAssignement(std::unique_ptr<HloModule> hlo_module,
58                                    se::StreamExecutor* executor, bool optimize,
59                                    const CompileOptions& options) override;
60 
61   Status OptimizeHloModule(HloModule* hlo_module,
62                            se::StreamExecutor* stream_exec,
63                            se::DeviceMemoryAllocator* device_allocator);
64 
65   virtual Status OptimizeHloConvolutionCanonicalization(
66       HloModule* hlo_module, se::StreamExecutor* stream_exec,
67       se::DeviceMemoryAllocator* device_allocator) = 0;
68 
69   virtual Status OptimizeHloPostLayoutAssignment(
70       HloModule* hlo_module, se::StreamExecutor* stream_exec,
71       se::DeviceMemoryAllocator* device_allocator);
72 
GetCanShareBuffer()73   virtual HloDataflowAnalysis::CanShareBuffer GetCanShareBuffer() {
74     return
75         [](const HloInstruction*, const HloInstruction*,
76            const ShapeIndex&) -> absl::optional<bool> { return absl::nullopt; };
77   }
78 
79   virtual GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) = 0;
80 
81   // TODO(timshen): Replace `debug_module` with some portable debug information
82   // that accommodates both HLO and MLIR.
83   virtual StatusOr<std::pair<std::string, std::vector<uint8>>>
84   CompileTargetBinary(const HloModuleConfig& module_config,
85                       llvm::Module* llvm_module, GpuVersion gpu_version,
86                       se::StreamExecutor* stream_exec, bool relocatable,
87                       const HloModule* debug_module) = 0;
88 
89   Status PrepareHloModuleForIrEmitting(HloModule* hlo_module);
90 
91   StatusOr<std::unique_ptr<Executable>> RunBackend(
92       std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
93       const CompileOptions& options) override;
94 
95   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
96   CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
97                      AotCompilationOptions const& options) override;
98 
99   StatusOr<std::pair<std::string, std::vector<uint8>>> CompileToTargetBinary(
100       const HloModuleConfig& module_config,
101       std::unique_ptr<llvm::Module> llvm_module,
102       se::StreamExecutor* stream_exec, const CompileOptions& options,
103       const HloModule* debug_module);
104 
PlatformId()105   se::Platform::Id PlatformId() const override { return platform_id_; }
106 
ShapeSizeBytesFunction()107   HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override {
108     // Capture just the pointer size, not the entire GpuCompiler object.
109     return [pointer_size = pointer_size_](const Shape& shape) {
110       return GetSizeOfShape(shape, pointer_size);
111     };
112   }
113 
GetSizeOfShape(const Shape & shape,int pointer_size)114   static int64 GetSizeOfShape(const Shape& shape, int pointer_size) {
115     if (shape.is_static() || shape.IsTuple()) {
116       return ShapeUtil::ByteSizeOf(shape, pointer_size);
117     }
118     // Each dynamic dimension size is represented as a S32.
119     int64_t metadata_size = sizeof(int32) * shape.dimensions_size();
120     return ShapeUtil::ByteSizeOf(shape, pointer_size) + metadata_size;
121   }
122 
123  private:
LinkModules(se::StreamExecutor * stream_exec,std::vector<std::vector<uint8>> modules)124   virtual StatusOr<std::vector<uint8>> LinkModules(
125       se::StreamExecutor* stream_exec,
126       std::vector<std::vector<uint8>> modules) {
127     return Unimplemented("LinkModules is not implemented.");
128   }
129 
130   se::Platform::Id platform_id_;
131 
132   // The triple that represents our target.
133   const char* target_triple_;
134 
135   // The data layout of the emitted module.
136   const char* data_layout_;
137 
138   // The size in bytes of a pointer. Used by ShapeSizeBytesFunction.
139   const int64 pointer_size_;
140 
141   TF_DISALLOW_COPY_AND_ASSIGN(GpuCompiler);
142 };
143 
144 GpuDeviceInfo GetGpuDeviceInfo(se::StreamExecutor* stream_exec);
145 
146 // Compile `hlo_module` using XLA GPU and return the LLVM module thus generated.
147 // The GpuExecutable (and the Thunks that are part of it) are not returned.
148 StatusOr<std::unique_ptr<llvm::Module>> CompileModuleToLlvmIr(
149     HloModule* hlo_module, llvm::LLVMContext* llvm_context,
150     const std::string& target_triple, const std::string& data_layout,
151     const std::string& platform_name, GpuDeviceInfo gpu_device_info,
152     se::CudaComputeCapability cuda_compute_capability, int pointer_size);
153 
154 // Compiles the given LMHLO module to an executable.
155 // ir_emitter_context should be partially populated: buffer_assignment
156 // or buffer_allocations should not be populated, while other fields should be
157 // populated (or left empty if that field is optional).
158 //
159 // NOTE: buffer_assignment will be gone from ir_emitter_context once LMHLO
160 // transition is done.
161 StatusOr<std::unique_ptr<Executable>> CompileLmhloToExecutable(
162     GpuCompiler* compiler, mlir::ModuleOp module, std::string module_name,
163     const HloModuleConfig& module_config,
164     const Compiler::CompileOptions& options,
165     absl::string_view entry_function_name, se::StreamExecutor* stream_exec,
166     std::unique_ptr<llvm::Module> llvm_module,
167     IrEmitterContext* ir_emitter_context);
168 
169 }  // namespace gpu
170 }  // namespace xla
171 
172 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COMPILER_H_
173