1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_KERNEL_THUNK_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_KERNEL_THUNK_H_ 18 19 #include <memory> 20 #include <string> 21 #include <vector> 22 23 #include "absl/types/span.h" 24 #include "tensorflow/compiler/xla/service/buffer_assignment.h" 25 #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h" 26 #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h" 27 #include "tensorflow/compiler/xla/service/gpu/launch_dimensions.h" 28 #include "tensorflow/compiler/xla/service/gpu/thunk.h" 29 #include "tensorflow/compiler/xla/service/hlo_instruction.h" 30 #include "tensorflow/compiler/xla/types.h" 31 #include "tensorflow/core/platform/mutex.h" 32 #include "tensorflow/core/platform/stream_executor_no_cuda.h" 33 #include "tensorflow/core/platform/thread_annotations.h" 34 35 namespace xla { 36 namespace gpu { 37 38 class GpuExecutable; 39 40 // This class stores everything that StreamExecutor needs for launching a 41 // kernel. It implements the ExecuteOnStream interface for GpuExecutable to 42 // invoke the corresponding kernel. 43 // 44 // This is thread-compatible. 45 class KernelThunk : public Thunk { 46 public: 47 // Constructs a thunk for the given kernel. 48 // 49 // `hlo_instruction` is as in Thunk. Other arguments are as the class members. 50 KernelThunk(ThunkInfo thunk_info, 51 absl::Span<const BufferAllocation* const> args, 52 const string& kernel_name); 53 KernelThunk(const KernelThunk&) = delete; 54 KernelThunk& operator=(const KernelThunk&) = delete; 55 ~KernelThunk() override = default; 56 kernel_name()57 const string& kernel_name() const { return kernel_name_; } 58 void SetLaunchDimensions(const LaunchDimensions& launch_dims); 59 60 Status Initialize(const GpuExecutable& executable, 61 se::StreamExecutor* executor) override; 62 Status ExecuteOnStream(const ExecuteParams& params) override; 63 64 private: 65 // Buffers passed to the kernel as arguments. 66 const std::vector<const BufferAllocation*> args_; 67 68 // Entry kernel name for the computation. 69 const string kernel_name_; 70 71 // The thread and block dimension used to launch the kernel. 72 // Will be set by IrEmitterUnnested. 73 LaunchDimensions launch_dimensions_; 74 75 mutable tensorflow::mutex mutex_; 76 77 // Loaded kernels for each `StreamExecutor`. Requires pointer stability of 78 // values. 79 std::unordered_map<se::StreamExecutor*, std::unique_ptr<se::KernelBase>> 80 kernel_cache_ TF_GUARDED_BY(mutex_); 81 }; 82 83 } // namespace gpu 84 } // namespace xla 85 86 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_KERNEL_THUNK_H_ 87