1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_ 17 #define TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_ 18 19 #include <string> 20 21 #include "tensorflow/compiler/xla/types.h" 22 23 // These classes are forward declared so that ExecutableRunOptions can be linked 24 // into an XLA-compiled binary without having to link all of the pointed-to 25 // objects (e.g., for an ahead-of-time compiled CPU binary, the gpu tools don't 26 // need to be linked). 27 namespace stream_executor { 28 class Stream; 29 class Platform; 30 class DeviceMemoryAllocator; 31 } // namespace stream_executor 32 33 namespace Eigen { 34 struct ThreadPoolDevice; 35 } // namespace Eigen 36 37 namespace xla { 38 39 class DeviceAssignment; 40 class ExecutionProfile; 41 namespace gpu { 42 class GpuExecutableRunOptions; 43 } // namespace gpu 44 45 // A unique identifier for a particular "logical execution" of an XLA model. 46 // 47 // A logical execution might encompass multiple executions of one or more 48 // HloModules. Runs that are part of the same logical execution can 49 // communicate via collective ops (e.g. kAllToAll), whereas runs that are part 50 // of different logical executions are isolated. 51 class RunId { 52 public: 53 // Creates a new, unique RunId. 54 RunId(); RunId(int64_t value)55 explicit RunId(int64_t value) : data_(value) {} 56 57 RunId(const RunId&) = default; 58 RunId& operator=(const RunId&) = default; 59 friend bool operator==(const RunId& a, const RunId& b); 60 std::string ToString() const; 61 int64_t ToInt() const; 62 63 template <typename H> AbslHashValue(H h,const RunId & id)64 friend H AbslHashValue(H h, const RunId& id) { 65 return H::combine(std::move(h), id.data_); 66 } 67 68 private: 69 int64_t data_; 70 }; 71 72 // Callback used by the GPU backend only. This is an "one-sided" version of 73 // ThenDoHostCallback that enqueues a callback onto a stream. The difference 74 // with ThenDoHostCallback is that the device does not block waiting for the 75 // callback to complete; instead the callback is scheduled by the runtime. 76 // This functionality must be provided by the caller, and hence is provided in 77 // callback form. 78 using ThenExecuteFunction = 79 std::function<void(stream_executor::Stream*, std::function<void()>)>; 80 81 // Class containing options for running a LocalExecutable. 82 class ExecutableRunOptions { 83 public: 84 // Specifies the allocator to use during execution. 85 ExecutableRunOptions& set_allocator( 86 stream_executor::DeviceMemoryAllocator* allocator); 87 stream_executor::DeviceMemoryAllocator* allocator() const; 88 89 // If set, this is the device to run the computation on. Valid device_ordinal 90 // values are: 0 to # of devices - 1. These values are identical to the device 91 // ordinal values used by StreamExecutor. The device must be of the same type 92 // as the executable was compiled for. A value of -1 indicates this option has 93 // not been set. 94 ExecutableRunOptions& set_device_ordinal(int device_ordinal); 95 int device_ordinal() const; 96 97 // If set, this is the stream to run the computation on. The platform of the 98 // stream must match the platform the executable was built for. A value of 99 // nullptr indicates the option has not been set. 100 ExecutableRunOptions& set_stream(stream_executor::Stream* stream); 101 stream_executor::Stream* stream() const; 102 103 // If set, this is the stream to perform any pre-computation transfers on. 104 // The platform of the stream must match the platform the executable was 105 // built for. A value of nullptr indicates the option has not been set. 106 ExecutableRunOptions& set_host_to_device_stream( 107 stream_executor::Stream* stream); 108 stream_executor::Stream* host_to_device_stream() const; 109 110 // Sets the thread pool device on which to run Eigen subcomputations. 111 // 112 // This field must be set for XLA:CPU models that call Eigen routines, but may 113 // be null otherwise. Routines that use this field should always CHECK (or 114 // TF_RET_CHECK) that it's not null before dereferencing it, so that users get 115 // a clean crash rather than a segfault. 116 // 117 // Does not take ownership. 118 ExecutableRunOptions& set_intra_op_thread_pool( 119 const Eigen::ThreadPoolDevice* intra_op_thread_pool); 120 const Eigen::ThreadPoolDevice* intra_op_thread_pool() const; 121 122 // If set, profiling information is written to 'profile'. 123 ExecutionProfile* execution_profile() const; 124 ExecutableRunOptions& set_execution_profile(ExecutionProfile* profile); 125 126 ExecutableRunOptions& set_device_assignment( 127 const DeviceAssignment* device_assignment); 128 const DeviceAssignment* device_assignment() const; 129 130 ExecutableRunOptions& set_rng_seed(int rng_seed); 131 int rng_seed() const; 132 set_launch_id(int32_t launch_id)133 ExecutableRunOptions& set_launch_id(int32_t launch_id) { 134 launch_id_ = launch_id; 135 return *this; 136 } 137 launch_id()138 int32_t launch_id() const { return launch_id_; } 139 140 ExecutableRunOptions& set_run_id(RunId id); 141 RunId run_id() const; 142 143 // See documentation on ThenExecuteFunction. set_then_execute_function(ThenExecuteFunction * f)144 ExecutableRunOptions& set_then_execute_function(ThenExecuteFunction* f) { 145 then_execute_function_ = f; 146 return *this; 147 } then_execute_function()148 ThenExecuteFunction* then_execute_function() const { 149 return then_execute_function_; 150 } 151 152 // GPU-backend specific options. These are kept out-of-line to avoid bloating 153 // the size of this dependency for CPU-only AOT builds. 154 ExecutableRunOptions& set_gpu_executable_run_options( 155 const gpu::GpuExecutableRunOptions* gpu_executable_run_options); 156 const gpu::GpuExecutableRunOptions* gpu_executable_run_options() const; 157 158 private: 159 stream_executor::DeviceMemoryAllocator* allocator_ = nullptr; 160 int device_ordinal_ = -1; 161 const DeviceAssignment* device_assignment_ = nullptr; 162 stream_executor::Stream* stream_ = nullptr; 163 const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr; 164 ExecutionProfile* execution_profile_ = nullptr; 165 int rng_seed_ = 0; 166 int32_t launch_id_ = 0; 167 stream_executor::Stream* host_to_device_stream_ = nullptr; 168 ThenExecuteFunction* then_execute_function_ = nullptr; 169 RunId run_id_; 170 const gpu::GpuExecutableRunOptions* gpu_executable_run_options_ = nullptr; 171 }; 172 173 } // namespace xla 174 175 #endif // TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_ 176