1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 18 19 #include <map> 20 #include <memory> 21 #include <set> 22 #include <string> 23 #include <vector> 24 25 #include "absl/container/flat_hash_map.h" 26 #include "absl/strings/str_cat.h" 27 #include "absl/types/span.h" 28 #include "tensorflow/compiler/xla/service/compiler.h" 29 #include "tensorflow/compiler/xla/service/computation_placer.h" 30 #include "tensorflow/compiler/xla/service/stream_pool.h" 31 #include "tensorflow/compiler/xla/service/transfer_manager.h" 32 #include "tensorflow/compiler/xla/statusor.h" 33 #include "tensorflow/compiler/xla/types.h" 34 #include "tensorflow/core/platform/mutex.h" 35 #include "tensorflow/core/platform/stream_executor_no_cuda.h" 36 #include "tensorflow/core/platform/thread_annotations.h" 37 #include "tensorflow/stream_executor/device_memory_allocator.h" 38 39 namespace Eigen { 40 struct ThreadPoolDevice; 41 } 42 43 namespace xla { 44 45 // Options to configure the backend when it is created. 46 class BackendOptions { 47 public: 48 // Set the platform backing the backend, or nullptr for the default platform. 49 BackendOptions& set_platform(se::Platform* platform); 50 se::Platform* platform() const; 51 52 // Sets the thread pool size for parallel execution of an individual operator. 53 // The default value of -1 will result in initializing the thread pool with 54 // the number of threads equal to the number of cores in the system. 55 BackendOptions& set_intra_op_parallelism_threads(int num_threads); 56 int intra_op_parallelism_threads() const; 57 58 // Sets the allowed_devices for selectively constructing stream executors 59 // on the platform. 60 BackendOptions& set_allowed_devices( 61 const absl::optional<std::set<int>>& allowed_devices); 62 const absl::optional<std::set<int>>& allowed_devices() const; 63 64 private: 65 se::Platform* platform_ = nullptr; 66 int intra_op_parallelism_threads_ = -1; 67 absl::optional<std::set<int>> allowed_devices_; 68 }; 69 70 // Class which encapsulates an XLA backend. It includes everything necessary 71 // to compile and execute computations on a particular platform. 72 // 73 // It also offers a pooling API for creation/use of initialized streams: 74 // 75 // StreamPool::Ptr stream = backend->BorrowStream().ConsumeValueOrDie(); 76 class Backend { 77 public: 78 // Creates a new backend. 79 static StatusOr<std::unique_ptr<Backend>> CreateBackend( 80 const BackendOptions& options); 81 82 // Creates a backend for the default platform. The default platform is defined 83 // in PlatformUtil. 84 static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend(); 85 86 ~Backend(); 87 88 // Accessors for the various objects. platform()89 se::Platform* platform() const { return platform_; } compiler()90 Compiler* compiler() const { return compiler_; } memory_allocator()91 se::DeviceMemoryAllocator* memory_allocator() const { 92 return memory_allocator_.get(); 93 } shared_memory_allocator()94 std::shared_ptr<se::DeviceMemoryAllocator> shared_memory_allocator() const { 95 return memory_allocator_; 96 } transfer_manager()97 TransferManager* transfer_manager() const { return transfer_manager_; } computation_placer()98 ComputationPlacer* computation_placer() const { return computation_placer_; } 99 100 // Returns the number of devices of the platform type which are visible. Not 101 // all of these devices may be usable by XLA. device_count()102 int device_count() const { return stream_executors_.size(); } 103 104 // Returns the device ordinal number of the default device. 105 int default_device_ordinal() const; 106 107 // Returns stream executors of all supported devices for this backend. The 108 // executors are ordered by the device ordinal. stream_executors()109 const std::vector<se::StreamExecutor*>& stream_executors() const { 110 return stream_executors_; 111 } 112 113 // Returns the stream executor for the given device ordinal. 114 StatusOr<se::StreamExecutor*> stream_executor(int device_ordinal) const; 115 116 // Returns the stream executor for the default device ordinal. This stream 117 // executor can only be used when the number of computations is 1 (replication 118 // can be > 1). default_stream_executor()119 se::StreamExecutor* default_stream_executor() const { 120 CHECK(!stream_executors_.empty()); 121 return stream_executors_[0]; 122 } 123 124 // Borrows a stream for use by the caller, either by grabbing it from an 125 // internal pool, or by constructing/initializating it, and returns the result 126 // to the caller. 127 StatusOr<StreamPool::Ptr> BorrowStream(int device_ordinal); 128 StatusOr<StreamPool::Ptr> BorrowStream(se::StreamExecutor* executor); 129 130 // Returns a function to borrow a stream, as `BorrowStream` above does. 131 // Purely for convenience, the caller could rather make this anonymous 132 // function itself. StreamBorrower()133 std::function<StatusOr<StreamPool::Ptr>(int)> StreamBorrower() { 134 return [this](int device_ordinal) { return BorrowStream(device_ordinal); }; 135 } 136 137 // Returns whether the given device ordinal of the backend is supported. device_ordinal_supported(int device_ordinal)138 bool device_ordinal_supported(int device_ordinal) const { 139 return (device_ordinal >= 0 && device_ordinal < device_count() && 140 stream_executors_[device_ordinal] != nullptr); 141 } 142 143 // Return a string identifier for the given device, eg: "GPU:3". device_name(int device_ordinal)144 string device_name(int device_ordinal) const { 145 return absl::StrCat(platform_->Name(), ":", device_ordinal); 146 } 147 148 // Returns true if the devices with the given ordinals are equivalent from 149 // XLA's perspective. That is, an executable compiled for one device would 150 // be equivalent to an executable compiled for the other. 151 StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b); 152 153 // For the host platform, returns the configured eigen threadpool device to be 154 // used for scheduling work. For other platforms, returns NULL. 155 const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const; 156 tensorflow::thread::ThreadPool* eigen_intra_op_thread_pool() const; 157 158 // Resets the devices associated with this backend. 159 Status ResetDevices(); 160 161 private: 162 Backend(se::Platform* platform, Compiler* compiler, 163 absl::Span<se::StreamExecutor* const> stream_executors, 164 TransferManager* transfer_manager, 165 ComputationPlacer* computation_placer, 166 int intra_op_parallelism_threads); 167 Backend(const Backend&) = delete; 168 Backend& operator=(const Backend&) = delete; 169 170 se::Platform* platform_; 171 Compiler* compiler_; 172 TransferManager* transfer_manager_; 173 ComputationPlacer* computation_placer_; 174 175 // Vector of stream executors. stream_executors_[0] is the default executor. 176 std::vector<se::StreamExecutor*> stream_executors_; 177 178 tensorflow::mutex mu_; 179 180 // Mapping from stream executor to stream pools, used by `BorrowStream` above. 181 absl::flat_hash_map<se::StreamExecutor*, std::unique_ptr<StreamPool>> 182 stream_pools_ TF_GUARDED_BY(mu_); 183 184 // The default memory allocator to use. 185 // This must be a shared_ptr, as this is passed all the way down to the 186 // cluster compilation. This allows asynchronous compilation to hold a 187 // referecence until the compilation is finished. 188 std::shared_ptr<se::StreamExecutorMemoryAllocator> memory_allocator_; 189 190 // For the CPU backend, an Eigen threadpool device for use by Eigen code. 191 struct IntraOpThreadPool; 192 std::unique_ptr<IntraOpThreadPool> intra_op_thread_pool_; 193 }; 194 195 } // namespace xla 196 197 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 198