1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 18 19 #include <map> 20 #include <memory> 21 #include <set> 22 #include <string> 23 #include <vector> 24 25 #include "absl/container/flat_hash_map.h" 26 #include "absl/strings/str_cat.h" 27 #include "absl/types/span.h" 28 #include "tensorflow/compiler/xla/service/compiler.h" 29 #include "tensorflow/compiler/xla/service/computation_placer.h" 30 #include "tensorflow/compiler/xla/service/stream_pool.h" 31 #include "tensorflow/compiler/xla/service/transfer_manager.h" 32 #include "tensorflow/compiler/xla/statusor.h" 33 #include "tensorflow/compiler/xla/types.h" 34 #include "tensorflow/core/platform/mutex.h" 35 #include "tensorflow/core/platform/stream_executor_no_cuda.h" 36 #include "tensorflow/core/platform/thread_annotations.h" 37 #include "tensorflow/stream_executor/device_memory_allocator.h" 38 39 namespace Eigen { 40 struct ThreadPoolDevice; 41 } 42 43 namespace xla { 44 45 // Options to configure the backend when it is created. 46 class BackendOptions { 47 public: 48 // Set the platform backing the backend, or nullptr for the default platform. 49 BackendOptions& set_platform(se::Platform* platform); 50 se::Platform* platform() const; 51 52 // Sets the thread pool size for parallel execution of an individual operator. 53 // The default value of -1 will result in initializing the thread pool with 54 // the number of threads equal to the number of cores in the system. 55 BackendOptions& set_intra_op_parallelism_threads(int num_threads); 56 int intra_op_parallelism_threads() const; 57 58 // Sets the allowed_devices for selectively constructing stream executors 59 // on the platform. 60 BackendOptions& set_allowed_devices( 61 const absl::optional<std::set<int>>& allowed_devices); 62 const absl::optional<std::set<int>>& allowed_devices() const; 63 64 private: 65 se::Platform* platform_ = nullptr; 66 int intra_op_parallelism_threads_ = -1; 67 absl::optional<std::set<int>> allowed_devices_; 68 }; 69 70 // Class which encapsulates an XLA backend. It includes everything necessary 71 // to compile and execute computations on a particular platform. 72 // 73 // It also offers a pooling API for creation/use of initialized streams: 74 // 75 // StreamPool::Ptr stream = backend->BorrowStream().ConsumeValueOrDie(); 76 class Backend { 77 public: 78 // Creates a new backend. 79 static StatusOr<std::unique_ptr<Backend>> CreateBackend( 80 const BackendOptions& options); 81 82 // Creates a backend for the default platform. The default platform is defined 83 // in PlatformUtil. 84 static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend(); 85 86 ~Backend(); 87 88 // Accessors for the various objects. platform()89 se::Platform* platform() const { return platform_; } compiler()90 Compiler* compiler() const { return compiler_; } memory_allocator()91 se::DeviceMemoryAllocator* memory_allocator() const { 92 return memory_allocator_.get(); 93 } transfer_manager()94 TransferManager* transfer_manager() const { return transfer_manager_; } computation_placer()95 ComputationPlacer* computation_placer() const { return computation_placer_; } 96 97 // Returns the number of devices of the platform type which are visible. Not 98 // all of these devices may be usable by XLA. device_count()99 int device_count() const { return stream_executors_.size(); } 100 101 // Returns the device ordinal number of the default device. 102 int default_device_ordinal() const; 103 104 // Returns stream executors of all supported devices for this backend. The 105 // executors are ordered by the device ordinal. stream_executors()106 const std::vector<se::StreamExecutor*>& stream_executors() const { 107 return stream_executors_; 108 } 109 110 // Returns the stream executor for the given device ordinal. 111 StatusOr<se::StreamExecutor*> stream_executor(int device_ordinal) const; 112 113 // Returns the stream executor for the default device ordinal. This stream 114 // executor can only be used when the number of computations is 1 (replication 115 // can be > 1). default_stream_executor()116 se::StreamExecutor* default_stream_executor() const { 117 CHECK(!stream_executors_.empty()); 118 return stream_executors_[0]; 119 } 120 121 // Borrows a stream for use by the caller, either by grabbing it from an 122 // internal pool, or by constructing/initializating it, and returns the result 123 // to the caller. 124 StatusOr<StreamPool::Ptr> BorrowStream(int device_ordinal); 125 StatusOr<StreamPool::Ptr> BorrowStream(se::StreamExecutor* executor); 126 127 // Returns a function to borrow a stream, as `BorrowStream` above does. 128 // Purely for convenience, the caller could rather make this anonymous 129 // function itself. StreamBorrower()130 std::function<StatusOr<StreamPool::Ptr>(int)> StreamBorrower() { 131 return [this](int device_ordinal) { return BorrowStream(device_ordinal); }; 132 } 133 134 // Returns whether the given device ordinal of the backend is supported. device_ordinal_supported(int device_ordinal)135 bool device_ordinal_supported(int device_ordinal) const { 136 return (device_ordinal >= 0 && device_ordinal < device_count() && 137 stream_executors_[device_ordinal] != nullptr); 138 } 139 140 // Return a string identifier for the given device, eg: "GPU:3". device_name(int device_ordinal)141 string device_name(int device_ordinal) const { 142 return absl::StrCat(platform_->Name(), ":", device_ordinal); 143 } 144 145 // Returns true if the devices with the given ordinals are equivalent from 146 // XLA's perspective. That is, an executable compiled for one device would 147 // be equivalent to an executable compiled for the other. 148 StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b); 149 150 // For the host platform, returns the configured eigen threadpool device to be 151 // used for scheduling work. For other platforms, returns NULL. 152 const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const; 153 tensorflow::thread::ThreadPool* eigen_intra_op_thread_pool() const; 154 155 // Resets the devices associated with this backend. 156 Status ResetDevices(); 157 158 private: 159 Backend(se::Platform* platform, Compiler* compiler, 160 absl::Span<se::StreamExecutor* const> stream_executors, 161 TransferManager* transfer_manager, 162 ComputationPlacer* computation_placer, 163 int intra_op_parallelism_threads); 164 Backend(const Backend&) = delete; 165 Backend& operator=(const Backend&) = delete; 166 167 se::Platform* platform_; 168 Compiler* compiler_; 169 TransferManager* transfer_manager_; 170 ComputationPlacer* computation_placer_; 171 172 // Vector of stream executors. stream_executors_[0] is the default executor. 173 std::vector<se::StreamExecutor*> stream_executors_; 174 175 tensorflow::mutex mu_; 176 177 // Mapping from stream executor to stream pools, used by `BorrowStream` above. 178 absl::flat_hash_map<se::StreamExecutor*, std::unique_ptr<StreamPool>> 179 stream_pools_ TF_GUARDED_BY(mu_); 180 181 // The default memory allocator to use. 182 std::unique_ptr<se::StreamExecutorMemoryAllocator> memory_allocator_; 183 184 // For the CPU backend, an Eigen threadpool device for use by Eigen code. 185 struct IntraOpThreadPool; 186 std::unique_ptr<IntraOpThreadPool> intra_op_thread_pool_; 187 }; 188 189 } // namespace xla 190 191 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 192