• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
18 
19 #include <map>
20 #include <memory>
21 #include <set>
22 #include <string>
23 #include <vector>
24 
25 #include "absl/container/flat_hash_map.h"
26 #include "absl/strings/str_cat.h"
27 #include "absl/types/span.h"
28 #include "tensorflow/compiler/xla/service/compiler.h"
29 #include "tensorflow/compiler/xla/service/computation_placer.h"
30 #include "tensorflow/compiler/xla/service/stream_pool.h"
31 #include "tensorflow/compiler/xla/service/transfer_manager.h"
32 #include "tensorflow/compiler/xla/statusor.h"
33 #include "tensorflow/compiler/xla/types.h"
34 #include "tensorflow/core/platform/mutex.h"
35 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
36 #include "tensorflow/core/platform/thread_annotations.h"
37 #include "tensorflow/stream_executor/device_memory_allocator.h"
38 
39 namespace Eigen {
40 struct ThreadPoolDevice;
41 }
42 
43 namespace xla {
44 
45 // Options to configure the backend when it is created.
46 class BackendOptions {
47  public:
48   // Set the platform backing the backend, or nullptr for the default platform.
49   BackendOptions& set_platform(se::Platform* platform);
50   se::Platform* platform() const;
51 
52   // Sets the thread pool size for parallel execution of an individual operator.
53   // The default value of -1 will result in initializing the thread pool with
54   // the number of threads equal to the number of cores in the system.
55   BackendOptions& set_intra_op_parallelism_threads(int num_threads);
56   int intra_op_parallelism_threads() const;
57 
58   // Sets the allowed_devices for selectively constructing stream executors
59   // on the platform.
60   BackendOptions& set_allowed_devices(
61       const absl::optional<std::set<int>>& allowed_devices);
62   const absl::optional<std::set<int>>& allowed_devices() const;
63 
64  private:
65   se::Platform* platform_ = nullptr;
66   int intra_op_parallelism_threads_ = -1;
67   absl::optional<std::set<int>> allowed_devices_;
68 };
69 
70 // Class which encapsulates an XLA backend. It includes everything necessary
71 // to compile and execute computations on a particular platform.
72 //
73 // It also offers a pooling API for creation/use of initialized streams:
74 //
75 //    StreamPool::Ptr stream = backend->BorrowStream().ConsumeValueOrDie();
76 class Backend {
77  public:
78   // Creates a new backend.
79   static StatusOr<std::unique_ptr<Backend>> CreateBackend(
80       const BackendOptions& options);
81 
82   // Creates a backend for the default platform. The default platform is defined
83   // in PlatformUtil.
84   static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend();
85 
86   ~Backend();
87 
88   // Accessors for the various objects.
platform()89   se::Platform* platform() const { return platform_; }
compiler()90   Compiler* compiler() const { return compiler_; }
memory_allocator()91   se::DeviceMemoryAllocator* memory_allocator() const {
92     return memory_allocator_.get();
93   }
shared_memory_allocator()94   std::shared_ptr<se::DeviceMemoryAllocator> shared_memory_allocator() const {
95     return memory_allocator_;
96   }
transfer_manager()97   TransferManager* transfer_manager() const { return transfer_manager_; }
computation_placer()98   ComputationPlacer* computation_placer() const { return computation_placer_; }
99 
100   // Returns the number of devices of the platform type which are visible. Not
101   // all of these devices may be usable by XLA.
device_count()102   int device_count() const { return stream_executors_.size(); }
103 
104   // Returns the device ordinal number of the default device.
105   int default_device_ordinal() const;
106 
107   // Returns stream executors of all supported devices for this backend. The
108   // executors are ordered by the device ordinal.
stream_executors()109   const std::vector<se::StreamExecutor*>& stream_executors() const {
110     return stream_executors_;
111   }
112 
113   // Returns the stream executor for the given device ordinal.
114   StatusOr<se::StreamExecutor*> stream_executor(int device_ordinal) const;
115 
116   // Returns the stream executor for the default device ordinal. This stream
117   // executor can only be used when the number of computations is 1 (replication
118   // can be > 1).
default_stream_executor()119   se::StreamExecutor* default_stream_executor() const {
120     CHECK(!stream_executors_.empty());
121     return stream_executors_[0];
122   }
123 
124   // Borrows a stream for use by the caller, either by grabbing it from an
125   // internal pool, or by constructing/initializating it, and returns the result
126   // to the caller.
127   StatusOr<StreamPool::Ptr> BorrowStream(int device_ordinal);
128   StatusOr<StreamPool::Ptr> BorrowStream(se::StreamExecutor* executor);
129 
130   // Returns a function to borrow a stream, as `BorrowStream` above does.
131   // Purely for convenience, the caller could rather make this anonymous
132   // function itself.
StreamBorrower()133   std::function<StatusOr<StreamPool::Ptr>(int)> StreamBorrower() {
134     return [this](int device_ordinal) { return BorrowStream(device_ordinal); };
135   }
136 
137   // Returns whether the given device ordinal of the backend is supported.
device_ordinal_supported(int device_ordinal)138   bool device_ordinal_supported(int device_ordinal) const {
139     return (device_ordinal >= 0 && device_ordinal < device_count() &&
140             stream_executors_[device_ordinal] != nullptr);
141   }
142 
143   // Return a string identifier for the given device, eg: "GPU:3".
device_name(int device_ordinal)144   string device_name(int device_ordinal) const {
145     return absl::StrCat(platform_->Name(), ":", device_ordinal);
146   }
147 
148   // Returns true if the devices with the given ordinals are equivalent from
149   // XLA's perspective. That is, an executable compiled for one device would
150   // be equivalent to an executable compiled for the other.
151   StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b);
152 
153   // For the host platform, returns the configured eigen threadpool device to be
154   // used for scheduling work. For other platforms, returns NULL.
155   const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const;
156   tensorflow::thread::ThreadPool* eigen_intra_op_thread_pool() const;
157 
158   // Resets the devices associated with this backend.
159   Status ResetDevices();
160 
161  private:
162   Backend(se::Platform* platform, Compiler* compiler,
163           absl::Span<se::StreamExecutor* const> stream_executors,
164           TransferManager* transfer_manager,
165           ComputationPlacer* computation_placer,
166           int intra_op_parallelism_threads);
167   Backend(const Backend&) = delete;
168   Backend& operator=(const Backend&) = delete;
169 
170   se::Platform* platform_;
171   Compiler* compiler_;
172   TransferManager* transfer_manager_;
173   ComputationPlacer* computation_placer_;
174 
175   // Vector of stream executors. stream_executors_[0] is the default executor.
176   std::vector<se::StreamExecutor*> stream_executors_;
177 
178   tensorflow::mutex mu_;
179 
180   // Mapping from stream executor to stream pools, used by `BorrowStream` above.
181   absl::flat_hash_map<se::StreamExecutor*, std::unique_ptr<StreamPool>>
182       stream_pools_ TF_GUARDED_BY(mu_);
183 
184   // The default memory allocator to use.
185   // This must be a shared_ptr, as this is passed all the way down to the
186   // cluster compilation. This allows asynchronous compilation to hold a
187   // referecence until the compilation is finished.
188   std::shared_ptr<se::StreamExecutorMemoryAllocator> memory_allocator_;
189 
190   // For the CPU backend, an Eigen threadpool device for use by Eigen code.
191   struct IntraOpThreadPool;
192   std::unique_ptr<IntraOpThreadPool> intra_op_thread_pool_;
193 };
194 
195 }  // namespace xla
196 
197 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
198