• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
17 #define TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
18 
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 #include "absl/base/macros.h"
24 #include "absl/strings/string_view.h"
25 #include "tensorflow/core/framework/device_attributes.pb.h"
26 #include "tensorflow/core/framework/tensor.h"
27 #include "tensorflow/core/lib/core/errors.h"
28 #include "tensorflow/core/lib/core/refcount.h"
29 #include "tensorflow/core/lib/core/status.h"
30 #include "tensorflow/core/lib/core/stringpiece.h"
31 #include "tensorflow/core/platform/logging.h"
32 
33 namespace Eigen {
34 struct ThreadPoolDevice;
35 #ifdef TENSORFLOW_USE_SYCL
36 struct SyclDevice;
37 #endif
38 }  // end namespace Eigen
39 
40 namespace stream_executor {
41 class Stream;
42 }  // namespace stream_executor
43 
44 namespace tensorflow {
45 
46 class Device;
47 class DeviceAttributes;
48 class Env;
49 class EventMgr;
50 class OpKernelContext;
51 class ResourceMgr;
52 class ScopedAllocatorMgr;
53 class TensorProto;
54 
55 namespace thread {
56 class ThreadPool;
57 }
58 
59 // A wrapper for an Eigen Gpu Device that includes per-op state. The
60 // class is defined even for non-GPU devices since the
61 // OpKernelContext::Params structure wants to fill it in.
62 class PerOpGpuDevice {
63  public:
~PerOpGpuDevice()64   virtual ~PerOpGpuDevice() {}
65   virtual const Eigen::GpuDevice& device() const = 0;
66 };
67 
68 // A class that devices can subclass to pass around
69 // Device-specific context to OpKernels.
70 class DeviceContext : public core::RefCounted {
71  public:
~DeviceContext()72   ~DeviceContext() override {}
stream()73   virtual stream_executor::Stream* stream() const { return nullptr; }
MaintainLifetimeOnStream(const Tensor * t,stream_executor::Stream * stream)74   virtual void MaintainLifetimeOnStream(const Tensor* t,
75                                         stream_executor::Stream* stream) const {
76   }
77 
78   // "cpu_tensor" is a tensor on a CPU. Copies "cpu_tensor" into
79   // "device_tensor" which is on a non-CPU device "device". "device_tensor"
80   // must be allocated to be of the same size as "cpu_tensor".
81   virtual void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
82                                      Tensor* device_tensor, StatusCallback done,
83                                      bool sync_dst_compute = true) const {
84     done(errors::Internal("Unrecognized device type in CPU-to-device Copy"));
85   }
86 
87   // Copies a tensor in this device.
CopyTensorInSameDevice(const Tensor * input_tensor,Device * device,Tensor * output_tensor,StatusCallback done)88   virtual void CopyTensorInSameDevice(const Tensor* input_tensor,
89                                       Device* device, Tensor* output_tensor,
90                                       StatusCallback done) const {
91     done(errors::Unimplemented("Copy in same device not implemented."));
92   }
93 
94   // "device_tensor" is a tensor on a non-CPU device.  Copies
95   // device_tensor into "cpu_tensor".  "cpu_tensor" must be allocated
96   // to be of the same size as "device_tensor".
CopyDeviceTensorToCPU(const Tensor * device_tensor,StringPiece tensor_name,Device * device,Tensor * cpu_tensor,StatusCallback done)97   virtual void CopyDeviceTensorToCPU(const Tensor* device_tensor,
98                                      StringPiece tensor_name, Device* device,
99                                      Tensor* cpu_tensor, StatusCallback done) {
100     done(errors::Internal("Unrecognized device type in device-to-CPU Copy"));
101   }
102 
103   // If possible, wait for all events on *stream to complete then execute func.
104   // A non-OK Status is returned otherwise.  The stream argument should be the
105   // one provided by GpuDeviceInfo.  This function is not applicable to devices
106   // that don't provide such a value.
ThenExecute(Device * device,stream_executor::Stream * stream,std::function<void ()> func)107   virtual Status ThenExecute(Device* device, stream_executor::Stream* stream,
108                              std::function<void()> func) {
109     return errors::Internal("ThenExecute not supported by device");
110   }
111 };
112 
113 // map[i] is the DeviceContext* for the node with id i, if i < map.size().
114 typedef std::vector<DeviceContext*> DeviceContextMap;
115 
116 class DeviceBase {
117  public:
DeviceBase(Env * env)118   explicit DeviceBase(Env* env) : env_(env) {}
119   virtual ~DeviceBase();
120 
env()121   Env* env() const { return env_; }
122 
123   // Override this to return true for devices that require an Op's
124   // compute method to save references to the temporary tensors it
125   // allocates until the Op execution completes
RequiresRecordingAccessedTensors()126   virtual bool RequiresRecordingAccessedTensors() const { return false; }
127 
128   struct CpuWorkerThreads {
129     int num_threads = 0;
130     thread::ThreadPool* workers = nullptr;
131   };
132 
133   // Does not take ownership.
set_tensorflow_cpu_worker_threads(CpuWorkerThreads * t)134   void set_tensorflow_cpu_worker_threads(CpuWorkerThreads* t) {
135     cpu_worker_threads_ = t;
136   }
137 
tensorflow_cpu_worker_threads()138   virtual const CpuWorkerThreads* tensorflow_cpu_worker_threads() const {
139     CHECK(cpu_worker_threads_ != nullptr);
140     return cpu_worker_threads_;
141   }
142 
143   // "stream" is used in special circumstances (such as the
144   // constructors of Ops) where there is no available OpKernelContext.
145   // "default_context" is used by OpKernelContext whenever a device does not
146   // supply a DeviceContext for an op in TryGetDeviceContext() (e.g. when only
147   // using a single stream.)
148   // "event_mgr" is used to delay deallocation of temporary GPU buffers.
149   // TODO(pbar) Work out how to move this out of DeviceBase.
150   // GpuDeviceInfo name is an unfortunate legacy, it is used not only by GPUs
151   // but also by TPU devices (to provide default device context).
152   struct GpuDeviceInfo {
153     // Make sure all the defaults are NULL, so we can spot missing assignments.
154     stream_executor::Stream* stream = nullptr;
155     DeviceContext* default_context = nullptr;
156     EventMgr* event_mgr = nullptr;
157     int gpu_id = -1;
158   };
159 
160   // Does not take ownership.
set_tensorflow_gpu_device_info(GpuDeviceInfo * g)161   void set_tensorflow_gpu_device_info(GpuDeviceInfo* g) {
162     gpu_device_info_ = g;
163   }
164 
tensorflow_gpu_device_info()165   virtual const GpuDeviceInfo* tensorflow_gpu_device_info() const {
166     return gpu_device_info_;
167   }
168 
169   // The preferred thread pool for this device. If it is nullptr, the system
170   // automatically assigns a thread pool for execution.
tensorflow_device_thread_pool()171   virtual thread::ThreadPool* tensorflow_device_thread_pool() {
172     return device_thread_pool_;
173   }
174 
175   // Does not take ownership.
176   void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d);
177 
178 #ifdef TENSORFLOW_USE_SYCL
set_eigen_sycl_device(Eigen::SyclDevice * d)179   void set_eigen_sycl_device(Eigen::SyclDevice* d) { eigen_sycl_device_ = d; }
180 #endif
181 
182   // Return the Allocator implementation to use based on the allocator
183   // attributes requested.  See allocator.h for more details.
GetAllocator(AllocatorAttributes)184   virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) {
185     LOG(FATAL) << "GetAllocator() is not implemented.";
186     return nullptr;
187   }
188 
189   // This method is provided for backwards compatibility, and will be removed
190   // in a future release.
191   ABSL_DEPRECATED("Use `this->GetAllocator()` or `this->GetScopedAllocator()`.")
GetStepAllocator(AllocatorAttributes attr,ResourceMgr *)192   Allocator* GetStepAllocator(AllocatorAttributes attr, ResourceMgr*) {
193     return GetAllocator(attr);
194   }
195 
196   // Return an Allocator prepared for use in particular places by graph
197   // optimization
GetScopedAllocator(AllocatorAttributes attr,int64 step_id)198   virtual Allocator* GetScopedAllocator(AllocatorAttributes attr,
199                                         int64 step_id) {
200     LOG(FATAL) << "Device does not implement GetScopedAllocator()";
201     return nullptr;
202   }
203 
GetScopedAllocatorMgr()204   virtual ScopedAllocatorMgr* GetScopedAllocatorMgr() const { return nullptr; }
205 
has_eigen_cpu_device()206   virtual bool has_eigen_cpu_device() const {
207     return !eigen_cpu_devices_.empty();
208   }
209 
210   virtual const Eigen::ThreadPoolDevice* eigen_cpu_device();
211 
212 #ifdef TENSORFLOW_USE_SYCL
eigen_sycl_device()213   virtual const Eigen::SyclDevice* eigen_sycl_device() const {
214     CHECK(eigen_sycl_device_ != nullptr);
215     return eigen_sycl_device_;
216   }
217 #endif
218 
219   // Caller owns the return value. The OpKernelContext calls this even
220   // for devices that do not implement an eigen_gpu_device. Overridden
221   // by GPU devices to return a derived type.
MakeGpuDevice()222   virtual PerOpGpuDevice* MakeGpuDevice() { return nullptr; }
223 
UnderlyingDevice()224   virtual DeviceBase* UnderlyingDevice() { return this; }
UnderlyingDevice()225   virtual const DeviceBase* UnderlyingDevice() const { return this; }
226 
227   // This is overridden by GPU devices to reinitialize the derived
228   // type returned by MakeGpuDevice.
ReinitializeGpuDevice(OpKernelContext *,PerOpGpuDevice *,DeviceContext *,Allocator *)229   virtual Status ReinitializeGpuDevice(OpKernelContext* /*context*/,
230                                        PerOpGpuDevice* /*device*/,
231                                        DeviceContext* /*dc*/,
232                                        Allocator* /*allocator*/) {
233     return Status::OK();
234   }
235 
236   // Unimplemented by default
237   virtual const DeviceAttributes& attributes() const;
NumaNode()238   virtual int NumaNode() const { return attributes().locality().numa_node(); }
239   virtual const string& name() const;
240 
241   // Materializes the given TensorProto into 'tensor' stored in Device
242   // memory.  Most devices will want to override this.
243   //
244   // TODO(vrv): We should be able to put this function into
245   // OpKernelContext and handle the copies from device memory via send
246   // and receive nodes, instead of requiring that each device handle
247   // the copies here as well as in copy ops.
MakeTensorFromProto(const TensorProto & tensor_proto,const AllocatorAttributes alloc_attrs,Tensor * tensor)248   virtual Status MakeTensorFromProto(const TensorProto& tensor_proto,
249                                      const AllocatorAttributes alloc_attrs,
250                                      Tensor* tensor) {
251     return errors::Internal("Device does not implement MakeTensorFromProto()");
252   }
253 
254   // Some devices (i.e. GPUs) may free device memory prior to its actual use
255   // being completed on the assumption that subsequent allocations can only be
256   // used serially with respect to pending uses.  If this function returns a
257   // non-zero value it is the value of a device-specific counter such that any
258   // device memory tagged with an earlier freed-at count is really unencumbered
259   // by pending uses.  For this to be useful the device memory allocator must
260   // be tagging deallocated memory chunks using the same counter.
SafeAllocFrontier(uint64 old_value)261   virtual uint64 SafeAllocFrontier(uint64 old_value) { return 0; }
262 
263   // Copies `input_tensor` to `output_tensor`, where both tensors are on this
264   // device. This function assumes that `output_tensor` has already been
265   // allocated with a buffer that is large enough to hold `input_tensor`'s data.
266   // Calls `done` from a device-specific thread after copy is finished, which
267   // may be the same as calling thread.
268   //
269   // NOTE(ayushd): This function is for TensorFlow internal use only.  Deep copy
270   // is discouraged and should not be used in OpKernels.
CopyTensorInSameDevice(const Tensor * input_tensor,Tensor * output_tensor,const DeviceContext * device_context,StatusCallback done)271   virtual void CopyTensorInSameDevice(const Tensor* input_tensor,
272                                       Tensor* output_tensor,
273                                       const DeviceContext* device_context,
274                                       StatusCallback done) {
275     done(errors::Internal("Device ", name(), " does not implement ",
276                           "CopyTensorInSameDevice"));
277   }
278 
279  protected:
280   // Does not take ownership.
set_tensorflow_device_thread_pool(thread::ThreadPool * thread_pool)281   void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) {
282     device_thread_pool_ = thread_pool;
283   }
284 
285  private:
286   Env* const env_;
287   CpuWorkerThreads* cpu_worker_threads_ = nullptr;
288   // Set by GPUs as well as by TPU devices.
289   GpuDeviceInfo* gpu_device_info_ = nullptr;
290   thread::ThreadPool* device_thread_pool_ = nullptr;
291   std::vector<Eigen::ThreadPoolDevice*> eigen_cpu_devices_;
292 #ifdef TENSORFLOW_USE_SYCL
293   Eigen::SyclDevice* eigen_sycl_device_ = nullptr;
294 #endif
295 };
296 
297 // Methods to create and check for Symbolic execution devices.
298 // Such devices are mostly used for TF-XLA bridge. TF should not treat these as
299 // normal devices.
300 void AddSymbolicExecutionDevice(absl::string_view device_name);
301 bool IsSymbolicExecutionDevice(absl::string_view device_name);
302 
303 }  // namespace tensorflow
304 
305 #endif  // TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
306