• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_
17 #define TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_
18 
19 #include <memory>
20 #include <string>
21 #include <unordered_map>
22 
23 #include "tensorflow/core/framework/tensor.h"
24 #include "tensorflow/core/lib/core/errors.h"
25 #include "tensorflow/core/lib/core/refcount.h"
26 #include "tensorflow/core/lib/core/status.h"
27 #include "tensorflow/core/lib/core/stringpiece.h"
28 #include "tensorflow/core/platform/logging.h"
29 
30 namespace Eigen {
31 struct ThreadPoolDevice;
32 #ifdef TENSORFLOW_USE_SYCL
33 struct SyclDevice;
34 #endif
35 }  // end namespace Eigen
36 
37 namespace perftools {
38 namespace gputools {
39 class Stream;
40 }  // namespace gputools
41 }  // namespace perftools
42 
43 namespace tensorflow {
44 
45 class Device;
46 class DeviceAttributes;
47 class Env;
48 class EventMgr;
49 class OpKernelContext;
50 class ResourceMgr;
51 class TensorProto;
52 
53 namespace thread {
54 class ThreadPool;
55 }
56 
57 // A wrapper for an Eigen Gpu Device that includes per-op state. The
58 // class is defined even for non-GPU devices since the
59 // OpKernelContext::Params structure wants to fill it in.
60 class PerOpGpuDevice {
61  public:
~PerOpGpuDevice()62   virtual ~PerOpGpuDevice() {}
63   virtual const Eigen::GpuDevice& device() const = 0;
64 };
65 
66 // A class that devices can subclass to pass around
67 // Device-specific context to OpKernels.
68 class DeviceContext : public core::RefCounted {
69  public:
~DeviceContext()70   ~DeviceContext() override {}
stream()71   virtual perftools::gputools::Stream* stream() const { return nullptr; }
MaintainLifetimeOnStream(const Tensor * t,perftools::gputools::Stream * stream)72   virtual void MaintainLifetimeOnStream(
73       const Tensor* t, perftools::gputools::Stream* stream) const {}
74 
75   // "cpu_tensor" is a tensor on a CPU. Copies "cpu_tensor" into
76   // "device_tensor" which is on a GPU device "device". "device_tensor"
77   // must be allocated to be of the same size as "cpu_tensor".
CopyCPUTensorToDevice(const Tensor * cpu_tensor,Device * device,Tensor * device_tensor,StatusCallback done)78   virtual void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
79                                      Tensor* device_tensor,
80                                      StatusCallback done) const {
81     done(errors::Internal("Unrecognized device type in CPU-to-device Copy"));
82   }
83 
84   // "device_tensor" is a tensor on a non-CPU device.  Copies
85   // device_tensor into "cpu_tensor".  "cpu_tensor" must be allocated
86   // to be of the same size as "device_tensor".
CopyDeviceTensorToCPU(const Tensor * device_tensor,StringPiece tensor_name,Device * device,Tensor * cpu_tensor,StatusCallback done)87   virtual void CopyDeviceTensorToCPU(const Tensor* device_tensor,
88                                      StringPiece tensor_name, Device* device,
89                                      Tensor* cpu_tensor, StatusCallback done) {
90     done(errors::Internal("Unrecognized device type in device-to-CPU Copy"));
91   }
92 };
93 
94 // map[i] is the DeviceContext* for the node with id i, if i < map.size().
95 typedef std::vector<DeviceContext*> DeviceContextMap;
96 
97 class DeviceBase {
98  public:
DeviceBase(Env * env)99   explicit DeviceBase(Env* env) : env_(env) {}
100   virtual ~DeviceBase();
101 
env()102   Env* env() const { return env_; }
103 
104   // Override this to return true for devices that require an Op's
105   // compute method to save references to the temporary tensors it
106   // allocates until the Op execution completes
RequiresRecordingAccessedTensors()107   virtual bool RequiresRecordingAccessedTensors() const { return false; }
108 
109   struct CpuWorkerThreads {
110     int num_threads = 0;
111     thread::ThreadPool* workers = nullptr;
112   };
113 
114   // Does not take ownership.
set_tensorflow_cpu_worker_threads(CpuWorkerThreads * t)115   void set_tensorflow_cpu_worker_threads(CpuWorkerThreads* t) {
116     cpu_worker_threads_ = t;
117   }
118 
tensorflow_cpu_worker_threads()119   virtual const CpuWorkerThreads* tensorflow_cpu_worker_threads() const {
120     CHECK(cpu_worker_threads_ != nullptr);
121     return cpu_worker_threads_;
122   }
123 
124   // "stream" is used in special circumstances (such as the
125   // constructors of Ops) where there is no available OpKernelContext.
126   // "default_context" is used by OpKernelContext whenever a device does not
127   // supply a DeviceContext for an op in FillContextMap (e.g. when only
128   // using a single stream.)
129   // "event_mgr" is used to delay deallocation of temporary GPU buffers.
130   // TODO(pbar) Work out how to move this out of DeviceBase.
131   struct GpuDeviceInfo {
132     // Make sure all the defaults are NULL, so we can spot missing assignments.
133     perftools::gputools::Stream* stream = nullptr;
134     DeviceContext* default_context = nullptr;
135     EventMgr* event_mgr = nullptr;
136     int gpu_id = -1;
137   };
138 
139   // Does not take ownership.
set_tensorflow_gpu_device_info(GpuDeviceInfo * g)140   void set_tensorflow_gpu_device_info(GpuDeviceInfo* g) {
141     gpu_device_info_ = g;
142   }
143 
tensorflow_gpu_device_info()144   virtual const GpuDeviceInfo* tensorflow_gpu_device_info() const {
145     return gpu_device_info_;
146   }
147 
148   // The preferred thread pool for this device. If it is nullptr, the system
149   // automatically assigns a thread pool for execution.
tensorflow_device_thread_pool()150   virtual thread::ThreadPool* tensorflow_device_thread_pool() {
151     return device_thread_pool_;
152   }
153 
154   // Does not take ownership.
set_eigen_cpu_device(Eigen::ThreadPoolDevice * d)155   void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d) {
156     eigen_cpu_device_ = d;
157   }
158 
159 #ifdef TENSORFLOW_USE_SYCL
set_eigen_sycl_device(Eigen::SyclDevice * d)160   void set_eigen_sycl_device(Eigen::SyclDevice* d) { eigen_sycl_device_ = d; }
161 #endif
162 
163   // Return the Allocator implementation to use based on the allocator
164   // attributes requested.  See allocator.h for more details.
GetAllocator(AllocatorAttributes)165   virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) {
166     LOG(FATAL) << "GetAllocator() is not implemented.";
167     return nullptr;
168   }
169 
170   // Return the Allocator implementation to use based on the allocator
171   // attributes requested and the supplied resource manager. By
172   // default this ignores the resource manager and calls the base
173   // implementation but devices can override if they want to consult
174   // the resource manager when choosing the allocator.
GetStepAllocator(AllocatorAttributes attr,ResourceMgr *)175   virtual Allocator* GetStepAllocator(AllocatorAttributes attr,
176                                       ResourceMgr* /*step_resource_manager*/) {
177     return GetAllocator(attr);
178   }
179 
eigen_cpu_device()180   virtual const Eigen::ThreadPoolDevice* eigen_cpu_device() {
181     CHECK(eigen_cpu_device_ != nullptr);
182     return eigen_cpu_device_;
183   }
184 
185 #ifdef TENSORFLOW_USE_SYCL
eigen_sycl_device()186   virtual const Eigen::SyclDevice* eigen_sycl_device() const {
187     CHECK(eigen_sycl_device_ != nullptr);
188     return eigen_sycl_device_;
189   }
190 #endif
191 
192   // Caller owns the return value. The OpKernelContext calls this even
193   // for devices that do not implement an eigen_gpu_device. Overridden
194   // by GPU devices to return a derived type.
MakeGpuDevice()195   virtual PerOpGpuDevice* MakeGpuDevice() { return nullptr; }
196 
UnderlyingDevice()197   virtual DeviceBase* UnderlyingDevice() { return this; }
UnderlyingDevice()198   virtual const DeviceBase* UnderlyingDevice() const { return this; }
199 
200   // This is overridden by GPU devices to reinitialize the derived
201   // type returned by MakeGpuDevice.
ReinitializeGpuDevice(OpKernelContext *,PerOpGpuDevice *,DeviceContext *,Allocator *)202   virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/,
203                                      PerOpGpuDevice* /*device*/,
204                                      DeviceContext* /*dc*/,
205                                      Allocator* /*allocator*/) {}
206 
207   // Unimplemented by default
208   virtual const DeviceAttributes& attributes() const;
209   virtual const string& name() const;
210 
211   // Materializes the given TensorProto into 'tensor' stored in Device
212   // memory.  Most devices will want to override this.
213   //
214   // TODO(vrv): We should be able to put this function into
215   // OpKernelContext and handle the copies from device memory via send
216   // and receive nodes, instead of requiring that each device handle
217   // the copies here as well as in copy ops.
MakeTensorFromProto(const TensorProto & tensor_proto,const AllocatorAttributes alloc_attrs,Tensor * tensor)218   virtual Status MakeTensorFromProto(const TensorProto& tensor_proto,
219                                      const AllocatorAttributes alloc_attrs,
220                                      Tensor* tensor) {
221     return errors::Internal("Device does not implement MakeTensorFromProto()");
222   }
223 
224  protected:
225   // Does not take ownership.
set_tensorflow_device_thread_pool(thread::ThreadPool * thread_pool)226   void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) {
227     device_thread_pool_ = thread_pool;
228   }
229 
230  private:
231   Env* const env_;
232   CpuWorkerThreads* cpu_worker_threads_ = nullptr;
233   GpuDeviceInfo* gpu_device_info_ = nullptr;
234   thread::ThreadPool* device_thread_pool_ = nullptr;
235   Eigen::ThreadPoolDevice* eigen_cpu_device_ = nullptr;
236 #ifdef TENSORFLOW_USE_SYCL
237   Eigen::SyclDevice* eigen_sycl_device_ = nullptr;
238 #endif
239 };
240 
241 }  // namespace tensorflow
242 
243 #endif  // TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_
244