1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_ 17 #define TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_ 18 19 #include <memory> 20 #include <string> 21 #include <unordered_map> 22 23 #include "tensorflow/core/framework/tensor.h" 24 #include "tensorflow/core/lib/core/errors.h" 25 #include "tensorflow/core/lib/core/refcount.h" 26 #include "tensorflow/core/lib/core/status.h" 27 #include "tensorflow/core/lib/core/stringpiece.h" 28 #include "tensorflow/core/platform/logging.h" 29 30 namespace Eigen { 31 struct ThreadPoolDevice; 32 #ifdef TENSORFLOW_USE_SYCL 33 struct SyclDevice; 34 #endif 35 } // end namespace Eigen 36 37 namespace perftools { 38 namespace gputools { 39 class Stream; 40 } // namespace gputools 41 } // namespace perftools 42 43 namespace tensorflow { 44 45 class Device; 46 class DeviceAttributes; 47 class Env; 48 class EventMgr; 49 class OpKernelContext; 50 class ResourceMgr; 51 class TensorProto; 52 53 namespace thread { 54 class ThreadPool; 55 } 56 57 // A wrapper for an Eigen Gpu Device that includes per-op state. The 58 // class is defined even for non-GPU devices since the 59 // OpKernelContext::Params structure wants to fill it in. 60 class PerOpGpuDevice { 61 public: ~PerOpGpuDevice()62 virtual ~PerOpGpuDevice() {} 63 virtual const Eigen::GpuDevice& device() const = 0; 64 }; 65 66 // A class that devices can subclass to pass around 67 // Device-specific context to OpKernels. 68 class DeviceContext : public core::RefCounted { 69 public: ~DeviceContext()70 ~DeviceContext() override {} stream()71 virtual perftools::gputools::Stream* stream() const { return nullptr; } MaintainLifetimeOnStream(const Tensor * t,perftools::gputools::Stream * stream)72 virtual void MaintainLifetimeOnStream( 73 const Tensor* t, perftools::gputools::Stream* stream) const {} 74 75 // "cpu_tensor" is a tensor on a CPU. Copies "cpu_tensor" into 76 // "device_tensor" which is on a GPU device "device". "device_tensor" 77 // must be allocated to be of the same size as "cpu_tensor". CopyCPUTensorToDevice(const Tensor * cpu_tensor,Device * device,Tensor * device_tensor,StatusCallback done)78 virtual void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, 79 Tensor* device_tensor, 80 StatusCallback done) const { 81 done(errors::Internal("Unrecognized device type in CPU-to-device Copy")); 82 } 83 84 // "device_tensor" is a tensor on a non-CPU device. Copies 85 // device_tensor into "cpu_tensor". "cpu_tensor" must be allocated 86 // to be of the same size as "device_tensor". CopyDeviceTensorToCPU(const Tensor * device_tensor,StringPiece tensor_name,Device * device,Tensor * cpu_tensor,StatusCallback done)87 virtual void CopyDeviceTensorToCPU(const Tensor* device_tensor, 88 StringPiece tensor_name, Device* device, 89 Tensor* cpu_tensor, StatusCallback done) { 90 done(errors::Internal("Unrecognized device type in device-to-CPU Copy")); 91 } 92 }; 93 94 // map[i] is the DeviceContext* for the node with id i, if i < map.size(). 95 typedef std::vector<DeviceContext*> DeviceContextMap; 96 97 class DeviceBase { 98 public: DeviceBase(Env * env)99 explicit DeviceBase(Env* env) : env_(env) {} 100 virtual ~DeviceBase(); 101 env()102 Env* env() const { return env_; } 103 104 // Override this to return true for devices that require an Op's 105 // compute method to save references to the temporary tensors it 106 // allocates until the Op execution completes RequiresRecordingAccessedTensors()107 virtual bool RequiresRecordingAccessedTensors() const { return false; } 108 109 struct CpuWorkerThreads { 110 int num_threads = 0; 111 thread::ThreadPool* workers = nullptr; 112 }; 113 114 // Does not take ownership. set_tensorflow_cpu_worker_threads(CpuWorkerThreads * t)115 void set_tensorflow_cpu_worker_threads(CpuWorkerThreads* t) { 116 cpu_worker_threads_ = t; 117 } 118 tensorflow_cpu_worker_threads()119 virtual const CpuWorkerThreads* tensorflow_cpu_worker_threads() const { 120 CHECK(cpu_worker_threads_ != nullptr); 121 return cpu_worker_threads_; 122 } 123 124 // "stream" is used in special circumstances (such as the 125 // constructors of Ops) where there is no available OpKernelContext. 126 // "default_context" is used by OpKernelContext whenever a device does not 127 // supply a DeviceContext for an op in FillContextMap (e.g. when only 128 // using a single stream.) 129 // "event_mgr" is used to delay deallocation of temporary GPU buffers. 130 // TODO(pbar) Work out how to move this out of DeviceBase. 131 struct GpuDeviceInfo { 132 // Make sure all the defaults are NULL, so we can spot missing assignments. 133 perftools::gputools::Stream* stream = nullptr; 134 DeviceContext* default_context = nullptr; 135 EventMgr* event_mgr = nullptr; 136 int gpu_id = -1; 137 }; 138 139 // Does not take ownership. set_tensorflow_gpu_device_info(GpuDeviceInfo * g)140 void set_tensorflow_gpu_device_info(GpuDeviceInfo* g) { 141 gpu_device_info_ = g; 142 } 143 tensorflow_gpu_device_info()144 virtual const GpuDeviceInfo* tensorflow_gpu_device_info() const { 145 return gpu_device_info_; 146 } 147 148 // The preferred thread pool for this device. If it is nullptr, the system 149 // automatically assigns a thread pool for execution. tensorflow_device_thread_pool()150 virtual thread::ThreadPool* tensorflow_device_thread_pool() { 151 return device_thread_pool_; 152 } 153 154 // Does not take ownership. set_eigen_cpu_device(Eigen::ThreadPoolDevice * d)155 void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d) { 156 eigen_cpu_device_ = d; 157 } 158 159 #ifdef TENSORFLOW_USE_SYCL set_eigen_sycl_device(Eigen::SyclDevice * d)160 void set_eigen_sycl_device(Eigen::SyclDevice* d) { eigen_sycl_device_ = d; } 161 #endif 162 163 // Return the Allocator implementation to use based on the allocator 164 // attributes requested. See allocator.h for more details. GetAllocator(AllocatorAttributes)165 virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) { 166 LOG(FATAL) << "GetAllocator() is not implemented."; 167 return nullptr; 168 } 169 170 // Return the Allocator implementation to use based on the allocator 171 // attributes requested and the supplied resource manager. By 172 // default this ignores the resource manager and calls the base 173 // implementation but devices can override if they want to consult 174 // the resource manager when choosing the allocator. GetStepAllocator(AllocatorAttributes attr,ResourceMgr *)175 virtual Allocator* GetStepAllocator(AllocatorAttributes attr, 176 ResourceMgr* /*step_resource_manager*/) { 177 return GetAllocator(attr); 178 } 179 eigen_cpu_device()180 virtual const Eigen::ThreadPoolDevice* eigen_cpu_device() { 181 CHECK(eigen_cpu_device_ != nullptr); 182 return eigen_cpu_device_; 183 } 184 185 #ifdef TENSORFLOW_USE_SYCL eigen_sycl_device()186 virtual const Eigen::SyclDevice* eigen_sycl_device() const { 187 CHECK(eigen_sycl_device_ != nullptr); 188 return eigen_sycl_device_; 189 } 190 #endif 191 192 // Caller owns the return value. The OpKernelContext calls this even 193 // for devices that do not implement an eigen_gpu_device. Overridden 194 // by GPU devices to return a derived type. MakeGpuDevice()195 virtual PerOpGpuDevice* MakeGpuDevice() { return nullptr; } 196 UnderlyingDevice()197 virtual DeviceBase* UnderlyingDevice() { return this; } UnderlyingDevice()198 virtual const DeviceBase* UnderlyingDevice() const { return this; } 199 200 // This is overridden by GPU devices to reinitialize the derived 201 // type returned by MakeGpuDevice. ReinitializeGpuDevice(OpKernelContext *,PerOpGpuDevice *,DeviceContext *,Allocator *)202 virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/, 203 PerOpGpuDevice* /*device*/, 204 DeviceContext* /*dc*/, 205 Allocator* /*allocator*/) {} 206 207 // Unimplemented by default 208 virtual const DeviceAttributes& attributes() const; 209 virtual const string& name() const; 210 211 // Materializes the given TensorProto into 'tensor' stored in Device 212 // memory. Most devices will want to override this. 213 // 214 // TODO(vrv): We should be able to put this function into 215 // OpKernelContext and handle the copies from device memory via send 216 // and receive nodes, instead of requiring that each device handle 217 // the copies here as well as in copy ops. MakeTensorFromProto(const TensorProto & tensor_proto,const AllocatorAttributes alloc_attrs,Tensor * tensor)218 virtual Status MakeTensorFromProto(const TensorProto& tensor_proto, 219 const AllocatorAttributes alloc_attrs, 220 Tensor* tensor) { 221 return errors::Internal("Device does not implement MakeTensorFromProto()"); 222 } 223 224 protected: 225 // Does not take ownership. set_tensorflow_device_thread_pool(thread::ThreadPool * thread_pool)226 void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) { 227 device_thread_pool_ = thread_pool; 228 } 229 230 private: 231 Env* const env_; 232 CpuWorkerThreads* cpu_worker_threads_ = nullptr; 233 GpuDeviceInfo* gpu_device_info_ = nullptr; 234 thread::ThreadPool* device_thread_pool_ = nullptr; 235 Eigen::ThreadPoolDevice* eigen_cpu_device_ = nullptr; 236 #ifdef TENSORFLOW_USE_SYCL 237 Eigen::SyclDevice* eigen_sycl_device_ = nullptr; 238 #endif 239 }; 240 241 } // namespace tensorflow 242 243 #endif // TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_ 244