1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_UTIL_H_ 17 #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_UTIL_H_ 18 19 #include "tensorflow/core/common_runtime/device.h" 20 #include "tensorflow/core/common_runtime/dma_helper.h" 21 #include "tensorflow/core/framework/tensor.h" 22 #include "tensorflow/core/lib/core/status.h" 23 #include "tensorflow/core/platform/stream_executor.h" 24 25 namespace tensorflow { 26 27 class RecvTensorResponse; 28 class TensorProto; 29 30 class GPUUtil { 31 public: 32 // "tensor" is GPU-local. "dev" is the hosting GPU. 33 // "device_context" should be the context of the GPU "_Send" op 34 // which provides the Tensor. 35 // Sets all necessary fields of "proto" by transferring value 36 // bytes from GPU to CPU RAM. "is_dead" indicates that the 37 // tensor is dead with an uninit value. 38 static void SetProtoFromGPU(const Tensor& tensor, Device* dev, 39 const DeviceContext* device_context, 40 TensorProto* proto, bool is_dead, 41 StatusCallback done); 42 43 // Copies the data in 'gpu_tensor' into 'cpu_tensor'. 44 // 'gpu_tensor''s backing memory must be on 'gpu_device' and 45 // 'cpu_tensor' must be allocated to be of the same size as 46 // 'gpu_tensor'. Synchronous: may block. 47 static void CopyGPUTensorToCPU(Device* gpu_device, 48 const DeviceContext* device_context, 49 const Tensor* gpu_tensor, Tensor* cpu_tensor, 50 StatusCallback done); 51 52 // Blocks until all operations queued on the stream associated with 53 // "gpu_device" at the time of the call have completed. Returns any 54 // error pending on the stream at completion. 55 static Status Sync(Device* gpu_device); 56 57 // Blocks until all operations queued on all streams associated with the 58 // corresponding GPU device at the time of call have completed. 59 // Returns any error pending on the stream at completion. 60 static Status SyncAll(Device* gpu_device); 61 62 // For debugging purpose, given a "device" and a "tensor" allocated 63 // on the device, return a string printing each byte in the tensor 64 // (up to a limit). "device" can be either a CPU or a GPU device. 65 static string MemoryDebugString(const Device* device, Tensor* tensor); 66 67 // Map a Tensor as a DeviceMemory object wrapping the given typed 68 // buffer. 69 // 70 // NOTE: will be removed soon, see StreamExecutorUtil::AsDeviceMemory 71 // instead. 72 template <typename T> AsDeviceMemory(const Tensor & t)73 static se::DeviceMemory<T> AsDeviceMemory(const Tensor& t) { 74 T* ptr = reinterpret_cast<T*>(const_cast<void*>(DMAHelper::base(&t))); 75 return se::DeviceMemory<T>(se::DeviceMemoryBase(ptr, t.TotalBytes())); 76 } 77 78 // Computes a checksum over the contents of "tensor", which is allocated 79 // on "gpu_device". 80 static uint64 Checksum(Device* gpu_device, 81 const DeviceContext* device_context, 82 const Tensor& tensor); 83 84 // Computes a checksum over the contents of "tensor", which is allocated 85 // in local CPU RAM. 86 static uint64 Checksum(const Tensor& tensor); 87 88 static void CopyCPUTensorToGPU(const Tensor* cpu_tensor, 89 const DeviceContext* device_context, 90 Device* gpu_device, Tensor* gpu_tensor, 91 StatusCallback done, bool sync_dst_compute); 92 93 static void DeviceToDeviceCopy( 94 DeviceContext* send_dev_context, DeviceContext* recv_dev_context, 95 Device* src, Device* dst, AllocatorAttributes src_alloc_attr, 96 AllocatorAttributes dst_alloc_attr, const Tensor* input, Tensor* output, 97 int dev_to_dev_stream_index, StatusCallback done); 98 99 // Deep-copying of GPU tensor on the same device. 100 // 'src_gpu_tensor''s and 'dst_gpu_tensor''s backing memory must be on 101 // 'gpu_device' and 'dst_cpu_tensor' must be allocated to be of the same 102 // size as 'src_gpu_tensor'. 103 static void CopyGPUTensorToSameGPU(Device* gpu_device, 104 const DeviceContext* device_context, 105 const Tensor* src_gpu_tensor, 106 Tensor* dst_gpu_tensor, 107 StatusCallback done); 108 }; 109 110 } // namespace tensorflow 111 #endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_UTIL_H_ 112