/external/tensorflow/tensorflow/stream_executor/host/ |
D | host_gpu_executor.cc | 67 const DeviceMemoryBase &gpu_src, uint64 size) { in Memcpy() argument 70 void *src_mem = const_cast<void *>(gpu_src.opaque()); in Memcpy() 88 const DeviceMemoryBase &gpu_src, in MemcpyDeviceToDevice() argument 91 void *src_mem = const_cast<void *>(gpu_src.opaque()); in MemcpyDeviceToDevice() 138 const DeviceMemoryBase &gpu_src, in SynchronousMemcpy() argument 140 memcpy(host_dst, gpu_src.opaque(), size); in SynchronousMemcpy() 145 DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, uint64 size) { in SynchronousMemcpyDeviceToDevice() argument 146 memcpy(gpu_dst->opaque(), gpu_src.opaque(), size); in SynchronousMemcpyDeviceToDevice()
|
D | host_gpu_executor.h | 75 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src, 100 const DeviceMemoryBase &gpu_src, 103 const DeviceMemoryBase &gpu_src,
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_driver.cc | 1080 CUdeviceptr gpu_src, in SynchronousMemcpyD2H() argument 1084 CHECK(PointerIsValid(gpu_src)) in SynchronousMemcpyD2H() 1085 << "Source pointer is not actually on GPU: " << gpu_src; in SynchronousMemcpyD2H() 1089 CUresult res = cuMemcpyDtoH(host_dst, gpu_src, size); in SynchronousMemcpyD2H() 1095 absl::bit_cast<void*>(gpu_src), size, size)); in SynchronousMemcpyD2H() 1127 CUdeviceptr gpu_src, in SynchronousMemcpyD2D() argument 1131 CHECK(PointerIsValid(gpu_src)) in SynchronousMemcpyD2D() 1132 << "Source pointer is not actually on GPU: " << gpu_src; in SynchronousMemcpyD2D() 1136 CUresult res = cuMemcpyDtoD(gpu_dst, gpu_src, size); in SynchronousMemcpyD2D() 1142 absl::bit_cast<void*>(gpu_src), size, size)); in SynchronousMemcpyD2D() [all …]
|
D | cuda_gpu_executor.cc | 611 const DeviceMemoryBase& gpu_src, in SynchronousMemcpy() argument 614 AsCudaDevicePtr(gpu_src), size); in SynchronousMemcpy() 618 DeviceMemoryBase* gpu_dst, const DeviceMemoryBase& gpu_src, uint64 size) { in SynchronousMemcpyDeviceToDevice() argument 620 AsCudaDevicePtr(gpu_src), size); in SynchronousMemcpyDeviceToDevice() 656 const DeviceMemoryBase& gpu_src, uint64 size) { in Memcpy() argument 658 AsCudaDevicePtr(gpu_src), size, in Memcpy() 671 const DeviceMemoryBase& gpu_src, in MemcpyDeviceToDevice() argument 674 AsCudaDevicePtr(gpu_src), size, in MemcpyDeviceToDevice()
|
/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_driver.cc | 892 GpuContext* context, void* host_dst, hipDeviceptr_t gpu_src, uint64 size) { in SynchronousMemcpyD2H() argument 894 hipError_t res = tensorflow::wrap::hipMemcpyDtoH(host_dst, gpu_src, size); in SynchronousMemcpyD2H() 900 absl::bit_cast<void*>(gpu_src), size, size)); in SynchronousMemcpyD2H() 925 GpuContext* context, hipDeviceptr_t gpu_dst, hipDeviceptr_t gpu_src, in SynchronousMemcpyD2D() argument 928 hipError_t res = tensorflow::wrap::hipMemcpyDtoD(gpu_dst, gpu_src, size); in SynchronousMemcpyD2D() 934 absl::bit_cast<void*>(gpu_src), size, size)); in SynchronousMemcpyD2D() 942 hipDeviceptr_t gpu_src, in AsynchronousMemcpyD2H() argument 947 tensorflow::wrap::hipMemcpyDtoHAsync(host_dst, gpu_src, size, stream); in AsynchronousMemcpyD2H() 952 ToString(res).c_str(), host_dst, absl::bit_cast<void*>(gpu_src), size, in AsynchronousMemcpyD2H() 957 << " bytes from " << absl::bit_cast<void*>(gpu_src) << " to " in AsynchronousMemcpyD2H() [all …]
|
D | rocm_gpu_executor.cc | 492 const DeviceMemoryBase& gpu_src, in SynchronousMemcpy() argument 495 AsROCmDevicePtr(gpu_src), size); in SynchronousMemcpy() 499 DeviceMemoryBase* gpu_dst, const DeviceMemoryBase& gpu_src, uint64 size) { in SynchronousMemcpyDeviceToDevice() argument 501 AsROCmDevicePtr(gpu_src), size); in SynchronousMemcpyDeviceToDevice() 537 const DeviceMemoryBase& gpu_src, uint64 size) { in Memcpy() argument 539 AsROCmDevicePtr(gpu_src), size, in Memcpy() 552 const DeviceMemoryBase& gpu_src, in MemcpyDeviceToDevice() argument 555 AsROCmDevicePtr(gpu_src), size, in MemcpyDeviceToDevice()
|
/external/tensorflow/tensorflow/stream_executor/ |
D | stream_executor_internal.h | 222 const DeviceMemoryBase &gpu_src, 225 DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, 236 const DeviceMemoryBase &gpu_src, uint64 size) = 0; 240 const DeviceMemoryBase &gpu_src,
|
D | trace_listener.h | 61 const DeviceMemoryBase& gpu_src, in SynchronousMemcpyD2HBegin() argument
|
D | stream.h | 1680 Stream &ThenMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, 1694 Stream &ThenMemcpyD2H(const DeviceMemory<T> &gpu_src, in ThenMemcpyD2H() argument 1697 CHECK(gpu_src.size() == 0 || host_size >= gpu_src.size()); in ThenMemcpyD2H() 1698 return ThenMemcpy(host_dst.begin(), gpu_src, host_size); in ThenMemcpyD2H() 1715 Stream &ThenMemcpy(DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, 1722 const DeviceMemoryBase &gpu_src, uint64 size) { in ThenMemcpyD2D() argument 1723 return ThenMemcpy(gpu_dst, gpu_src, size); in ThenMemcpyD2D()
|
D | stream.cc | 4780 Stream &Stream::ThenMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, in ThenMemcpy() argument 4782 VLOG_CALL(PARAM(host_dst), PARAM(gpu_src), PARAM(size)); in ThenMemcpy() 4785 CheckError(parent_->Memcpy(this, host_dst, gpu_src, size)); in ThenMemcpy() 4788 << " did not memcpy device-to-host; source: " << gpu_src.opaque(); in ThenMemcpy() 4807 const DeviceMemoryBase &gpu_src, uint64 size) { in ThenMemcpy() argument 4808 VLOG_CALL(PARAM(gpu_dst), PARAM(gpu_src), PARAM(size)); in ThenMemcpy() 4811 CheckError(parent_->MemcpyDeviceToDevice(this, gpu_dst, gpu_src, size)); in ThenMemcpy() 4814 << " did not memcpy gpu-to-gpu; source: " << &gpu_src; in ThenMemcpy()
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | gpu_executor.h | 131 const DeviceMemoryBase& gpu_src, 135 const DeviceMemoryBase& gpu_src, 145 bool Memcpy(Stream* stream, void* host_dst, const DeviceMemoryBase& gpu_src, 152 const DeviceMemoryBase& gpu_src,
|
D | gpu_driver.h | 281 GpuDevicePtr gpu_src, uint64 size); 287 GpuDevicePtr gpu_src, uint64 size); 293 GpuDevicePtr gpu_src, uint64 size, 299 GpuDevicePtr gpu_src, uint64 size,
|
/external/tensorflow/tensorflow/ |
D | tensorflow.bzl | 1332 for gpu_src in gpu_srcs: 1333 if gpu_src.endswith(".cc") and not gpu_src.endswith(".cu.cc"): 1335 .format(gpu_src))
|