/external/tensorflow/tensorflow/compiler/xla/service/interpreter/ |
D | executor.cc | 50 bool XlaInterpreterExecutor::Memcpy(Stream *stream, void *host_dst, in Memcpy() argument 53 AsExecutorStream(stream)->EnqueueTask([this, host_dst, dev_src, size]() { in Memcpy() 54 port::Status ok = SynchronousMemcpy(host_dst, dev_src, size); in Memcpy() 76 void *host_dst, const DeviceMemoryBase &dev_src, uint64 size) { in SynchronousMemcpy() argument 77 memcpy(host_dst, dev_src.opaque(), size); in SynchronousMemcpy()
|
D | executor.h | 83 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &pop_src, 119 port::Status SynchronousMemcpy(void *host_dst,
|
/external/tensorflow/tensorflow/stream_executor/host/ |
D | host_gpu_executor.cc | 66 bool HostExecutor::Memcpy(Stream *stream, void *host_dst, in Memcpy() argument 72 [host_dst, src_mem, size]() { memcpy(host_dst, src_mem, size); }); in Memcpy() 137 port::Status HostExecutor::SynchronousMemcpy(void *host_dst, in SynchronousMemcpy() argument 140 memcpy(host_dst, gpu_src.opaque(), size); in SynchronousMemcpy()
|
D | host_gpu_executor.h | 75 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src, 99 port::Status SynchronousMemcpy(void *host_dst,
|
/external/tensorflow/tensorflow/stream_executor/ |
D | stream_executor_pimpl.cc | 638 bool StreamExecutor::SynchronousMemcpy(void *host_dst, in SynchronousMemcpy() argument 641 VLOG(1) << "Called StreamExecutor::SynchronousMemcpy(host_dst=" << host_dst in SynchronousMemcpy() 646 implementation_->SynchronousMemcpy(host_dst, device_src, size); in SynchronousMemcpy() 669 const DeviceMemoryBase &device_src, int64 size, void *host_dst) { in SynchronousMemcpyD2H() argument 672 << ", host_dst=" << host_dst << ")" << StackTraceIfVLOG10(); in SynchronousMemcpyD2H() 676 host_dst); in SynchronousMemcpyD2H() 678 result = implementation_->SynchronousMemcpy(host_dst, device_src, size); in SynchronousMemcpyD2H() 684 device_src.opaque(), host_dst, size, in SynchronousMemcpyD2H() 714 bool StreamExecutor::Memcpy(Stream *stream, void *host_dst, in Memcpy() argument 716 return implementation_->Memcpy(stream, host_dst, device_src, size); in Memcpy()
|
D | stream_executor_pimpl.h | 268 bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &device_src, 288 int64 size, void *host_dst); 295 port::MutableArraySlice<T> host_dst) { in SynchronousMemcpyD2H() argument 296 auto host_size = host_dst.size() * sizeof(T); in SynchronousMemcpyD2H() 298 return SynchronousMemcpyD2H(device_src, host_size, host_dst.begin()); in SynchronousMemcpyD2H() 546 bool Memcpy(Stream *stream, void *host_dst,
|
D | trace_listener.h | 62 int64 size, void* host_dst) {} in SynchronousMemcpyD2HBegin() argument
|
D | stream_executor_internal.h | 221 virtual port::Status SynchronousMemcpy(void *host_dst, 235 virtual bool Memcpy(Stream *stream, void *host_dst,
|
D | stream.h | 678 void *host_dst, uint64 size); 686 port::MutableArraySlice<ElementType> host_dst) { in ThenMemcpyD2HQuantized() argument 689 host_dst.data(), host_dst.size() * sizeof(ElementType)); in ThenMemcpyD2HQuantized() 1680 Stream &ThenMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, 1695 port::MutableArraySlice<T> host_dst) { in ThenMemcpyD2H() argument 1696 auto host_size = host_dst.size() * sizeof(T); in ThenMemcpyD2H() 1698 return ThenMemcpy(host_dst.begin(), gpu_src, host_size); in ThenMemcpyD2H()
|
D | stream.cc | 1706 dnn::QuantizedActivationMode mode, void *host_dst, uint64 size) { in ThenMemcpyD2HQuantized() argument 1707 VLOG_CALL(PARAM(gpu_unquantized_src), PARAM(mode), PARAM(host_dst), in ThenMemcpyD2HQuantized() 1713 host_dst, size)); in ThenMemcpyD2HQuantized() 4780 Stream &Stream::ThenMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, in ThenMemcpy() argument 4782 VLOG_CALL(PARAM(host_dst), PARAM(gpu_src), PARAM(size)); in ThenMemcpy() 4785 CheckError(parent_->Memcpy(this, host_dst, gpu_src, size)); in ThenMemcpy()
|
D | dnn.h | 1997 QuantizedActivationMode mode, void* host_dst, int64 size) = 0;
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_driver.cc | 1079 void* host_dst, in SynchronousMemcpyD2H() argument 1086 CHECK(PointerIsValid(host_dst)) in SynchronousMemcpyD2H() 1087 << "Destination pointer is not actually on CPU: " << host_dst; in SynchronousMemcpyD2H() 1089 CUresult res = cuMemcpyDtoH(host_dst, gpu_src, size); in SynchronousMemcpyD2H() 1094 ToString(res).c_str(), host_dst, in SynchronousMemcpyD2H() 1098 << host_dst; in SynchronousMemcpyD2H() 1149 void* host_dst, in AsynchronousMemcpyD2H() argument 1157 CHECK(PointerIsValid(host_dst)) in AsynchronousMemcpyD2H() 1158 << "Destination pointer is not actually on CPU: " << host_dst; in AsynchronousMemcpyD2H() 1160 CUresult res = cuMemcpyDtoHAsync(host_dst, gpu_src, size, stream); in AsynchronousMemcpyD2H() [all …]
|
D | cuda_gpu_executor.cc | 610 port::Status GpuExecutor::SynchronousMemcpy(void* host_dst, in SynchronousMemcpy() argument 613 return GpuDriver::SynchronousMemcpyD2H(context_, host_dst, in SynchronousMemcpy() 655 bool GpuExecutor::Memcpy(Stream* stream, void* host_dst, in Memcpy() argument 657 return GpuDriver::AsynchronousMemcpyD2H(context_, host_dst, in Memcpy()
|
D | cuda_dnn.h | 527 dnn::QuantizedActivationMode mode, void* host_dst,
|
D | cuda_dnn.cc | 4076 dnn::QuantizedActivationMode mode, void* host_dst, int64 size) { in DoMemcpyD2HQuantized() argument
|
/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_driver.cc | 892 GpuContext* context, void* host_dst, hipDeviceptr_t gpu_src, uint64 size) { in SynchronousMemcpyD2H() argument 894 hipError_t res = tensorflow::wrap::hipMemcpyDtoH(host_dst, gpu_src, size); in SynchronousMemcpyD2H() 899 ToString(res).c_str(), host_dst, in SynchronousMemcpyD2H() 903 << host_dst; in SynchronousMemcpyD2H() 941 void* host_dst, in AsynchronousMemcpyD2H() argument 947 tensorflow::wrap::hipMemcpyDtoHAsync(host_dst, gpu_src, size, stream); in AsynchronousMemcpyD2H() 952 ToString(res).c_str(), host_dst, absl::bit_cast<void*>(gpu_src), size, in AsynchronousMemcpyD2H() 958 << host_dst << " on stream " << stream; in AsynchronousMemcpyD2H()
|
D | rocm_gpu_executor.cc | 491 port::Status GpuExecutor::SynchronousMemcpy(void* host_dst, in SynchronousMemcpy() argument 494 return GpuDriver::SynchronousMemcpyD2H(context_, host_dst, in SynchronousMemcpy() 536 bool GpuExecutor::Memcpy(Stream* stream, void* host_dst, in Memcpy() argument 538 return GpuDriver::AsynchronousMemcpyD2H(context_, host_dst, in Memcpy()
|
D | rocm_dnn.h | 510 dnn::QuantizedActivationMode mode, void* host_dst,
|
D | rocm_dnn.cc | 3973 dnn::QuantizedActivationMode mode, void* host_dst, int64 size) { in DoMemcpyD2HQuantized() argument
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | gpu_executor.h | 130 port::Status SynchronousMemcpy(void* host_dst, 145 bool Memcpy(Stream* stream, void* host_dst, const DeviceMemoryBase& gpu_src,
|
D | gpu_driver.h | 280 static port::Status SynchronousMemcpyD2H(GpuContext* context, void* host_dst, 292 static bool AsynchronousMemcpyD2H(GpuContext* context, void* host_dst,
|