/external/tensorflow/tensorflow/stream_executor/ |
D | device_memory.h | 57 class DeviceMemoryBase { 62 explicit DeviceMemoryBase(void *opaque = nullptr, uint64 size = 0) 75 bool operator<(const DeviceMemoryBase &other) const { 95 bool IsSameAs(const DeviceMemoryBase &other) const { in IsSameAs() 122 class DeviceMemory final : public DeviceMemoryBase { 125 DeviceMemory() : DeviceMemoryBase(nullptr, 0) {} in DeviceMemory() 130 explicit DeviceMemory(const DeviceMemoryBase &other) in DeviceMemory() 131 : DeviceMemoryBase(const_cast<DeviceMemoryBase &>(other).opaque(), in DeviceMemory() 156 DeviceMemoryBase::Reset(opaque, bytes); in ResetFromByteSize() 168 DeviceMemory(void *opaque, uint64 size) : DeviceMemoryBase(opaque, size) {} in DeviceMemory() [all …]
|
D | stream_executor_internal.h | 198 virtual DeviceMemoryBase Allocate(uint64 size, int64 memory_space) = 0; 199 DeviceMemoryBase Allocate(uint64 size) { in Allocate() 202 virtual void *GetSubBuffer(DeviceMemoryBase *parent, uint64 offset, 204 virtual void Deallocate(DeviceMemoryBase *mem) = 0; 219 virtual port::Status SynchronousMemZero(DeviceMemoryBase *location, 221 virtual port::Status SynchronousMemSet(DeviceMemoryBase *location, int value, 223 virtual port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst, 226 const DeviceMemoryBase &gpu_src, 229 DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, 231 virtual port::Status MemZero(Stream *stream, DeviceMemoryBase *location, [all …]
|
D | stream_executor_pimpl.h | 182 port::StatusOr<DeviceMemoryBase> GetUntypedSymbol( 190 void Deallocate(DeviceMemoryBase *mem); 237 port::Status SynchronousMemZero(DeviceMemoryBase *location, 242 port::Status SynchronousMemSet(DeviceMemoryBase *location, int value, 249 bool SynchronousMemcpy(DeviceMemoryBase *device_dst, const void *host_src, 256 bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &device_src, 261 DeviceMemoryBase *device_dst); 268 DeviceMemoryBase *device_dst) { in SynchronousMemcpyH2D() 275 port::Status SynchronousMemcpyD2H(const DeviceMemoryBase &device_src, 291 bool SynchronousMemcpy(DeviceMemoryBase *device_dst, [all …]
|
D | temporary_memory_manager.cc | 31 DeviceMemoryBase device_memory = it->first; in ForceDeallocateAll() 37 const DeviceMemoryBase& device_memory, uint64 generation, bool must_exist) { in MarkFinalized() 55 DeviceMemoryBase device_memory = it->first; in DeallocateFinalizedTemporaries() 66 bool TemporaryMemoryManager::IsFinalized(const DeviceMemoryBase& device_memory, in IsFinalized() 82 bool TemporaryMemoryManager::HasAllocated(const DeviceMemoryBase& device_memory, in HasAllocated() 96 DeviceMemoryBase device_memory = in AllocateArrayBase()
|
D | temporary_device_memory.h | 62 DeviceMemoryBase* mutable_device_memory(); 65 const DeviceMemoryBase& device_memory() const; 93 TemporaryDeviceMemoryBase(Stream* parent, DeviceMemoryBase device_memory, 97 DeviceMemoryBase device_memory_;
|
D | stream_executor_pimpl.cc | 210 void StreamExecutor::Deallocate(DeviceMemoryBase *mem) { in Deallocate() 279 const dnn::BatchDescriptor &input_descriptor, DeviceMemoryBase input_data, in GetMIOpenConvolveAlgorithms() 281 DeviceMemoryBase filter_data, const dnn::BatchDescriptor &output_descriptor, in GetMIOpenConvolveAlgorithms() 282 DeviceMemoryBase output_data, in GetMIOpenConvolveAlgorithms() 487 DeviceMemoryBase StreamExecutor::Allocate(uint64 size, int64 memory_space) { in Allocate() 494 return DeviceMemoryBase(); in Allocate() 496 DeviceMemoryBase buf = implementation_->Allocate(size, memory_space); in Allocate() 505 port::StatusOr<DeviceMemoryBase> StreamExecutor::GetUntypedSymbol( in GetUntypedSymbol() 512 return DeviceMemoryBase(opaque, bytes); in GetUntypedSymbol() 591 port::Status StreamExecutor::SynchronousMemZero(DeviceMemoryBase *location, in SynchronousMemZero() [all …]
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | gpu_conv_runner.h | 85 se::DeviceMemoryBase bias_buf; 86 se::DeviceMemoryBase side_input_buf; // nullable 89 se::DeviceMemoryBase input_buf; 90 se::DeviceMemoryBase filter_buf; 91 se::DeviceMemoryBase output_buf; 111 absl::Span<se::DeviceMemoryBase> operand_buffers, 112 se::DeviceMemoryBase result_buffer, 113 se::DeviceMemoryBase scratch_buf, se::Stream* stream, 117 absl::Span<se::DeviceMemoryBase> operand_buffers, 118 se::DeviceMemoryBase result_buffer, [all …]
|
D | cudnn_batchnorm_runner.h | 43 const CudnnBatchNormConfig &config, se::DeviceMemoryBase operand, 44 se::DeviceMemoryBase output, se::DeviceMemory<float> scale, 49 const CudnnBatchNormConfig &config, se::DeviceMemoryBase operand, 50 se::DeviceMemoryBase output_data, se::DeviceMemory<float> output_mean, 55 const CudnnBatchNormConfig &config, se::DeviceMemoryBase operand, 56 se::DeviceMemoryBase output_grad_data, se::DeviceMemoryBase grad_output,
|
D | buffer_allocations.cc | 36 const std::set<se::DeviceMemoryBase>& live_addresses, in TearDown() 44 se::DeviceMemoryBase buffer_address = GetDeviceAddress(allocation.index()); in TearDown() 60 se::DeviceMemoryBase BufferAllocations::GetDeviceAddress( in GetDeviceAddress() 67 se::DeviceMemoryBase& BufferAllocations::GetMutableDeviceAddress( in GetMutableDeviceAddress() 74 se::DeviceMemoryBase BufferAllocations::GetDeviceAddress( in GetDeviceAddress() 76 se::DeviceMemoryBase base = GetDeviceAddress(buffer_slice.index()); in GetDeviceAddress() 79 return se::DeviceMemoryBase( in GetDeviceAddress()
|
D | buffer_allocations.h | 38 BufferAllocations(absl::Span<se::DeviceMemoryBase const> buffers, in BufferAllocations() 58 se::DeviceMemoryBase GetDeviceAddress( 62 se::DeviceMemoryBase& GetMutableDeviceAddress( 67 se::DeviceMemoryBase GetDeviceAddress( 72 Status TearDown(const std::set<se::DeviceMemoryBase>& live_addresses, 89 std::vector<se::DeviceMemoryBase> buffers_;
|
D | cudnn_batchnorm_runner.cc | 32 se::DeviceMemoryBase operand; 41 se::DeviceMemoryBase output; 49 se::DeviceMemoryBase output_data; 57 se::DeviceMemoryBase output_grad_data; 58 se::DeviceMemoryBase grad_output; 115 const se::DeviceMemoryBase& operand, in AssignCommonParams() 227 const CudnnBatchNormConfig& config, se::DeviceMemoryBase operand, in RunCudnnBatchNormForwardInference() 228 se::DeviceMemoryBase output, se::DeviceMemory<float> scale, in RunCudnnBatchNormForwardInference() 256 const CudnnBatchNormConfig& config, se::DeviceMemoryBase operand, in RunCudnnBatchNormForwardTraining() 257 se::DeviceMemoryBase output_data, se::DeviceMemory<float> output_mean, in RunCudnnBatchNormForwardTraining() [all …]
|
D | cudnn_batchnorm_thunk.cc | 55 se::DeviceMemoryBase output_base = in ExecuteOnStream() 57 se::DeviceMemoryBase operand = buffer_allocations.GetDeviceAddress(operand_); in ExecuteOnStream() 92 se::DeviceMemoryBase operand = buffer_allocations.GetDeviceAddress(operand_); in ExecuteOnStream() 93 se::DeviceMemoryBase output_data = in ExecuteOnStream() 140 se::DeviceMemoryBase operand = buffer_allocations.GetDeviceAddress(operand_); in ExecuteOnStream() 141 se::DeviceMemoryBase output_grad_data = in ExecuteOnStream() 143 se::DeviceMemoryBase grad_output = in ExecuteOnStream()
|
/external/tensorflow/tensorflow/compiler/xla/service/interpreter/ |
D | executor.h | 49 using Args = absl::Span<const DeviceMemoryBase>; 70 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override; 71 void *GetSubBuffer(DeviceMemoryBase *parent, uint64 offset_bytes, 73 void Deallocate(DeviceMemoryBase *mem) override; 82 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &dev_src, 84 bool Memcpy(Stream *stream, DeviceMemoryBase *dev_dst, const void *host_src, 86 bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *pop_dst, in MemcpyDeviceToDevice() 87 const DeviceMemoryBase &host_src, in MemcpyDeviceToDevice() 92 port::Status MemZero(Stream *stream, DeviceMemoryBase *location, in MemZero() 96 port::Status Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern, in Memset() [all …]
|
D | executor.cc | 36 DeviceMemoryBase XlaInterpreterExecutor::Allocate(uint64 size, in Allocate() 38 return DeviceMemoryBase(new char[size], size); in Allocate() 41 void *XlaInterpreterExecutor::GetSubBuffer(DeviceMemoryBase *parent, in GetSubBuffer() 47 void XlaInterpreterExecutor::Deallocate(DeviceMemoryBase *mem) { in Deallocate() 52 const DeviceMemoryBase &dev_src, in Memcpy() 61 bool XlaInterpreterExecutor::Memcpy(Stream *stream, DeviceMemoryBase *dev_dst, in Memcpy() 71 DeviceMemoryBase *dev_dst, const void *host_src, uint64 size) { in SynchronousMemcpy() 77 void *host_dst, const DeviceMemoryBase &dev_src, uint64 size) { in SynchronousMemcpy()
|
/external/tensorflow/tensorflow/stream_executor/host/ |
D | host_gpu_executor.h | 63 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override; 64 void *GetSubBuffer(DeviceMemoryBase *parent, uint64 offset_bytes, 66 void Deallocate(DeviceMemoryBase *mem) override; 75 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src, 77 bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src, 79 bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst, 80 const DeviceMemoryBase &gpu_src, 83 port::Status MemZero(Stream *stream, DeviceMemoryBase *location, 85 port::Status Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern, 87 port::Status Memset32(Stream *stream, DeviceMemoryBase *location, [all …]
|
D | host_gpu_executor.cc | 69 DeviceMemoryBase HostExecutor::Allocate(uint64 size, int64 memory_space) { in Allocate() 74 return DeviceMemoryBase( in Allocate() 78 void *HostExecutor::GetSubBuffer(DeviceMemoryBase *parent, uint64 offset_bytes, in GetSubBuffer() 83 void HostExecutor::Deallocate(DeviceMemoryBase *mem) { in Deallocate() 87 port::Status HostExecutor::SynchronousMemZero(DeviceMemoryBase *location, in SynchronousMemZero() 93 port::Status HostExecutor::SynchronousMemSet(DeviceMemoryBase *location, in SynchronousMemSet() 100 const DeviceMemoryBase &gpu_src, uint64 size) { in Memcpy() 109 bool HostExecutor::Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, in Memcpy() 120 DeviceMemoryBase *gpu_dst, in MemcpyDeviceToDevice() 121 const DeviceMemoryBase &gpu_src, in MemcpyDeviceToDevice() [all …]
|
/external/tensorflow/tensorflow/stream_executor/tpu/ |
D | tpu_executor.h | 51 using DeviceMemoryBase = ::stream_executor::DeviceMemoryBase; variable 69 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override; 88 void Deallocate(const DeviceMemoryBase& memory); 90 void Deallocate(DeviceMemoryBase* memory) override; 121 const ::stream_executor::DeviceMemoryBase& device_src, 124 bool Memcpy(Stream* stream, ::stream_executor::DeviceMemoryBase* device_dst, 128 ::stream_executor::DeviceMemoryBase* gpu_dst, 129 const ::stream_executor::DeviceMemoryBase& host_src, 135 Status SynchronousMemcpy(::stream_executor::DeviceMemoryBase* device_dst, 138 void* host_dst, const ::stream_executor::DeviceMemoryBase& device_src, [all …]
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | gpu_executor.h | 89 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override; 91 void* GetSubBuffer(DeviceMemoryBase* mem, uint64 offset_bytes, 94 void Deallocate(DeviceMemoryBase* mem) override; 122 port::Status SynchronousMemZero(DeviceMemoryBase* location, 125 port::Status SynchronousMemSet(DeviceMemoryBase* location, int value, 128 port::Status SynchronousMemcpy(DeviceMemoryBase* gpu_dst, 132 const DeviceMemoryBase& gpu_src, 135 port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase* gpu_dst, 136 const DeviceMemoryBase& gpu_src, 139 port::Status MemZero(Stream* stream, DeviceMemoryBase* location, [all …]
|
/external/tensorflow/tensorflow/compiler/xla/service/ |
D | shaped_buffer.h | 74 const se::DeviceMemoryBase& root_buffer() const { in root_buffer() 80 const se::DeviceMemoryBase& buffer(const ShapeIndex& index) const { in buffer() 85 void set_buffer(const se::DeviceMemoryBase& buffer, const ShapeIndex& index) { in set_buffer() 92 void set_buffers(ShapeTree<se::DeviceMemoryBase> buffers) { in set_buffers() 116 const ShapeTree<se::DeviceMemoryBase>& buffers() const { return buffers_; } in buffers() 117 ShapeTree<se::DeviceMemoryBase>& buffers() { return buffers_; } in buffers() 136 ShapeTree<se::DeviceMemoryBase> buffers_; 188 *buffers_.mutable_element(index) = se::DeviceMemoryBase(); in set_buffer()
|
D | transfer_manager.h | 174 const se::DeviceMemoryBase& dest, 178 const se::DeviceMemoryBase& source, 184 const se::DeviceMemoryBase& dest, 188 const se::DeviceMemoryBase& source, 276 const se::DeviceMemoryBase& device_buffer) const { in CanBufferBeAccessedNow() 302 se::Stream* stream, absl::Span<const se::DeviceMemoryBase> elements, 303 const Shape& shape, se::DeviceMemoryBase* region) = 0; 311 const se::DeviceMemoryBase& source, 320 se::DeviceMemoryBase* destination);
|
D | maybe_owning_device_memory.h | 33 explicit MaybeOwningDeviceMemory(tensorflow::se::DeviceMemoryBase unowned) in MaybeOwningDeviceMemory() 38 MaybeOwningDeviceMemory& operator=(tensorflow::se::DeviceMemoryBase unowned) { 52 tensorflow::se::DeviceMemoryBase AsDeviceMemoryBase() const; 69 tensorflow::se::DeviceMemoryBase>
|
D | transfer_manager.cc | 115 se::Stream* stream, const Shape& shape, const se::DeviceMemoryBase& source, in TransferArrayFromDevice() 144 const se::DeviceMemoryBase& dest, in TransferArrayToDevice() 159 const se::DeviceMemoryBase& dest, in TransferArrayToDeviceAsync() 180 se::Stream* stream, const Shape& shape, const se::DeviceMemoryBase& source, in TransferArrayFromDevice() 212 [&](const ShapeIndex& index, se::DeviceMemoryBase* buffer) { in ReadDynamicShapes() 302 se::DeviceMemoryBase device_memory = device_buffer.buffer(index); in WriteTupleIndexTablesAsync() 306 std::vector<se::DeviceMemoryBase> elements; in WriteTupleIndexTablesAsync() 328 se::DeviceMemoryBase device_memory = device_buffer.buffer({}); in WriteRootTupleIndexTable() 332 std::vector<se::DeviceMemoryBase> elements; in WriteRootTupleIndexTable() 347 se::DeviceMemoryBase device_memory = in WriteRootTupleIndexTable() [all …]
|
/external/tensorflow/tensorflow/core/kernels/ |
D | gpu_utils.h | 53 se::DeviceMemoryBase WrapRedzoneBestEffort(se::RedzoneAllocator* rz_allocator, 54 se::DeviceMemoryBase buffer); 64 se::DeviceMemoryBase wrapped(const_cast<T*>(cuda_memory), size * sizeof(T)); in AsDeviceMemory() 219 se::DeviceMemoryBase input_buffer, 220 se::DeviceMemoryBase filter_buffer, 221 se::DeviceMemoryBase output_buffer, 231 se::dnn::DataType element_type, se::DeviceMemoryBase input_buffer, 232 se::DeviceMemoryBase filter_buffer, se::DeviceMemoryBase output_buffer, 233 se::DeviceMemoryBase bias_buffer, se::DeviceMemoryBase side_input_buffer,
|
D | gpu_utils.cc | 40 se::DeviceMemoryBase WrapRedzoneBestEffort(se::RedzoneAllocator* rz_allocator, in WrapRedzoneBestEffort() 41 se::DeviceMemoryBase buffer) { in WrapRedzoneBestEffort() 56 return se::DeviceMemoryBase(output_rz_or.ValueOrDie()); in WrapRedzoneBestEffort() 128 se::DeviceMemoryBase input_buffer, in LogConvAutotuneResults() 129 se::DeviceMemoryBase filter_buffer, in LogConvAutotuneResults() 130 se::DeviceMemoryBase output_buffer, in LogConvAutotuneResults() 171 se::dnn::DataType element_type, se::DeviceMemoryBase input_buffer, in LogFusedConvForwardAutotuneResults() 172 se::DeviceMemoryBase filter_buffer, se::DeviceMemoryBase output_buffer, in LogFusedConvForwardAutotuneResults() 173 se::DeviceMemoryBase bias_buffer, se::DeviceMemoryBase side_input_buffer, in LogFusedConvForwardAutotuneResults()
|
/external/tensorflow/tensorflow/compiler/xla/pjrt/ |
D | tracked_device_buffer.cc | 101 ShapeTree<se::DeviceMemoryBase>::iterator iterator = in FromScopedShapedBuffer() 103 std::vector<se::DeviceMemoryBase> buffers; in FromScopedShapedBuffer() 110 iterator->second = se::DeviceMemoryBase(); in FromScopedShapedBuffer() 116 absl::Span<se::DeviceMemoryBase>(buffers), definition_events, in FromScopedShapedBuffer() 123 ShapeTree<se::DeviceMemoryBase>::iterator iterator = in AsShapedBuffer() 125 for (const se::DeviceMemoryBase& buf : device_memory_) { in AsShapedBuffer() 140 for (const se::DeviceMemoryBase& buf : device_memory_) { in AddToInputAsImmutable() 153 for (const se::DeviceMemoryBase& buf : device_memory_) { in AddToInputAsDonated() 165 absl::Span<se::DeviceMemoryBase const> device_memory, in TrackedDeviceBuffer() 178 for (const se::DeviceMemoryBase& buffer : device_memory_) { in ~TrackedDeviceBuffer()
|