Home
last modified time | relevance | path

Searched refs:DeviceMemory (Results 1 – 25 of 115) sorted by relevance

12345

/external/tensorflow/tensorflow/stream_executor/rocm/
Drocm_dnn.h103 const DeviceMemory<Eigen::half>& input_data,
105 const DeviceMemory<Eigen::half>& input_h_data,
107 const DeviceMemory<Eigen::half>& input_c_data,
108 const DeviceMemory<Eigen::half>& params,
110 DeviceMemory<Eigen::half>* output_data,
112 DeviceMemory<Eigen::half>* output_h_data,
114 DeviceMemory<Eigen::half>* output_c_data, bool is_training,
121 const DeviceMemory<float>& input_data,
123 const DeviceMemory<float>& input_h_data,
125 const DeviceMemory<float>& input_c_data,
[all …]
Drocm_blas.cc298 const DeviceMemory<T> &a) { in complex_cast()
311 DeviceMemory<T> *a) { in complex_cast()
445 const DeviceMemory<float> &x, int incx, in DoBlasAsum()
446 DeviceMemory<float> *result) { in DoBlasAsum()
453 const DeviceMemory<double> &x, int incx, in DoBlasAsum()
454 DeviceMemory<double> *result) { in DoBlasAsum()
461 const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasAsum()
462 DeviceMemory<float> *result) { in DoBlasAsum()
469 const DeviceMemory<std::complex<double>> &x, int incx, in DoBlasAsum()
470 DeviceMemory<double> *result) { in DoBlasAsum()
[all …]
Drocm_dnn.cc1975 const DeviceMemory<T>& input_data, in ExtractAndCheckRnnForward()
1977 const DeviceMemory<T>& input_h_data, in ExtractAndCheckRnnForward()
1979 const DeviceMemory<T>& input_c_data, const DeviceMemory<T>& params, in ExtractAndCheckRnnForward()
1981 const DeviceMemory<T>& output_data, in ExtractAndCheckRnnForward()
1983 const DeviceMemory<T>& output_h_data, in ExtractAndCheckRnnForward()
1985 const DeviceMemory<T>& output_c_data, RnnModelDims* model_dims) { in ExtractAndCheckRnnForward()
2052 DeviceMemory<uint8>* workspace) { in CreateRnnWorkspace()
2074 *workspace = DeviceMemory<uint8>(); in CreateRnnWorkspace()
2085 const DeviceMemory<T>& input_data, in DoRnnForwardImpl()
2087 const DeviceMemory<T>& input_h_data, in DoRnnForwardImpl()
[all …]
Drocm_blas.h133 DeviceMemory<typename RocBlasTypeConversionHelper<T>::mapped_type>
157 const port::ArraySlice<DeviceMemory<T> *> &a_ptrs_to_wrappers, int lda,
158 const port::ArraySlice<DeviceMemory<T> *> &b_ptrs_to_wrappers, int ldb,
159 T beta, const port::ArraySlice<DeviceMemory<T> *> &c_ptrs_to_wrappers,
171 uint64 n, uint64 k, const CompT &alpha, const DeviceMemory<InT> &a,
172 int lda, const DeviceMemory<InT> &b, int ldb, const CompT &beta,
173 DeviceMemory<OutT> *c, int ldc, blas::ComputationType computation_type,
181 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a,
182 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta,
183 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result);
[all …]
/external/tensorflow/tensorflow/stream_executor/cuda/
Dcuda_dnn.h76 const DeviceMemory<Eigen::half>& input_data,
78 const DeviceMemory<Eigen::half>& input_h_data,
80 const DeviceMemory<Eigen::half>& input_c_data,
81 const DeviceMemory<Eigen::half>& params,
83 DeviceMemory<Eigen::half>* output_data,
85 DeviceMemory<Eigen::half>* output_h_data,
87 DeviceMemory<Eigen::half>* output_c_data, bool is_training,
94 const DeviceMemory<float>& input_data,
96 const DeviceMemory<float>& input_h_data,
98 const DeviceMemory<float>& input_c_data,
[all …]
Dcuda_blas.cc578 const DeviceMemory<float> &x, int incx, in DoBlasInternalImpl()
579 DeviceMemory<float> *result) { in DoBlasInternalImpl()
586 const DeviceMemory<double> &x, int incx, in DoBlasInternalImpl()
587 DeviceMemory<double> *result) { in DoBlasInternalImpl()
594 const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasInternalImpl()
595 DeviceMemory<float> *result) { in DoBlasInternalImpl()
602 const DeviceMemory<std::complex<double>> &x, int incx, in DoBlasInternalImpl()
603 DeviceMemory<double> *result) { in DoBlasInternalImpl()
610 const DeviceMemory<float> &x, int incx, in DoBlasInternalImpl()
611 DeviceMemory<float> *y, int incy) { in DoBlasInternalImpl()
[all …]
Dcuda_dnn.cc1002 DeviceMemory<uint8> state_memory; in Create()
1600 const DeviceMemory<T>& input_data, in ExtractAndCheckRnnForward()
1602 const DeviceMemory<T>& input_h_data, in ExtractAndCheckRnnForward()
1604 const DeviceMemory<T>& input_c_data, const DeviceMemory<T>& params, in ExtractAndCheckRnnForward()
1606 const DeviceMemory<T>& output_data, in ExtractAndCheckRnnForward()
1608 const DeviceMemory<T>& output_h_data, in ExtractAndCheckRnnForward()
1610 const DeviceMemory<T>& output_c_data) { in ExtractAndCheckRnnForward()
1674 port::StatusOr<DeviceMemory<uint8>> CreateRnnWorkspace( in CreateRnnWorkspace()
1687 return DeviceMemory<uint8>(); in CreateRnnWorkspace()
1693 port::StatusOr<DeviceMemory<uint8>> CreateBatchNormForwardWorkspace( in CreateBatchNormForwardWorkspace()
[all …]
Dcuda_blas.h110 const port::ArraySlice<DeviceMemory<T> *> &a_array, int lda,
111 const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, Scalar beta,
112 const port::ArraySlice<DeviceMemory<T> *> &c_array, int ldc,
120 const DeviceMemory<InT> &a, int lda, const DeviceMemory<InT> &b, int ldb,
121 const HostOrDeviceScalar<CompT> &beta, DeviceMemory<OutT> *c, int ldc,
129 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a,
130 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta,
131 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result);
137 const DeviceMemory<T> &a, int lda,
138 const DeviceMemory<T> &x, int incx,
[all …]
/external/tensorflow/tensorflow/stream_executor/
Dstream.h63 class DeviceMemory; variable
247 const DeviceMemory<float> &x, const DeviceMemory<float> &scale,
248 const DeviceMemory<float> &offset,
249 const DeviceMemory<float> &estimated_mean,
250 const DeviceMemory<float> &estimated_variance,
251 const DeviceMemory<float> &side_input, const dnn::BatchDescriptor &x_desc,
254 dnn::ActivationMode activation_mode, DeviceMemory<float> *y,
255 DeviceMemory<float> *batch_mean, DeviceMemory<float> *batch_var,
256 DeviceMemory<float> *saved_mean, DeviceMemory<float> *saved_inv_var,
262 const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x,
[all …]
Dblas.h61 class DeviceMemory; variable
251 const DeviceMemory<float> &x, int incx,
252 DeviceMemory<float> *result) = 0;
254 const DeviceMemory<double> &x, int incx,
255 DeviceMemory<double> *result) = 0;
257 const DeviceMemory<std::complex<float>> &x, int incx,
258 DeviceMemory<float> *result) = 0;
260 const DeviceMemory<std::complex<double>> &x, int incx,
261 DeviceMemory<double> *result) = 0;
265 const DeviceMemory<float> &x, int incx,
[all …]
Ddnn.h1012 Stream* stream, const DeviceMemory<float>& x, in DoBatchNormalizationForward()
1013 const DeviceMemory<float>& scale, const DeviceMemory<float>& offset, in DoBatchNormalizationForward()
1014 const DeviceMemory<float>& estimated_mean, in DoBatchNormalizationForward()
1015 const DeviceMemory<float>& estimated_variance, in DoBatchNormalizationForward()
1016 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForward()
1019 dnn::ActivationMode activation_mode, DeviceMemory<float>* y, in DoBatchNormalizationForward()
1020 DeviceMemory<float>* batch_mean, DeviceMemory<float>* batch_var, in DoBatchNormalizationForward()
1021 DeviceMemory<float>* reserve_space_1, in DoBatchNormalizationForward()
1022 DeviceMemory<float>* reserve_space_2, bool is_training, in DoBatchNormalizationForward()
1031 Stream* stream, const DeviceMemory<Eigen::half>& x, in DoBatchNormalizationForward()
[all …]
Dstream.cc343 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, in ThenBatchNormalizationForward()
344 const DeviceMemory<float> &offset, in ThenBatchNormalizationForward()
345 const DeviceMemory<float> &estimated_mean, in ThenBatchNormalizationForward()
346 const DeviceMemory<float> &estimated_variance, in ThenBatchNormalizationForward()
347 const DeviceMemory<float> &side_input, const dnn::BatchDescriptor &x_desc, in ThenBatchNormalizationForward()
350 dnn::ActivationMode activation_mode, DeviceMemory<float> *y, in ThenBatchNormalizationForward()
351 DeviceMemory<float> *batch_mean, DeviceMemory<float> *batch_var, in ThenBatchNormalizationForward()
352 DeviceMemory<float> *saved_mean, DeviceMemory<float> *saved_inv_var, in ThenBatchNormalizationForward()
371 const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x, in ThenBatchNormalizationBackward()
372 const DeviceMemory<float> &scale, const DeviceMemory<float> &mean, in ThenBatchNormalizationBackward()
[all …]
Dfft.h55 class DeviceMemory; variable
181 const DeviceMemory<std::complex<float>> &input,
182 DeviceMemory<std::complex<float>> *output) = 0;
184 const DeviceMemory<std::complex<double>> &input,
185 DeviceMemory<std::complex<double>> *output) = 0;
189 const DeviceMemory<float> &input,
190 DeviceMemory<std::complex<float>> *output) = 0;
192 const DeviceMemory<double> &input,
193 DeviceMemory<std::complex<double>> *output) = 0;
197 const DeviceMemory<std::complex<float>> &input,
[all …]
Drng.h29 class DeviceMemory; variable
54 DeviceMemory<float> *v) = 0;
56 DeviceMemory<double> *v) = 0;
58 DeviceMemory<std::complex<float>> *v) = 0;
60 DeviceMemory<std::complex<double>> *v) = 0;
65 DeviceMemory<float> *v) { in DoPopulateRandGaussian()
71 double stddev, DeviceMemory<double> *v) { in DoPopulateRandGaussian()
Ddevice_memory_allocator.h107 const DeviceMemory<ElemT> &cref() const { return wrapped_; } in cref()
112 DeviceMemory<ElemT> *ptr() { return &wrapped_; } in ptr()
113 const DeviceMemory<ElemT> *ptr() const { return &wrapped_; } in ptr()
118 const DeviceMemory<ElemT> &operator*() const { return cref(); }
119 DeviceMemory<ElemT> *operator->() { return ptr(); }
120 const DeviceMemory<ElemT> *operator->() const { return ptr(); }
130 DeviceMemory<ElemT> Release() { in Release()
131 DeviceMemory<ElemT> tmp = wrapped_; in Release()
132 wrapped_ = DeviceMemory<ElemT>{}; in Release()
145 DeviceMemory<ElemT> wrapped_; // Value we wrap with scoped-release.
[all …]
Ddevice_memory.h122 class DeviceMemory final : public DeviceMemoryBase {
125 DeviceMemory() : DeviceMemoryBase(nullptr, 0) {} in DeviceMemory() function
126 explicit DeviceMemory(std::nullptr_t) : DeviceMemory() {} in DeviceMemory() function
130 explicit DeviceMemory(const DeviceMemoryBase &other) in DeviceMemory() function
146 static DeviceMemory<ElemT> MakeFromByteSize(void *opaque, uint64 bytes) { in MakeFromByteSize()
147 return DeviceMemory<ElemT>(opaque, bytes); in MakeFromByteSize()
168 DeviceMemory(void *opaque, uint64 size) : DeviceMemoryBase(opaque, size) {} in DeviceMemory() function
/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dcudnn_batchnorm_runner.cc35 se::DeviceMemory<float> scale;
42 se::DeviceMemory<float> offset;
43 se::DeviceMemory<float> mean;
44 se::DeviceMemory<float> variance;
50 se::DeviceMemory<float> offset;
51 se::DeviceMemory<float> output_mean;
52 se::DeviceMemory<float> output_inv_stddev;
59 se::DeviceMemory<float> output_grad_scale;
60 se::DeviceMemory<float> output_grad_offset;
61 se::DeviceMemory<float> mean;
[all …]
Dcudnn_batchnorm_runner.h44 se::DeviceMemoryBase output, se::DeviceMemory<float> scale,
45 se::DeviceMemory<float> offset, se::DeviceMemory<float> mean,
46 se::DeviceMemory<float> variance, se::Stream *stream);
50 se::DeviceMemoryBase output_data, se::DeviceMemory<float> output_mean,
51 se::DeviceMemory<float> output_inv_stddev, se::DeviceMemory<float> scale,
52 se::DeviceMemory<float> offset, se::Stream *stream);
57 se::DeviceMemory<float> output_grad_scale,
58 se::DeviceMemory<float> output_grad_offset, se::DeviceMemory<float> scale,
59 se::DeviceMemory<float> mean, se::DeviceMemory<float> inv_stddev,
Dcudnn_batchnorm_thunk.cc58 se::DeviceMemory<float> scale(buffer_allocations.GetDeviceAddress(scale_)); in ExecuteOnStream()
59 se::DeviceMemory<float> offset(buffer_allocations.GetDeviceAddress(offset_)); in ExecuteOnStream()
60 se::DeviceMemory<float> mean(buffer_allocations.GetDeviceAddress(mean_)); in ExecuteOnStream()
61 se::DeviceMemory<float> variance( in ExecuteOnStream()
96 se::DeviceMemory<float> output_mean( in ExecuteOnStream()
98 se::DeviceMemory<float> output_inv_stddev( in ExecuteOnStream()
101 se::DeviceMemory<float> null_device_ptr(nullptr); in ExecuteOnStream()
107 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)), in ExecuteOnStream()
108 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(offset_)), in ExecuteOnStream()
145 se::DeviceMemory<float> output_grad_scale( in ExecuteOnStream()
[all …]
Dfft_thunk.cc40 StatusOr<se::DeviceMemory<uint8>> FftScratchAllocator::AllocateBytes( in AllocateBytes()
58 return se::DeviceMemory<uint8>(buffer_addr); in AllocateBytes()
183 se::DeviceMemory<complex64> input_data( in ExecuteOnStream()
185 se::DeviceMemory<complex64> output_data( in ExecuteOnStream()
191 se::DeviceMemory<complex128> input_data( in ExecuteOnStream()
193 se::DeviceMemory<complex128> output_data( in ExecuteOnStream()
199 se::DeviceMemory<complex64> input_data( in ExecuteOnStream()
201 se::DeviceMemory<complex64> output_data( in ExecuteOnStream()
213 se::DeviceMemory<complex128> input_data( in ExecuteOnStream()
215 se::DeviceMemory<complex128> output_data( in ExecuteOnStream()
[all …]
Dcholesky_thunk.cc79 se::DeviceMemory<int> info_data( in ExecuteOnStream()
84 context->Potrf(uplo_, n_, se::DeviceMemory<float>(a_data), n_, in ExecuteOnStream()
85 info_data, se::DeviceMemory<float>(workspace_data))); in ExecuteOnStream()
90 uplo_, n_, se::DeviceMemory<double>(a_data), n_, info_data, in ExecuteOnStream()
91 se::DeviceMemory<double>(workspace_data))); in ExecuteOnStream()
96 uplo_, n_, se::DeviceMemory<std::complex<float>>(a_data), n_, in ExecuteOnStream()
97 info_data, se::DeviceMemory<std::complex<float>>(workspace_data))); in ExecuteOnStream()
102 uplo_, n_, se::DeviceMemory<std::complex<double>>(a_data), n_, in ExecuteOnStream()
103 info_data, se::DeviceMemory<std::complex<double>>(workspace_data))); in ExecuteOnStream()
Dcusolver_context.h58 Status Potrf(se::blas::UpperLower uplo, int n, se::DeviceMemory<T> dev_A,
59 int lda, se::DeviceMemory<int> dev_lapack_info,
60 se::DeviceMemory<T> workspace) = delete;
81 se::blas::UpperLower uplo, int n, se::DeviceMemory<T> A, int lda, \
82 se::DeviceMemory<int> lapack_info, se::DeviceMemory<T> workspace);
103 Status Potrf(se::blas::UpperLower uplo, int n, se::DeviceMemory<T> dev_A,
104 int lda, se::DeviceMemory<int> dev_lapack_info,
105 se::DeviceMemory<T> workspace) {
/external/swiftshader/src/Vulkan/
DVkDeviceMemory.cpp62 class DeviceMemoryHostExternalBase : public DeviceMemory::ExternalBase
113 class ExternalMemoryHost : public vk::DeviceMemory::ExternalBase
293 DeviceMemory::DeviceMemory(const VkMemoryAllocateInfo *pAllocateInfo, void *mem, Device *pDevice) in DeviceMemory() function in vk::DeviceMemory
307 void DeviceMemory::destroy(const VkAllocationCallbacks *pAllocator) in destroy()
321 size_t DeviceMemory::ComputeRequiredAllocationSize(const VkMemoryAllocateInfo *pAllocateInfo) in ComputeRequiredAllocationSize()
328 VkResult DeviceMemory::allocate() in allocate()
356 VkResult DeviceMemory::map(VkDeviceSize pOffset, VkDeviceSize pSize, void **ppData) in map()
363 VkDeviceSize DeviceMemory::getCommittedMemoryInBytes() const in getCommittedMemoryInBytes()
368 void *DeviceMemory::getOffsetPointer(VkDeviceSize pOffset) const in getOffsetPointer()
374 bool DeviceMemory::checkExternalMemoryHandleType( in checkExternalMemoryHandleType()
[all …]
/external/tensorflow/tensorflow/stream_executor/gpu/
Dgpu_rng.h30 class DeviceMemory; variable
56 bool DoPopulateRandUniform(Stream* stream, DeviceMemory<float>* v) override;
57 bool DoPopulateRandUniform(Stream* stream, DeviceMemory<double>* v) override;
59 DeviceMemory<std::complex<float>>* v) override;
61 DeviceMemory<std::complex<double>>* v) override;
63 DeviceMemory<float>* v) override;
65 DeviceMemory<double>* v) override;
73 bool DoPopulateRandUniformInternal(Stream* stream, DeviceMemory<T>* v);
76 DeviceMemory<ElemT>* v, FuncT func);
Dredzone_allocator.cc63 port::StatusOr<DeviceMemory<uint8>> RedzoneAllocator::AllocateBytes( in AllocateBytes()
83 DeviceMemory<uint8> allocated_buffer_memory(*allocated_buffer); in AllocateBytes()
85 DeviceMemory<uint8> lhs_redzone = stream_->parent()->GetSubBuffer( in AllocateBytes()
88 DeviceMemory<uint8> data_chunk = stream_->parent()->GetSubBuffer( in AllocateBytes()
96 DeviceMemory<uint8> rhs_redzone_slop = stream_->parent()->GetSubBuffer( in AllocateBytes()
99 DeviceMemory<uint8> rhs_redzone_nonslop = stream_->parent()->GetSubBuffer( in AllocateBytes()
176 TypedKernel<DeviceMemory<uint8>, uint8, uint64, DeviceMemory<uint64>>;
216 const DeviceMemory<uint8>& redzone, in RunRedzoneChecker()
218 const DeviceMemory<uint64>& out_param, in RunRedzoneChecker()
252 const DeviceMemory<uint64>& out_param, in CheckRedzonesForBuffer()
[all …]

12345