/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_dnn.h | 103 const DeviceMemory<Eigen::half>& input_data, 105 const DeviceMemory<Eigen::half>& input_h_data, 107 const DeviceMemory<Eigen::half>& input_c_data, 108 const DeviceMemory<Eigen::half>& params, 110 DeviceMemory<Eigen::half>* output_data, 112 DeviceMemory<Eigen::half>* output_h_data, 114 DeviceMemory<Eigen::half>* output_c_data, bool is_training, 121 const DeviceMemory<float>& input_data, 123 const DeviceMemory<float>& input_h_data, 125 const DeviceMemory<float>& input_c_data, [all …]
|
D | rocm_blas.cc | 298 const DeviceMemory<T> &a) { in complex_cast() 311 DeviceMemory<T> *a) { in complex_cast() 445 const DeviceMemory<float> &x, int incx, in DoBlasAsum() 446 DeviceMemory<float> *result) { in DoBlasAsum() 453 const DeviceMemory<double> &x, int incx, in DoBlasAsum() 454 DeviceMemory<double> *result) { in DoBlasAsum() 461 const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasAsum() 462 DeviceMemory<float> *result) { in DoBlasAsum() 469 const DeviceMemory<std::complex<double>> &x, int incx, in DoBlasAsum() 470 DeviceMemory<double> *result) { in DoBlasAsum() [all …]
|
D | rocm_dnn.cc | 1975 const DeviceMemory<T>& input_data, in ExtractAndCheckRnnForward() 1977 const DeviceMemory<T>& input_h_data, in ExtractAndCheckRnnForward() 1979 const DeviceMemory<T>& input_c_data, const DeviceMemory<T>& params, in ExtractAndCheckRnnForward() 1981 const DeviceMemory<T>& output_data, in ExtractAndCheckRnnForward() 1983 const DeviceMemory<T>& output_h_data, in ExtractAndCheckRnnForward() 1985 const DeviceMemory<T>& output_c_data, RnnModelDims* model_dims) { in ExtractAndCheckRnnForward() 2052 DeviceMemory<uint8>* workspace) { in CreateRnnWorkspace() 2074 *workspace = DeviceMemory<uint8>(); in CreateRnnWorkspace() 2085 const DeviceMemory<T>& input_data, in DoRnnForwardImpl() 2087 const DeviceMemory<T>& input_h_data, in DoRnnForwardImpl() [all …]
|
D | rocm_blas.h | 133 DeviceMemory<typename RocBlasTypeConversionHelper<T>::mapped_type> 157 const port::ArraySlice<DeviceMemory<T> *> &a_ptrs_to_wrappers, int lda, 158 const port::ArraySlice<DeviceMemory<T> *> &b_ptrs_to_wrappers, int ldb, 159 T beta, const port::ArraySlice<DeviceMemory<T> *> &c_ptrs_to_wrappers, 171 uint64 n, uint64 k, const CompT &alpha, const DeviceMemory<InT> &a, 172 int lda, const DeviceMemory<InT> &b, int ldb, const CompT &beta, 173 DeviceMemory<OutT> *c, int ldc, blas::ComputationType computation_type, 181 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a, 182 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta, 183 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result); [all …]
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_dnn.h | 76 const DeviceMemory<Eigen::half>& input_data, 78 const DeviceMemory<Eigen::half>& input_h_data, 80 const DeviceMemory<Eigen::half>& input_c_data, 81 const DeviceMemory<Eigen::half>& params, 83 DeviceMemory<Eigen::half>* output_data, 85 DeviceMemory<Eigen::half>* output_h_data, 87 DeviceMemory<Eigen::half>* output_c_data, bool is_training, 94 const DeviceMemory<float>& input_data, 96 const DeviceMemory<float>& input_h_data, 98 const DeviceMemory<float>& input_c_data, [all …]
|
D | cuda_blas.cc | 578 const DeviceMemory<float> &x, int incx, in DoBlasInternalImpl() 579 DeviceMemory<float> *result) { in DoBlasInternalImpl() 586 const DeviceMemory<double> &x, int incx, in DoBlasInternalImpl() 587 DeviceMemory<double> *result) { in DoBlasInternalImpl() 594 const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasInternalImpl() 595 DeviceMemory<float> *result) { in DoBlasInternalImpl() 602 const DeviceMemory<std::complex<double>> &x, int incx, in DoBlasInternalImpl() 603 DeviceMemory<double> *result) { in DoBlasInternalImpl() 610 const DeviceMemory<float> &x, int incx, in DoBlasInternalImpl() 611 DeviceMemory<float> *y, int incy) { in DoBlasInternalImpl() [all …]
|
D | cuda_dnn.cc | 1002 DeviceMemory<uint8> state_memory; in Create() 1600 const DeviceMemory<T>& input_data, in ExtractAndCheckRnnForward() 1602 const DeviceMemory<T>& input_h_data, in ExtractAndCheckRnnForward() 1604 const DeviceMemory<T>& input_c_data, const DeviceMemory<T>& params, in ExtractAndCheckRnnForward() 1606 const DeviceMemory<T>& output_data, in ExtractAndCheckRnnForward() 1608 const DeviceMemory<T>& output_h_data, in ExtractAndCheckRnnForward() 1610 const DeviceMemory<T>& output_c_data) { in ExtractAndCheckRnnForward() 1674 port::StatusOr<DeviceMemory<uint8>> CreateRnnWorkspace( in CreateRnnWorkspace() 1687 return DeviceMemory<uint8>(); in CreateRnnWorkspace() 1693 port::StatusOr<DeviceMemory<uint8>> CreateBatchNormForwardWorkspace( in CreateBatchNormForwardWorkspace() [all …]
|
D | cuda_blas.h | 110 const port::ArraySlice<DeviceMemory<T> *> &a_array, int lda, 111 const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, Scalar beta, 112 const port::ArraySlice<DeviceMemory<T> *> &c_array, int ldc, 120 const DeviceMemory<InT> &a, int lda, const DeviceMemory<InT> &b, int ldb, 121 const HostOrDeviceScalar<CompT> &beta, DeviceMemory<OutT> *c, int ldc, 129 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a, 130 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta, 131 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result); 137 const DeviceMemory<T> &a, int lda, 138 const DeviceMemory<T> &x, int incx, [all …]
|
/external/tensorflow/tensorflow/stream_executor/ |
D | stream.h | 63 class DeviceMemory; variable 247 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, 248 const DeviceMemory<float> &offset, 249 const DeviceMemory<float> &estimated_mean, 250 const DeviceMemory<float> &estimated_variance, 251 const DeviceMemory<float> &side_input, const dnn::BatchDescriptor &x_desc, 254 dnn::ActivationMode activation_mode, DeviceMemory<float> *y, 255 DeviceMemory<float> *batch_mean, DeviceMemory<float> *batch_var, 256 DeviceMemory<float> *saved_mean, DeviceMemory<float> *saved_inv_var, 262 const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x, [all …]
|
D | blas.h | 61 class DeviceMemory; variable 251 const DeviceMemory<float> &x, int incx, 252 DeviceMemory<float> *result) = 0; 254 const DeviceMemory<double> &x, int incx, 255 DeviceMemory<double> *result) = 0; 257 const DeviceMemory<std::complex<float>> &x, int incx, 258 DeviceMemory<float> *result) = 0; 260 const DeviceMemory<std::complex<double>> &x, int incx, 261 DeviceMemory<double> *result) = 0; 265 const DeviceMemory<float> &x, int incx, [all …]
|
D | dnn.h | 1012 Stream* stream, const DeviceMemory<float>& x, in DoBatchNormalizationForward() 1013 const DeviceMemory<float>& scale, const DeviceMemory<float>& offset, in DoBatchNormalizationForward() 1014 const DeviceMemory<float>& estimated_mean, in DoBatchNormalizationForward() 1015 const DeviceMemory<float>& estimated_variance, in DoBatchNormalizationForward() 1016 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForward() 1019 dnn::ActivationMode activation_mode, DeviceMemory<float>* y, in DoBatchNormalizationForward() 1020 DeviceMemory<float>* batch_mean, DeviceMemory<float>* batch_var, in DoBatchNormalizationForward() 1021 DeviceMemory<float>* reserve_space_1, in DoBatchNormalizationForward() 1022 DeviceMemory<float>* reserve_space_2, bool is_training, in DoBatchNormalizationForward() 1031 Stream* stream, const DeviceMemory<Eigen::half>& x, in DoBatchNormalizationForward() [all …]
|
D | stream.cc | 343 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, in ThenBatchNormalizationForward() 344 const DeviceMemory<float> &offset, in ThenBatchNormalizationForward() 345 const DeviceMemory<float> &estimated_mean, in ThenBatchNormalizationForward() 346 const DeviceMemory<float> &estimated_variance, in ThenBatchNormalizationForward() 347 const DeviceMemory<float> &side_input, const dnn::BatchDescriptor &x_desc, in ThenBatchNormalizationForward() 350 dnn::ActivationMode activation_mode, DeviceMemory<float> *y, in ThenBatchNormalizationForward() 351 DeviceMemory<float> *batch_mean, DeviceMemory<float> *batch_var, in ThenBatchNormalizationForward() 352 DeviceMemory<float> *saved_mean, DeviceMemory<float> *saved_inv_var, in ThenBatchNormalizationForward() 371 const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x, in ThenBatchNormalizationBackward() 372 const DeviceMemory<float> &scale, const DeviceMemory<float> &mean, in ThenBatchNormalizationBackward() [all …]
|
D | fft.h | 55 class DeviceMemory; variable 181 const DeviceMemory<std::complex<float>> &input, 182 DeviceMemory<std::complex<float>> *output) = 0; 184 const DeviceMemory<std::complex<double>> &input, 185 DeviceMemory<std::complex<double>> *output) = 0; 189 const DeviceMemory<float> &input, 190 DeviceMemory<std::complex<float>> *output) = 0; 192 const DeviceMemory<double> &input, 193 DeviceMemory<std::complex<double>> *output) = 0; 197 const DeviceMemory<std::complex<float>> &input, [all …]
|
D | rng.h | 29 class DeviceMemory; variable 54 DeviceMemory<float> *v) = 0; 56 DeviceMemory<double> *v) = 0; 58 DeviceMemory<std::complex<float>> *v) = 0; 60 DeviceMemory<std::complex<double>> *v) = 0; 65 DeviceMemory<float> *v) { in DoPopulateRandGaussian() 71 double stddev, DeviceMemory<double> *v) { in DoPopulateRandGaussian()
|
D | device_memory_allocator.h | 107 const DeviceMemory<ElemT> &cref() const { return wrapped_; } in cref() 112 DeviceMemory<ElemT> *ptr() { return &wrapped_; } in ptr() 113 const DeviceMemory<ElemT> *ptr() const { return &wrapped_; } in ptr() 118 const DeviceMemory<ElemT> &operator*() const { return cref(); } 119 DeviceMemory<ElemT> *operator->() { return ptr(); } 120 const DeviceMemory<ElemT> *operator->() const { return ptr(); } 130 DeviceMemory<ElemT> Release() { in Release() 131 DeviceMemory<ElemT> tmp = wrapped_; in Release() 132 wrapped_ = DeviceMemory<ElemT>{}; in Release() 145 DeviceMemory<ElemT> wrapped_; // Value we wrap with scoped-release. [all …]
|
D | device_memory.h | 122 class DeviceMemory final : public DeviceMemoryBase { 125 DeviceMemory() : DeviceMemoryBase(nullptr, 0) {} in DeviceMemory() function 126 explicit DeviceMemory(std::nullptr_t) : DeviceMemory() {} in DeviceMemory() function 130 explicit DeviceMemory(const DeviceMemoryBase &other) in DeviceMemory() function 146 static DeviceMemory<ElemT> MakeFromByteSize(void *opaque, uint64 bytes) { in MakeFromByteSize() 147 return DeviceMemory<ElemT>(opaque, bytes); in MakeFromByteSize() 168 DeviceMemory(void *opaque, uint64 size) : DeviceMemoryBase(opaque, size) {} in DeviceMemory() function
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | cudnn_batchnorm_runner.cc | 35 se::DeviceMemory<float> scale; 42 se::DeviceMemory<float> offset; 43 se::DeviceMemory<float> mean; 44 se::DeviceMemory<float> variance; 50 se::DeviceMemory<float> offset; 51 se::DeviceMemory<float> output_mean; 52 se::DeviceMemory<float> output_inv_stddev; 59 se::DeviceMemory<float> output_grad_scale; 60 se::DeviceMemory<float> output_grad_offset; 61 se::DeviceMemory<float> mean; [all …]
|
D | cudnn_batchnorm_runner.h | 44 se::DeviceMemoryBase output, se::DeviceMemory<float> scale, 45 se::DeviceMemory<float> offset, se::DeviceMemory<float> mean, 46 se::DeviceMemory<float> variance, se::Stream *stream); 50 se::DeviceMemoryBase output_data, se::DeviceMemory<float> output_mean, 51 se::DeviceMemory<float> output_inv_stddev, se::DeviceMemory<float> scale, 52 se::DeviceMemory<float> offset, se::Stream *stream); 57 se::DeviceMemory<float> output_grad_scale, 58 se::DeviceMemory<float> output_grad_offset, se::DeviceMemory<float> scale, 59 se::DeviceMemory<float> mean, se::DeviceMemory<float> inv_stddev,
|
D | cudnn_batchnorm_thunk.cc | 58 se::DeviceMemory<float> scale(buffer_allocations.GetDeviceAddress(scale_)); in ExecuteOnStream() 59 se::DeviceMemory<float> offset(buffer_allocations.GetDeviceAddress(offset_)); in ExecuteOnStream() 60 se::DeviceMemory<float> mean(buffer_allocations.GetDeviceAddress(mean_)); in ExecuteOnStream() 61 se::DeviceMemory<float> variance( in ExecuteOnStream() 96 se::DeviceMemory<float> output_mean( in ExecuteOnStream() 98 se::DeviceMemory<float> output_inv_stddev( in ExecuteOnStream() 101 se::DeviceMemory<float> null_device_ptr(nullptr); in ExecuteOnStream() 107 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)), in ExecuteOnStream() 108 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(offset_)), in ExecuteOnStream() 145 se::DeviceMemory<float> output_grad_scale( in ExecuteOnStream() [all …]
|
D | fft_thunk.cc | 40 StatusOr<se::DeviceMemory<uint8>> FftScratchAllocator::AllocateBytes( in AllocateBytes() 58 return se::DeviceMemory<uint8>(buffer_addr); in AllocateBytes() 183 se::DeviceMemory<complex64> input_data( in ExecuteOnStream() 185 se::DeviceMemory<complex64> output_data( in ExecuteOnStream() 191 se::DeviceMemory<complex128> input_data( in ExecuteOnStream() 193 se::DeviceMemory<complex128> output_data( in ExecuteOnStream() 199 se::DeviceMemory<complex64> input_data( in ExecuteOnStream() 201 se::DeviceMemory<complex64> output_data( in ExecuteOnStream() 213 se::DeviceMemory<complex128> input_data( in ExecuteOnStream() 215 se::DeviceMemory<complex128> output_data( in ExecuteOnStream() [all …]
|
D | cholesky_thunk.cc | 79 se::DeviceMemory<int> info_data( in ExecuteOnStream() 84 context->Potrf(uplo_, n_, se::DeviceMemory<float>(a_data), n_, in ExecuteOnStream() 85 info_data, se::DeviceMemory<float>(workspace_data))); in ExecuteOnStream() 90 uplo_, n_, se::DeviceMemory<double>(a_data), n_, info_data, in ExecuteOnStream() 91 se::DeviceMemory<double>(workspace_data))); in ExecuteOnStream() 96 uplo_, n_, se::DeviceMemory<std::complex<float>>(a_data), n_, in ExecuteOnStream() 97 info_data, se::DeviceMemory<std::complex<float>>(workspace_data))); in ExecuteOnStream() 102 uplo_, n_, se::DeviceMemory<std::complex<double>>(a_data), n_, in ExecuteOnStream() 103 info_data, se::DeviceMemory<std::complex<double>>(workspace_data))); in ExecuteOnStream()
|
D | cusolver_context.h | 58 Status Potrf(se::blas::UpperLower uplo, int n, se::DeviceMemory<T> dev_A, 59 int lda, se::DeviceMemory<int> dev_lapack_info, 60 se::DeviceMemory<T> workspace) = delete; 81 se::blas::UpperLower uplo, int n, se::DeviceMemory<T> A, int lda, \ 82 se::DeviceMemory<int> lapack_info, se::DeviceMemory<T> workspace); 103 Status Potrf(se::blas::UpperLower uplo, int n, se::DeviceMemory<T> dev_A, 104 int lda, se::DeviceMemory<int> dev_lapack_info, 105 se::DeviceMemory<T> workspace) {
|
/external/swiftshader/src/Vulkan/ |
D | VkDeviceMemory.cpp | 62 class DeviceMemoryHostExternalBase : public DeviceMemory::ExternalBase 113 class ExternalMemoryHost : public vk::DeviceMemory::ExternalBase 293 DeviceMemory::DeviceMemory(const VkMemoryAllocateInfo *pAllocateInfo, void *mem, Device *pDevice) in DeviceMemory() function in vk::DeviceMemory 307 void DeviceMemory::destroy(const VkAllocationCallbacks *pAllocator) in destroy() 321 size_t DeviceMemory::ComputeRequiredAllocationSize(const VkMemoryAllocateInfo *pAllocateInfo) in ComputeRequiredAllocationSize() 328 VkResult DeviceMemory::allocate() in allocate() 356 VkResult DeviceMemory::map(VkDeviceSize pOffset, VkDeviceSize pSize, void **ppData) in map() 363 VkDeviceSize DeviceMemory::getCommittedMemoryInBytes() const in getCommittedMemoryInBytes() 368 void *DeviceMemory::getOffsetPointer(VkDeviceSize pOffset) const in getOffsetPointer() 374 bool DeviceMemory::checkExternalMemoryHandleType( in checkExternalMemoryHandleType() [all …]
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | gpu_rng.h | 30 class DeviceMemory; variable 56 bool DoPopulateRandUniform(Stream* stream, DeviceMemory<float>* v) override; 57 bool DoPopulateRandUniform(Stream* stream, DeviceMemory<double>* v) override; 59 DeviceMemory<std::complex<float>>* v) override; 61 DeviceMemory<std::complex<double>>* v) override; 63 DeviceMemory<float>* v) override; 65 DeviceMemory<double>* v) override; 73 bool DoPopulateRandUniformInternal(Stream* stream, DeviceMemory<T>* v); 76 DeviceMemory<ElemT>* v, FuncT func);
|
D | redzone_allocator.cc | 63 port::StatusOr<DeviceMemory<uint8>> RedzoneAllocator::AllocateBytes( in AllocateBytes() 83 DeviceMemory<uint8> allocated_buffer_memory(*allocated_buffer); in AllocateBytes() 85 DeviceMemory<uint8> lhs_redzone = stream_->parent()->GetSubBuffer( in AllocateBytes() 88 DeviceMemory<uint8> data_chunk = stream_->parent()->GetSubBuffer( in AllocateBytes() 96 DeviceMemory<uint8> rhs_redzone_slop = stream_->parent()->GetSubBuffer( in AllocateBytes() 99 DeviceMemory<uint8> rhs_redzone_nonslop = stream_->parent()->GetSubBuffer( in AllocateBytes() 176 TypedKernel<DeviceMemory<uint8>, uint8, uint64, DeviceMemory<uint64>>; 216 const DeviceMemory<uint8>& redzone, in RunRedzoneChecker() 218 const DeviceMemory<uint64>& out_param, in RunRedzoneChecker() 252 const DeviceMemory<uint64>& out_param, in CheckRedzonesForBuffer() [all …]
|