/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_dnn.h | 66 const DeviceMemory<Eigen::half>& input_data, 68 const DeviceMemory<Eigen::half>& input_h_data, 70 const DeviceMemory<Eigen::half>& input_c_data, 71 const DeviceMemory<Eigen::half>& params, 73 DeviceMemory<Eigen::half>* output_data, 75 DeviceMemory<Eigen::half>* output_h_data, 77 DeviceMemory<Eigen::half>* output_c_data, bool is_training, 84 const DeviceMemory<float>& input_data, 86 const DeviceMemory<float>& input_h_data, 88 const DeviceMemory<float>& input_c_data, [all …]
|
D | rocm_blas.cc | 401 const DeviceMemory<float> &x, int incx, in DoBlasAsum() 402 DeviceMemory<float> *result) { in DoBlasAsum() 409 const DeviceMemory<double> &x, int incx, in DoBlasAsum() 410 DeviceMemory<double> *result) { in DoBlasAsum() 417 const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasAsum() 418 DeviceMemory<float> *result) { in DoBlasAsum() 425 const DeviceMemory<std::complex<double>> &x, int incx, in DoBlasAsum() 426 DeviceMemory<double> *result) { in DoBlasAsum() 433 const DeviceMemory<float> &x, int incx, in DoBlasAxpy() 434 DeviceMemory<float> *y, int incy) { in DoBlasAxpy() [all …]
|
D | rocm_dnn.cc | 1894 const DeviceMemory<T>& input_data, in ExtractAndCheckRnnForward() 1896 const DeviceMemory<T>& input_h_data, in ExtractAndCheckRnnForward() 1898 const DeviceMemory<T>& input_c_data, const DeviceMemory<T>& params, in ExtractAndCheckRnnForward() 1900 const DeviceMemory<T>& output_data, in ExtractAndCheckRnnForward() 1902 const DeviceMemory<T>& output_h_data, in ExtractAndCheckRnnForward() 1904 const DeviceMemory<T>& output_c_data, RnnModelDims* model_dims) { in ExtractAndCheckRnnForward() 1971 DeviceMemory<uint8>* workspace) { in CreateRnnWorkspace() 1993 *workspace = DeviceMemory<uint8>(); in CreateRnnWorkspace() 2004 const DeviceMemory<T>& input_data, in DoRnnForwardImpl() 2006 const DeviceMemory<T>& input_h_data, in DoRnnForwardImpl() [all …]
|
D | rocm_blas.h | 106 const port::ArraySlice<DeviceMemory<T> *> &a_array, int lda, 107 const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, T beta, 108 const port::ArraySlice<DeviceMemory<T> *> &c_array, int ldc, 120 uint64 n, uint64 k, const CompT &alpha, const DeviceMemory<InT> &a, 121 int lda, const DeviceMemory<InT> &b, int ldb, const CompT &beta, 122 DeviceMemory<OutT> *c, int ldc, blas::ComputationType computation_type, 130 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a, 131 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta, 132 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result); 138 const DeviceMemory<T> &a, int lda, [all …]
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_dnn.h | 75 const DeviceMemory<Eigen::half>& input_data, 77 const DeviceMemory<Eigen::half>& input_h_data, 79 const DeviceMemory<Eigen::half>& input_c_data, 80 const DeviceMemory<Eigen::half>& params, 82 DeviceMemory<Eigen::half>* output_data, 84 DeviceMemory<Eigen::half>* output_h_data, 86 DeviceMemory<Eigen::half>* output_c_data, bool is_training, 93 const DeviceMemory<float>& input_data, 95 const DeviceMemory<float>& input_h_data, 97 const DeviceMemory<float>& input_c_data, [all …]
|
D | cuda_blas.cc | 434 const DeviceMemory<float> &x, int incx, in DoBlasAsum() 435 DeviceMemory<float> *result) { in DoBlasAsum() 442 const DeviceMemory<double> &x, int incx, in DoBlasAsum() 443 DeviceMemory<double> *result) { in DoBlasAsum() 450 const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasAsum() 451 DeviceMemory<float> *result) { in DoBlasAsum() 458 const DeviceMemory<std::complex<double>> &x, int incx, in DoBlasAsum() 459 DeviceMemory<double> *result) { in DoBlasAsum() 466 const DeviceMemory<float> &x, int incx, in DoBlasAxpy() 467 DeviceMemory<float> *y, int incy) { in DoBlasAxpy() [all …]
|
D | cuda_dnn.cc | 943 DeviceMemory<uint8> state_memory; in Create() 1414 const DeviceMemory<T>& input_data, in ExtractAndCheckRnnForward() 1416 const DeviceMemory<T>& input_h_data, in ExtractAndCheckRnnForward() 1418 const DeviceMemory<T>& input_c_data, const DeviceMemory<T>& params, in ExtractAndCheckRnnForward() 1420 const DeviceMemory<T>& output_data, in ExtractAndCheckRnnForward() 1422 const DeviceMemory<T>& output_h_data, in ExtractAndCheckRnnForward() 1424 const DeviceMemory<T>& output_c_data) { in ExtractAndCheckRnnForward() 1485 port::StatusOr<DeviceMemory<uint8>> CreateRnnWorkspace( in CreateRnnWorkspace() 1498 return DeviceMemory<uint8>(); in CreateRnnWorkspace() 1509 const DeviceMemory<T>& input_data, in DoRnnForwardImpl() [all …]
|
D | cuda_blas.h | 113 const port::ArraySlice<DeviceMemory<T> *> &a_array, int lda, 114 const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, Scalar beta, 115 const port::ArraySlice<DeviceMemory<T> *> &c_array, int ldc, 123 const DeviceMemory<InT> &a, int lda, const DeviceMemory<InT> &b, int ldb, 124 const HostOrDeviceScalar<CompT> &beta, DeviceMemory<OutT> *c, int ldc, 132 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a, 133 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta, 134 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result); 140 const DeviceMemory<T> &a, int lda, 141 const DeviceMemory<T> &x, int incx, [all …]
|
D | cuda_rng.cc | 108 bool GpuRng::DoPopulateRandUniformInternal(Stream* stream, DeviceMemory<T>* v) { in DoPopulateRandUniformInternal() 144 bool GpuRng::DoPopulateRandUniform(Stream* stream, DeviceMemory<float>* v) { in DoPopulateRandUniform() 148 bool GpuRng::DoPopulateRandUniform(Stream* stream, DeviceMemory<double>* v) { in DoPopulateRandUniform() 153 DeviceMemory<std::complex<float>>* v) { in DoPopulateRandUniform() 158 DeviceMemory<std::complex<double>>* v) { in DoPopulateRandUniform() 165 DeviceMemory<ElemT>* v, in DoPopulateRandGaussianInternal() 188 DeviceMemory<float>* v) { in DoPopulateRandGaussian() 194 DeviceMemory<double>* v) { in DoPopulateRandGaussian()
|
/external/tensorflow/tensorflow/stream_executor/ |
D | stream.h | 62 class DeviceMemory; variable 233 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, 234 const DeviceMemory<float> &offset, 235 const DeviceMemory<float> &estimated_mean, 236 const DeviceMemory<float> &estimated_variance, 239 DeviceMemory<float> *y, DeviceMemory<float> *batch_mean, 240 DeviceMemory<float> *batch_var, DeviceMemory<float> *saved_mean, 241 DeviceMemory<float> *saved_inv_var, bool is_training, 242 std::function<const DeviceMemory<float> &()> var_to_inv_var, 246 const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x, [all …]
|
D | blas.h | 60 class DeviceMemory; variable 182 const DeviceMemory<float> &x, int incx, 183 DeviceMemory<float> *result) = 0; 185 const DeviceMemory<double> &x, int incx, 186 DeviceMemory<double> *result) = 0; 188 const DeviceMemory<std::complex<float>> &x, int incx, 189 DeviceMemory<float> *result) = 0; 191 const DeviceMemory<std::complex<double>> &x, int incx, 192 DeviceMemory<double> *result) = 0; 196 const DeviceMemory<float> &x, int incx, [all …]
|
D | dnn.h | 991 Stream* stream, const DeviceMemory<float>& x, 992 const DeviceMemory<float>& scale, const DeviceMemory<float>& offset, 993 const DeviceMemory<float>& estimated_mean, 994 const DeviceMemory<float>& estimated_variance, 997 DeviceMemory<float>* y, DeviceMemory<float>* batch_mean, 998 DeviceMemory<float>* batch_var, DeviceMemory<float>* reserve_space_1, 999 DeviceMemory<float>* reserve_space_2, bool is_training, 1000 std::function<const DeviceMemory<float>&()> var_to_inv_var, 1008 Stream* stream, const DeviceMemory<Eigen::half>& x, 1009 const DeviceMemory<float>& scale, const DeviceMemory<float>& offset, [all …]
|
D | stream.cc | 340 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, in ThenBatchNormalizationForward() 341 const DeviceMemory<float> &offset, in ThenBatchNormalizationForward() 342 const DeviceMemory<float> &estimated_mean, in ThenBatchNormalizationForward() 343 const DeviceMemory<float> &estimated_variance, in ThenBatchNormalizationForward() 346 DeviceMemory<float> *y, DeviceMemory<float> *batch_mean, in ThenBatchNormalizationForward() 347 DeviceMemory<float> *batch_var, DeviceMemory<float> *saved_mean, in ThenBatchNormalizationForward() 348 DeviceMemory<float> *saved_inv_var, bool is_training, in ThenBatchNormalizationForward() 349 std::function<const DeviceMemory<float> &()> var_to_inv_var, in ThenBatchNormalizationForward() 368 const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x, in ThenBatchNormalizationBackward() 369 const DeviceMemory<float> &scale, const DeviceMemory<float> &mean, in ThenBatchNormalizationBackward() [all …]
|
D | fft.h | 55 class DeviceMemory; variable 181 const DeviceMemory<std::complex<float>> &input, 182 DeviceMemory<std::complex<float>> *output) = 0; 184 const DeviceMemory<std::complex<double>> &input, 185 DeviceMemory<std::complex<double>> *output) = 0; 189 const DeviceMemory<float> &input, 190 DeviceMemory<std::complex<float>> *output) = 0; 192 const DeviceMemory<double> &input, 193 DeviceMemory<std::complex<double>> *output) = 0; 197 const DeviceMemory<std::complex<float>> &input, [all …]
|
D | device_memory.h | 119 class DeviceMemory final : public DeviceMemoryBase { 122 DeviceMemory() : DeviceMemoryBase(nullptr, 0) {} in DeviceMemory() function 123 DeviceMemory(std::nullptr_t) : DeviceMemory() {} in DeviceMemory() function 127 explicit DeviceMemory(const DeviceMemoryBase &other) in DeviceMemory() function 141 static DeviceMemory<ElemT> MakeFromByteSize(void *opaque, uint64 bytes) { in MakeFromByteSize() 142 return DeviceMemory<ElemT>(opaque, bytes); in MakeFromByteSize() 163 DeviceMemory(void *opaque, uint64 size) : DeviceMemoryBase(opaque, size) {} in DeviceMemory() function 232 const DeviceMemory<ElemT> &cref() const { return wrapped_; } in cref() 237 DeviceMemory<ElemT> *ptr() { return &wrapped_; } in ptr() 238 const DeviceMemory<ElemT> *ptr() const { return &wrapped_; } in ptr() [all …]
|
D | rng.h | 29 class DeviceMemory; variable 54 DeviceMemory<float> *v) = 0; 56 DeviceMemory<double> *v) = 0; 58 DeviceMemory<std::complex<float>> *v) = 0; 60 DeviceMemory<std::complex<double>> *v) = 0; 65 DeviceMemory<float> *v) { in DoPopulateRandGaussian() 71 double stddev, DeviceMemory<double> *v) { in DoPopulateRandGaussian()
|
D | stream_executor_pimpl.h | 123 DeviceMemory<T> AllocateArray(uint64 element_count); 134 DeviceMemory<T> AllocateScalar() { in AllocateScalar() 147 DeviceMemory<T> AllocateZeroed(); 164 DeviceMemory<T> AllocateSubBuffer(DeviceMemory<T> *parent, 170 ScopedDeviceMemory<T> AllocateOwnedSubBuffer(DeviceMemory<T> *parent, in AllocateOwnedSubBuffer() 187 port::StatusOr<DeviceMemory<T>> GetSymbol(const string &symbol_name, 294 port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &device_src, in SynchronousMemcpyD2H() 764 inline DeviceMemory<T> StreamExecutor::AllocateArray(uint64 element_count) { in AllocateArray() 767 return DeviceMemory<T>::MakeFromByteSize(opaque, bytes); in AllocateArray() 771 inline port::StatusOr<DeviceMemory<T>> StreamExecutor::GetSymbol( in GetSymbol() [all …]
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | cusolver_context.h | 50 Status Potrf(se::blas::UpperLower uplo, int n, se::DeviceMemory<float> dev_A, 51 int lda, se::DeviceMemory<int> dev_lapack_info, 52 se::DeviceMemory<float> workspace); 53 Status Potrf(se::blas::UpperLower uplo, int n, se::DeviceMemory<double> dev_A, 54 int lda, se::DeviceMemory<int> dev_lapack_info, 55 se::DeviceMemory<double> workspace); 57 se::DeviceMemory<std::complex<float>> dev_A, int lda, 58 se::DeviceMemory<int> dev_lapack_info, 59 se::DeviceMemory<std::complex<float>> workspace); 61 se::DeviceMemory<std::complex<double>> dev_A, int lda, [all …]
|
D | cudnn_batchnorm_thunk.cc | 109 se::DeviceMemory<float> output(buffer_allocations.GetDeviceAddress(output_)); in ExecuteOnStream() 112 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(operand_)), in ExecuteOnStream() 113 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)), in ExecuteOnStream() 114 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(offset_)), in ExecuteOnStream() 115 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(mean_)), in ExecuteOnStream() 116 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(variance_)), in ExecuteOnStream() 174 se::DeviceMemory<float> output_data( in ExecuteOnStream() 176 se::DeviceMemory<float> output_mean( in ExecuteOnStream() 178 se::DeviceMemory<float> output_inv_stddev( in ExecuteOnStream() 181 se::DeviceMemory<float> null_device_ptr(nullptr); in ExecuteOnStream() [all …]
|
D | cholesky_thunk.cc | 83 se::DeviceMemory<int> info_data( in ExecuteOnStream() 88 context->Potrf(uplo_, n_, se::DeviceMemory<float>(a_data), n_, in ExecuteOnStream() 89 info_data, se::DeviceMemory<float>(workspace_data))); in ExecuteOnStream() 94 uplo_, n_, se::DeviceMemory<double>(a_data), n_, info_data, in ExecuteOnStream() 95 se::DeviceMemory<double>(workspace_data))); in ExecuteOnStream() 100 uplo_, n_, se::DeviceMemory<std::complex<float>>(a_data), n_, in ExecuteOnStream() 101 info_data, se::DeviceMemory<std::complex<float>>(workspace_data))); in ExecuteOnStream() 106 uplo_, n_, se::DeviceMemory<std::complex<double>>(a_data), n_, in ExecuteOnStream() 107 info_data, se::DeviceMemory<std::complex<double>>(workspace_data))); in ExecuteOnStream()
|
D | triangular_solve_thunk.cc | 96 se::DeviceMemory<float> b_data_typed(b_data); in ExecuteOnStream() 100 se::DeviceMemory<float>(a_data), lda, in ExecuteOnStream() 106 se::DeviceMemory<double> b_data_typed(b_data); in ExecuteOnStream() 110 se::DeviceMemory<double>(a_data), lda, in ExecuteOnStream() 116 se::DeviceMemory<std::complex<float>> b_data_typed(b_data); in ExecuteOnStream() 121 se::DeviceMemory<std::complex<float>>(a_data), in ExecuteOnStream() 127 se::DeviceMemory<std::complex<double>> b_data_typed(b_data); in ExecuteOnStream() 132 se::DeviceMemory<std::complex<double>>(a_data), in ExecuteOnStream()
|
D | fft_thunk.cc | 40 StatusOr<se::DeviceMemory<uint8>> FftScratchAllocator::AllocateBytes( in AllocateBytes() 58 return se::DeviceMemory<uint8>(buffer_addr); in AllocateBytes() 160 se::DeviceMemory<complex64> input_data( in ExecuteOnStream() 162 se::DeviceMemory<complex64> output_data( in ExecuteOnStream() 169 se::DeviceMemory<complex64> input_data( in ExecuteOnStream() 171 se::DeviceMemory<complex64> output_data( in ExecuteOnStream() 185 se::DeviceMemory<float> input_data( in ExecuteOnStream() 187 se::DeviceMemory<complex64> output_data( in ExecuteOnStream() 194 se::DeviceMemory<complex64> input_data( in ExecuteOnStream() 196 se::DeviceMemory<float> output_data( in ExecuteOnStream()
|
/external/swiftshader/src/Vulkan/ |
D | VkDeviceMemory.cpp | 22 DeviceMemory::DeviceMemory(const VkMemoryAllocateInfo* pCreateInfo, void* mem) : in DeviceMemory() function in vk::DeviceMemory 28 void DeviceMemory::destroy(const VkAllocationCallbacks* pAllocator) in destroy() 33 size_t DeviceMemory::ComputeRequiredAllocationSize(const VkMemoryAllocateInfo* pCreateInfo) in ComputeRequiredAllocationSize() 39 VkResult DeviceMemory::allocate() in allocate() 54 VkResult DeviceMemory::map(VkDeviceSize pOffset, VkDeviceSize pSize, void** ppData) in map() 61 VkDeviceSize DeviceMemory::getCommittedMemoryInBytes() const in getCommittedMemoryInBytes() 66 void* DeviceMemory::getOffsetPointer(VkDeviceSize pOffset) in getOffsetPointer()
|
D | VkDeviceMemory.hpp | 23 class DeviceMemory : public Object<DeviceMemory, VkDeviceMemory> class 26 DeviceMemory(const VkMemoryAllocateInfo* pCreateInfo, void* mem); 27 ~DeviceMemory() = delete; 44 static inline DeviceMemory* Cast(VkDeviceMemory object) in Cast() 46 return reinterpret_cast<DeviceMemory*>(object); in Cast()
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | gpu_rng.h | 31 class DeviceMemory; variable 57 bool DoPopulateRandUniform(Stream* stream, DeviceMemory<float>* v) override; 58 bool DoPopulateRandUniform(Stream* stream, DeviceMemory<double>* v) override; 60 DeviceMemory<std::complex<float>>* v) override; 62 DeviceMemory<std::complex<double>>* v) override; 64 DeviceMemory<float>* v) override; 66 DeviceMemory<double>* v) override; 74 bool DoPopulateRandUniformInternal(Stream* stream, DeviceMemory<T>* v); 77 DeviceMemory<ElemT>* v, FuncT func);
|