Home
last modified time | relevance | path

Searched defs:cuda_stream (Results 1 – 25 of 573) sorted by relevance

12345678910>>...23

/third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/
Dfft_with_size_impl.cu34 cudaStream_t cuda_stream) { in CalculateFFT()
44 const uint32_t &device_id, cudaStream_t cuda_stream) { in CalculateFFT()
54 cudaStream_t cuda_stream) { in CalculateIFFT()
64 const uint32_t &device_id, cudaStream_t cuda_stream) { in CalculateIFFT()
99 cudaStream_t cuda_stream) { in CalculateRFFT()
116 cudaStream_t cuda_stream) { in CalculateRFFT()
132 … cublasHandle_t scale_plan, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalculateIRFFT()
152 cudaStream_t cuda_stream) { in CalculateIRFFT()
Dreal_to_complex_impl.cu21 __global__ void ToComplex(const size_t size, const T *input, T *output, cudaStream_t cuda_stream) { in ToComplex()
29 cudaError_t RealToComplex(const size_t size, const T *input, T *output, cudaStream_t cuda_stream) { in RealToComplex()
Dbatchnorm_fold_impl.cu52 …t CalUpdateRunningStd(int channel_size, double epsilon, T *running_std, cudaStream_t cuda_stream) { in CalUpdateRunningStd()
61 cudaError_t CalUpdateBatchStd(int channel_size, T *batch_std, cudaStream_t cuda_stream) { in CalUpdateBatchStd()
72 cudaStream_t cuda_stream) { in CalBatchNormFoldGrad()
85 cudaError_t ThrustFillWith(T *array, int size, T tofill, cudaStream_t cuda_stream) { in ThrustFillWith()
Dmish_impl.cu43 cudaStream_t cuda_stream) { in Mish()
50 cudaStream_t cuda_stream) { in Mish()
58 cudaStream_t cuda_stream) { in Mish()
Dsoftsign_impl.cu43 cudaStream_t cuda_stream) { in Softsign()
51 cudaStream_t cuda_stream) { in Softsign()
59 cudaStream_t cuda_stream) { in Softsign()
Dbessel_impl.cu483 cudaStream_t cuda_stream) { in CalBesselJ0()
490 cudaStream_t cuda_stream) { in CalBesselJ0()
497 cudaStream_t cuda_stream) { in CalBesselJ1()
504 cudaStream_t cuda_stream) { in CalBesselJ1()
511 cudaStream_t cuda_stream) { in CalBesselK0()
518 cudaStream_t cuda_stream) { in CalBesselK0e()
525 cudaStream_t cuda_stream) { in CalBesselK1()
532 cudaStream_t cuda_stream) { in CalBesselK1e()
539 cudaStream_t cuda_stream) { in CalBesselY0()
546 cudaStream_t cuda_stream) { in CalBesselY1()
[all …]
Dhistogram_fixed_width_impl.cu26 int64_t num_levels, cudaStream_t cuda_stream) { in HistogramFixedWidthKernel()
40 int64_t num_levels, cudaStream_t cuda_stream) { in CalHistogramFixedWidth()
Ddense_to_csr_sparse_matrix_gpu_kernel.cu32 …itIndices2D(const S *indices, S *row_indices, S *col_indices, int size, cudaStream_t cuda_stream) { in CallSplitIndices2D()
48 cudaStream_t cuda_stream) { in CallSplitIndices3D()
63 cudaStream_t cuda_stream) { in CallNNZPerBatch()
Drandom_op_impl.cu99 size_t count, cudaStream_t cuda_stream) { in StandardNormal()
107 cudaStream_t cuda_stream, bool *host_error_res) { in UniformInt()
117 size_t count, cudaStream_t cuda_stream) { in UniformReal()
124 cudaStream_t cuda_stream) { in TruncatedNormal()
132 T *output, size_t count, cudaStream_t cuda_stream) { in RandomPoisson()
140 cudaStream_t cuda_stream) { in StandardLaplace()
Dscatter_nd_functor_impl.cu77 … const T *updates, T *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor()
118 … std::complex<float> *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor()
129 … std::complex<float> *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor()
140 … std::complex<double> *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor()
151 … std::complex<double> *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor()
Dhsigmoid_impl.cu38 cudaError_t CalHSigmoid(const size_t &size, const T *input, T *output, cudaStream_t cuda_stream) { in CalHSigmoid()
44 …lHSigmoidGrad(const size_t &size, const T *dout, const T *x, T *output, cudaStream_t cuda_stream) { in CalHSigmoidGrad()
Ddropout_impl.cu34 cudaStream_t cuda_stream) { in DropoutForward()
49 cudaStream_t cuda_stream) { in DropoutBackward()
139 uint64_t seed_offset, cudaStream_t cuda_stream) { in FusedDropoutForward()
147 cudaStream_t cuda_stream) { in FusedDropoutForwardOnlyMask()
155 uint64_t seed_offset, cudaStream_t cuda_stream) { in FusedDropoutForwardOnlyOutput()
Dfake_learned_scale_quant_perlayer_impl.cu72 cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerLayer()
80 const bool neg_trunc, cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerLayerGrad()
88 cudaStream_t cuda_stream) { in CalLSQNudgePerLayer()
Deye_impl.cu47 cudaError_t BatchEye(const size_t size, const size_t dim, T *output_addr, cudaStream_t cuda_stream)… in BatchEye()
53 …(const size_t out_size, const int64_t nums, const int64_t cols, T *out, cudaStream_t cuda_stream) { in CudaEye()
Dfast_gelu_impl.cu60 cudaError_t FastGelu(size_t size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in FastGelu()
66 cudaError_t FastGelu(size_t size, half *input_addr, half *output_addr, cudaStream_t cuda_stream) { in FastGelu()
118 …or_t FastGeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr, cudaStream_t cuda_stream) { in FastGeluGradKernel()
124 …GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr, cudaStream_t cuda_stream) { in FastGeluGradKernel()
Dgelu_impl.cu55 cudaError_t Gelu(size_t size, const T *input_addr, T *output_addr, cudaStream_t cuda_stream, const … in Gelu()
61 cudaError_t Gelu(size_t size, const half *input_addr, half *output_addr, cudaStream_t cuda_stream, in Gelu()
119 cudaError_t GeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr, cudaStream_t cuda_stream, in GeluGradKernel()
127 …t GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr, cudaStream_t cuda_stream, in GeluGradKernel()
Ddata_format_vec_permute_impl.cu41 const uint32_t &device_id, cudaStream_t cuda_stream) { in CalDataFormatVecPermute1D()
49 const uint32_t &device_id, cudaStream_t cuda_stream) { in CalDataFormatVecPermute2D()
Dfake_quant_perlayer_impl.cu94 … const float *nudge_max, const float *scale, cudaStream_t cuda_stream) { in CalFakeQuantPerLayer()
101 … const float *nudge_min, const float *nudge_max, cudaStream_t cuda_stream) { in CalFakeQuantPerLayerGrad()
109 cudaStream_t cuda_stream) { in CalNudgePerLayer()
Dcholesky_inverse_impl.cu42 cudaStream_t cuda_stream) { in CalCopyUpToLow()
49 cudaStream_t cuda_stream) { in CalCopyLowToUp()
Dlu_unpack_grad_impl.cu91 … const int64_t lu_data_width, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalTrilExpendWidth()
100 … const int64_t lu_data_width, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalTrilLower()
109 … const int64_t lu_data_width, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalTriuExpendHeight()
118 … const int64_t lu_data_width, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalTriuUpper()
Dtriangle_matrix_copy_impl.cu55 const size_t ldb, const size_t m, cudaStream_t cuda_stream) { in TriangleMatrixCopy()
76 cudaError_t MatrixCopy(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in MatrixCopy()
/third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/
Dgpu_kernel_utils.h34 size_t *dev_shape, size_t *dev_axis, T *dst, cudaStream_t cuda_stream, in MatrixTransposeND()
51 cudaStream_t cuda_stream, const std::string &kernel_name) { in MatrixTransposeND()
59 … cuDoubleComplex *dst, cudaStream_t cuda_stream, const std::string &kernel_name) { in MatrixTransposeND()
/third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/
Drl_buffer_impl.cu121 … unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream) { in BufferAppend()
127 cudaStream_t cuda_stream) { in IncreaseCount()
133 cudaStream_t cuda_stream) { in ReMappingIndex()
139 unsigned char *out, cudaStream_t cuda_stream) { in BufferGetItem()
145 cudaStream_t cuda_stream) { in CheckBatchSize()
151 … const unsigned char *buffer, unsigned char *out, cudaStream_t cuda_stream) { in BufferSample()
/third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/rl/
Dtensors_queue_gpu_kernel.cc89 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local
142 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local
185 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local
206 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local
229 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local
/third_party/mindspore/mindspore-src/source/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/
Dhash.cu46 const int hash_dim, cudaStream_t cuda_stream) { in DoHashSwapOut()
54 const int hash_dim, cudaStream_t cuda_stream) { in DoHashSwapIn()

12345678910>>...23