| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/ |
| D | fft_with_size_impl.cu | 34 cudaStream_t cuda_stream) { in CalculateFFT() 44 const uint32_t &device_id, cudaStream_t cuda_stream) { in CalculateFFT() 54 cudaStream_t cuda_stream) { in CalculateIFFT() 64 const uint32_t &device_id, cudaStream_t cuda_stream) { in CalculateIFFT() 99 cudaStream_t cuda_stream) { in CalculateRFFT() 116 cudaStream_t cuda_stream) { in CalculateRFFT() 132 … cublasHandle_t scale_plan, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalculateIRFFT() 152 cudaStream_t cuda_stream) { in CalculateIRFFT()
|
| D | real_to_complex_impl.cu | 21 __global__ void ToComplex(const size_t size, const T *input, T *output, cudaStream_t cuda_stream) { in ToComplex() 29 cudaError_t RealToComplex(const size_t size, const T *input, T *output, cudaStream_t cuda_stream) { in RealToComplex()
|
| D | batchnorm_fold_impl.cu | 52 …t CalUpdateRunningStd(int channel_size, double epsilon, T *running_std, cudaStream_t cuda_stream) { in CalUpdateRunningStd() 61 cudaError_t CalUpdateBatchStd(int channel_size, T *batch_std, cudaStream_t cuda_stream) { in CalUpdateBatchStd() 72 cudaStream_t cuda_stream) { in CalBatchNormFoldGrad() 85 cudaError_t ThrustFillWith(T *array, int size, T tofill, cudaStream_t cuda_stream) { in ThrustFillWith()
|
| D | mish_impl.cu | 43 cudaStream_t cuda_stream) { in Mish() 50 cudaStream_t cuda_stream) { in Mish() 58 cudaStream_t cuda_stream) { in Mish()
|
| D | softsign_impl.cu | 43 cudaStream_t cuda_stream) { in Softsign() 51 cudaStream_t cuda_stream) { in Softsign() 59 cudaStream_t cuda_stream) { in Softsign()
|
| D | bessel_impl.cu | 483 cudaStream_t cuda_stream) { in CalBesselJ0() 490 cudaStream_t cuda_stream) { in CalBesselJ0() 497 cudaStream_t cuda_stream) { in CalBesselJ1() 504 cudaStream_t cuda_stream) { in CalBesselJ1() 511 cudaStream_t cuda_stream) { in CalBesselK0() 518 cudaStream_t cuda_stream) { in CalBesselK0e() 525 cudaStream_t cuda_stream) { in CalBesselK1() 532 cudaStream_t cuda_stream) { in CalBesselK1e() 539 cudaStream_t cuda_stream) { in CalBesselY0() 546 cudaStream_t cuda_stream) { in CalBesselY1() [all …]
|
| D | histogram_fixed_width_impl.cu | 26 int64_t num_levels, cudaStream_t cuda_stream) { in HistogramFixedWidthKernel() 40 int64_t num_levels, cudaStream_t cuda_stream) { in CalHistogramFixedWidth()
|
| D | dense_to_csr_sparse_matrix_gpu_kernel.cu | 32 …itIndices2D(const S *indices, S *row_indices, S *col_indices, int size, cudaStream_t cuda_stream) { in CallSplitIndices2D() 48 cudaStream_t cuda_stream) { in CallSplitIndices3D() 63 cudaStream_t cuda_stream) { in CallNNZPerBatch()
|
| D | random_op_impl.cu | 99 size_t count, cudaStream_t cuda_stream) { in StandardNormal() 107 cudaStream_t cuda_stream, bool *host_error_res) { in UniformInt() 117 size_t count, cudaStream_t cuda_stream) { in UniformReal() 124 cudaStream_t cuda_stream) { in TruncatedNormal() 132 T *output, size_t count, cudaStream_t cuda_stream) { in RandomPoisson() 140 cudaStream_t cuda_stream) { in StandardLaplace()
|
| D | scatter_nd_functor_impl.cu | 77 … const T *updates, T *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor() 118 … std::complex<float> *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor() 129 … std::complex<float> *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor() 140 … std::complex<double> *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor() 151 … std::complex<double> *input, uint32_t device_id, cudaStream_t cuda_stream) { in CalScatterNdFunctor()
|
| D | hsigmoid_impl.cu | 38 cudaError_t CalHSigmoid(const size_t &size, const T *input, T *output, cudaStream_t cuda_stream) { in CalHSigmoid() 44 …lHSigmoidGrad(const size_t &size, const T *dout, const T *x, T *output, cudaStream_t cuda_stream) { in CalHSigmoidGrad()
|
| D | dropout_impl.cu | 34 cudaStream_t cuda_stream) { in DropoutForward() 49 cudaStream_t cuda_stream) { in DropoutBackward() 139 uint64_t seed_offset, cudaStream_t cuda_stream) { in FusedDropoutForward() 147 cudaStream_t cuda_stream) { in FusedDropoutForwardOnlyMask() 155 uint64_t seed_offset, cudaStream_t cuda_stream) { in FusedDropoutForwardOnlyOutput()
|
| D | fake_learned_scale_quant_perlayer_impl.cu | 72 cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerLayer() 80 const bool neg_trunc, cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerLayerGrad() 88 cudaStream_t cuda_stream) { in CalLSQNudgePerLayer()
|
| D | eye_impl.cu | 47 cudaError_t BatchEye(const size_t size, const size_t dim, T *output_addr, cudaStream_t cuda_stream)… in BatchEye() 53 …(const size_t out_size, const int64_t nums, const int64_t cols, T *out, cudaStream_t cuda_stream) { in CudaEye()
|
| D | fast_gelu_impl.cu | 60 cudaError_t FastGelu(size_t size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in FastGelu() 66 cudaError_t FastGelu(size_t size, half *input_addr, half *output_addr, cudaStream_t cuda_stream) { in FastGelu() 118 …or_t FastGeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr, cudaStream_t cuda_stream) { in FastGeluGradKernel() 124 …GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr, cudaStream_t cuda_stream) { in FastGeluGradKernel()
|
| D | gelu_impl.cu | 55 cudaError_t Gelu(size_t size, const T *input_addr, T *output_addr, cudaStream_t cuda_stream, const … in Gelu() 61 cudaError_t Gelu(size_t size, const half *input_addr, half *output_addr, cudaStream_t cuda_stream, in Gelu() 119 cudaError_t GeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr, cudaStream_t cuda_stream, in GeluGradKernel() 127 …t GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr, cudaStream_t cuda_stream, in GeluGradKernel()
|
| D | data_format_vec_permute_impl.cu | 41 const uint32_t &device_id, cudaStream_t cuda_stream) { in CalDataFormatVecPermute1D() 49 const uint32_t &device_id, cudaStream_t cuda_stream) { in CalDataFormatVecPermute2D()
|
| D | fake_quant_perlayer_impl.cu | 94 … const float *nudge_max, const float *scale, cudaStream_t cuda_stream) { in CalFakeQuantPerLayer() 101 … const float *nudge_min, const float *nudge_max, cudaStream_t cuda_stream) { in CalFakeQuantPerLayerGrad() 109 cudaStream_t cuda_stream) { in CalNudgePerLayer()
|
| D | cholesky_inverse_impl.cu | 42 cudaStream_t cuda_stream) { in CalCopyUpToLow() 49 cudaStream_t cuda_stream) { in CalCopyLowToUp()
|
| D | lu_unpack_grad_impl.cu | 91 … const int64_t lu_data_width, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalTrilExpendWidth() 100 … const int64_t lu_data_width, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalTrilLower() 109 … const int64_t lu_data_width, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalTriuExpendHeight() 118 … const int64_t lu_data_width, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalTriuUpper()
|
| D | triangle_matrix_copy_impl.cu | 55 const size_t ldb, const size_t m, cudaStream_t cuda_stream) { in TriangleMatrixCopy() 76 cudaError_t MatrixCopy(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in MatrixCopy()
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/ |
| D | gpu_kernel_utils.h | 34 size_t *dev_shape, size_t *dev_axis, T *dst, cudaStream_t cuda_stream, in MatrixTransposeND() 51 cudaStream_t cuda_stream, const std::string &kernel_name) { in MatrixTransposeND() 59 … cuDoubleComplex *dst, cudaStream_t cuda_stream, const std::string &kernel_name) { in MatrixTransposeND()
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/ |
| D | rl_buffer_impl.cu | 121 … unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream) { in BufferAppend() 127 cudaStream_t cuda_stream) { in IncreaseCount() 133 cudaStream_t cuda_stream) { in ReMappingIndex() 139 unsigned char *out, cudaStream_t cuda_stream) { in BufferGetItem() 145 cudaStream_t cuda_stream) { in CheckBatchSize() 151 … const unsigned char *buffer, unsigned char *out, cudaStream_t cuda_stream) { in BufferSample()
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/rl/ |
| D | tensors_queue_gpu_kernel.cc | 89 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local 142 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local 185 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local 206 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local 229 cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream); in Launch() local
|
| /third_party/mindspore/mindspore-src/source/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/ |
| D | hash.cu | 46 const int hash_dim, cudaStream_t cuda_stream) { in DoHashSwapOut() 54 const int hash_dim, cudaStream_t cuda_stream) { in DoHashSwapIn()
|