/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | unary_op_impl.cu | 409 void Exponential(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Exponential() 414 void Expm1(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Expm1() 419 void Logarithm(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Logarithm() 424 void Log1p(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Log1p() 429 void Erf(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Erf() 434 void Erfc(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Erfc() 439 void Negative(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Negative() 444 void Reciprocal(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Reciprocal() 449 void Square(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Square() 454 void Pow(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Pow() [all …]
|
D | batchnorm_fold_impl.cu | 53 void CalUpdateRunningStd(int channel_size, double epsilon, T* running_std, cudaStream_t cuda_stream… in CalUpdateRunningStd() 62 void CalUpdateBatchStd(int channel_size, T* batch_std, cudaStream_t cuda_stream) { in CalUpdateBatchStd() 72 cudaStream_t cuda_stream) { in CalBatchNormFoldGrad() 82 void ThrustFillWith(T* array, int size, T tofill, cudaStream_t cuda_stream) { in ThrustFillWith()
|
D | softplus_impl.cu | 37 void Softplus(const size_t size, const T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in Softplus() 42 … Softplus(const size_t size, const half *input_addr, half *output_addr, cudaStream_t cuda_stream) { in Softplus() 65 …sGrad(const size_t size, const T *dy_addr, const T *x_addr, T *dx_addr, cudaStream_t cuda_stream) { in SoftplusGrad() 70 …st size_t size, const half *dy_addr, const half *x_addr, half *dx_addr, cudaStream_t cuda_stream) { in SoftplusGrad()
|
D | unary_op_grad_impl.cu | 126 … SqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in SqrtGrad() 132 …RsqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in RsqrtGrad() 138 … AsinGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in AsinGrad() 144 … ACosGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in ACosGrad() 150 … AtanGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in AtanGrad() 156 …AsinhGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in AsinhGrad() 162 …AcoshGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in AcoshGrad() 168 …rocalGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in ReciprocalGrad()
|
D | relu_impl.cu | 29 void CalReLU(int size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in CalReLU() 57 void ReluV2(const size_t num, const T *x, T *y, uint32_t *mask, cudaStream_t cuda_stream) { in ReluV2() 70 … ReluGradV2(const size_t num, const T *dy, const uint32_t *mask, T *dx, cudaStream_t cuda_stream) { in ReluGradV2()
|
D | float_status_impl.cu | 109 void CalFloatStatus(const size_t size, const T* input, float* output, cudaStream_t cuda_stream) { in CalFloatStatus() 114 void CalIsNan(const size_t size, const T* input, bool* output, cudaStream_t cuda_stream) { in CalIsNan() 119 void CalIsInf(const size_t size, const T* input, bool* output, cudaStream_t cuda_stream) { in CalIsInf() 124 void CalIsFinite(const size_t size, const T* input, bool* output, cudaStream_t cuda_stream) { in CalIsFinite()
|
D | multinomial_impl.cu | 33 cudaStream_t cuda_stream) { in CheckZero() 49 void CheckNonNeg(const size_t size, const T *input, T *output, cudaStream_t cuda_stream) { in CheckNonNeg() 66 …ormInput(T *input, const size_t distributions, const size_t categories, cudaStream_t cuda_stream) { in NormInput() 106 size_t distributions, size_t categories, cudaStream_t cuda_stream) { in Multinomial()
|
D | hsigmoid_impl.cu | 37 void CalHSigmoid(const size_t &size, const T *input, T *output, cudaStream_t cuda_stream) { in CalHSigmoid() 42 …lHSigmoidGrad(const size_t &size, const T *dout, const T *x, T *output, cudaStream_t cuda_stream) { in CalHSigmoidGrad()
|
D | gelu_impl.cu | 55 void Gelu(size_t size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in Gelu() 60 void Gelu(size_t size, half *input_addr, half *output_addr, cudaStream_t cuda_stream) { in Gelu() 115 void GeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr, cudaStream_t cuda_stream) { in GeluGradKernel() 120 …GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr, cudaStream_t cuda_stream) { in GeluGradKernel()
|
D | fake_learned_scale_quant_perlayer_impl.cu | 73 cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerLayer() 81 cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerLayerGrad() 93 … float *input_div_alpha, float *input_quant, const bool neg_trunc, cudaStream_t cuda_stream) { in CalLSQNudgePerLayer()
|
D | random_op_impl.cu | 54 …int seed, int seed2, curandState *globalState, T *output, size_t count, cudaStream_t cuda_stream) { in StandardNormal() 70 T *input2, size_t input_size_2, T *output, size_t count, cudaStream_t cuda_stream) { in UniformInt() 89 …int seed, int seed2, curandState *globalState, T *output, size_t count, cudaStream_t cuda_stream) { in UniformReal()
|
D | fake_quant_perlayer_impl.cu | 92 const float *nudge_max, const float *scale, cudaStream_t cuda_stream) { in CalFakeQuantPerLayer() 99 … const float *nudge_min, const float *nudge_max, cudaStream_t cuda_stream) { in CalFakeQuantPerLayerGrad() 107 cudaStream_t cuda_stream) { in CalNudgePerLayer()
|
D | cross_entropy_impl.cu | 83 cudaStream_t cuda_stream) { in CrossEntropyWithSparse() 94 T *grad, cudaStream_t cuda_stream) { in CrossEntropyGradWithSparse() 101 T *dlogits, cudaStream_t cuda_stream) { in CrossEntropy()
|
D | hash_impl.cu | 46 const int hash_dim, cudaStream_t cuda_stream) { in DoHashSwapOut() 54 const int hash_dim, cudaStream_t cuda_stream) { in DoHashSwapIn()
|
D | hswish_impl.cu | 56 void CalHSwish(const size_t &size, const T *input, T *output, cudaStream_t cuda_stream) { in CalHSwish() 61 …CalHSwishGrad(const size_t &size, const T *dout, const T *x, T *output, cudaStream_t cuda_stream) { in CalHSwishGrad()
|
D | determinant_triangle_impl.cu | 30 void DetTriangle(T *input, T *output, size_t matrix_n_, size_t count, cudaStream_t cuda_stream) { in DetTriangle() 71 …CheckTriangle(T *input, int fill_mode_, size_t matrix_n_, size_t count, cudaStream_t cuda_stream) { in CheckTriangle()
|
D | add_relu_v2_impl.cu | 35 …eluV2(const size_t num, const T *x1, const T *x2, T *y, uint32_t *mask, cudaStream_t cuda_stream) { in AddReluV2() 48 …onst size_t num, const T *x1, const T *x2, const uint32_t *mask, T *dx, cudaStream_t cuda_stream) { in AddReluGradV2()
|
D | sparse_cross_entropy_cuda_impl.cu | 57 cudaStream_t cuda_stream) { in CalCrossEntropy() 64 cudaStream_t cuda_stream) { in CalCrossEntropyGrad()
|
D | momentum_impl.cu | 68 const S *momentum, bool use_nesterov, cudaStream_t cuda_stream) { in MomentumUpdateVariable() 87 cudaStream_t cuda_stream) { in FusedWeightDecayScaleMomentum() 105 const S *gradient, const T *momentum, cudaStream_t cuda_stream) { in FusedScaleMomentum() 124 … const T *learning_rate, const S *gradient, const T *momentum, cudaStream_t cuda_stream) { in FusedWeightDecayMomentum() 146 cudaStream_t cuda_stream) { in CombineFusedScaleMomentum() 171 … T **learning_rate, S **gradient, T **momentum, cudaStream_t cuda_stream) { in CombineFusedWeightDecayScaleMomentum()
|
D | fake_quant_perchannel_impl.cu | 70 cudaStream_t cuda_stream) { in CalNudgePerChannel() 113 cudaStream_t cuda_stream) { in CalFakeQuantPerChannel() 135 cudaStream_t cuda_stream) { in CalFakeQuantPerChannelGrad()
|
D | fake_learned_scale_quant_perchannel_impl.cu | 86 const int channel_num, cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerChannel() 94 … const bool neg_trunc, const int channel_num, cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerChannelGrad() 108 cudaStream_t cuda_stream) { in CalLSQNudgePerChannel()
|
D | batchnorm_fold2_impl.cu | 106 size_t N, size_t C, size_t H, size_t W, cudaStream_t cuda_stream) { in BatchNormFold2Forward() 119 size_t C, size_t H, size_t W, cudaStream_t cuda_stream) { in BatchNormFold2GradReduce() 134 … T *d_batch_mean, T *d_batch_std, size_t C, cudaStream_t cuda_stream) { in CalBatchNormFold2GradNotFreeze() 148 … T *d_batch_mean, T *d_batch_std, size_t C, cudaStream_t cuda_stream) { in CalBatchNormFold2GradFreeze() 163 size_t W, cudaStream_t cuda_stream) { in CalBatchNormFold2GradNotFreezeDxMul()
|
D | dynamic_range_impl.cu | 79 const int64_t max_output_size, cudaStream_t cuda_stream) { in CudaValidateInputAndInferShape() 86 … DynamicRangeErrorCode *error_code, const int64_t max_output_size, cudaStream_t cuda_stream) { in CalRange()
|
D | correction_mul_impl.cu | 50 cudaStream_t cuda_stream) { in CalCorrectionMul() 60 T* tmp, cudaStream_t cuda_stream) { in CalCorrectionMulGrad()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/ |
D | rl_buffer_impl.cu | 120 unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream) { in BufferAppend() 125 cudaStream_t cuda_stream) { in IncreaseCount() 129 …const int *count, const int *head, const int *origin_index, int *index, cudaStream_t cuda_stream) { in ReMappingIndex() 134 unsigned char *out, cudaStream_t cuda_stream) { in BufferGetItem() 139 cudaStream_t cuda_stream) { in CheckBatchSize() 144 unsigned char *out, cudaStream_t cuda_stream) { in BufferSample()
|