Home
last modified time | relevance | path

Searched defs:cuda_stream (Results 1 – 25 of 110) sorted by relevance

12345

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dunary_op_impl.cu409 void Exponential(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Exponential()
414 void Expm1(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Expm1()
419 void Logarithm(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Logarithm()
424 void Log1p(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Log1p()
429 void Erf(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Erf()
434 void Erfc(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Erfc()
439 void Negative(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Negative()
444 void Reciprocal(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Reciprocal()
449 void Square(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Square()
454 void Pow(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Pow()
[all …]
Dbatchnorm_fold_impl.cu53 void CalUpdateRunningStd(int channel_size, double epsilon, T* running_std, cudaStream_t cuda_stream in CalUpdateRunningStd()
62 void CalUpdateBatchStd(int channel_size, T* batch_std, cudaStream_t cuda_stream) { in CalUpdateBatchStd()
72 cudaStream_t cuda_stream) { in CalBatchNormFoldGrad()
82 void ThrustFillWith(T* array, int size, T tofill, cudaStream_t cuda_stream) { in ThrustFillWith()
Dsoftplus_impl.cu37 void Softplus(const size_t size, const T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in Softplus()
42 … Softplus(const size_t size, const half *input_addr, half *output_addr, cudaStream_t cuda_stream) { in Softplus()
65 …sGrad(const size_t size, const T *dy_addr, const T *x_addr, T *dx_addr, cudaStream_t cuda_stream) { in SoftplusGrad()
70 …st size_t size, const half *dy_addr, const half *x_addr, half *dx_addr, cudaStream_t cuda_stream) { in SoftplusGrad()
Dunary_op_grad_impl.cu126 … SqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in SqrtGrad()
132 …RsqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in RsqrtGrad()
138 … AsinGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in AsinGrad()
144 … ACosGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in ACosGrad()
150 … AtanGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in AtanGrad()
156 …AsinhGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in AsinhGrad()
162 …AcoshGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in AcoshGrad()
168 …rocalGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) { in ReciprocalGrad()
Drelu_impl.cu29 void CalReLU(int size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in CalReLU()
57 void ReluV2(const size_t num, const T *x, T *y, uint32_t *mask, cudaStream_t cuda_stream) { in ReluV2()
70 … ReluGradV2(const size_t num, const T *dy, const uint32_t *mask, T *dx, cudaStream_t cuda_stream) { in ReluGradV2()
Dfloat_status_impl.cu109 void CalFloatStatus(const size_t size, const T* input, float* output, cudaStream_t cuda_stream) { in CalFloatStatus()
114 void CalIsNan(const size_t size, const T* input, bool* output, cudaStream_t cuda_stream) { in CalIsNan()
119 void CalIsInf(const size_t size, const T* input, bool* output, cudaStream_t cuda_stream) { in CalIsInf()
124 void CalIsFinite(const size_t size, const T* input, bool* output, cudaStream_t cuda_stream) { in CalIsFinite()
Dmultinomial_impl.cu33 cudaStream_t cuda_stream) { in CheckZero()
49 void CheckNonNeg(const size_t size, const T *input, T *output, cudaStream_t cuda_stream) { in CheckNonNeg()
66 …ormInput(T *input, const size_t distributions, const size_t categories, cudaStream_t cuda_stream) { in NormInput()
106 size_t distributions, size_t categories, cudaStream_t cuda_stream) { in Multinomial()
Dhsigmoid_impl.cu37 void CalHSigmoid(const size_t &size, const T *input, T *output, cudaStream_t cuda_stream) { in CalHSigmoid()
42 …lHSigmoidGrad(const size_t &size, const T *dout, const T *x, T *output, cudaStream_t cuda_stream) { in CalHSigmoidGrad()
Dgelu_impl.cu55 void Gelu(size_t size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in Gelu()
60 void Gelu(size_t size, half *input_addr, half *output_addr, cudaStream_t cuda_stream) { in Gelu()
115 void GeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr, cudaStream_t cuda_stream) { in GeluGradKernel()
120 …GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr, cudaStream_t cuda_stream) { in GeluGradKernel()
Dfake_learned_scale_quant_perlayer_impl.cu73 cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerLayer()
81 cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerLayerGrad()
93 … float *input_div_alpha, float *input_quant, const bool neg_trunc, cudaStream_t cuda_stream) { in CalLSQNudgePerLayer()
Drandom_op_impl.cu54 …int seed, int seed2, curandState *globalState, T *output, size_t count, cudaStream_t cuda_stream) { in StandardNormal()
70 T *input2, size_t input_size_2, T *output, size_t count, cudaStream_t cuda_stream) { in UniformInt()
89 …int seed, int seed2, curandState *globalState, T *output, size_t count, cudaStream_t cuda_stream) { in UniformReal()
Dfake_quant_perlayer_impl.cu92 const float *nudge_max, const float *scale, cudaStream_t cuda_stream) { in CalFakeQuantPerLayer()
99 … const float *nudge_min, const float *nudge_max, cudaStream_t cuda_stream) { in CalFakeQuantPerLayerGrad()
107 cudaStream_t cuda_stream) { in CalNudgePerLayer()
Dcross_entropy_impl.cu83 cudaStream_t cuda_stream) { in CrossEntropyWithSparse()
94 T *grad, cudaStream_t cuda_stream) { in CrossEntropyGradWithSparse()
101 T *dlogits, cudaStream_t cuda_stream) { in CrossEntropy()
Dhash_impl.cu46 const int hash_dim, cudaStream_t cuda_stream) { in DoHashSwapOut()
54 const int hash_dim, cudaStream_t cuda_stream) { in DoHashSwapIn()
Dhswish_impl.cu56 void CalHSwish(const size_t &size, const T *input, T *output, cudaStream_t cuda_stream) { in CalHSwish()
61 …CalHSwishGrad(const size_t &size, const T *dout, const T *x, T *output, cudaStream_t cuda_stream) { in CalHSwishGrad()
Ddeterminant_triangle_impl.cu30 void DetTriangle(T *input, T *output, size_t matrix_n_, size_t count, cudaStream_t cuda_stream) { in DetTriangle()
71 …CheckTriangle(T *input, int fill_mode_, size_t matrix_n_, size_t count, cudaStream_t cuda_stream) { in CheckTriangle()
Dadd_relu_v2_impl.cu35 …eluV2(const size_t num, const T *x1, const T *x2, T *y, uint32_t *mask, cudaStream_t cuda_stream) { in AddReluV2()
48 …onst size_t num, const T *x1, const T *x2, const uint32_t *mask, T *dx, cudaStream_t cuda_stream) { in AddReluGradV2()
Dsparse_cross_entropy_cuda_impl.cu57 cudaStream_t cuda_stream) { in CalCrossEntropy()
64 cudaStream_t cuda_stream) { in CalCrossEntropyGrad()
Dmomentum_impl.cu68 const S *momentum, bool use_nesterov, cudaStream_t cuda_stream) { in MomentumUpdateVariable()
87 cudaStream_t cuda_stream) { in FusedWeightDecayScaleMomentum()
105 const S *gradient, const T *momentum, cudaStream_t cuda_stream) { in FusedScaleMomentum()
124 … const T *learning_rate, const S *gradient, const T *momentum, cudaStream_t cuda_stream) { in FusedWeightDecayMomentum()
146 cudaStream_t cuda_stream) { in CombineFusedScaleMomentum()
171 … T **learning_rate, S **gradient, T **momentum, cudaStream_t cuda_stream) { in CombineFusedWeightDecayScaleMomentum()
Dfake_quant_perchannel_impl.cu70 cudaStream_t cuda_stream) { in CalNudgePerChannel()
113 cudaStream_t cuda_stream) { in CalFakeQuantPerChannel()
135 cudaStream_t cuda_stream) { in CalFakeQuantPerChannelGrad()
Dfake_learned_scale_quant_perchannel_impl.cu86 const int channel_num, cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerChannel()
94 … const bool neg_trunc, const int channel_num, cudaStream_t cuda_stream) { in CalFakeLearnedScaleQuantPerChannelGrad()
108 cudaStream_t cuda_stream) { in CalLSQNudgePerChannel()
Dbatchnorm_fold2_impl.cu106 size_t N, size_t C, size_t H, size_t W, cudaStream_t cuda_stream) { in BatchNormFold2Forward()
119 size_t C, size_t H, size_t W, cudaStream_t cuda_stream) { in BatchNormFold2GradReduce()
134 … T *d_batch_mean, T *d_batch_std, size_t C, cudaStream_t cuda_stream) { in CalBatchNormFold2GradNotFreeze()
148 … T *d_batch_mean, T *d_batch_std, size_t C, cudaStream_t cuda_stream) { in CalBatchNormFold2GradFreeze()
163 size_t W, cudaStream_t cuda_stream) { in CalBatchNormFold2GradNotFreezeDxMul()
Ddynamic_range_impl.cu79 const int64_t max_output_size, cudaStream_t cuda_stream) { in CudaValidateInputAndInferShape()
86 … DynamicRangeErrorCode *error_code, const int64_t max_output_size, cudaStream_t cuda_stream) { in CalRange()
Dcorrection_mul_impl.cu50 cudaStream_t cuda_stream) { in CalCorrectionMul()
60 T* tmp, cudaStream_t cuda_stream) { in CalCorrectionMulGrad()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/
Drl_buffer_impl.cu120 unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream) { in BufferAppend()
125 cudaStream_t cuda_stream) { in IncreaseCount()
129 …const int *count, const int *head, const int *origin_index, int *index, cudaStream_t cuda_stream) { in ReMappingIndex()
134 unsigned char *out, cudaStream_t cuda_stream) { in BufferGetItem()
139 cudaStream_t cuda_stream) { in CheckBatchSize()
144 unsigned char *out, cudaStream_t cuda_stream) { in BufferSample()

12345