/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | unary_op_impl.cu | 19 __global__ void ExponentialKernel(const T *input, T *output, const size_t count) { in ExponentialKernel() 26 __global__ void ExponentialKernel(const double *input, double *output, const size_t count) { in ExponentialKernel() 33 __global__ void ExponentialKernel(const half *input, half *output, const size_t count) { in ExponentialKernel() 40 __global__ void Expm1Kernel(const T *input, T *output, const size_t count) { in Expm1Kernel() 47 __global__ void Expm1Kernel(const double *input, double *output, const size_t count) { in Expm1Kernel() 54 __global__ void LogarithmKernel(const T *input, T *output, const size_t count) { in LogarithmKernel() 61 __global__ void LogarithmKernel(const double *input, double *output, const size_t count) { in LogarithmKernel() 68 __global__ void LogarithmKernel(const half *input, half *output, const size_t count) { in LogarithmKernel() 75 __global__ void Log1pKernel(const T *input, T *output, const size_t count) { in Log1pKernel() 82 __global__ void Log1pKernel(const double *input, double *output, const size_t count) { in Log1pKernel() [all …]
|
D | float_status_impl.cu | 21 __global__ void IsNan(const size_t size, const T* input, bool* out) { in IsNan() 32 __global__ void IsNan(const size_t size, const half* input, bool* out) { in IsNan() 44 __global__ void IsInf(const size_t size, const T* input, bool* out) { in IsInf() 55 __global__ void IsInf(const size_t size, const half* input, bool* out) { in IsInf() 67 __global__ void IsFinite(const size_t size, const T* input, bool* out) { in IsFinite() 78 __global__ void IsFinite(const size_t size, const half* input, bool* out) { in IsFinite() 90 __global__ void FloatStatus(const size_t size, const T* input, float* out) { in FloatStatus() 99 __global__ void FloatStatus(const size_t size, const half* input, float* out) { in FloatStatus()
|
D | square_sum_all_impl.cu | 21 __global__ void SquareSumAllKernel(const size_t size, const T* input_addr_0, const T* input_addr_1, in SquareSumAllKernel() 38 __global__ void SquareSumAllKernel(const size_t size, const float* input_addr_0, const float* input… in SquareSumAllKernel() 55 __global__ void AssignKernel(const size_t size, T* output_addr_0, T* output_addr_1, in AssignKernel() 65 __global__ void AssignKernel(const size_t size, float* output_addr_0, float* output_addr_1, in AssignKernel() 75 __global__ void InitOutput(const size_t size, T *output) { in InitOutput()
|
D | unary_op_grad_impl.cu | 20 __global__ void SqrtGradKernel(const T *input, const T *dout, T *output, const size_t count) { in SqrtGradKernel() 31 __global__ void RsqrtGradKernel(const T *input, const T *dout, T *output, const size_t count) { in RsqrtGradKernel() 43 __global__ void AsinGradKernel(const T *input, const T *dout, T *output, const size_t count) { in AsinGradKernel() 53 __global__ void AsinGradKernel(const half *input, const half *dout, half *output, const size_t coun… in AsinGradKernel() 63 __global__ void ACosGradKernel(const T *input, const T *dout, T *output, const size_t count) { in ACosGradKernel() 74 __global__ void ACosGradKernel(const half *input, const half *dout, half *output, const size_t coun… in ACosGradKernel() 85 __global__ void AtanGradKernel(const T *input, const T *dout, T *output, const size_t count) { in AtanGradKernel() 95 __global__ void AsinhGradKernel(const T *input, const T *dout, T *output, const size_t count) { in AsinhGradKernel() 105 __global__ void AcoshGradKernel(const T *input, const T *dout, T *output, const size_t count) { in AcoshGradKernel() 115 __global__ void ReciprocalGradKernel(const T *input, const T *dout, T *output, const size_t count) { in ReciprocalGradKernel()
|
D | ctcloss_impl.cu | 31 __global__ void CalculateFwdVarKernel(T *log_alpha_b, int *label_value_with_blank, T *softmax_probs, in CalculateFwdVarKernel() 90 __global__ void CalculateBwdVarKernel(T *log_beta_b, int *label_value_with_blank, T *softmax_probs, in CalculateBwdVarKernel() 157 __global__ void ProbInitKernel(T *prob_num, int size) { in ProbInitKernel() 163 __global__ void LogBInitKernel(T *log_b, int log_prob_size) { in LogBInitKernel() 170 __global__ void CTCLossKernel(T *log_alpha_b, T *log_beta_b, T *softmax_probs, int *label_value_wit… in CTCLossKernel() 221 __global__ void InnerSoftMaxKernel(const T *probs, T *softmax_probs, const int *sequence_length, in… in InnerSoftMaxKernel() 245 __global__ void GenLabelValuePCRKernel(int *label_value_sp, int *label_value_pcr, int *label_squenc… in GenLabelValuePCRKernel() 262 __global__ void UpdateLengthKernel(int *label_squence_length, int *cum_labels_length, int *max_labe… in UpdateLengthKernel() 306 __global__ void GenLabelWithBlankKernel(int *label_value, int *label_value_with_blank, int *label_s… in GenLabelWithBlankKernel() 336 __global__ void GenLabelValueKernel(int *label_value_sp, const int64_t *label_indices, const int *l… in GenLabelValueKernel() [all …]
|
D | adagrad_impl.cu | 30 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel() 45 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel() 60 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel() 75 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel() 90 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel()
|
D | loss_with_reduction_impl.cu | 30 __global__ void Copy(T *loss, T *tmp_loss, int reduction, int input_size) { in Copy() 39 __global__ void CopyEqual(const T *src, T *dest, const int size) { in CopyEqual() 46 __global__ void AddTile(T *tmp_loss, int index) { in AddTile() 50 __global__ void PartialSum(T *tmp_loss, int stride) { in PartialSum() 77 __global__ void Divide(const T *numerator, const S *denominator, T *result) { in Divide() 82 __global__ void Divide(const float *numerator, const half *denominator, float *result) { in Divide() 89 __global__ void Divide(const half *numerator, const float *denominator, half *result) { in Divide() 125 __global__ void LossInitKernel(T *loss) { in LossInitKernel() 130 __global__ void InitZero(T *array, int size) { in InitZero() 137 __global__ void KLDivLossKernel(const int input_size, const int reduction, const T *input_x, const … in KLDivLossKernel() [all …]
|
D | random_choice_with_mask_impl.cu | 39 __global__ void InitArray(const int input_size, const int ceil_power2, const T *input, S *mask_buff… in InitArray() 57 __global__ void ReductionSum(T *g_idata, T *g_odata, size_t n) { in ReductionSum() 102 __global__ void Reshape2Index(const int input_size, const int input_shape_size, const int d1, const… in Reshape2Index() 127 __global__ void Copy(const T *src, T *dst, const int n) { in Copy() 134 __global__ void Sort(const int ceil_power2, T *rank_buff) { in Sort() 156 __global__ void SrandInit(const int ceil_power2, curandState *globalState, const int seedc) { in SrandInit() 163 __global__ void Shuffle(const int ceil_power2, curandState *globalState, T *rank_buff) { in Shuffle() 184 __global__ void MoveToOutput(const int input_shape_size, const int count, const T *input, S *output… in MoveToOutput()
|
D | gelu_impl.cu | 21 __global__ void GeluKernel(size_t size, T *input_addr, T *output_addr) { in GeluKernel() 34 __global__ void GeluKernel(size_t size, half *input_addr, half *output_addr) { in GeluKernel() 43 __global__ void GeluKernel(size_t size, half2 *input_addr, half2 *output_addr) { in GeluKernel() 70 __global__ void GeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr) { in GeluGradKernel() 87 __global__ void GeluGradKernel(size_t size, half2 *dy_addr, half2 *x_addr, half2 *dx_addr) { in GeluGradKernel() 103 __global__ void GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr) { in GeluGradKernel()
|
D | softplus_impl.cu | 21 __global__ void SoftplusKernel(const size_t size, const T *input_addr, T *output_addr) { in SoftplusKernel() 29 __global__ void SoftplusKernel(const size_t size, const half *input_addr, half *output_addr) { in SoftplusKernel() 47 __global__ void SoftplusGradKernel(const size_t size, const T *dy_addr, const T *x_addr, T *dx_addr… in SoftplusGradKernel() 55 __global__ void SoftplusGradKernel(const size_t size, const half *dy_addr, const half *x_addr, half… in SoftplusGradKernel()
|
D | dropout_impl.cu | 21 __global__ void DropoutForwardKernel(const T *input, T *mask, T *output, float *mask_f, size_t num_… in DropoutForwardKernel() 31 __global__ void DropoutForwardKernel(const half *input, half *mask, half *output, float *mask_f, in DropoutForwardKernel() 47 __global__ void DropoutBackwardKernel(const T *dy, const T *mask, T *dx, size_t num_count, in DropoutBackwardKernel() 55 __global__ void DropoutBackwardKernel(const half *dy, const half *mask, half *dx, size_t num_count, in DropoutBackwardKernel()
|
D | nms_with_mask_impl.cu | 40 __global__ void MaskInit(int numSq, bool *row_mask) { in MaskInit() 49 __global__ void PopulateOutput(T *data_in, T *data_out, int *index_buff, const int num, int box_siz… in PopulateOutput() 100 __global__ void Preprocess(const int num, int *sel_idx, bool *sel_boxes, T *output, int box_size) { in Preprocess() 110 __global__ void NmsPass(const int num, const float IOU_value, T *output, bool *sel_boxes, int box_s… in NmsPass() 126 __global__ void ReducePass(const int num, bool *sel_boxes, bool *row_mask) { in ReducePass() 142 __global__ void NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2, … in NmsBitonicSortByKeyKernel()
|
D | bce_with_logits_loss_impl.cu | 22 __global__ void FillWithoutBroadcast(const size_t size, const T *src, T *dst) { in FillWithoutBroadcast() 30 __global__ void FillAndBroadcast(const size_t size, const size_t shape_size, const size_t *src_shap… in FillAndBroadcast() 58 __global__ void BCEWithLogitsLossMain(size_t size, const T *predict, const T *target, const T *shap… in BCEWithLogitsLossMain() 71 __global__ void BCEWithLogitsLossMain(size_t size, const half *predict, const half *target, in BCEWithLogitsLossMain() 84 __global__ void Mul(size_t size, const T *lhs, T *rhs) { in Mul()
|
D | multinomial_impl.cu | 21 __global__ void CheckZeroKernel(const size_t distributions, const size_t categories, const T *input… in CheckZeroKernel() 38 __global__ void CheckNonNegKernel(const size_t size, const T *input, T *out) { in CheckNonNegKernel() 54 __global__ void NormInputKernel(T *input, const size_t distributions, const size_t categories) { in NormInputKernel() 91 __global__ void MultinomialKernel(int seed, T *input, int num_sample, curandState *globalState, int… in MultinomialKernel()
|
D | batchnorm_fold2_impl.cu | 28 __global__ void BatchNormFold2Kernel(const T *x, const T *beta, const T *gamma, const T *batch_std,… in BatchNormFold2Kernel() 47 __global__ void BatchNormFold2GradReduce1(const T *dout, T *tmp, const T *x, T *tmp2, size_t N, siz… in BatchNormFold2GradReduce1() 59 __global__ void BatchNormFold2GradReduce2(const T *tmp, T *d_beta, const T *tmp2, T *reduce_x, size… in BatchNormFold2GradReduce2() 67 __global__ void BatchNormFold2GradNotFreeze(const T *d_beta, const T *reduce_x, const T *batch_mean… in BatchNormFold2GradNotFreeze() 79 __global__ void BatchNormFold2GradFreeze(const T *d_beta, const T *running_mean, const T *running_s… in BatchNormFold2GradFreeze() 87 __global__ void BatchNormFold2GradMul(const T *dout, const T *x, T *tmp_x, size_t NCHW) { in BatchNormFold2GradMul() 94 __global__ void DxMul(size_t N, size_t C, size_t HW, const T *batch_std, const T *running_std, T *d… in DxMul()
|
D | fake_learned_scale_quant_perlayer_impl.cu | 24 __global__ void FakeLearnedScaleQuantPerLayer(float *output, const int size, float *input_alpha, in FakeLearnedScaleQuantPerLayer() 33 __global__ void FakeLearnedScaleQuantPerLayerGrad(float *grad_input, float *grad_alpha, const float… in FakeLearnedScaleQuantPerLayerGrad() 55 __global__ void LSQNudgePerLayer(const float *input, const int size, float *input_alpha, float *inp… in LSQNudgePerLayer()
|
D | momentum_impl.cu | 19 __global__ void MomentumUpdateVariableKernel(const size_t size, T *variable, T *accumulation, const… in MomentumUpdateVariableKernel() 34 __global__ void MomentumUpdateVariableKernel(const size_t size, half *variable, half *accumulation, in MomentumUpdateVariableKernel() 51 __global__ void MomentumUpdateVariableKernel(const size_t size, float *variable, float *accumulatio… in MomentumUpdateVariableKernel() 74 __global__ void FusedMomentumWeightDecayScaleKernel(const size_t element_num, T *weight_decay, T *s… in FusedMomentumWeightDecayScaleKernel() 95 __global__ void FusedMomentumScaleKernel(const size_t element_num, T *scale, T *variable, T *accumu… in FusedMomentumScaleKernel() 113 __global__ void FusedWeightDecayMomentumKernel(const size_t element_num, T *weight_decay, T *variab… in FusedWeightDecayMomentumKernel() 133 __global__ void CombineFusedMomentumScaleKernel(const size_t num, const size_t *element_num, T **sc… in CombineFusedMomentumScaleKernel() 156 __global__ void CombineFusedMomentumWeightDecayScaleKernel(const size_t num, const size_t *element_… in CombineFusedMomentumWeightDecayScaleKernel()
|
D | cross_entropy_impl.cu | 24 __global__ void CrossEntropyWithSparseKernel(const T *logits, const S *labels, const size_t batch_s… in CrossEntropyWithSparseKernel() 40 __global__ void LargeBatchCrossEntropyWithSparseKernel(const T *logits, const S *labels, const size… in LargeBatchCrossEntropyWithSparseKernel() 54 __global__ void CrossEntropyGradWithSparseKernel(const T *logits, const S *labels, const size_t bat… in CrossEntropyGradWithSparseKernel() 68 __global__ void CrossEntropyKernel(const T *logits, const S *labels, const size_t batch_size, const… in CrossEntropyKernel()
|
D | correction_mul_impl.cu | 22 __global__ void CorrectionMul(const T* weight, const T* gamma, const T* running_std, const int batc… in CorrectionMul() 32 __global__ void Mul(int N, const T* a, const T* b, T* c) { in Mul() 40 __global__ void Reduce(int N, int CHW, const T* tmp, const T* running_std, T* d_gamma) { in Reduce()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/ |
D | common_sponge.cuh | 217 __global__ static void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, i… in construct_neighbor_list_kernel() 225 __global__ static void construct_atom_near(int atom_numbers, int near_numbers, int *atom_serial, AT… in construct_atom_near() 254 __global__ static void construct_constrain_pair(int constrain_pair_numbers, const int *atom_i_seria… in construct_constrain_pair() 266 __global__ static void Copy_Crd_To_New_Crd_Start(const int atom_numbers, const UNSIGNED_INT_VECTOR … in Copy_Crd_To_New_Crd_Start() 285 __global__ static void Rand_Normal(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state, in Rand_Normal() 293 __global__ static void Setup_Rand_Normal_Kernel(const int float4_numbers, curandStatePhilox4_32_10_… in Setup_Rand_Normal_Kernel() 303 __global__ static void Reset_List(const int element_numbers, int *list, const int replace_element) { in Reset_List() 310 __global__ static void Reset_List(const int element_numbers, float *list, const float replace_eleme… in Reset_List() 317 __global__ static void Sum_Of_List(const int element_numbers, const float *list, float *sum) { in Sum_Of_List() 329 __global__ static void Scale_List(const int element_numbers, float *list, float scaler) { in Scale_List() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/ |
D | rl_buffer_impl.cu | 23 __global__ void BufferAppendKernel(const int64_t capacity, const size_t size, const int *index, con… in BufferAppendKernel() 36 __global__ void IncreaseCountKernel(const int64_t capacity, const int exp_batch, int *count, int *h… in IncreaseCountKernel() 55 __global__ void ReMappingIndexKernel(const int *count, const int *head, const int *origin_index, in… in ReMappingIndexKernel() 73 __global__ void BufferGetItemKernel(const size_t size, const int *index, const size_t one_exp_len, in BufferGetItemKernel() 83 __global__ void CheckBatchSizeKernel(const int *count, const int *head, const size_t batch_size, in CheckBatchSizeKernel() 91 __global__ void BufferSampleKernel(const size_t size, const size_t one_element, const unsigned int … in BufferSampleKernel() 98 __global__ void SetupKernel(const int seed, curandState *state, const int size) { in SetupKernel() 104 __global__ void SrandUInt(const int size, curandState *globalState, unsigned int *value, unsigned i… in SrandUInt() 111 __global__ void SrandUniformInt(const int size, curandState *globalState, const int upBound, unsign… in SrandUniformInt()
|
/third_party/ffmpeg/libavfilter/ |
D | vf_thumbnail_cuda.cu | 25 __global__ void Thumbnail_uchar(cudaTextureObject_t uchar_tex, in Thumbnail_uchar() 37 __global__ void Thumbnail_uchar2(cudaTextureObject_t uchar2_tex, in Thumbnail_uchar2() 51 __global__ void Thumbnail_ushort(cudaTextureObject_t ushort_tex, in Thumbnail_ushort() 64 __global__ void Thumbnail_ushort2(cudaTextureObject_t ushort2_tex, in Thumbnail_ushort2()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/ |
D | neighbor_list_impl.cu | 27 static __global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) { in Copy_List() 34 static __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *c… in Crd_To_Uint_Crd() 55 static __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_lengt… in Crd_Periodic_Map() 86 static __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket, in Clear_Grid_Bucket() 98 static __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_in… in Find_Atom_In_Grid_Serial() 113 static __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_seri… in Put_Atom_In_Grid_Bucket() 136 static __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_… in Find_atom_neighbors() 179 static __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHB… in Delete_Excluded_Atoms_Serial_In_Neighbor_List() 220 static __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, i… in construct_neighbor_list_kernel() 228 static __global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, N… in copy_neighbor_list_atom_number() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/ |
D | pme_common.cuh | 105 __global__ static void device_add(float *ene, float *factor, float *charge_sum) { in device_add() 109 __global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const… in PME_Atom_Near() 154 __global__ static void PME_Q_Spread(int *PME_atom_near, const float *charge, const VECTOR *PME_frxy… in PME_Q_Spread() 196 __global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl, in PME_Direct_Energy() 241 __global__ static void PME_Direct_Atom_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl, in PME_Direct_Atom_Energy() 284 __global__ static void PME_Energy_Product(const int element_number, const float *list1, const float… in PME_Energy_Product() 297 __global__ static void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) { in PME_BCFQ() 307 __global__ static void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VE… in PME_Excluded_Energy_Correction()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/ |
D | atomcrdtocv_impl.cu | 27 __global__ void Record_Box_Map_Times(int atom_numbers, const float *crd, const float *old_crd, floa… in Record_Box_Map_Times() 50 __global__ void gen_nowarp_crd(int atom_numbers, const float *crd, float *box, int *box_map_times, … in gen_nowarp_crd() 59 __global__ void G_Radial(const int start_serial, const int end_serial, const float *crd, float *g_r… in G_Radial() 78 __global__ void G_Angular(const int start_serial, const int end_serial, const float *crd, float *g_… in G_Angular()
|