Home
last modified time | relevance | path

Searched refs:__global__ (Results 1 – 25 of 207) sorted by relevance

123456789

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dunary_op_impl.cu19 __global__ void ExponentialKernel(const T *input, T *output, const size_t count) { in ExponentialKernel()
26 __global__ void ExponentialKernel(const double *input, double *output, const size_t count) { in ExponentialKernel()
33 __global__ void ExponentialKernel(const half *input, half *output, const size_t count) { in ExponentialKernel()
40 __global__ void Expm1Kernel(const T *input, T *output, const size_t count) { in Expm1Kernel()
47 __global__ void Expm1Kernel(const double *input, double *output, const size_t count) { in Expm1Kernel()
54 __global__ void LogarithmKernel(const T *input, T *output, const size_t count) { in LogarithmKernel()
61 __global__ void LogarithmKernel(const double *input, double *output, const size_t count) { in LogarithmKernel()
68 __global__ void LogarithmKernel(const half *input, half *output, const size_t count) { in LogarithmKernel()
75 __global__ void Log1pKernel(const T *input, T *output, const size_t count) { in Log1pKernel()
82 __global__ void Log1pKernel(const double *input, double *output, const size_t count) { in Log1pKernel()
[all …]
Dfloat_status_impl.cu21 __global__ void IsNan(const size_t size, const T* input, bool* out) { in IsNan()
32 __global__ void IsNan(const size_t size, const half* input, bool* out) { in IsNan()
44 __global__ void IsInf(const size_t size, const T* input, bool* out) { in IsInf()
55 __global__ void IsInf(const size_t size, const half* input, bool* out) { in IsInf()
67 __global__ void IsFinite(const size_t size, const T* input, bool* out) { in IsFinite()
78 __global__ void IsFinite(const size_t size, const half* input, bool* out) { in IsFinite()
90 __global__ void FloatStatus(const size_t size, const T* input, float* out) { in FloatStatus()
99 __global__ void FloatStatus(const size_t size, const half* input, float* out) { in FloatStatus()
Dsquare_sum_all_impl.cu21 __global__ void SquareSumAllKernel(const size_t size, const T* input_addr_0, const T* input_addr_1, in SquareSumAllKernel()
38 __global__ void SquareSumAllKernel(const size_t size, const float* input_addr_0, const float* input… in SquareSumAllKernel()
55 __global__ void AssignKernel(const size_t size, T* output_addr_0, T* output_addr_1, in AssignKernel()
65 __global__ void AssignKernel(const size_t size, float* output_addr_0, float* output_addr_1, in AssignKernel()
75 __global__ void InitOutput(const size_t size, T *output) { in InitOutput()
Dunary_op_grad_impl.cu20 __global__ void SqrtGradKernel(const T *input, const T *dout, T *output, const size_t count) { in SqrtGradKernel()
31 __global__ void RsqrtGradKernel(const T *input, const T *dout, T *output, const size_t count) { in RsqrtGradKernel()
43 __global__ void AsinGradKernel(const T *input, const T *dout, T *output, const size_t count) { in AsinGradKernel()
53 __global__ void AsinGradKernel(const half *input, const half *dout, half *output, const size_t coun… in AsinGradKernel()
63 __global__ void ACosGradKernel(const T *input, const T *dout, T *output, const size_t count) { in ACosGradKernel()
74 __global__ void ACosGradKernel(const half *input, const half *dout, half *output, const size_t coun… in ACosGradKernel()
85 __global__ void AtanGradKernel(const T *input, const T *dout, T *output, const size_t count) { in AtanGradKernel()
95 __global__ void AsinhGradKernel(const T *input, const T *dout, T *output, const size_t count) { in AsinhGradKernel()
105 __global__ void AcoshGradKernel(const T *input, const T *dout, T *output, const size_t count) { in AcoshGradKernel()
115 __global__ void ReciprocalGradKernel(const T *input, const T *dout, T *output, const size_t count) { in ReciprocalGradKernel()
Dctcloss_impl.cu31 __global__ void CalculateFwdVarKernel(T *log_alpha_b, int *label_value_with_blank, T *softmax_probs, in CalculateFwdVarKernel()
90 __global__ void CalculateBwdVarKernel(T *log_beta_b, int *label_value_with_blank, T *softmax_probs, in CalculateBwdVarKernel()
157 __global__ void ProbInitKernel(T *prob_num, int size) { in ProbInitKernel()
163 __global__ void LogBInitKernel(T *log_b, int log_prob_size) { in LogBInitKernel()
170 __global__ void CTCLossKernel(T *log_alpha_b, T *log_beta_b, T *softmax_probs, int *label_value_wit… in CTCLossKernel()
221 __global__ void InnerSoftMaxKernel(const T *probs, T *softmax_probs, const int *sequence_length, in… in InnerSoftMaxKernel()
245 __global__ void GenLabelValuePCRKernel(int *label_value_sp, int *label_value_pcr, int *label_squenc… in GenLabelValuePCRKernel()
262 __global__ void UpdateLengthKernel(int *label_squence_length, int *cum_labels_length, int *max_labe… in UpdateLengthKernel()
306 __global__ void GenLabelWithBlankKernel(int *label_value, int *label_value_with_blank, int *label_s… in GenLabelWithBlankKernel()
336 __global__ void GenLabelValueKernel(int *label_value_sp, const int64_t *label_indices, const int *l… in GenLabelValueKernel()
[all …]
Dadagrad_impl.cu30 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel()
45 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel()
60 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel()
75 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel()
90 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel()
Dloss_with_reduction_impl.cu30 __global__ void Copy(T *loss, T *tmp_loss, int reduction, int input_size) { in Copy()
39 __global__ void CopyEqual(const T *src, T *dest, const int size) { in CopyEqual()
46 __global__ void AddTile(T *tmp_loss, int index) { in AddTile()
50 __global__ void PartialSum(T *tmp_loss, int stride) { in PartialSum()
77 __global__ void Divide(const T *numerator, const S *denominator, T *result) { in Divide()
82 __global__ void Divide(const float *numerator, const half *denominator, float *result) { in Divide()
89 __global__ void Divide(const half *numerator, const float *denominator, half *result) { in Divide()
125 __global__ void LossInitKernel(T *loss) { in LossInitKernel()
130 __global__ void InitZero(T *array, int size) { in InitZero()
137 __global__ void KLDivLossKernel(const int input_size, const int reduction, const T *input_x, const … in KLDivLossKernel()
[all …]
Drandom_choice_with_mask_impl.cu39 __global__ void InitArray(const int input_size, const int ceil_power2, const T *input, S *mask_buff… in InitArray()
57 __global__ void ReductionSum(T *g_idata, T *g_odata, size_t n) { in ReductionSum()
102 __global__ void Reshape2Index(const int input_size, const int input_shape_size, const int d1, const… in Reshape2Index()
127 __global__ void Copy(const T *src, T *dst, const int n) { in Copy()
134 __global__ void Sort(const int ceil_power2, T *rank_buff) { in Sort()
156 __global__ void SrandInit(const int ceil_power2, curandState *globalState, const int seedc) { in SrandInit()
163 __global__ void Shuffle(const int ceil_power2, curandState *globalState, T *rank_buff) { in Shuffle()
184 __global__ void MoveToOutput(const int input_shape_size, const int count, const T *input, S *output… in MoveToOutput()
Dgelu_impl.cu21 __global__ void GeluKernel(size_t size, T *input_addr, T *output_addr) { in GeluKernel()
34 __global__ void GeluKernel(size_t size, half *input_addr, half *output_addr) { in GeluKernel()
43 __global__ void GeluKernel(size_t size, half2 *input_addr, half2 *output_addr) { in GeluKernel()
70 __global__ void GeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr) { in GeluGradKernel()
87 __global__ void GeluGradKernel(size_t size, half2 *dy_addr, half2 *x_addr, half2 *dx_addr) { in GeluGradKernel()
103 __global__ void GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr) { in GeluGradKernel()
Ddropout_impl.cu21 __global__ void DropoutForwardKernel(const T *input, T *mask, T *output, float *mask_f, size_t num_… in DropoutForwardKernel()
31 __global__ void DropoutForwardKernel(const half *input, half *mask, half *output, float *mask_f, in DropoutForwardKernel()
47 __global__ void DropoutBackwardKernel(const T *dy, const T *mask, T *dx, size_t num_count, in DropoutBackwardKernel()
55 __global__ void DropoutBackwardKernel(const half *dy, const half *mask, half *dx, size_t num_count, in DropoutBackwardKernel()
Dsoftplus_impl.cu21 __global__ void SoftplusKernel(const size_t size, const T *input_addr, T *output_addr) { in SoftplusKernel()
29 __global__ void SoftplusKernel(const size_t size, const half *input_addr, half *output_addr) { in SoftplusKernel()
47 __global__ void SoftplusGradKernel(const size_t size, const T *dy_addr, const T *x_addr, T *dx_addr… in SoftplusGradKernel()
55 __global__ void SoftplusGradKernel(const size_t size, const half *dy_addr, const half *x_addr, half… in SoftplusGradKernel()
Dnms_with_mask_impl.cu40 __global__ void MaskInit(int numSq, bool *row_mask) { in MaskInit()
49 __global__ void PopulateOutput(T *data_in, T *data_out, int *index_buff, const int num, int box_siz… in PopulateOutput()
100 __global__ void Preprocess(const int num, int *sel_idx, bool *sel_boxes, T *output, int box_size) { in Preprocess()
110 __global__ void NmsPass(const int num, const float IOU_value, T *output, bool *sel_boxes, int box_s… in NmsPass()
126 __global__ void ReducePass(const int num, bool *sel_boxes, bool *row_mask) { in ReducePass()
142 __global__ void NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2, … in NmsBitonicSortByKeyKernel()
Dbce_with_logits_loss_impl.cu22 __global__ void FillWithoutBroadcast(const size_t size, const T *src, T *dst) { in FillWithoutBroadcast()
30 __global__ void FillAndBroadcast(const size_t size, const size_t shape_size, const size_t *src_shap… in FillAndBroadcast()
58 __global__ void BCEWithLogitsLossMain(size_t size, const T *predict, const T *target, const T *shap… in BCEWithLogitsLossMain()
71 __global__ void BCEWithLogitsLossMain(size_t size, const half *predict, const half *target, in BCEWithLogitsLossMain()
84 __global__ void Mul(size_t size, const T *lhs, T *rhs) { in Mul()
Dmultinomial_impl.cu21 __global__ void CheckZeroKernel(const size_t distributions, const size_t categories, const T *input… in CheckZeroKernel()
38 __global__ void CheckNonNegKernel(const size_t size, const T *input, T *out) { in CheckNonNegKernel()
54 __global__ void NormInputKernel(T *input, const size_t distributions, const size_t categories) { in NormInputKernel()
91 __global__ void MultinomialKernel(int seed, T *input, int num_sample, curandState *globalState, int… in MultinomialKernel()
Dbatchnorm_fold2_impl.cu28 __global__ void BatchNormFold2Kernel(const T *x, const T *beta, const T *gamma, const T *batch_std,… in BatchNormFold2Kernel()
47 __global__ void BatchNormFold2GradReduce1(const T *dout, T *tmp, const T *x, T *tmp2, size_t N, siz… in BatchNormFold2GradReduce1()
59 __global__ void BatchNormFold2GradReduce2(const T *tmp, T *d_beta, const T *tmp2, T *reduce_x, size… in BatchNormFold2GradReduce2()
67 __global__ void BatchNormFold2GradNotFreeze(const T *d_beta, const T *reduce_x, const T *batch_mean… in BatchNormFold2GradNotFreeze()
79 __global__ void BatchNormFold2GradFreeze(const T *d_beta, const T *running_mean, const T *running_s… in BatchNormFold2GradFreeze()
87 __global__ void BatchNormFold2GradMul(const T *dout, const T *x, T *tmp_x, size_t NCHW) { in BatchNormFold2GradMul()
94 __global__ void DxMul(size_t N, size_t C, size_t HW, const T *batch_std, const T *running_std, T *d… in DxMul()
Dfake_learned_scale_quant_perlayer_impl.cu24 __global__ void FakeLearnedScaleQuantPerLayer(float *output, const int size, float *input_alpha, in FakeLearnedScaleQuantPerLayer()
33 __global__ void FakeLearnedScaleQuantPerLayerGrad(float *grad_input, float *grad_alpha, const float… in FakeLearnedScaleQuantPerLayerGrad()
55 __global__ void LSQNudgePerLayer(const float *input, const int size, float *input_alpha, float *inp… in LSQNudgePerLayer()
Dmomentum_impl.cu19 __global__ void MomentumUpdateVariableKernel(const size_t size, T *variable, T *accumulation, const… in MomentumUpdateVariableKernel()
34 __global__ void MomentumUpdateVariableKernel(const size_t size, half *variable, half *accumulation, in MomentumUpdateVariableKernel()
51 __global__ void MomentumUpdateVariableKernel(const size_t size, float *variable, float *accumulatio… in MomentumUpdateVariableKernel()
74 __global__ void FusedMomentumWeightDecayScaleKernel(const size_t element_num, T *weight_decay, T *s… in FusedMomentumWeightDecayScaleKernel()
95 __global__ void FusedMomentumScaleKernel(const size_t element_num, T *scale, T *variable, T *accumu… in FusedMomentumScaleKernel()
113 __global__ void FusedWeightDecayMomentumKernel(const size_t element_num, T *weight_decay, T *variab… in FusedWeightDecayMomentumKernel()
133 __global__ void CombineFusedMomentumScaleKernel(const size_t num, const size_t *element_num, T **sc… in CombineFusedMomentumScaleKernel()
156 __global__ void CombineFusedMomentumWeightDecayScaleKernel(const size_t num, const size_t *element_… in CombineFusedMomentumWeightDecayScaleKernel()
Dcross_entropy_impl.cu24 __global__ void CrossEntropyWithSparseKernel(const T *logits, const S *labels, const size_t batch_s… in CrossEntropyWithSparseKernel()
40 __global__ void LargeBatchCrossEntropyWithSparseKernel(const T *logits, const S *labels, const size… in LargeBatchCrossEntropyWithSparseKernel()
54 __global__ void CrossEntropyGradWithSparseKernel(const T *logits, const S *labels, const size_t bat… in CrossEntropyGradWithSparseKernel()
68 __global__ void CrossEntropyKernel(const T *logits, const S *labels, const size_t batch_size, const… in CrossEntropyKernel()
Dfake_quant_perlayer_impl.cu22 __global__ void FakeQuantPerLayer(const float *input, float *output, const int size, const float *n… in FakeQuantPerLayer()
45 __global__ void FakeQuantPerLayerGrad(const float *input, const float *gradient, float *output, con… in FakeQuantPerLayerGrad()
57 __global__ void NudgeMinMaxPerLayer(float *input_min, float *input_max, const float quant_min, cons… in NudgeMinMaxPerLayer()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/
Dcommon_sponge.cuh217 __global__ static void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, i… in construct_neighbor_list_kernel()
225 __global__ static void construct_atom_near(int atom_numbers, int near_numbers, int *atom_serial, AT… in construct_atom_near()
254 __global__ static void construct_constrain_pair(int constrain_pair_numbers, const int *atom_i_seria… in construct_constrain_pair()
266 __global__ static void Copy_Crd_To_New_Crd_Start(const int atom_numbers, const UNSIGNED_INT_VECTOR … in Copy_Crd_To_New_Crd_Start()
285 __global__ static void Rand_Normal(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state, in Rand_Normal()
293 __global__ static void Setup_Rand_Normal_Kernel(const int float4_numbers, curandStatePhilox4_32_10_… in Setup_Rand_Normal_Kernel()
303 __global__ static void Reset_List(const int element_numbers, int *list, const int replace_element) { in Reset_List()
310 __global__ static void Reset_List(const int element_numbers, float *list, const float replace_eleme… in Reset_List()
317 __global__ static void Sum_Of_List(const int element_numbers, const float *list, float *sum) { in Sum_Of_List()
329 __global__ static void Scale_List(const int element_numbers, float *list, float scaler) { in Scale_List()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/
Drl_buffer_impl.cu23 __global__ void BufferAppendKernel(const int64_t capacity, const size_t size, const int *index, con… in BufferAppendKernel()
36 __global__ void IncreaseCountKernel(const int64_t capacity, const int exp_batch, int *count, int *h… in IncreaseCountKernel()
55 __global__ void ReMappingIndexKernel(const int *count, const int *head, const int *origin_index, in… in ReMappingIndexKernel()
73 __global__ void BufferGetItemKernel(const size_t size, const int *index, const size_t one_exp_len, in BufferGetItemKernel()
83 __global__ void CheckBatchSizeKernel(const int *count, const int *head, const size_t batch_size, in CheckBatchSizeKernel()
91 __global__ void BufferSampleKernel(const size_t size, const size_t one_element, const unsigned int … in BufferSampleKernel()
98 __global__ void SetupKernel(const int seed, curandState *state, const int size) { in SetupKernel()
104 __global__ void SrandUInt(const int size, curandState *globalState, unsigned int *value, unsigned i… in SrandUInt()
111 __global__ void SrandUniformInt(const int size, curandState *globalState, const int upBound, unsign… in SrandUniformInt()
/third_party/ffmpeg/libavfilter/
Dvf_thumbnail_cuda.cu25 __global__ void Thumbnail_uchar(cudaTextureObject_t uchar_tex, in Thumbnail_uchar()
37 __global__ void Thumbnail_uchar2(cudaTextureObject_t uchar2_tex, in Thumbnail_uchar2()
51 __global__ void Thumbnail_ushort(cudaTextureObject_t ushort_tex, in Thumbnail_ushort()
64 __global__ void Thumbnail_ushort2(cudaTextureObject_t ushort2_tex, in Thumbnail_ushort2()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/
Dneighbor_list_impl.cu27 static __global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) { in Copy_List()
34 static __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *c… in Crd_To_Uint_Crd()
55 static __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_lengt… in Crd_Periodic_Map()
86 static __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket, in Clear_Grid_Bucket()
98 static __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_in… in Find_Atom_In_Grid_Serial()
113 static __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_seri… in Put_Atom_In_Grid_Bucket()
136 static __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_… in Find_atom_neighbors()
179 static __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHB… in Delete_Excluded_Atoms_Serial_In_Neighbor_List()
220 static __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, i… in construct_neighbor_list_kernel()
228 static __global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, N… in copy_neighbor_list_atom_number()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/
Dpme_common.cuh105 __global__ static void device_add(float *ene, float *factor, float *charge_sum) { in device_add()
109 __global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const… in PME_Atom_Near()
154 __global__ static void PME_Q_Spread(int *PME_atom_near, const float *charge, const VECTOR *PME_frxy… in PME_Q_Spread()
196 __global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl, in PME_Direct_Energy()
241 __global__ static void PME_Direct_Atom_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl, in PME_Direct_Atom_Energy()
284 __global__ static void PME_Energy_Product(const int element_number, const float *list1, const float… in PME_Energy_Product()
297 __global__ static void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) { in PME_BCFQ()
307 __global__ static void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VE… in PME_Excluded_Energy_Correction()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/
Datomcrdtocv_impl.cu27 __global__ void Record_Box_Map_Times(int atom_numbers, const float *crd, const float *old_crd, floa… in Record_Box_Map_Times()
50 __global__ void gen_nowarp_crd(int atom_numbers, const float *crd, float *box, int *box_map_times, … in gen_nowarp_crd()
59 __global__ void G_Radial(const int start_serial, const int end_serial, const float *crd, float *g_r… in G_Radial()
78 __global__ void G_Angular(const int start_serial, const int end_serial, const float *crd, float *g_… in G_Angular()

123456789