__global__ (reference) in projects: third_party

Project(s)

Full Search
Definition
Symbol
File Path
History
Type

Searched refs:__global__ (Results 1 – 25 of 207) sorted by relevance

12 3 4 5 6 7 8 9

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
D	unary_op_impl.cu	19 __global__ void ExponentialKernel(const T input, T output, const size_t count) { in ExponentialKernel() 26 __global__ void ExponentialKernel(const double input, double output, const size_t count) { in ExponentialKernel() 33 __global__ void ExponentialKernel(const half input, half output, const size_t count) { in ExponentialKernel() 40 __global__ void Expm1Kernel(const T input, T output, const size_t count) { in Expm1Kernel() 47 __global__ void Expm1Kernel(const double input, double output, const size_t count) { in Expm1Kernel() 54 __global__ void LogarithmKernel(const T input, T output, const size_t count) { in LogarithmKernel() 61 __global__ void LogarithmKernel(const double input, double output, const size_t count) { in LogarithmKernel() 68 __global__ void LogarithmKernel(const half input, half output, const size_t count) { in LogarithmKernel() 75 __global__ void Log1pKernel(const T input, T output, const size_t count) { in Log1pKernel() 82 __global__ void Log1pKernel(const double input, double output, const size_t count) { in Log1pKernel() [all …]
D	float_status_impl.cu	21 __global__ void IsNan(const size_t size, const T* input, bool* out) { in IsNan() 32 __global__ void IsNan(const size_t size, const half* input, bool* out) { in IsNan() 44 __global__ void IsInf(const size_t size, const T* input, bool* out) { in IsInf() 55 __global__ void IsInf(const size_t size, const half* input, bool* out) { in IsInf() 67 __global__ void IsFinite(const size_t size, const T* input, bool* out) { in IsFinite() 78 __global__ void IsFinite(const size_t size, const half* input, bool* out) { in IsFinite() 90 __global__ void FloatStatus(const size_t size, const T* input, float* out) { in FloatStatus() 99 __global__ void FloatStatus(const size_t size, const half* input, float* out) { in FloatStatus()
D	square_sum_all_impl.cu	21 __global__ void SquareSumAllKernel(const size_t size, const T* input_addr_0, const T* input_addr_1, in SquareSumAllKernel() 38 __global__ void SquareSumAllKernel(const size_t size, const float* input_addr_0, const float* input… in SquareSumAllKernel() 55 __global__ void AssignKernel(const size_t size, T* output_addr_0, T* output_addr_1, in AssignKernel() 65 __global__ void AssignKernel(const size_t size, float* output_addr_0, float* output_addr_1, in AssignKernel() 75 __global__ void InitOutput(const size_t size, T *output) { in InitOutput()
D	unary_op_grad_impl.cu	20 __global__ void SqrtGradKernel(const T input, const T dout, T output, const size_t count) { in SqrtGradKernel() 31 __global__ void RsqrtGradKernel(const T input, const T dout, T output, const size_t count) { in RsqrtGradKernel() 43 __global__ void AsinGradKernel(const T input, const T dout, T output, const size_t count) { in AsinGradKernel() 53 __global__ void AsinGradKernel(const half input, const half dout, half output, const size_t coun… in AsinGradKernel() 63 __global__ void ACosGradKernel(const T input, const T dout, T output, const size_t count) { in ACosGradKernel() 74 __global__ void ACosGradKernel(const half input, const half dout, half output, const size_t coun… in ACosGradKernel() 85 __global__ void AtanGradKernel(const T input, const T dout, T output, const size_t count) { in AtanGradKernel() 95 __global__ void AsinhGradKernel(const T input, const T dout, T output, const size_t count) { in AsinhGradKernel() 105 __global__ void AcoshGradKernel(const T input, const T dout, T output, const size_t count) { in AcoshGradKernel() 115 __global__ void ReciprocalGradKernel(const T input, const T dout, T output, const size_t count) { in ReciprocalGradKernel()
D	ctcloss_impl.cu	31 __global__ void CalculateFwdVarKernel(T log_alpha_b, int label_value_with_blank, T softmax_probs, in CalculateFwdVarKernel() 90 __global__ void CalculateBwdVarKernel(T log_beta_b, int label_value_with_blank, T softmax_probs, in CalculateBwdVarKernel() 157 __global__ void ProbInitKernel(T prob_num, int size) { in ProbInitKernel() 163 __global__ void LogBInitKernel(T log_b, int log_prob_size) { in LogBInitKernel() 170 __global__ void CTCLossKernel(T log_alpha_b, T log_beta_b, T softmax_probs, int label_value_wit… in CTCLossKernel() 221 __global__ void InnerSoftMaxKernel(const T probs, T softmax_probs, const int sequence_length, in… in InnerSoftMaxKernel() 245 __global__ void GenLabelValuePCRKernel(int label_value_sp, int label_value_pcr, int label_squenc… in GenLabelValuePCRKernel() 262 __global__ void UpdateLengthKernel(int label_squence_length, int cum_labels_length, int max_labe… in UpdateLengthKernel() 306 __global__ void GenLabelWithBlankKernel(int label_value, int label_value_with_blank, int label_s… in GenLabelWithBlankKernel() 336 __global__ void GenLabelValueKernel(int label_value_sp, const int64_t label_indices, const int *l… in GenLabelValueKernel() [all …]
D	adagrad_impl.cu	`30 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel() 45 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel() 60 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel() 75 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel() 90 __global__ void ApplyAdagradKernel(const size_t size, in ApplyAdagradKernel()`
D	loss_with_reduction_impl.cu	30 __global__ void Copy(T loss, T tmp_loss, int reduction, int input_size) { in Copy() 39 __global__ void CopyEqual(const T src, T dest, const int size) { in CopyEqual() 46 __global__ void AddTile(T tmp_loss, int index) { in AddTile() 50 __global__ void PartialSum(T tmp_loss, int stride) { in PartialSum() 77 __global__ void Divide(const T numerator, const S denominator, T result) { in Divide() 82 __global__ void Divide(const float numerator, const half denominator, float result) { in Divide() 89 __global__ void Divide(const half numerator, const float denominator, half result) { in Divide() 125 __global__ void LossInitKernel(T loss) { in LossInitKernel() 130 __global__ void InitZero(T array, int size) { in InitZero() 137 __global__ void KLDivLossKernel(const int input_size, const int reduction, const T input_x, const … in KLDivLossKernel() [all …]
D	random_choice_with_mask_impl.cu	39 __global__ void InitArray(const int input_size, const int ceil_power2, const T input, S mask_buff… in InitArray() 57 __global__ void ReductionSum(T g_idata, T g_odata, size_t n) { in ReductionSum() 102 __global__ void Reshape2Index(const int input_size, const int input_shape_size, const int d1, const… in Reshape2Index() 127 __global__ void Copy(const T src, T dst, const int n) { in Copy() 134 __global__ void Sort(const int ceil_power2, T rank_buff) { in Sort() 156 __global__ void SrandInit(const int ceil_power2, curandState globalState, const int seedc) { in SrandInit() 163 __global__ void Shuffle(const int ceil_power2, curandState globalState, T rank_buff) { in Shuffle() 184 __global__ void MoveToOutput(const int input_shape_size, const int count, const T input, S output… in MoveToOutput()
D	gelu_impl.cu	21 __global__ void GeluKernel(size_t size, T input_addr, T output_addr) { in GeluKernel() 34 __global__ void GeluKernel(size_t size, half input_addr, half output_addr) { in GeluKernel() 43 __global__ void GeluKernel(size_t size, half2 input_addr, half2 output_addr) { in GeluKernel() 70 __global__ void GeluGradKernel(size_t size, T dy_addr, T x_addr, T dx_addr) { in GeluGradKernel() 87 __global__ void GeluGradKernel(size_t size, half2 dy_addr, half2 x_addr, half2 dx_addr) { in GeluGradKernel() 103 __global__ void GeluGradKernel(size_t size, half dy_addr, half x_addr, half *dx_addr) { in GeluGradKernel()
D	dropout_impl.cu	21 __global__ void DropoutForwardKernel(const T input, T mask, T output, float mask_f, size_t num_… in DropoutForwardKernel() 31 __global__ void DropoutForwardKernel(const half input, half mask, half output, float mask_f, in DropoutForwardKernel() 47 __global__ void DropoutBackwardKernel(const T dy, const T mask, T dx, size_t num_count, in DropoutBackwardKernel() 55 __global__ void DropoutBackwardKernel(const half dy, const half mask, half dx, size_t num_count, in DropoutBackwardKernel()
D	softplus_impl.cu	`21 __global__ void SoftplusKernel(const size_t size, const T input_addr, T output_addr) { in SoftplusKernel() 29 __global__ void SoftplusKernel(const size_t size, const half input_addr, half output_addr) { in SoftplusKernel() 47 __global__ void SoftplusGradKernel(const size_t size, const T dy_addr, const T x_addr, T dx_addr… in SoftplusGradKernel() 55 __global__ void SoftplusGradKernel(const size_t size, const half dy_addr, const half *x_addr, half… in SoftplusGradKernel()`
D	nms_with_mask_impl.cu	40 __global__ void MaskInit(int numSq, bool row_mask) { in MaskInit() 49 __global__ void PopulateOutput(T data_in, T data_out, int index_buff, const int num, int box_siz… in PopulateOutput() 100 __global__ void Preprocess(const int num, int sel_idx, bool sel_boxes, T output, int box_size) { in Preprocess() 110 __global__ void NmsPass(const int num, const float IOU_value, T output, bool sel_boxes, int box_s… in NmsPass() 126 __global__ void ReducePass(const int num, bool sel_boxes, bool *row_mask) { in ReducePass() 142 __global__ void NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2, … in NmsBitonicSortByKeyKernel()
D	bce_with_logits_loss_impl.cu	22 __global__ void FillWithoutBroadcast(const size_t size, const T src, T dst) { in FillWithoutBroadcast() 30 __global__ void FillAndBroadcast(const size_t size, const size_t shape_size, const size_t src_shap… in FillAndBroadcast() 58 __global__ void BCEWithLogitsLossMain(size_t size, const T predict, const T target, const T shap… in BCEWithLogitsLossMain() 71 __global__ void BCEWithLogitsLossMain(size_t size, const half predict, const half target, in BCEWithLogitsLossMain() 84 __global__ void Mul(size_t size, const T lhs, T rhs) { in Mul()
D	multinomial_impl.cu	`21 __global__ void CheckZeroKernel(const size_t distributions, const size_t categories, const T input… in CheckZeroKernel() 38 __global__ void CheckNonNegKernel(const size_t size, const T input, T out) { in CheckNonNegKernel() 54 __global__ void NormInputKernel(T input, const size_t distributions, const size_t categories) { in NormInputKernel() 91 __global__ void MultinomialKernel(int seed, T input, int num_sample, curandState globalState, int… in MultinomialKernel()`
D	batchnorm_fold2_impl.cu	28 __global__ void BatchNormFold2Kernel(const T x, const T beta, const T gamma, const T batch_std,… in BatchNormFold2Kernel() 47 __global__ void BatchNormFold2GradReduce1(const T dout, T tmp, const T x, T tmp2, size_t N, siz… in BatchNormFold2GradReduce1() 59 __global__ void BatchNormFold2GradReduce2(const T tmp, T d_beta, const T tmp2, T reduce_x, size… in BatchNormFold2GradReduce2() 67 __global__ void BatchNormFold2GradNotFreeze(const T d_beta, const T reduce_x, const T batch_mean… in BatchNormFold2GradNotFreeze() 79 __global__ void BatchNormFold2GradFreeze(const T d_beta, const T running_mean, const T running_s… in BatchNormFold2GradFreeze() 87 __global__ void BatchNormFold2GradMul(const T dout, const T x, T tmp_x, size_t NCHW) { in BatchNormFold2GradMul() 94 __global__ void DxMul(size_t N, size_t C, size_t HW, const T batch_std, const T running_std, T d… in DxMul()
D	fake_learned_scale_quant_perlayer_impl.cu	`24 __global__ void FakeLearnedScaleQuantPerLayer(float output, const int size, float input_alpha, in FakeLearnedScaleQuantPerLayer() 33 __global__ void FakeLearnedScaleQuantPerLayerGrad(float grad_input, float grad_alpha, const float… in FakeLearnedScaleQuantPerLayerGrad() 55 __global__ void LSQNudgePerLayer(const float input, const int size, float input_alpha, float *inp… in LSQNudgePerLayer()`
D	momentum_impl.cu	19 __global__ void MomentumUpdateVariableKernel(const size_t size, T variable, T accumulation, const… in MomentumUpdateVariableKernel() 34 __global__ void MomentumUpdateVariableKernel(const size_t size, half variable, half accumulation, in MomentumUpdateVariableKernel() 51 __global__ void MomentumUpdateVariableKernel(const size_t size, float variable, float accumulatio… in MomentumUpdateVariableKernel() 74 __global__ void FusedMomentumWeightDecayScaleKernel(const size_t element_num, T weight_decay, T s… in FusedMomentumWeightDecayScaleKernel() 95 __global__ void FusedMomentumScaleKernel(const size_t element_num, T scale, T variable, T accumu… in FusedMomentumScaleKernel() 113 __global__ void FusedWeightDecayMomentumKernel(const size_t element_num, T weight_decay, T variab… in FusedWeightDecayMomentumKernel() 133 __global__ void CombineFusedMomentumScaleKernel(const size_t num, const size_t element_num, T *sc… in CombineFusedMomentumScaleKernel() 156 __global__ void CombineFusedMomentumWeightDecayScaleKernel(const size_t num, const size_t element_… in CombineFusedMomentumWeightDecayScaleKernel()
D	cross_entropy_impl.cu	24 __global__ void CrossEntropyWithSparseKernel(const T logits, const S labels, const size_t batch_s… in CrossEntropyWithSparseKernel() 40 __global__ void LargeBatchCrossEntropyWithSparseKernel(const T logits, const S labels, const size… in LargeBatchCrossEntropyWithSparseKernel() 54 __global__ void CrossEntropyGradWithSparseKernel(const T logits, const S labels, const size_t bat… in CrossEntropyGradWithSparseKernel() 68 __global__ void CrossEntropyKernel(const T logits, const S labels, const size_t batch_size, const… in CrossEntropyKernel()
D	fake_quant_perlayer_impl.cu	`22 __global__ void FakeQuantPerLayer(const float input, float output, const int size, const float n… in FakeQuantPerLayer() 45 __global__ void FakeQuantPerLayerGrad(const float input, const float gradient, float output, con… in FakeQuantPerLayerGrad() 57 __global__ void NudgeMinMaxPerLayer(float input_min, float input_max, const float quant_min, cons… in NudgeMinMaxPerLayer()`
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/
D	common_sponge.cuh	217 __global__ static void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, i… in construct_neighbor_list_kernel() 225 __global__ static void construct_atom_near(int atom_numbers, int near_numbers, int atom_serial, AT… in construct_atom_near() 254 __global__ static void construct_constrain_pair(int constrain_pair_numbers, const int atom_i_seria… in construct_constrain_pair() 266 __global__ static void Copy_Crd_To_New_Crd_Start(const int atom_numbers, const UNSIGNED_INT_VECTOR … in Copy_Crd_To_New_Crd_Start() 285 __global__ static void Rand_Normal(const int float4_numbers, curandStatePhilox4_32_10_t rand_state, in Rand_Normal() 293 __global__ static void Setup_Rand_Normal_Kernel(const int float4_numbers, curandStatePhilox4_32_10_… in Setup_Rand_Normal_Kernel() 303 __global__ static void Reset_List(const int element_numbers, int list, const int replace_element) { in Reset_List() 310 __global__ static void Reset_List(const int element_numbers, float list, const float replace_eleme… in Reset_List() 317 __global__ static void Sum_Of_List(const int element_numbers, const float list, float sum) { in Sum_Of_List() 329 __global__ static void Scale_List(const int element_numbers, float list, float scaler) { in Scale_List() [all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/
D	rl_buffer_impl.cu	23 __global__ void BufferAppendKernel(const int64_t capacity, const size_t size, const int index, con… in BufferAppendKernel() 36 __global__ void IncreaseCountKernel(const int64_t capacity, const int exp_batch, int count, int h… in IncreaseCountKernel() 55 __global__ void ReMappingIndexKernel(const int count, const int head, const int origin_index, in… in ReMappingIndexKernel() 73 __global__ void BufferGetItemKernel(const size_t size, const int index, const size_t one_exp_len, in BufferGetItemKernel() 83 __global__ void CheckBatchSizeKernel(const int count, const int head, const size_t batch_size, in CheckBatchSizeKernel() 91 __global__ void BufferSampleKernel(const size_t size, const size_t one_element, const unsigned int … in BufferSampleKernel() 98 __global__ void SetupKernel(const int seed, curandState state, const int size) { in SetupKernel() 104 __global__ void SrandUInt(const int size, curandState globalState, unsigned int value, unsigned i… in SrandUInt() 111 __global__ void SrandUniformInt(const int size, curandState *globalState, const int upBound, unsign… in SrandUniformInt()
/third_party/ffmpeg/libavfilter/
D	vf_thumbnail_cuda.cu	`25 __global__ void Thumbnail_uchar(cudaTextureObject_t uchar_tex, in Thumbnail_uchar() 37 __global__ void Thumbnail_uchar2(cudaTextureObject_t uchar2_tex, in Thumbnail_uchar2() 51 __global__ void Thumbnail_ushort(cudaTextureObject_t ushort_tex, in Thumbnail_ushort() 64 __global__ void Thumbnail_ushort2(cudaTextureObject_t ushort2_tex, in Thumbnail_ushort2()`
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/
D	neighbor_list_impl.cu	27 static __global__ void Copy_List(const int element_numbers, const float origin_list, float list) { in Copy_List() 34 static __global__ void Crd_To_Uint_Crd(const int atom_numbers, float scale_factor, const VECTOR c… in Crd_To_Uint_Crd() 55 static __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR crd, const float box_lengt… in Crd_Periodic_Map() 86 static __global__ void Clear_Grid_Bucket(const int grid_numbers, int atom_numbers_in_grid_bucket, in Clear_Grid_Bucket() 98 static __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float grid_length_in… in Find_Atom_In_Grid_Serial() 113 static __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int atom_in_grid_seri… in Put_Atom_In_Grid_Bucket() 136 static __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR uint_… in Find_atom_neighbors() 179 static __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHB… in Delete_Excluded_Atoms_Serial_In_Neighbor_List() 220 static __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, i… in construct_neighbor_list_kernel() 228 static __global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, N… in copy_neighbor_list_atom_number() [all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/
D	pme_common.cuh	105 __global__ static void device_add(float ene, float factor, float charge_sum) { in device_add() 109 __global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR uint_crd, int PME_atom_near, const… in PME_Atom_Near() 154 __global__ static void PME_Q_Spread(int PME_atom_near, const float charge, const VECTOR PME_frxy… in PME_Q_Spread() 196 __global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_LIST nl, in PME_Direct_Energy() 241 __global__ static void PME_Direct_Atom_Energy(const int atom_numbers, const NEIGHBOR_LIST nl, in PME_Direct_Atom_Energy() 284 __global__ static void PME_Energy_Product(const int element_number, const float list1, const float… in PME_Energy_Product() 297 __global__ static void PME_BCFQ(cufftComplex PME_FQ, float *PME_BC, int PME_Nfft) { in PME_BCFQ() 307 __global__ static void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VE… in PME_Excluded_Energy_Correction()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/
D	atomcrdtocv_impl.cu	`27 __global__ void Record_Box_Map_Times(int atom_numbers, const float crd, const float old_crd, floa… in Record_Box_Map_Times() 50 __global__ void gen_nowarp_crd(int atom_numbers, const float crd, float box, int box_map_times, … in gen_nowarp_crd() 59 __global__ void G_Radial(const int start_serial, const int end_serial, const float crd, float g_r… in G_Radial() 78 __global__ void G_Angular(const int start_serial, const int end_serial, const float crd, float *g_… in G_Angular()`

12 3 4 5 6 7 8 9