Home
last modified time | relevance | path

Searched refs:blockDim (Results 1 – 25 of 210) sorted by relevance

123456789

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dunary_op_impl.cu20 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ExponentialKernel()
27 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ExponentialKernel()
34 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ExponentialKernel()
41 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in Expm1Kernel()
48 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in Expm1Kernel()
55 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in LogarithmKernel()
62 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in LogarithmKernel()
69 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in LogarithmKernel()
76 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in Log1pKernel()
83 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in Log1pKernel()
[all …]
Dfloat_status_impl.cu22 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsNan()
33 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsNan()
45 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsInf()
56 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsInf()
68 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsFinite()
79 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsFinite()
91 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in FloatStatus()
100 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in FloatStatus()
Dloss_with_reduction_impl.cu40 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in CopyEqual()
51 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < stride; i += blockDim.x * gridDim.x) { in PartialSum()
131 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in InitZero()
141 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in KLDivLossKernel()
147 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in KLDivLossKernel()
181 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in KLDivLossGradKernel()
191 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in KLDivLossGradKernel()
212 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in BinaryCrossEntropyLossKernel()
218 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in BinaryCrossEntropyLossKernel()
223 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in BinaryCrossEntropyLossKernel()
[all …]
Dsquare_sum_all_impl.cu23 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SquareSumAllKernel()
40 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SquareSumAllKernel()
57 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in AssignKernel()
67 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in AssignKernel()
77 for (size_t id = blockIdx.x * blockDim.x + threadIdx.x; id < size; id += blockDim.x * gridDim.x) { in InitOutput()
Dunary_op_grad_impl.cu21 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in SqrtGradKernel()
32 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in RsqrtGradKernel()
44 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AsinGradKernel()
54 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AsinGradKernel()
64 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ACosGradKernel()
75 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ACosGradKernel()
86 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AtanGradKernel()
96 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AsinhGradKernel()
106 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AcoshGradKernel()
116 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) { in ReciprocalGradKernel()
Dmomentum_impl.cu22 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel()
27 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel()
38 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel()
44 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel()
55 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel()
60 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel()
77 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (element_num); i += blockDim.x * gridDi… in FusedMomentumWeightDecayScaleKernel()
97 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (element_num); i += blockDim.x * gridDi… in FusedMomentumScaleKernel()
115 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (element_num); i += blockDim.x * gridDi… in FusedWeightDecayMomentumKernel()
136 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (element_num[idx]); i += blockDim.x * g… in CombineFusedMomentumScaleKernel()
[all …]
Dprelu_grad_impl.cu24 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in CalPReLUGradKernel()
25 size_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CalPReLUGradKernel()
27 size_t index = channel_id * blockDim.x * gridDim.x + thread_id; in CalPReLUGradKernel()
37 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < dw_array_size; i += blockDim.x * gridDi… in InitDwArrayData()
44 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < weight_size; i += blockDim.x * gridDim.… in ComputeDwData()
Dadagrad_impl.cu36 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel()
51 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel()
66 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel()
81 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel()
96 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel()
Dgelu_impl.cu26 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in GeluKernel()
35 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in GeluKernel()
44 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in GeluKernel()
77 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in GeluGradKernel()
88 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in GeluGradKernel()
104 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in GeluGradKernel()
Dbatchnorm_fold2_impl.cu34 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in BatchNormFold2Kernel()
39 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in BatchNormFold2Kernel()
50 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N * C; i += blockDim.x * gridDim.x) { in BatchNormFold2GradReduce1()
60 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < C; i += blockDim.x * gridDim.x) { in BatchNormFold2GradReduce2()
70 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < C; i += blockDim.x * gridDim.x) { in BatchNormFold2GradNotFreeze()
81 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < C; i += blockDim.x * gridDim.x) { in BatchNormFold2GradFreeze()
88 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < NCHW; i += blockDim.x * gridDim.x) { in BatchNormFold2GradMul()
97 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DxMul()
Ddropout_impl.cu24 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DropoutForwardKernel()
34 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DropoutForwardKernel()
50 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DropoutBackwardKernel()
58 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DropoutBackwardKernel()
Dsoftplus_impl.cu22 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in SoftplusKernel()
30 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in SoftplusKernel()
48 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in SoftplusGradKernel()
56 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in SoftplusGradKernel()
Drandom_choice_with_mask_impl.cu40 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < ceil_power2; pos += blockDim.x * gr… in InitArray()
106 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < input_size; pos += blockDim.x * gri… in Reshape2Index()
128 for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < n; pos += blockDim.x * gridDim.x) { in Copy()
137 for (size_t tid = threadIdx.x; tid < ceil_power2; tid += blockDim.x) { in Sort()
157 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < ceil_power2; i += blockDim.x * gridDim.… in SrandInit()
168 for (size_t tid = threadIdx.x; tid < ceil_power2; tid += blockDim.x) { in Shuffle()
190 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) { in MoveToOutput()
203 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) { in MoveToOutput()
Dnms_with_mask_impl.cu41 …for (int mat_pos = blockIdx.x * blockDim.x + threadIdx.x; mat_pos < numSq; mat_pos += blockDim.x *… in MaskInit()
51 …for (int box_num = blockIdx.x * blockDim.x + threadIdx.x; box_num < num; box_num += blockDim.x * g… in PopulateOutput()
101 …for (int box_num = blockIdx.x * blockDim.x + threadIdx.x; box_num < num; box_num += blockDim.x * g… in Preprocess()
113 for (int mask_index = blockIdx.x * blockDim.x + threadIdx.x; mask_index < num * num; in NmsPass()
114 mask_index += blockDim.x * gridDim.x) { in NmsPass()
133 for (int j = blockIdx.x * blockDim.x + threadIdx.x; j < num; j += blockDim.x * gridDim.x) { in ReducePass()
144 for (int i = threadIdx.x; i < ceil_power2; i += blockDim.x) { in NmsBitonicSortByKeyKernel()
152 for (size_t tid = threadIdx.x; tid < ceil_power2; tid += blockDim.x) { in NmsBitonicSortByKeyKernel()
Dbce_with_logits_loss_impl.cu23 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in FillWithoutBroadcast()
33 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in FillAndBroadcast()
60 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in BCEWithLogitsLossMain()
73 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in BCEWithLogitsLossMain()
85 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in Mul()
Dmultinomial_impl.cu23 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (distributions); pos += blockDim.x … in CheckZeroKernel()
40 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in CheckNonNegKernel()
56 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in NormInputKernel()
94 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) { in MultinomialKernel()
Dctcloss_impl.cu35 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in CalculateFwdVarKernel()
94 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in CalculateBwdVarKernel()
158 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in ProbInitKernel()
164 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < log_prob_size; i += blockDim.x * gridDim.x… in LogBInitKernel()
174 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in CTCLossKernel()
223 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch * max_time; i += blockDim.x * gridDi… in InnerSoftMaxKernel()
247 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in GenLabelValuePCRKernel()
308 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in GenLabelWithBlankKernel()
338 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in GenLabelValueKernel()
349 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in LabelValueInitKernel()
[all …]
Dfake_learned_scale_quant_perlayer_impl.cu26 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in FakeLearnedScaleQuantPerLayer()
39 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in FakeLearnedScaleQuantPerLayerGrad()
60 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in LSQNudgePerLayer()
Dcorrection_mul_impl.cu24 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batchsize * chw; i += blockDim.x * gridDim… in CorrectionMul()
33 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { in Mul()
41 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { in Reduce()
Dfake_learned_scale_quant_perchannel_impl.cu30 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in FakeLearnedScaleQuantPerChannel()
46 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in FakeLearnedScaleQuantPerChannelGrad()
72 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in LSQNudgePerChannel()
Dcumsum_impl.cu22 size_t step = blockDim.x * gridDim.x; in Copy()
23 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < size; write_index +… in Copy()
33 size_t step = blockDim.x * gridDim.x; in LeftMoveSum()
34 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in LeftMoveSum()
55 size_t step = blockDim.x * gridDim.x; in RightMoveSum()
56 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in RightMoveSum()
76 size_t step = blockDim.x * gridDim.x; in CumSumKernelReverse()
77 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in CumSumKernelReverse()
98 size_t step = blockDim.x * gridDim.x; in CumSumKernel()
99 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in CumSumKernel()
Dcumprod_impl.cu22 size_t step = blockDim.x * gridDim.x; in Copy()
23 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < size; write_index +… in Copy()
33 size_t step = blockDim.x * gridDim.x; in LeftMoveProd()
34 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in LeftMoveProd()
55 size_t step = blockDim.x * gridDim.x; in RightMoveProd()
56 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in RightMoveProd()
76 size_t step = blockDim.x * gridDim.x; in CumProdKernelReverse()
77 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in CumProdKernelReverse()
98 size_t step = blockDim.x * gridDim.x; in CumProdKernel()
99 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in CumProdKernel()
/third_party/ffmpeg/libavfilter/
Dvf_thumbnail_cuda.cu28 int x = blockIdx.x * blockDim.x + threadIdx.x; in Thumbnail_uchar()
29 int y = blockIdx.y * blockDim.y + threadIdx.y; in Thumbnail_uchar()
40 int x = blockIdx.x * blockDim.x + threadIdx.x; in Thumbnail_uchar2()
41 int y = blockIdx.y * blockDim.y + threadIdx.y; in Thumbnail_uchar2()
54 int x = blockIdx.x * blockDim.x + threadIdx.x; in Thumbnail_ushort()
55 int y = blockIdx.y * blockDim.y + threadIdx.y; in Thumbnail_ushort()
67 int x = blockIdx.x * blockDim.x + threadIdx.x; in Thumbnail_ushort2()
68 int y = blockIdx.y * blockDim.y + threadIdx.y; in Thumbnail_ushort2()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/
Drl_buffer_impl.cu26 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in BufferAppendKernel()
78 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in BufferGetItemKernel()
93 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in BufferSampleKernel()
99 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SetupKernel()
105 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SrandUInt()
112 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SrandUniformInt()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/
Dcommon_sponge.cuh219 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x)… in construct_neighbor_list_kernel()
226 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x)… in construct_atom_near()
257 int atom_i = blockDim.x * blockIdx.x + threadIdx.x; in construct_constrain_pair()
269 int atom_i = blockDim.x * blockIdx.x + threadIdx.x; in Copy_Crd_To_New_Crd_Start()
287 int i = blockDim.x * blockIdx.x + threadIdx.x; in Rand_Normal()
295 int id = threadIdx.x + blockIdx.x * blockDim.x; in Setup_Rand_Normal_Kernel()
304 int i = blockDim.x * blockIdx.x + threadIdx.x; in Reset_List()
311 int i = blockDim.x * blockIdx.x + threadIdx.x; in Reset_List()
323 for (int i = threadIdx.x; i < element_numbers; i = i + blockDim.x) { in Sum_Of_List()
330 int i = blockDim.x * blockIdx.x + threadIdx.x; in Scale_List()
[all …]

123456789