/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | unary_op_impl.cu | 20 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ExponentialKernel() 27 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ExponentialKernel() 34 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ExponentialKernel() 41 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in Expm1Kernel() 48 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in Expm1Kernel() 55 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in LogarithmKernel() 62 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in LogarithmKernel() 69 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in LogarithmKernel() 76 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in Log1pKernel() 83 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in Log1pKernel() [all …]
|
D | float_status_impl.cu | 22 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsNan() 33 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsNan() 45 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsInf() 56 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsInf() 68 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsFinite() 79 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in IsFinite() 91 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in FloatStatus() 100 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in FloatStatus()
|
D | loss_with_reduction_impl.cu | 40 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in CopyEqual() 51 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < stride; i += blockDim.x * gridDim.x) { in PartialSum() 131 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in InitZero() 141 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in KLDivLossKernel() 147 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in KLDivLossKernel() 181 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in KLDivLossGradKernel() 191 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in KLDivLossGradKernel() 212 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in BinaryCrossEntropyLossKernel() 218 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in BinaryCrossEntropyLossKernel() 223 … for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) { in BinaryCrossEntropyLossKernel() [all …]
|
D | square_sum_all_impl.cu | 23 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SquareSumAllKernel() 40 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SquareSumAllKernel() 57 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in AssignKernel() 67 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in AssignKernel() 77 for (size_t id = blockIdx.x * blockDim.x + threadIdx.x; id < size; id += blockDim.x * gridDim.x) { in InitOutput()
|
D | unary_op_grad_impl.cu | 21 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in SqrtGradKernel() 32 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in RsqrtGradKernel() 44 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AsinGradKernel() 54 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AsinGradKernel() 64 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ACosGradKernel() 75 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in ACosGradKernel() 86 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AtanGradKernel() 96 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AsinhGradKernel() 106 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) { in AcoshGradKernel() 116 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) { in ReciprocalGradKernel()
|
D | momentum_impl.cu | 22 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel() 27 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel() 38 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel() 44 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel() 55 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel() 60 … for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { in MomentumUpdateVariableKernel() 77 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (element_num); i += blockDim.x * gridDi… in FusedMomentumWeightDecayScaleKernel() 97 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (element_num); i += blockDim.x * gridDi… in FusedMomentumScaleKernel() 115 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (element_num); i += blockDim.x * gridDi… in FusedWeightDecayMomentumKernel() 136 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (element_num[idx]); i += blockDim.x * g… in CombineFusedMomentumScaleKernel() [all …]
|
D | prelu_grad_impl.cu | 24 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in CalPReLUGradKernel() 25 size_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CalPReLUGradKernel() 27 size_t index = channel_id * blockDim.x * gridDim.x + thread_id; in CalPReLUGradKernel() 37 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < dw_array_size; i += blockDim.x * gridDi… in InitDwArrayData() 44 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < weight_size; i += blockDim.x * gridDim.… in ComputeDwData()
|
D | adagrad_impl.cu | 36 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel() 51 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel() 66 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel() 81 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel() 96 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in ApplyAdagradKernel()
|
D | gelu_impl.cu | 26 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in GeluKernel() 35 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in GeluKernel() 44 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in GeluKernel() 77 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in GeluGradKernel() 88 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in GeluGradKernel() 104 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in GeluGradKernel()
|
D | batchnorm_fold2_impl.cu | 34 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in BatchNormFold2Kernel() 39 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in BatchNormFold2Kernel() 50 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N * C; i += blockDim.x * gridDim.x) { in BatchNormFold2GradReduce1() 60 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < C; i += blockDim.x * gridDim.x) { in BatchNormFold2GradReduce2() 70 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < C; i += blockDim.x * gridDim.x) { in BatchNormFold2GradNotFreeze() 81 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < C; i += blockDim.x * gridDim.x) { in BatchNormFold2GradFreeze() 88 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < NCHW; i += blockDim.x * gridDim.x) { in BatchNormFold2GradMul() 97 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DxMul()
|
D | dropout_impl.cu | 24 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DropoutForwardKernel() 34 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DropoutForwardKernel() 50 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DropoutBackwardKernel() 58 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x)… in DropoutBackwardKernel()
|
D | softplus_impl.cu | 22 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in SoftplusKernel() 30 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in SoftplusKernel() 48 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in SoftplusGradKernel() 56 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in SoftplusGradKernel()
|
D | random_choice_with_mask_impl.cu | 40 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < ceil_power2; pos += blockDim.x * gr… in InitArray() 106 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < input_size; pos += blockDim.x * gri… in Reshape2Index() 128 for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < n; pos += blockDim.x * gridDim.x) { in Copy() 137 for (size_t tid = threadIdx.x; tid < ceil_power2; tid += blockDim.x) { in Sort() 157 …for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < ceil_power2; i += blockDim.x * gridDim.… in SrandInit() 168 for (size_t tid = threadIdx.x; tid < ceil_power2; tid += blockDim.x) { in Shuffle() 190 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) { in MoveToOutput() 203 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) { in MoveToOutput()
|
D | nms_with_mask_impl.cu | 41 …for (int mat_pos = blockIdx.x * blockDim.x + threadIdx.x; mat_pos < numSq; mat_pos += blockDim.x *… in MaskInit() 51 …for (int box_num = blockIdx.x * blockDim.x + threadIdx.x; box_num < num; box_num += blockDim.x * g… in PopulateOutput() 101 …for (int box_num = blockIdx.x * blockDim.x + threadIdx.x; box_num < num; box_num += blockDim.x * g… in Preprocess() 113 for (int mask_index = blockIdx.x * blockDim.x + threadIdx.x; mask_index < num * num; in NmsPass() 114 mask_index += blockDim.x * gridDim.x) { in NmsPass() 133 for (int j = blockIdx.x * blockDim.x + threadIdx.x; j < num; j += blockDim.x * gridDim.x) { in ReducePass() 144 for (int i = threadIdx.x; i < ceil_power2; i += blockDim.x) { in NmsBitonicSortByKeyKernel() 152 for (size_t tid = threadIdx.x; tid < ceil_power2; tid += blockDim.x) { in NmsBitonicSortByKeyKernel()
|
D | bce_with_logits_loss_impl.cu | 23 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in FillWithoutBroadcast() 33 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in FillAndBroadcast() 60 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in BCEWithLogitsLossMain() 73 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in BCEWithLogitsLossMain() 85 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x… in Mul()
|
D | multinomial_impl.cu | 23 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (distributions); pos += blockDim.x … in CheckZeroKernel() 40 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in CheckNonNegKernel() 56 …for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim… in NormInputKernel() 94 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) { in MultinomialKernel()
|
D | ctcloss_impl.cu | 35 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in CalculateFwdVarKernel() 94 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in CalculateBwdVarKernel() 158 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in ProbInitKernel() 164 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < log_prob_size; i += blockDim.x * gridDim.x… in LogBInitKernel() 174 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in CTCLossKernel() 223 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch * max_time; i += blockDim.x * gridDi… in InnerSoftMaxKernel() 247 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in GenLabelValuePCRKernel() 308 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch; i += blockDim.x * gridDim.x) { in GenLabelWithBlankKernel() 338 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in GenLabelValueKernel() 349 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in LabelValueInitKernel() [all …]
|
D | fake_learned_scale_quant_perlayer_impl.cu | 26 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in FakeLearnedScaleQuantPerLayer() 39 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in FakeLearnedScaleQuantPerLayerGrad() 60 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in LSQNudgePerLayer()
|
D | correction_mul_impl.cu | 24 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batchsize * chw; i += blockDim.x * gridDim… in CorrectionMul() 33 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { in Mul() 41 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { in Reduce()
|
D | fake_learned_scale_quant_perchannel_impl.cu | 30 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in FakeLearnedScaleQuantPerChannel() 46 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in FakeLearnedScaleQuantPerChannelGrad() 72 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) { in LSQNudgePerChannel()
|
D | cumsum_impl.cu | 22 size_t step = blockDim.x * gridDim.x; in Copy() 23 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < size; write_index +… in Copy() 33 size_t step = blockDim.x * gridDim.x; in LeftMoveSum() 34 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in LeftMoveSum() 55 size_t step = blockDim.x * gridDim.x; in RightMoveSum() 56 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in RightMoveSum() 76 size_t step = blockDim.x * gridDim.x; in CumSumKernelReverse() 77 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in CumSumKernelReverse() 98 size_t step = blockDim.x * gridDim.x; in CumSumKernel() 99 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in CumSumKernel()
|
D | cumprod_impl.cu | 22 size_t step = blockDim.x * gridDim.x; in Copy() 23 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < size; write_index +… in Copy() 33 size_t step = blockDim.x * gridDim.x; in LeftMoveProd() 34 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in LeftMoveProd() 55 size_t step = blockDim.x * gridDim.x; in RightMoveProd() 56 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in RightMoveProd() 76 size_t step = blockDim.x * gridDim.x; in CumProdKernelReverse() 77 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in CumProdKernelReverse() 98 size_t step = blockDim.x * gridDim.x; in CumProdKernel() 99 …for (size_t write_index = blockIdx.x * blockDim.x + threadIdx.x; write_index < num; write_index +=… in CumProdKernel()
|
/third_party/ffmpeg/libavfilter/ |
D | vf_thumbnail_cuda.cu | 28 int x = blockIdx.x * blockDim.x + threadIdx.x; in Thumbnail_uchar() 29 int y = blockIdx.y * blockDim.y + threadIdx.y; in Thumbnail_uchar() 40 int x = blockIdx.x * blockDim.x + threadIdx.x; in Thumbnail_uchar2() 41 int y = blockIdx.y * blockDim.y + threadIdx.y; in Thumbnail_uchar2() 54 int x = blockIdx.x * blockDim.x + threadIdx.x; in Thumbnail_ushort() 55 int y = blockIdx.y * blockDim.y + threadIdx.y; in Thumbnail_ushort() 67 int x = blockIdx.x * blockDim.x + threadIdx.x; in Thumbnail_ushort2() 68 int y = blockIdx.y * blockDim.y + threadIdx.y; in Thumbnail_ushort2()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/ |
D | rl_buffer_impl.cu | 26 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in BufferAppendKernel() 78 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in BufferGetItemKernel() 93 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in BufferSampleKernel() 99 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SetupKernel() 105 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SrandUInt() 112 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { in SrandUniformInt()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/ |
D | common_sponge.cuh | 219 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x)… in construct_neighbor_list_kernel() 226 …for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x)… in construct_atom_near() 257 int atom_i = blockDim.x * blockIdx.x + threadIdx.x; in construct_constrain_pair() 269 int atom_i = blockDim.x * blockIdx.x + threadIdx.x; in Copy_Crd_To_New_Crd_Start() 287 int i = blockDim.x * blockIdx.x + threadIdx.x; in Rand_Normal() 295 int id = threadIdx.x + blockIdx.x * blockDim.x; in Setup_Rand_Normal_Kernel() 304 int i = blockDim.x * blockIdx.x + threadIdx.x; in Reset_List() 311 int i = blockDim.x * blockIdx.x + threadIdx.x; in Reset_List() 323 for (int i = threadIdx.x; i < element_numbers; i = i + blockDim.x) { in Sum_Of_List() 330 int i = blockDim.x * blockIdx.x + threadIdx.x; in Scale_List() [all …]
|