| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/ |
| D | bincount_impl.cu | 24 const int64_t outer_size) { in BincountNoWeight() 45 const int64_t outer_size) {} in BincountNoWeight() 49 const int64_t outer_size) {} in BincountNoWeight() 65 … const int64_t threads_size, const int64_t outer_size, const bool has_weights) { in BincountMem() 87 … const int64_t threads_size, const int64_t outer_size, const uint32_t &device_id, in CalBincount() 106 … const bool has_weights, const int64_t threads_size, const int64_t outer_size, in CalBincount() 121 … const bool has_weights, const int64_t threads_size, const int64_t outer_size, in CalBincount()
|
| D | maxunpool2d_grad_impl.cu | 23 … const int64_t outputHeight, const int64_t outputWidth, const int64_t outer_size, in MaxUnpool2DGradNCHW() 37 … const int64_t outputWidth, const int64_t outputChannel, const int64_t outer_size, in MaxUnpool2DGradNHWC() 50 … const std::vector<int64_t> grad_shape, T *output, const int64_t outer_size, in CalMaxUnpool2DGrad()
|
| D | maxunpool3d_grad_impl.cu | 23 … const int64_t outputWidth, const int64_t outer_size, T *output) { in MaxUnpool3DGradNCDHW() 37 … const int64_t outputChannel, const int64_t outer_size, T *output) { in MaxUnpool3DGradNDHWC() 49 … const std::vector<int64_t> grad_shape, T *output, const int64_t outer_size, in CalMaxUnpool3DGrad()
|
| D | index_add_impl.cu | 20 …exAddAtomic(T *dst, const int *index, const T *src, const size_t src_size, const size_t outer_size, in IndexAddAtomic() 39 …id IndexAdd(T *dst, const int *index, const T *src, const size_t src_size, const size_t outer_size, in IndexAdd() 58 cudaError_t CalIndexAdd(T *dst, const int *index, const T *src, const size_t outer_size, const size… in CalIndexAdd()
|
| D | argmax_impl.cu | 19 __global__ void Argmax(const T *input, const S bound, const size_t outer_size, const size_t inner_s… in Argmax() 39 cudaError_t CalArgmax(const T *input, const S bound, const size_t outer_size, const size_t inner_si… in CalArgmax()
|
| D | argmin_impl.cu | 19 __global__ void Argmin(const T *input, const S bound, const size_t outer_size, const size_t inner_s… in Argmin() 39 cudaError_t CalArgmin(const T *input, const S bound, const size_t outer_size, const size_t inner_si… in CalArgmin()
|
| D | sparse_segment_impl.cu | 39 … size_t outer_size, size_t inner_size, size_t output_dim0, R *y_ptr) { in SparseSegmentSumKernel() 67 … size_t outer_size, size_t inner_size, size_t output_dim0, float *y_ptr) { in SparseSegmentSumKernel() 96 … size_t outer_size, size_t inner_size, size_t output_dim0, half *y_ptr) { in SparseSegmentSumKernel() 124 … size_t outer_size, size_t inner_size, size_t output_dim0, R *y_ptr) { in SparseSegmentSqrtNKernel() 153 … size_t outer_size, size_t inner_size, size_t output_dim0, float *y_ptr) { in SparseSegmentSqrtNKernel() 183 … size_t outer_size, size_t inner_size, size_t output_dim0, half *y_ptr) { in SparseSegmentSqrtNKernel() 245 … const S *segment_ids_ptr, size_t *segment_pos_ptr, size_t outer_size, in CalSparseSegmentCombination()
|
| D | sparse_segment_grad_impl.cu | 24 size_t outer_size) { in SparseSegmentPosKernel() 40 … const size_t *indices_pos_ptr, size_t outer_size, size_t inner_size, in SparseSegmentSumGradKernel() 66 … const size_t *indices_pos_ptr, size_t outer_size, size_t inner_size, in SparseSegmentSumGradKernel() 92 … const size_t *indices_pos_ptr, size_t outer_size, size_t inner_size, in SparseSegmentSqrtNGradKernel() 119 … const size_t *indices_pos_ptr, size_t outer_size, size_t inner_size, in SparseSegmentSqrtNGradKernel() 178 … const S *segment_ids_ptr, size_t *indices_pos_ptr, size_t outer_size, in CalSparseSegmentGradCombination()
|
| D | fractional_pool_impl.cu | 24 const int64_t outer_size) { in Fractionalmaxpool() 60 const int64_t outer_size) { in Fractionalavgpool() 95 … const int64_t outer_size, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalFractionalmaxpool() 106 … const int64_t outer_size, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalFractionalavgpool()
|
| D | sparse_segment_mean_grad_impl.cu | 24 size_t outer_size) { in SparseSegmentPosKernel() 40 … const size_t *indices_pos_ptr, size_t outer_size, size_t inner_size, in SparseSegmentMeanGradKernel() 67 … size_t *indices_pos_ptr, size_t outer_size, size_t inner_size, size_t idx_seg_size, in CalSparseSegmentMeanGrad()
|
| D | dilation2d_backprop_filter_impl.cu | 23 __global__ void InitOutput(T *output, const int64_t outer_size) { in InitOutput() 38 const int64_t pad_left, const int64_t outer_size) { in Dilation2DBackpropFilter() 80 const int64_t pad_left, const int64_t outer_size) { in Dilation2DBackpropFilter() 122 … const int64_t outer_size, const uint32_t &device_id, cudaStream_t cuda_stream) { in CalDilation2DBackpropFilter()
|
| D | general_reduction_impl.cu | 50 __global__ void ThreadReduction(bool small, size_t outer_size, size_t bound, size_t inner_size, con… in ThreadReduction() 77 __global__ void WarpReduction(bool small, size_t outer_size, size_t bound, size_t inner_size, const… in WarpReduction() 122 __global__ void Warp4Reduction(bool small, size_t outer_size, size_t bound, size_t inner_size, cons… in Warp4Reduction() 208 __global__ void BlockReduction(bool small, size_t outer_size, size_t bound, size_t inner_size, cons… in BlockReduction() 289 void GeneralReductionImpl(bool small, size_t outer_size, size_t bound, size_t inner_size, const T *… in GeneralReductionImpl() 316 __global__ void ThreadReduction1(bool small, size_t outer_size, size_t bound, size_t inner_size, co… in ThreadReduction1() 344 __global__ void WarpReduction1(bool small, size_t outer_size, size_t bound, size_t inner_size, cons… in WarpReduction1() 390 __global__ void Warp4Reduction1(bool small, size_t outer_size, size_t bound, size_t inner_size, con… in Warp4Reduction1() 477 __global__ void BlockReduction1(bool small, size_t outer_size, size_t bound, size_t inner_size, con… in BlockReduction1() 559 void GeneralReductionImpl1(bool small, size_t outer_size, size_t bound, size_t inner_size, const ha… in GeneralReductionImpl1()
|
| D | fractionalmaxpool3dgradwithfixedksize_impl.cu | 21 __global__ void InitOutput(T *output, const int64_t outer_size) { in InitOutput() 48 … const int64_t outer_size, const int64_t out_backprop_size, in CalFractionalmaxpool3dgradwithfixedksize()
|
| D | topk_impl.cu | 106 inline __device__ void TopKStep(const int &outer_size, const int &inner_size, const T *input, T *ou… in TopKStep() 185 __global__ void TopKBlock(int outer_size, int inner_size, const T *input, T *output, S *output_inde… in TopKBlock() 214 cudaError_t FastTopK(const int outer_size, const int inner_size, const T *input, S k_cut, T *output… in FastTopK()
|
| D | fractional_pool_grad_impl.cu | 21 __global__ void InitOutput(T *output, const int64_t outer_size) { in InitOutput() 105 … const int64_t backprop_size, const int64_t outer_size, const uint32_t &device_id, in CalFractionalmaxpoolgrad() 120 … const int64_t backprop_size, const int64_t outer_size, const uint32_t &device_id, in CalFractionalavgpoolgrad()
|
| D | maxunpool2d_impl.cu | 20 __global__ void InitMaxUnpool2D(const int64_t outer_size, T *output) { in InitMaxUnpool2D() 65 … const std::vector<int64_t> output_shape, T *output, const int64_t outer_size, in CalMaxUnpool2D()
|
| D | maxunpool3d_impl.cu | 19 __global__ void InitMaxUnpool3D(const int64_t outer_size, T *output) { in InitMaxUnpool3D() 63 … const std::vector<int64_t> output_shape, T *output, const int64_t outer_size, in CalMaxUnpool3D()
|
| D | sparse_segment_mean_with_num_segments_impl.cu | 63 … size_t outer_size, size_t inner_size, size_t segment_size) { in SparseSegmentMeanWithNumSegmentsKernel() 129 size_t outer_size, int *ret_flag, size_t indices_size) { in InputValidCheck() 149 …const IndexType *num_segments_ptr, size_t *segment_pos_ptr, DataType *y_ptr, size_t outer_size, si… in CalSparseSegmentMeanWithNumSegments()
|
| D | fractional_max_pool_with_fixed_ksize_impl.cu | 48 const int64_t outer_size) { in Fractionalmaxpoolwithfixedksize() 82 const int64_t outer_size, const uint32_t &device_id, in CalFractionalmaxpoolwithfixedksize()
|
| D | triplet_margin_loss_impl.cu | 56 … const size_t bound, const size_t outer_size, const size_t inner_size, in PairwiseDistance() 119 … const size_t *bound_list, const size_t bound, const size_t outer_size, in PairwiseDistance() 145 … const size_t *bound_list, const size_t bound, const size_t outer_size, in PairwiseDistance() 186 … const size_t *bound_list, const size_t bound, const size_t outer_size, in PairwiseDistance() 213 … const int64_t *tensor_shapes, const int64_t *dst_shape, const size_t outer_size, in CalTripletMarginLoss()
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp16/ |
| D | reduce_fp16.c | 23 int ReduceMeanFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float1… in ReduceMeanFp16() 48 int ReduceMaxFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16… in ReduceMaxFp16() 70 int ReduceMinFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16… in ReduceMinFp16() 95 int ReduceProdFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float1… in ReduceProdFp16() 120 int ReduceSumFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16… in ReduceSumFp16() 160 int ReduceL2NormFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, floa… in ReduceL2NormFp16()
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/ |
| D | reduce_int8.c | 176 int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *… in ReduceMeanInt8() 210 int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32… in ReduceMeanLastAxis() 251 int ReduceSumInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *s… in ReduceSumInt8() 283 int ReduceSumLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_… in ReduceSumLastAxis() 323 int ReduceMaxLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_… in ReduceMaxLastAxis() 359 int ReduceMaxInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *s… in ReduceMaxInt8() 382 int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_… in ReduceMinLastAxis() 420 int ReduceMinInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *s… in ReduceMinInt8() 442 int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32… in ReduceProdLastAxis() 485 int ReduceProdInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *… in ReduceProdInt8() [all …]
|
| D | gather_int8.c | 22 int GatherInt8Int32Index(const int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, i… in GatherInt8Int32Index() 46 int GatherInt8Int64Index(const int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, i… in GatherInt8Int64Index()
|
| D | dynamic_gather_int8.c | 20 void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int32_t *i… in DynamicGather() 49 void DynamicGatherForFp16(const int8_t *input, int outer_size, int inner_size, int limit, const int… in DynamicGatherForFp16()
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/ |
| D | index_fill_gpu_kernel.cc | 63 bool IndexFillGpuKernelMod::GetSizeInfo(KernelTensor *address_ptr, int64_t &outer_size, int64_t &di… in GetSizeInfo() 140 int64_t dim_size, outer_size, inner_size; in LaunchKernel() local
|