| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/ |
| D | mish_impl.cu | 21 __global__ void MishKernel(const size_t size, const T *input_addr, T *output_addr) { in MishKernel() 28 __global__ void MishKernel(const size_t size, const half *input_addr, half *output_addr) { in MishKernel() 35 __global__ void MishKernel(const size_t size, const double *input_addr, double *output_addr) { in MishKernel() 42 cudaError_t Mish(const size_t size, const T *input_addr, T *output_addr, const uint32_t &device_id, in Mish() 49 cudaError_t Mish(const size_t size, const half *input_addr, half *output_addr, const uint32_t &devi… in Mish() 57 cudaError_t Mish(const size_t size, const double *input_addr, double *output_addr, const uint32_t &… in Mish()
|
| D | softsign_impl.cu | 21 __global__ void SoftsignKernel(const size_t size, const T *input_addr, T *output_addr) { in SoftsignKernel() 28 __global__ void SoftsignKernel(const size_t size, const half *input_addr, half *output_addr) { in SoftsignKernel() 35 __global__ void SoftsignKernel(const size_t size, const double *input_addr, double *output_addr) { in SoftsignKernel() 42 cudaError_t Softsign(const size_t size, const T *input_addr, T *output_addr, const uint32_t &device… in Softsign() 50 cudaError_t Softsign(const size_t size, const half *input_addr, half *output_addr, const uint32_t &… in Softsign() 58 cudaError_t Softsign(const size_t size, const double *input_addr, double *output_addr, const uint32… in Softsign()
|
| D | sparse_matrix_transpose_impl.cu | 22 __global__ void ConjKernel(const size_t input_size, cuComplex *input_addr) { in ConjKernel() 29 __global__ void ConjKernel(const size_t input_size, cuDoubleComplex *input_addr) { in ConjKernel() 36 cudaError_t Conj(const size_t input_size, cuComplex *input_addr, cudaStream_t stream) { in Conj() 41 cudaError_t Conj(const size_t input_size, cuDoubleComplex *input_addr, cudaStream_t stream) { in Conj()
|
| D | correlate_impl.cu | 21 __global__ void Conv1D(const T *input_addr, const T *kernel_addr, T *output_addr, const size_t out_… in Conv1D() 35 __global__ void Conj(const T *input_addr, T *output_addr, const size_t input_size) { in Conj() 43 __global__ void Reverse(const T *input_addr, T *output_addr, const size_t *input_size_d, size_t inp… in Reverse() 51 cudaError_t CorrelateCalc(const T *input_addr, const T *kernel_addr, T *output_addr, const size_t i… in CorrelateCalc() 77 cudaError_t CalConj(const T *input_addr, T *output_addr, const size_t input_size, const uint32_t &d… in CalConj() 84 cudaError_t CalReverse1D(const T *input_addr, T *output_addr, const size_t *input_size_d, size_t in… in CalReverse1D()
|
| D | convert_gradient_impl.cu | 21 … const size_t batchwidth, const size_t width, T *input_addr, T *output_addr) { in ConvertGradientKernel() 35 … const size_t batchwidth, const size_t width, T *input_addr, T *output_addr) { in ConvertGradientBackKernel() 50 const size_t width, T *input_addr, T *output_addr) { in ConvertGradientBackKernel() 68 … const size_t width, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in ConvertGradient() 76 … const size_t batchwidth, const size_t width, T *input_addr, T *output_addr, in ConvertGradientBack() 85 … const size_t ori_w, const size_t batchwidth, const size_t width, T *input_addr, in ConvertGradientBack()
|
| D | data_format_dim_map_impl.cu | 21 __global__ void DataFormatDimMapKernel(size_t size, T *input_addr, T *output_addr, int32_t *dim_map… in DataFormatDimMapKernel() 28 cudaError_t DataFormatDimMap(size_t size, T *input_addr, T *output_addr, int32_t *dim_map, cudaStre… in DataFormatDimMap()
|
| D | matrix_split_impl.cu | 20 … void MatrixSplitKernel(const size_t size, const size_t split_dim, const size_t dim, T *input_addr, in MatrixSplitKernel() 33 T *input_addr, T *output_addr) { in MatrixSplitKernel() 55 cudaError_t MatrixSplit(const size_t size, const size_t split_dim, const size_t dim, T *input_addr,… in MatrixSplit()
|
| D | fast_gelu_impl.cu | 21 __global__ void FastGeluKernel(size_t size, T *input_addr, T *output_addr) { in FastGeluKernel() 33 __global__ void FastGeluKernel(size_t size, half *input_addr, half *output_addr) { in FastGeluKernel() 43 __global__ void FastGeluKernel(size_t size, half2 *input_addr, half2 *output_addr) { in FastGeluKernel() 60 cudaError_t FastGelu(size_t size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in FastGelu() 66 cudaError_t FastGelu(size_t size, half *input_addr, half *output_addr, cudaStream_t cuda_stream) { in FastGelu()
|
| D | gelu_impl.cu | 21 __global__ void GeluKernel(size_t size, const T *input_addr, T *output_addr) { in GeluKernel() 34 __global__ void GeluKernel(size_t size, const half *input_addr, half *output_addr) { in GeluKernel() 43 __global__ void GeluKernel(size_t size, const half2 *input_addr, half2 *output_addr) { in GeluKernel() 55 cudaError_t Gelu(size_t size, const T *input_addr, T *output_addr, cudaStream_t cuda_stream, const … in Gelu() 61 cudaError_t Gelu(size_t size, const half *input_addr, half *output_addr, cudaStream_t cuda_stream, in Gelu()
|
| D | matrix_combine_impl.cu | 21 const size_t dst_width, T *input_addr, T *output_addr) { in MatrixCombineKernel() 34 … const size_t dst_width, const size_t res_width, const size_t batch, T *input_addr, in MatrixCombineKernel() 57 … const size_t residual, const size_t res_width, const size_t batch, T *input_addr, in MatrixCombine()
|
| D | softplus_impl.cu | 25 __global__ void SoftplusKernel(const size_t size, const T threshold, const T *input_addr, T *output… in SoftplusKernel() 33 __global__ void SoftplusKernel(const size_t size, const half threshold, const half *input_addr, hal… in SoftplusKernel() 43 cudaError_t Softplus(const size_t size, const T *input_addr, T *output_addr, cudaStream_t cuda_stre… in Softplus()
|
| D | unsorted_segment_sum.cu | 22 T *input_addr, S *ids_addr, T *output_addr) { in UnsortedSegmentSumCal() 47 T *input_addr, S *ids_addr, T *output_addr, cudaStream_t stream, in UnsortedSegmentSum()
|
| D | unsorted_segment_prod.cu | 22 T *input_addr, S *ids_addr, T *output_addr) { in UnsortedSegmentProdCal() 47 T *input_addr, S *ids_addr, T *output_addr, cudaStream_t stream, in UnsortedSegmentProd()
|
| D | unsorted_segment_max.cu | 34 T *input_addr, S *ids_addr, T *output_addr) { in UnsortedSegmentMaxCal() 60 T *input_addr, S *ids_addr, T *output_addr, cudaStream_t stream, in UnsortedSegmentMax()
|
| D | unsorted_segment_min.cu | 34 T *input_addr, S *ids_addr, T *output_addr) { in UnsortedSegmentMinCal() 60 T *input_addr, S *ids_addr, T *output_addr, cudaStream_t stream, in UnsortedSegmentMin()
|
| /third_party/mindspore/mindspore-src/source/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/ |
| D | cast.cu | 22 __device__ __forceinline__ void CastBase(const S *input_addr, T *output_addr) { in CastBase() 27 __global__ void CastKernel(const int input_size, const S *input_addr, T *output_addr) { in CastKernel() 34 void Cast(const int input_size, const S *input_addr, T *output_addr, cudaStream_t stream) { in Cast()
|
| /third_party/mindspore/mindspore-src/source/mindspore/lite/src/extendrt/delegate/tensorrt/distribution/ |
| D | distribution_collective.cc | 27 int DistributionCollective::ReduceScatterWrapper(const void *input_addr, void *output_addr, size_t … in ReduceScatterWrapper() 33 int DistributionCollective::AllReduceWrapper(const void *input_addr, void *output_addr, size_t coun… in AllReduceWrapper() 39 int DistributionCollective::AllGatherWrapper(const void *input_addr, void *output_addr, size_t coun… in AllGatherWrapper()
|
| D | distribution_collective_impl.cc | 36 int DistributionCollective::ReduceScatterWrapper(const void *input_addr, void *output_addr, size_t … in ReduceScatterWrapper() 55 int DistributionCollective::AllReduceWrapper(const void *input_addr, void *output_addr, size_t coun… in AllReduceWrapper() 74 int DistributionCollective::AllGatherWrapper(const void *input_addr, void *output_addr, size_t coun… in AllGatherWrapper()
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/cpu/kernel/ |
| D | rolling_cpu_kernel.cc | 57 S Var(const T *input_addr, const size_t *ids, size_t start, size_t end) const { in Var() argument 162 reduceMethod_ = [](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch() 173 reduceMethod_ = [](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch() 184 reduceMethod_ = [](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch() 193 reduceMethod_ = [](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch() 202 reduceMethod_ = [this](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch() 207 reduceMethod_ = [this](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch() 221 auto input_addr = reinterpret_cast<T *>(inputs[kIndex0]->device_ptr()); in RunFunc() local
|
| D | stridedslice_cpu_kernel.cc | 175 common::Status StridedSliceCpuKernelMod::RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_… in RunTaskOnOuter() 193 common::Status StridedSliceCpuKernelMod::RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *out… in RunTaskOnSplitAxis() 209 void StridedSliceCpuKernelMod::ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thr… in ParallelRun() 238 auto input_addr = reinterpret_cast<uint8_t *>(inputs[0]->device_ptr()); in LaunchKernel() local
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/ |
| D | nccl_gpu_kernel.cc | 41 bool NcclGpuKernelMod::AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataT… in AllReduce() 49 bool NcclGpuKernelMod::AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataT… in AllGather() 57 bool NcclGpuKernelMod::ReduceScatter(const void *input_addr, void *output_addr, size_t count, ncclD… in ReduceScatter() 66 bool NcclGpuKernelMod::Broadcast(const void *input_addr, void *output_addr, size_t count, ncclDataT… in Broadcast()
|
| D | nccl_collective_gpu_kernel.h | 145 T *input_addr = GetDeviceAddress<T>(inputs, 0); in LaunchAllReduce() local 153 T *input_addr = GetDeviceAddress<T>(inputs, 0); in LaunchAllGather() local 161 T *input_addr = GetDeviceAddress<T>(inputs, 0); in LaunchReduceScatter() local 169 T *input_addr = nullptr; in LaunchBroadcast() local
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/gpu/hal/device/distribution/ |
| D | collective_wrapper.cc | 37 ncclResult_t AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data… in AllReduce() 42 ncclResult_t AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data… in AllGather() 47 ncclResult_t ReduceScatter(const void *input_addr, void *output_addr, size_t count, ncclDataType_t … in ReduceScatter() 52 ncclResult_t Broadcast(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data… in Broadcast()
|
| D | nccl_wrapper.cc | 57 ncclResult_t NCCLWrapper::AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDa… in AllReduce() 65 ncclResult_t NCCLWrapper::AllGather(const void *input_addr, void *output_addr, size_t count, ncclDa… in AllGather() 73 ncclResult_t NCCLWrapper::ReduceScatter(const void *input_addr, void *output_addr, size_t count, in ReduceScatter() 82 ncclResult_t NCCLWrapper::Broadcast(const void *input_addr, void *output_addr, size_t count, ncclDa… in Broadcast()
|
| /third_party/mindspore/mindspore-src/source/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/ |
| D | pooling_cpu_kernel_nnacl.cc | 307 CTask PoolingCpuKernelNnaclMod::KernelAvgPool(T *input_addr, T *output_addr) { in KernelAvgPool() 361 CTask PoolingCpuKernelNnaclMod::KernelMaxPool(T *input_addr, T *output_addr) { in KernelMaxPool() 397 void PoolingCpuKernelNnaclMod::LaunchTransposeFp32(float *input_addr, float *output_addr, int plane… in LaunchTransposeFp32() 408 void PoolingCpuKernelNnaclMod::LaunchPoolingChannelLastFp32(float *input_addr, float *transpose_out… in LaunchPoolingChannelLastFp32() 434 T *input_addr = reinterpret_cast<T *>(inputs[kIndex0]->device_ptr()); in LaunchKernel() local 459 float *input_addr = reinterpret_cast<float *>(inputs[kIndex0]->device_ptr()); in Launch() local
|