Home
last modified time | relevance | path

Searched defs:input_addr (Results 1 – 25 of 94) sorted by relevance

1234

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dcast_impl.cu25 __device__ __forceinline__ void CastBase(const S *input_addr, T *output_addr) { in CastBase()
30 __device__ __forceinline__ void CastBase(const half *input_addr, uint64_t *output_addr) { in CastBase()
34 __device__ __forceinline__ void CastBase(const half *input_addr, int64_t *output_addr) { in CastBase()
38 __device__ __forceinline__ void CastBase(const half *input_addr, uint32_t *output_addr) { in CastBase()
42 __device__ __forceinline__ void CastBase(const half *input_addr, int32_t *output_addr) { in CastBase()
46 __device__ __forceinline__ void CastBase(const half *input_addr, uint16_t *output_addr) { in CastBase()
50 __device__ __forceinline__ void CastBase(const half *input_addr, int16_t *output_addr) { in CastBase()
54 __device__ __forceinline__ void CastBase(const half *input_addr, uint8_t *output_addr) { in CastBase()
58 __device__ __forceinline__ void CastBase(const half *input_addr, int8_t *output_addr) { in CastBase()
63 __device__ __forceinline__ void CastBase(const uint64_t *input_addr, half *output_addr) { in CastBase()
[all …]
Dconvert_gradient_impl.cu21 … const size_t batchwidth, const size_t width, T *input_addr, T *output_addr) { in ConvertGradientKernel()
35 … const size_t batchwidth, const size_t width, T *input_addr, T *output_addr) { in ConvertGradientBackKernel()
50 const size_t width, T *input_addr, T *output_addr) { in ConvertGradientBackKernel()
68 const size_t width, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in ConvertGradient()
75 … const size_t width, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in ConvertGradientBack()
82 … const size_t ori_w, const size_t batchwidth, const size_t width, T *input_addr, T *output_addr, in ConvertGradientBack()
Dsoftplus_impl.cu21 __global__ void SoftplusKernel(const size_t size, const T *input_addr, T *output_addr) { in SoftplusKernel()
29 __global__ void SoftplusKernel(const size_t size, const half *input_addr, half *output_addr) { in SoftplusKernel()
37 void Softplus(const size_t size, const T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in Softplus()
42 void Softplus(const size_t size, const half *input_addr, half *output_addr, cudaStream_t cuda_strea… in Softplus()
Dmatrix_split_impl.cu20 … void MatrixSplitKernel(const size_t size, const size_t split_dim, const size_t dim, T *input_addr, in MatrixSplitKernel()
33 T *input_addr, T *output_addr) { in MatrixSplitKernel()
55 void MatrixSplit(const size_t size, const size_t split_dim, const size_t dim, T *input_addr, T *out… in MatrixSplit()
Dgelu_impl.cu21 __global__ void GeluKernel(size_t size, T *input_addr, T *output_addr) { in GeluKernel()
34 __global__ void GeluKernel(size_t size, half *input_addr, half *output_addr) { in GeluKernel()
43 __global__ void GeluKernel(size_t size, half2 *input_addr, half2 *output_addr) { in GeluKernel()
55 void Gelu(size_t size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in Gelu()
60 void Gelu(size_t size, half *input_addr, half *output_addr, cudaStream_t cuda_stream) { in Gelu()
Dmatrix_combine_impl.cu21 const size_t dst_width, T *input_addr, T *output_addr) { in MatrixCombineKernel()
34 … const size_t dst_width, const size_t res_width, const size_t batch, T *input_addr, in MatrixCombineKernel()
57 … const size_t residual, const size_t res_width, const size_t batch, T *input_addr, T *output_addr, in MatrixCombine()
Dunsorted_segment_sum.cu22 T* input_addr, S* ids_addr, T* output_addr) { in UnsortedSegmentSum()
39 T* input_addr, S* ids_addr, T* output_addr, cudaStream_t stream) { in UnsortedSegmentSum()
Drelu_impl.cu22 __global__ void CalReLUKernel(int size, T *input_addr, T *output_addr) { in CalReLUKernel()
29 void CalReLU(int size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in CalReLU()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/
Drolling_cpu_kernel.cc103 reduceMethod_ = [](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch()
114 reduceMethod_ = [](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch()
125 reduceMethod_ = [](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch()
134 reduceMethod_ = [](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch()
143 reduceMethod_ = [this](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch()
148 reduceMethod_ = [this](const T *input_addr, const size_t *ids, size_t start, size_t end) { in MethodSwitch()
160 auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); in Launch() local
Dstridedslice_cpu_kernel.cc165 int StridedSliceCPUKernel::RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int star… in RunTaskOnOuter()
180 int StridedSliceCPUKernel::RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int … in RunTaskOnSplitAxis()
194 void StridedSliceCPUKernel::ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread… in ParallelRun()
223 auto input_addr = reinterpret_cast<uint8_t *>(inputs[0]->addr); in Launch() local
Dl2_normalize_cpu_kernel.cc51 void L2NormalizeCPUKernel<T>::CalcDenominator(const T *input_addr, const size_t reduce_size, const … in CalcDenominator()
97 void L2NormalizeCPUKernel<T>::CalcOutput(const T *input_addr, const std::vector<size_t> reduce_shap… in CalcOutput()
132 auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); in Launch() local
Dtranspose_cpu_kernel.cc88 const auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr); in LaunchKernel() local
130 void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, const int *output_shap… in ParallelRun()
Dembedding_look_up_cpu_kernel.cc33 void LookUpTableTask(const float *input_addr, const T *indices_addr, float *output_addr, size_t ind… in LookUpTableTask()
105 const auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr); in LaunchKernel() local
Dl2loss_cpu_kernel.cc42 auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); in Launch() local
Dallgather_cpu_kernel.cc42 auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr); in Launch() local
Drank_cpu_kernel.cc136 void RankCpuKernel<T>::Launch1DInt(const T *input_addr, size_t *sort_idx, T *values, const AxisIter… in Launch1DInt()
174 void RankCpuKernel<T>::Launch1DFloat(const T *input_addr, size_t *sort_idx, T *values, bool *is_nan, in Launch1DFloat()
252 auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); in Launch() local
Dreduce_scatter_cpu_kernel.cc55 auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr); in Launch() local
Dunsorted_segment_sum_cpu_kernel.cc56 void *input_addr = inputs[0]->addr; in Launch() local
Dbroadcast_to_cpu_kernel.cc66 const auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr); in Launch() local
Drolling_cpu_kernel.h52 S Var(const T *input_addr, const size_t *ids, size_t start, size_t end) const { in Var()
/third_party/mindspore/mindspore/ccsrc/runtime/device/gpu/distribution/
Dcollective_wrapper.cc37 ncclResult_t AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data… in AllReduce()
42 ncclResult_t AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data… in AllGather()
47 ncclResult_t ReduceScatter(const void *input_addr, void *output_addr, size_t count, ncclDataType_t … in ReduceScatter()
52 ncclResult_t Broadcast(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data… in Broadcast()
Dnccl_wrapper.cc49 ncclResult_t NCCLWrapper::AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDa… in AllReduce()
57 ncclResult_t NCCLWrapper::AllGather(const void *input_addr, void *output_addr, size_t count, ncclDa… in AllGather()
65 ncclResult_t NCCLWrapper::ReduceScatter(const void *input_addr, void *output_addr, size_t count, in ReduceScatter()
74 ncclResult_t NCCLWrapper::Broadcast(const void *input_addr, void *output_addr, size_t count, ncclDa… in Broadcast()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/
Dnccl_collective_gpu_kernel.h158 T *input_addr = GetDeviceAddress<T>(inputs, 0); in LaunchAllReduce() local
171 T *input_addr = GetDeviceAddress<T>(inputs, 0); in LaunchAllGather() local
184 T *input_addr = GetDeviceAddress<T>(inputs, 0); in LaunchReduceScatter() local
198 T *input_addr = GetDeviceAddress<T>(inputs, 0); in LaunchBroadcast() local
/third_party/mindspore/mindspore/ccsrc/runtime/device/ascend/
Dascend_launch_atomic_clean.h35 void SetInputAddr(uint8_t *input_addr) override { input_addr_ = input_addr; } in SetInputAddr()
Dascend_launch_transdata.h41 void SetInputAddr(uint8_t *input_addr) override { input_addr_ = input_addr; } in SetInputAddr()

1234