/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | cast_impl.cu | 104 void Cast(const int input_size, const S *input_addr, T *output_addr, cudaStream_t stream) { in Cast() 108 …oid Cast(const int input_size, const int8_t *input_addr, int8_t *output_addr, cudaStream_t stream); 109 …id Cast(const int input_size, const int8_t *input_addr, int16_t *output_addr, cudaStream_t stream); 110 …id Cast(const int input_size, const int8_t *input_addr, int32_t *output_addr, cudaStream_t stream); 111 …id Cast(const int input_size, const int8_t *input_addr, int64_t *output_addr, cudaStream_t stream); 112 …id Cast(const int input_size, const int8_t *input_addr, uint8_t *output_addr, cudaStream_t stream); 113 …d Cast(const int input_size, const int8_t *input_addr, uint16_t *output_addr, cudaStream_t stream); 114 …d Cast(const int input_size, const int8_t *input_addr, uint32_t *output_addr, cudaStream_t stream); 115 …d Cast(const int input_size, const int8_t *input_addr, uint64_t *output_addr, cudaStream_t stream); 116 template void Cast(const int input_size, const int8_t *input_addr, float *output_addr, cudaStream_t… [all …]
|
D | unary_op_impl.cu | 409 void Exponential(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Exponential() 414 void Expm1(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Expm1() 419 void Logarithm(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Logarithm() 424 void Log1p(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Log1p() 429 void Erf(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Erf() 434 void Erfc(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Erfc() 439 void Negative(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Negative() 444 void Reciprocal(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Reciprocal() 449 void Square(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Square() 454 void Pow(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Pow() [all …]
|
D | unary_op_impl.cuh | 23 void Exponential(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 25 void Expm1(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 27 void Logarithm(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 29 void Log1p(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 31 void Erf(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 33 void Erfc(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 35 void Negative(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 37 void Reciprocal(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 39 void Square(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); 41 void Sqrt(const T *input, T *output, const size_t count, cudaStream_t cuda_stream); [all …]
|
D | relu_impl.cu | 29 void CalReLU(int size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in CalReLU() 33 template void CalReLU(int size, double *input_addr, double *output_addr, cudaStream_t cuda_stream); 34 template void CalReLU(int size, float *input_addr, float *output_addr, cudaStream_t cuda_stream); 35 template void CalReLU(int size, half *input_addr, half *output_addr, cudaStream_t cuda_stream); 36 template void CalReLU(int size, int8_t *input_addr, int8_t *output_addr, cudaStream_t cuda_stream); 37 template void CalReLU(int size, int16_t *input_addr, int16_t *output_addr, cudaStream_t cuda_stream… 38 template void CalReLU(int size, int32_t *input_addr, int32_t *output_addr, cudaStream_t cuda_stream… 39 template void CalReLU(int size, int64_t *input_addr, int64_t *output_addr, cudaStream_t cuda_stream… 40 template void CalReLU(int size, uint8_t *input_addr, uint8_t *output_addr, cudaStream_t cuda_stream… 57 void ReluV2(const size_t num, const T *x, T *y, uint32_t *mask, cudaStream_t cuda_stream) { in ReluV2() [all …]
|
D | gather.cu | 46 const size_t dim_after_axis, cudaStream_t stream) { in Gather() 56 cudaStream_t stream); 60 cudaStream_t stream); 64 cudaStream_t stream); 68 cudaStream_t stream); 72 cudaStream_t stream); 76 cudaStream_t stream); 80 cudaStream_t stream); 84 cudaStream_t stream); 88 cudaStream_t stream); [all …]
|
D | unary_op_grad_impl.cu | 126 void SqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… in SqrtGrad() 132 void RsqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… in RsqrtGrad() 138 void AsinGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… in AsinGrad() 144 void ACosGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… in ACosGrad() 150 void AtanGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… in AtanGrad() 156 void AsinhGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… in AsinhGrad() 162 void AcoshGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… in AcoshGrad() 168 void ReciprocalGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda… in ReciprocalGrad() 174 cudaStream_t cuda_stream); 176 cudaStream_t cuda_stream); [all …]
|
D | oneslike_impl.cu | 30 void CalOnesLike(const size_t size, const T* input, T* output, cudaStream_t cuda_stream) { in CalOnesLike() 35 …OnesLike<double>(const size_t size, const double* input, double* output, cudaStream_t cuda_stream); 36 template void CalOnesLike<float>(const size_t size, const float* input, float* output, cudaStream_t… 37 template void CalOnesLike<half>(const size_t size, const half* input, half* output, cudaStream_t cu… 38 …OnesLike<int8_t>(const size_t size, const int8_t* input, int8_t* output, cudaStream_t cuda_stream); 39 …sLike<int16_t>(const size_t size, const int16_t* input, int16_t* output, cudaStream_t cuda_stream); 40 …sLike<int32_t>(const size_t size, const int32_t* input, int32_t* output, cudaStream_t cuda_stream); 41 …sLike<int64_t>(const size_t size, const int64_t* input, int64_t* output, cudaStream_t cuda_stream); 42 …sLike<uint8_t>(const size_t size, const uint8_t* input, uint8_t* output, cudaStream_t cuda_stream); 44 cudaStream_t cuda_stream); [all …]
|
D | gather_grad.cu | 58 cudaStream_t stream) { in GatherGrad() 72 cudaStream_t stream); 76 cudaStream_t stream); 80 cudaStream_t stream); 84 cudaStream_t stream); 88 cudaStream_t stream); 92 cudaStream_t stream); 96 cudaStream_t stream); 100 cudaStream_t stream); 104 cudaStream_t stream); [all …]
|
D | gathernd.cu | 50 const size_t &indices_dim1, S *batch_indices, S *batch_strides, cudaStream_t stream) { in GatherNd() 59 int *batch_strides, cudaStream_t stream); 62 int *batch_strides, cudaStream_t stream); 65 int *batch_strides, cudaStream_t stream); 68 int *batch_strides, cudaStream_t stream); 71 int *batch_strides, cudaStream_t stream); 75 cudaStream_t stream); 78 int *batch_strides, cudaStream_t stream); 82 cudaStream_t stream); 85 int *batch_strides, cudaStream_t stream); [all …]
|
D | gatherv2.cu | 43 size_t input_dim1, cudaStream_t stream) { in GatherV2() 51 size_t output_dim2, size_t input_dim1, cudaStream_t stream); 53 … size_t output_dim1, size_t output_dim2, size_t input_dim1, cudaStream_t stream); 55 size_t output_dim2, size_t input_dim1, cudaStream_t stream); 57 … size_t output_dim1, size_t output_dim2, size_t input_dim1, cudaStream_t stream); 59 size_t output_dim2, size_t input_dim1, cudaStream_t stream); 61 … size_t output_dim1, size_t output_dim2, size_t input_dim1, cudaStream_t stream); 63 size_t output_dim2, size_t input_dim1, cudaStream_t stream); 65 size_t output_dim2, size_t input_dim1, cudaStream_t stream); 67 … size_t output_dim1, size_t output_dim2, size_t input_dim1, cudaStream_t stream); [all …]
|
D | embedding_lookup_impl.cu | 31 size_t input_dim1, int64_t offset, cudaStream_t stream) { in CalEmbeddingLookup() 43 cudaStream_t stream); 46 int64_t offset, cudaStream_t stream); 49 cudaStream_t stream); 52 int64_t offset, cudaStream_t stream); 55 cudaStream_t stream); 58 int64_t offset, cudaStream_t stream); 61 cudaStream_t stream); 64 int64_t offset, cudaStream_t stream); 67 int64_t offset, cudaStream_t stream); [all …]
|
D | unpack.cu | 36 cudaStream_t cuda_stream) { in UnpackKernel() 44 cudaStream_t cuda_stream); 47 cudaStream_t cuda_stream); 50 cudaStream_t cuda_stream); 53 cudaStream_t cuda_stream); 56 cudaStream_t cuda_stream); 59 cudaStream_t cuda_stream); 62 cudaStream_t cuda_stream); 65 cudaStream_t cuda_stream); 68 cudaStream_t cuda_stream); [all …]
|
D | pack.cu | 35 cudaStream_t cuda_stream) { in PackKernel() 43 cudaStream_t cuda_stream); 46 cudaStream_t cuda_stream); 49 cudaStream_t cuda_stream); 52 cudaStream_t cuda_stream); 55 cudaStream_t cuda_stream); 58 cudaStream_t cuda_stream); 61 cudaStream_t cuda_stream); 64 cudaStream_t cuda_stream); 67 cudaStream_t cuda_stream); [all …]
|
D | slice_impl.cu | 157 void FillDeviceArray(const size_t input_size, T *addr, const float value, cudaStream_t cuda_stream)… in FillDeviceArray() 162 …onst size_t s1, const size_t l1, const size_t d1, const T *input, T *output, cudaStream_t stream) { in Slice1DKernel() 167 const T *input, T *output, cudaStream_t stream) { in Slice2DKernel() 172 …onst size_t d1, const size_t d2, const size_t d3, const T *input, T *output, cudaStream_t stream) { in Slice3DKernel() 178 const T *input, T *output, cudaStream_t stream) { in Slice4DKernel() 185 …onst size_t d3, const size_t d4, const size_t d5, const T *input, T *output, cudaStream_t stream) { in Slice5DKernel() 193 const T *input, T *output, cudaStream_t stream) { in Slice6DKernel() 202 …onst size_t d5, const size_t d6, const size_t d7, const T *input, T *output, cudaStream_t stream) { in Slice7DKernel() 210 const size_t d3, const size_t d4, const T *dy, T *dx, cudaStream_t stream) { in CalSlice4DGrad() 243 T *output, cudaStream_t cuda_stream) { in StridedSlice() [all …]
|
D | ctcloss_impl.cuh | 23 … int *cum_labels_length, bool ignore_longer_outputs_than_inputs, cudaStream_t stream); 28 … int *cum_labels_length, bool ignore_longer_outputs_than_inputs, cudaStream_t stream); 32 cudaStream_t stream); 35 int *max_labels_length, int batch, cudaStream_t stream); 38 … int *precum_labels_length, int *cum_labels_length, int batch, int blank, cudaStream_t stream); 42 int batch, cudaStream_t stream); 45 … int *max_labels_length, const int64_t *label_indices, int batch, int size, cudaStream_t stream); 46 …ateMaxSequence(const int *sequence_length, int *max_labels_length, int batch, cudaStream_t stream); 50 … T *cost, T *grads, T *prob_num, bool ignore_longer_outputs_than_inputs, cudaStream_t stream);
|
D | relu_grad_impl.cu | 28 void CalReLUGrad(int size, T *dy, T *y, T *dx, cudaStream_t cuda_stream) { in CalReLUGrad() 33 template void CalReLUGrad(int size, double *dy, double *y, double *dx, cudaStream_t cuda_stream); 34 template void CalReLUGrad(int size, float *dy, float *y, float *dx, cudaStream_t cuda_stream); 35 template void CalReLUGrad(int size, half *dy, half *y, half *dx, cudaStream_t cuda_stream); 36 template void CalReLUGrad(int size, int8_t *dy, int8_t *y, int8_t *dx, cudaStream_t cuda_stream); 37 template void CalReLUGrad(int size, int16_t *dy, int16_t *y, int16_t *dx, cudaStream_t cuda_stream); 38 template void CalReLUGrad(int size, int32_t *dy, int32_t *y, int32_t *dx, cudaStream_t cuda_stream); 39 template void CalReLUGrad(int size, int64_t *dy, int64_t *y, int64_t *dx, cudaStream_t cuda_stream); 40 template void CalReLUGrad(int size, uint8_t *dy, uint8_t *y, uint8_t *dx, cudaStream_t cuda_stream);
|
D | unary_op_grad_impl.cuh | 22 void SqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… 24 void RsqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… 26 void AsinGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… 28 void ACosGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… 30 void AtanGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… 32 void AsinhGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… 34 void AcoshGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… 36 void ReciprocalGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda…
|
D | scatter_nd.cu | 52 S *work_shape, cudaStream_t stream) { in ScatterNd() 62 cudaStream_t stream); 66 … int64_t *indices_stride, int64_t *work_shape, cudaStream_t stream); 70 cudaStream_t stream); 74 … int64_t *indices_stride, int64_t *work_shape, cudaStream_t stream); 78 cudaStream_t stream); 82 cudaStream_t stream); 86 cudaStream_t stream); 90 cudaStream_t stream); 95 cudaStream_t stream); [all …]
|
D | pad_impl.cuh | 25 float pad_value, T* output, cudaStream_t cuda_stream); 29 const int pad_left, T* dx, cudaStream_t cuda_stream); 33 float pad_value, T* output, cudaStream_t cuda_stream); 37 const int pad_left, T* output, cudaStream_t cuda_stream); 40 … const int *paddings, const int input_size, const size_t input_rank, cudaStream_t cuda_stream); 45 T* output, cudaStream_t cuda_stream); 50 cudaStream_t cuda_stream); 55 const float pad_value, T *output, cudaStream_t cuda_stream); 60 cudaStream_t cuda_stream);
|
D | scatter_nd_functor_impl.cu | 101 cudaStream_t cuda_stream) { in CalScatterNdFunctor() 121 … const double *updates, double *input, cudaStream_t cuda_stream); 125 … const double *updates, double *input, cudaStream_t cuda_stream); 129 … const float *updates, float *input, cudaStream_t cuda_stream); 133 … const float *updates, float *input, cudaStream_t cuda_stream); 137 … const half *updates, half *input, cudaStream_t cuda_stream); 141 … const half *updates, half *input, cudaStream_t cuda_stream); 145 … const int32_t *updates, int32_t *input, cudaStream_t cuda_stream); 149 … const int32_t *updates, int32_t *input, cudaStream_t cuda_stream); 153 … const int16_t *updates, int16_t *input, cudaStream_t cuda_stream); [all …]
|
D | reverse_sequence_impl.cu | 89 … size_t *input_shape_cum_ptr, size_t shape_size, T *output, cudaStream_t cuda_stream) { in CalReverseSequence() 99 … size_t shape_size, int8_t *output, cudaStream_t cuda_stream); 103 … size_t shape_size, int8_t *output, cudaStream_t cuda_stream); 107 … size_t shape_size, int16_t *output, cudaStream_t cuda_stream); 111 … size_t shape_size, int16_t *output, cudaStream_t cuda_stream); 115 … size_t shape_size, int *output, cudaStream_t cuda_stream); 119 … size_t shape_size, int *output, cudaStream_t cuda_stream); 123 … size_t shape_size, int64_t *output, cudaStream_t cuda_stream); 127 … size_t shape_size, int64_t *output, cudaStream_t cuda_stream); 131 … size_t shape_size, half *output, cudaStream_t cuda_stream); [all …]
|
D | slice_impl.cuh | 25 void SliceKernel(const T *input, T *output, const size_t output_size, cudaStream_t cuda_stream, S..… 30 const size_t d3, const size_t d4, const T *dy, T *dx, cudaStream_t stream); 33 …const size_t s1, const size_t l1, const size_t d1, const T *input, T *output, cudaStream_t stream); 37 const T *input, T *output, cudaStream_t stream); 41 …const size_t d1, const size_t d2, const size_t d3, const T *input, T *output, cudaStream_t stream); 46 const T *input, T *output, cudaStream_t stream); 51 …const size_t d3, const size_t d4, const size_t d5, const T *input, T *output, cudaStream_t stream); 57 const T *input, T *output, cudaStream_t stream); 63 …const size_t d5, const size_t d6, const size_t d7, const T *input, T *output, cudaStream_t stream); 68 T *output, cudaStream_t cuda_stream); [all …]
|
/third_party/boost/boost/fiber/cuda/ |
D | waitfor.hpp | 37 static void trampoline( cudaStream_t st, cudaError_t status, void * vp) { in trampoline() 44 single_stream_rendezvous( cudaStream_t st) { in single_stream_rendezvous() 54 void notify( cudaStream_t st, cudaError_t status) noexcept { in notify() 63 std::tuple< cudaStream_t, cudaError_t > wait() { in wait() 72 cudaStream_t st_{}; 79 many_streams_rendezvous( std::initializer_list< cudaStream_t > l) : in many_streams_rendezvous() 82 for ( cudaStream_t st : stx_) { 93 void notify( cudaStream_t st, cudaError_t status) noexcept { in notify() 103 std::vector< std::tuple< cudaStream_t, cudaError_t > > wait() { in wait() 112 std::set< cudaStream_t > stx_; [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/math/ |
D | unary_op_gpu_kernel.h | 95 …(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch() 99 …Expm1(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_… in Launch() 103 …(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch() 107 …Log1p(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_… in Launch() 111 …Erf(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_pt… in Launch() 115 …Erfc(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_p… in Launch() 119 …(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch() 123 …(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch() 127 …Square(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream… in Launch() 131 …Sqrt(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_p… in Launch() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/ |
D | rl_buffer_impl.cuh | 22 unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream); 24 cudaStream_t cuda_stream); 25 …(const int *count, const int *head, const int *origin_index, int *index, cudaStream_t cuda_stream); 27 unsigned char *out, cudaStream_t cuda_stream); 29 cudaStream_t cuda_stream); 31 unsigned char *out, cudaStream_t cuda_stream); 32 …t int size, curandState *globalState, unsigned int *value, unsigned int *key, cudaStream_t stream); 33 void RandInit(const int size, const int seed, curandState *state, cudaStream_t stream); 35 cudaStream_t cuda_stream);
|