Home
last modified time | relevance | path

Searched refs:cudaStream_t (Results 1 – 25 of 679) sorted by relevance

12345678910>>...28

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dcast_impl.cu104 void Cast(const int input_size, const S *input_addr, T *output_addr, cudaStream_t stream) { in Cast()
108 …oid Cast(const int input_size, const int8_t *input_addr, int8_t *output_addr, cudaStream_t stream);
109 …id Cast(const int input_size, const int8_t *input_addr, int16_t *output_addr, cudaStream_t stream);
110 …id Cast(const int input_size, const int8_t *input_addr, int32_t *output_addr, cudaStream_t stream);
111 …id Cast(const int input_size, const int8_t *input_addr, int64_t *output_addr, cudaStream_t stream);
112 …id Cast(const int input_size, const int8_t *input_addr, uint8_t *output_addr, cudaStream_t stream);
113 …d Cast(const int input_size, const int8_t *input_addr, uint16_t *output_addr, cudaStream_t stream);
114 …d Cast(const int input_size, const int8_t *input_addr, uint32_t *output_addr, cudaStream_t stream);
115 …d Cast(const int input_size, const int8_t *input_addr, uint64_t *output_addr, cudaStream_t stream);
116 template void Cast(const int input_size, const int8_t *input_addr, float *output_addr, cudaStream_t
[all …]
Dunary_op_impl.cu409 void Exponential(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Exponential()
414 void Expm1(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Expm1()
419 void Logarithm(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Logarithm()
424 void Log1p(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Log1p()
429 void Erf(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Erf()
434 void Erfc(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Erfc()
439 void Negative(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Negative()
444 void Reciprocal(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Reciprocal()
449 void Square(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Square()
454 void Pow(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) { in Pow()
[all …]
Dunary_op_impl.cuh23 void Exponential(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
25 void Expm1(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
27 void Logarithm(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
29 void Log1p(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
31 void Erf(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
33 void Erfc(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
35 void Negative(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
37 void Reciprocal(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
39 void Square(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
41 void Sqrt(const T *input, T *output, const size_t count, cudaStream_t cuda_stream);
[all …]
Drelu_impl.cu29 void CalReLU(int size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { in CalReLU()
33 template void CalReLU(int size, double *input_addr, double *output_addr, cudaStream_t cuda_stream);
34 template void CalReLU(int size, float *input_addr, float *output_addr, cudaStream_t cuda_stream);
35 template void CalReLU(int size, half *input_addr, half *output_addr, cudaStream_t cuda_stream);
36 template void CalReLU(int size, int8_t *input_addr, int8_t *output_addr, cudaStream_t cuda_stream);
37 template void CalReLU(int size, int16_t *input_addr, int16_t *output_addr, cudaStream_t cuda_stream…
38 template void CalReLU(int size, int32_t *input_addr, int32_t *output_addr, cudaStream_t cuda_stream…
39 template void CalReLU(int size, int64_t *input_addr, int64_t *output_addr, cudaStream_t cuda_stream…
40 template void CalReLU(int size, uint8_t *input_addr, uint8_t *output_addr, cudaStream_t cuda_stream…
57 void ReluV2(const size_t num, const T *x, T *y, uint32_t *mask, cudaStream_t cuda_stream) { in ReluV2()
[all …]
Dgather.cu46 const size_t dim_after_axis, cudaStream_t stream) { in Gather()
56 cudaStream_t stream);
60 cudaStream_t stream);
64 cudaStream_t stream);
68 cudaStream_t stream);
72 cudaStream_t stream);
76 cudaStream_t stream);
80 cudaStream_t stream);
84 cudaStream_t stream);
88 cudaStream_t stream);
[all …]
Dunary_op_grad_impl.cu126 void SqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… in SqrtGrad()
132 void RsqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… in RsqrtGrad()
138 void AsinGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… in AsinGrad()
144 void ACosGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… in ACosGrad()
150 void AtanGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea… in AtanGrad()
156 void AsinhGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… in AsinhGrad()
162 void AcoshGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre… in AcoshGrad()
168 void ReciprocalGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda… in ReciprocalGrad()
174 cudaStream_t cuda_stream);
176 cudaStream_t cuda_stream);
[all …]
Doneslike_impl.cu30 void CalOnesLike(const size_t size, const T* input, T* output, cudaStream_t cuda_stream) { in CalOnesLike()
35 …OnesLike<double>(const size_t size, const double* input, double* output, cudaStream_t cuda_stream);
36 template void CalOnesLike<float>(const size_t size, const float* input, float* output, cudaStream_t
37 template void CalOnesLike<half>(const size_t size, const half* input, half* output, cudaStream_t cu…
38 …OnesLike<int8_t>(const size_t size, const int8_t* input, int8_t* output, cudaStream_t cuda_stream);
39 …sLike<int16_t>(const size_t size, const int16_t* input, int16_t* output, cudaStream_t cuda_stream);
40 …sLike<int32_t>(const size_t size, const int32_t* input, int32_t* output, cudaStream_t cuda_stream);
41 …sLike<int64_t>(const size_t size, const int64_t* input, int64_t* output, cudaStream_t cuda_stream);
42 …sLike<uint8_t>(const size_t size, const uint8_t* input, uint8_t* output, cudaStream_t cuda_stream);
44 cudaStream_t cuda_stream);
[all …]
Dgather_grad.cu58 cudaStream_t stream) { in GatherGrad()
72 cudaStream_t stream);
76 cudaStream_t stream);
80 cudaStream_t stream);
84 cudaStream_t stream);
88 cudaStream_t stream);
92 cudaStream_t stream);
96 cudaStream_t stream);
100 cudaStream_t stream);
104 cudaStream_t stream);
[all …]
Dgathernd.cu50 const size_t &indices_dim1, S *batch_indices, S *batch_strides, cudaStream_t stream) { in GatherNd()
59 int *batch_strides, cudaStream_t stream);
62 int *batch_strides, cudaStream_t stream);
65 int *batch_strides, cudaStream_t stream);
68 int *batch_strides, cudaStream_t stream);
71 int *batch_strides, cudaStream_t stream);
75 cudaStream_t stream);
78 int *batch_strides, cudaStream_t stream);
82 cudaStream_t stream);
85 int *batch_strides, cudaStream_t stream);
[all …]
Dgatherv2.cu43 size_t input_dim1, cudaStream_t stream) { in GatherV2()
51 size_t output_dim2, size_t input_dim1, cudaStream_t stream);
53 … size_t output_dim1, size_t output_dim2, size_t input_dim1, cudaStream_t stream);
55 size_t output_dim2, size_t input_dim1, cudaStream_t stream);
57 … size_t output_dim1, size_t output_dim2, size_t input_dim1, cudaStream_t stream);
59 size_t output_dim2, size_t input_dim1, cudaStream_t stream);
61 … size_t output_dim1, size_t output_dim2, size_t input_dim1, cudaStream_t stream);
63 size_t output_dim2, size_t input_dim1, cudaStream_t stream);
65 size_t output_dim2, size_t input_dim1, cudaStream_t stream);
67 … size_t output_dim1, size_t output_dim2, size_t input_dim1, cudaStream_t stream);
[all …]
Dembedding_lookup_impl.cu31 size_t input_dim1, int64_t offset, cudaStream_t stream) { in CalEmbeddingLookup()
43 cudaStream_t stream);
46 int64_t offset, cudaStream_t stream);
49 cudaStream_t stream);
52 int64_t offset, cudaStream_t stream);
55 cudaStream_t stream);
58 int64_t offset, cudaStream_t stream);
61 cudaStream_t stream);
64 int64_t offset, cudaStream_t stream);
67 int64_t offset, cudaStream_t stream);
[all …]
Dunpack.cu36 cudaStream_t cuda_stream) { in UnpackKernel()
44 cudaStream_t cuda_stream);
47 cudaStream_t cuda_stream);
50 cudaStream_t cuda_stream);
53 cudaStream_t cuda_stream);
56 cudaStream_t cuda_stream);
59 cudaStream_t cuda_stream);
62 cudaStream_t cuda_stream);
65 cudaStream_t cuda_stream);
68 cudaStream_t cuda_stream);
[all …]
Dpack.cu35 cudaStream_t cuda_stream) { in PackKernel()
43 cudaStream_t cuda_stream);
46 cudaStream_t cuda_stream);
49 cudaStream_t cuda_stream);
52 cudaStream_t cuda_stream);
55 cudaStream_t cuda_stream);
58 cudaStream_t cuda_stream);
61 cudaStream_t cuda_stream);
64 cudaStream_t cuda_stream);
67 cudaStream_t cuda_stream);
[all …]
Dslice_impl.cu157 void FillDeviceArray(const size_t input_size, T *addr, const float value, cudaStream_t cuda_stream)… in FillDeviceArray()
162 …onst size_t s1, const size_t l1, const size_t d1, const T *input, T *output, cudaStream_t stream) { in Slice1DKernel()
167 const T *input, T *output, cudaStream_t stream) { in Slice2DKernel()
172 …onst size_t d1, const size_t d2, const size_t d3, const T *input, T *output, cudaStream_t stream) { in Slice3DKernel()
178 const T *input, T *output, cudaStream_t stream) { in Slice4DKernel()
185 …onst size_t d3, const size_t d4, const size_t d5, const T *input, T *output, cudaStream_t stream) { in Slice5DKernel()
193 const T *input, T *output, cudaStream_t stream) { in Slice6DKernel()
202 …onst size_t d5, const size_t d6, const size_t d7, const T *input, T *output, cudaStream_t stream) { in Slice7DKernel()
210 const size_t d3, const size_t d4, const T *dy, T *dx, cudaStream_t stream) { in CalSlice4DGrad()
243 T *output, cudaStream_t cuda_stream) { in StridedSlice()
[all …]
Dctcloss_impl.cuh23 … int *cum_labels_length, bool ignore_longer_outputs_than_inputs, cudaStream_t stream);
28 … int *cum_labels_length, bool ignore_longer_outputs_than_inputs, cudaStream_t stream);
32 cudaStream_t stream);
35 int *max_labels_length, int batch, cudaStream_t stream);
38 … int *precum_labels_length, int *cum_labels_length, int batch, int blank, cudaStream_t stream);
42 int batch, cudaStream_t stream);
45 … int *max_labels_length, const int64_t *label_indices, int batch, int size, cudaStream_t stream);
46 …ateMaxSequence(const int *sequence_length, int *max_labels_length, int batch, cudaStream_t stream);
50 … T *cost, T *grads, T *prob_num, bool ignore_longer_outputs_than_inputs, cudaStream_t stream);
Drelu_grad_impl.cu28 void CalReLUGrad(int size, T *dy, T *y, T *dx, cudaStream_t cuda_stream) { in CalReLUGrad()
33 template void CalReLUGrad(int size, double *dy, double *y, double *dx, cudaStream_t cuda_stream);
34 template void CalReLUGrad(int size, float *dy, float *y, float *dx, cudaStream_t cuda_stream);
35 template void CalReLUGrad(int size, half *dy, half *y, half *dx, cudaStream_t cuda_stream);
36 template void CalReLUGrad(int size, int8_t *dy, int8_t *y, int8_t *dx, cudaStream_t cuda_stream);
37 template void CalReLUGrad(int size, int16_t *dy, int16_t *y, int16_t *dx, cudaStream_t cuda_stream);
38 template void CalReLUGrad(int size, int32_t *dy, int32_t *y, int32_t *dx, cudaStream_t cuda_stream);
39 template void CalReLUGrad(int size, int64_t *dy, int64_t *y, int64_t *dx, cudaStream_t cuda_stream);
40 template void CalReLUGrad(int size, uint8_t *dy, uint8_t *y, uint8_t *dx, cudaStream_t cuda_stream);
Dunary_op_grad_impl.cuh22 void SqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea…
24 void RsqrtGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre…
26 void AsinGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea…
28 void ACosGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea…
30 void AtanGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_strea…
32 void AsinhGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre…
34 void AcoshGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stre…
36 void ReciprocalGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda…
Dscatter_nd.cu52 S *work_shape, cudaStream_t stream) { in ScatterNd()
62 cudaStream_t stream);
66 … int64_t *indices_stride, int64_t *work_shape, cudaStream_t stream);
70 cudaStream_t stream);
74 … int64_t *indices_stride, int64_t *work_shape, cudaStream_t stream);
78 cudaStream_t stream);
82 cudaStream_t stream);
86 cudaStream_t stream);
90 cudaStream_t stream);
95 cudaStream_t stream);
[all …]
Dpad_impl.cuh25 float pad_value, T* output, cudaStream_t cuda_stream);
29 const int pad_left, T* dx, cudaStream_t cuda_stream);
33 float pad_value, T* output, cudaStream_t cuda_stream);
37 const int pad_left, T* output, cudaStream_t cuda_stream);
40 … const int *paddings, const int input_size, const size_t input_rank, cudaStream_t cuda_stream);
45 T* output, cudaStream_t cuda_stream);
50 cudaStream_t cuda_stream);
55 const float pad_value, T *output, cudaStream_t cuda_stream);
60 cudaStream_t cuda_stream);
Dscatter_nd_functor_impl.cu101 cudaStream_t cuda_stream) { in CalScatterNdFunctor()
121 … const double *updates, double *input, cudaStream_t cuda_stream);
125 … const double *updates, double *input, cudaStream_t cuda_stream);
129 … const float *updates, float *input, cudaStream_t cuda_stream);
133 … const float *updates, float *input, cudaStream_t cuda_stream);
137 … const half *updates, half *input, cudaStream_t cuda_stream);
141 … const half *updates, half *input, cudaStream_t cuda_stream);
145 … const int32_t *updates, int32_t *input, cudaStream_t cuda_stream);
149 … const int32_t *updates, int32_t *input, cudaStream_t cuda_stream);
153 … const int16_t *updates, int16_t *input, cudaStream_t cuda_stream);
[all …]
Dreverse_sequence_impl.cu89 … size_t *input_shape_cum_ptr, size_t shape_size, T *output, cudaStream_t cuda_stream) { in CalReverseSequence()
99 … size_t shape_size, int8_t *output, cudaStream_t cuda_stream);
103 … size_t shape_size, int8_t *output, cudaStream_t cuda_stream);
107 … size_t shape_size, int16_t *output, cudaStream_t cuda_stream);
111 … size_t shape_size, int16_t *output, cudaStream_t cuda_stream);
115 … size_t shape_size, int *output, cudaStream_t cuda_stream);
119 … size_t shape_size, int *output, cudaStream_t cuda_stream);
123 … size_t shape_size, int64_t *output, cudaStream_t cuda_stream);
127 … size_t shape_size, int64_t *output, cudaStream_t cuda_stream);
131 … size_t shape_size, half *output, cudaStream_t cuda_stream);
[all …]
Dslice_impl.cuh25 void SliceKernel(const T *input, T *output, const size_t output_size, cudaStream_t cuda_stream, S..…
30 const size_t d3, const size_t d4, const T *dy, T *dx, cudaStream_t stream);
33 …const size_t s1, const size_t l1, const size_t d1, const T *input, T *output, cudaStream_t stream);
37 const T *input, T *output, cudaStream_t stream);
41 …const size_t d1, const size_t d2, const size_t d3, const T *input, T *output, cudaStream_t stream);
46 const T *input, T *output, cudaStream_t stream);
51 …const size_t d3, const size_t d4, const size_t d5, const T *input, T *output, cudaStream_t stream);
57 const T *input, T *output, cudaStream_t stream);
63 …const size_t d5, const size_t d6, const size_t d7, const T *input, T *output, cudaStream_t stream);
68 T *output, cudaStream_t cuda_stream);
[all …]
/third_party/boost/boost/fiber/cuda/
Dwaitfor.hpp37 static void trampoline( cudaStream_t st, cudaError_t status, void * vp) { in trampoline()
44 single_stream_rendezvous( cudaStream_t st) { in single_stream_rendezvous()
54 void notify( cudaStream_t st, cudaError_t status) noexcept { in notify()
63 std::tuple< cudaStream_t, cudaError_t > wait() { in wait()
72 cudaStream_t st_{};
79 many_streams_rendezvous( std::initializer_list< cudaStream_t > l) : in many_streams_rendezvous()
82 for ( cudaStream_t st : stx_) {
93 void notify( cudaStream_t st, cudaError_t status) noexcept { in notify()
103 std::vector< std::tuple< cudaStream_t, cudaError_t > > wait() { in wait()
112 std::set< cudaStream_t > stx_;
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/math/
Dunary_op_gpu_kernel.h95 …(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch()
99 …Expm1(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_… in Launch()
103 …(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch()
107 …Log1p(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_… in Launch()
111 …Erf(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_pt… in Launch()
115 …Erfc(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_p… in Launch()
119 …(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch()
123 …(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch()
127 …Square(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream… in Launch()
131 …Sqrt(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_p… in Launch()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/
Drl_buffer_impl.cuh22 unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream);
24 cudaStream_t cuda_stream);
25 …(const int *count, const int *head, const int *origin_index, int *index, cudaStream_t cuda_stream);
27 unsigned char *out, cudaStream_t cuda_stream);
29 cudaStream_t cuda_stream);
31 unsigned char *out, cudaStream_t cuda_stream);
32 …t int size, curandState *globalState, unsigned int *value, unsigned int *key, cudaStream_t stream);
33 void RandInit(const int size, const int seed, curandState *state, cudaStream_t stream);
35 cudaStream_t cuda_stream);

12345678910>>...28