/third_party/boost/libs/fiber/examples/cuda/ |
D | multiple_streams.cu | 62 … cudaMemcpyAsync( dev_a0, host_a + i, size * sizeof( int), cudaMemcpyHostToDevice, stream0); in main() 63 … cudaMemcpyAsync( dev_a1, host_a + i + size, size * sizeof( int), cudaMemcpyHostToDevice, stream1); in main() 64 … cudaMemcpyAsync( dev_b0, host_b + i, size * sizeof( int), cudaMemcpyHostToDevice, stream0); in main() 65 … cudaMemcpyAsync( dev_b1, host_b + i + size, size * sizeof( int), cudaMemcpyHostToDevice, stream1); in main()
|
D | single_stream.cu | 56 … cudaMemcpyAsync( dev_a, host_a + i, size * sizeof( int), cudaMemcpyHostToDevice, stream); in main() 57 … cudaMemcpyAsync( dev_b, host_b + i, size * sizeof( int), cudaMemcpyHostToDevice, stream); in main()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/math/ |
D | cholesky_solve_gpu_kernel.h | 71 cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 76 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 80 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 99 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 103 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | cast_all_gpu_kernel.h | 55 …cudaMemcpyAsync(inputs_dev, in_addr.get(), sizeof(T *) * num_input_, cudaMemcpyHostToDevice, strea… in Launch() 59 …cudaMemcpyAsync(outputs_dev, out_addr.get(), sizeof(S *) * num_input_, cudaMemcpyHostToDevice, str… in Launch() 62 …udaMemcpyAsync(size_dev, size_.get(), sizeof(size_t) * num_input_, cudaMemcpyHostToDevice, stream), in Launch()
|
D | cholesky_trsm_solve_gpu_kernel.h | 141 cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in LaunchNonSplitMatrix() 146 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in LaunchNonSplitMatrix() 150 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in LaunchNonSplitMatrix() 179 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in LaunchSplitMatrix() 183 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in LaunchSplitMatrix()
|
D | matrix_inverse_gpu_kernel.h | 64 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 68 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/ |
D | reverse_v2_gpu_kernel.h | 52 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 57 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 61 … cudaMemcpyAsync(axis_device, &axis_[0], workspace_size_list_[2], cudaMemcpyHostToDevice, in Launch()
|
D | extract_image_patches_gpu_kernel.h | 60 … cudaMemcpyAsync(input_shape, &input_shape_[0], shape_size, cudaMemcpyHostToDevice, in Launch() 64 … cudaMemcpyAsync(input_to_nhwc_axis, &to_nhwc_axis[0], shape_size, cudaMemcpyHostToDevice, in Launch() 68 … cudaMemcpyAsync(t_output_shape, &t_output_shape_[0], shape_size, cudaMemcpyHostToDevice, in Launch() 73 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | sort_gpu_kernel.h | 61 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 65 … cudaMemcpyAsync(perm_device, &perm_[0], workspace_size_list_[3], cudaMemcpyHostToDevice, in Launch() 98 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | tile_gpu_kernel.h | 49 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 54 cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | transpose_gpu_kernel.h | 48 … cudaMemcpyAsync(input_shape, &input_shape_[0], workspace_size_, cudaMemcpyHostToDevice, in Launch() 52 … cudaMemcpyAsync(input_axis, &input_axis_[0], workspace_size_, cudaMemcpyHostToDevice, in Launch()
|
D | concatv2_gpu_kernel.h | 63 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 67 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | gathernd_gpu_kernel.h | 60 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 64 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ |
D | pad_gpu_kernel.h | 58 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 63 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 68 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | local_response_norm_gpu_kernel.h | 64 … cudaMemcpyAsync(ws_input_shape, &input_shape_[0], shape_size, cudaMemcpyHostToDevice, in Launch() 69 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 72 … cudaMemcpyAsync(ws_to_nhwc_axis, &to_nhwc_axis[0], shape_size, cudaMemcpyHostToDevice, in Launch() 76 … cudaMemcpyAsync(ws_to_nchw_axis, &to_nchw_axis[0], shape_size, cudaMemcpyHostToDevice, in Launch()
|
D | bce_with_logits_loss_kernel.h | 53 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 57 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 62 cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | local_response_norm_grad_gpu_kernel.h | 70 … cudaMemcpyAsync(ws_input_shape, &input_shape_[0], shape_size, cudaMemcpyHostToDevice, in Launch() 75 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 78 … cudaMemcpyAsync(ws_to_nhwc_axis, &to_nhwc_axis[0], shape_size, cudaMemcpyHostToDevice, in Launch() 82 … cudaMemcpyAsync(ws_to_nchw_axis, &to_nchw_axis[0], shape_size, cudaMemcpyHostToDevice, in Launch()
|
D | softmax_grad_gpu_kernel.h | 79 … cudaMemcpyAsync(input_shape, &input_shape_[0], workspace_size_, cudaMemcpyHostToDevice, in Launch() 84 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 88 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | softmax_gpu_kernel.h | 77 … cudaMemcpyAsync(input_shape, &input_shape_[0], workspace_size_, cudaMemcpyHostToDevice, in Launch() 82 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 86 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/random/ |
D | random_categorical_gpu_kernel.h | 55 … host_cdf.get(), sizeof(double *) * batch_size_, cudaMemcpyHostToDevice, in Launch() 78 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 83 … host_rand.get(), sizeof(double *) * batch_size_, cudaMemcpyHostToDevice, in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/ |
D | fake_learned_scale_quant_perlayer_grad_gpu_kernel.cc | 106 … cudaMemcpyAsync(grad_alpha, alpha_no_grad, sizeof(float), cudaMemcpyHostToDevice, in Launch() 115 … cudaMemcpyAsync(grad_alpha, alpha_no_grad, sizeof(float), cudaMemcpyHostToDevice, in Launch()
|
D | fake_learned_scale_quant_perchannel_grad_gpu_kernel.cc | 112 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 121 … cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
/third_party/mindspore/tests/ut/cpp/stub/runtime/ |
D | cuda_runtime_api.h | 26 cudaMemcpyHostToDevice = 1, enumerator
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/ |
D | neighbor_list_update_kernel.h | 97 …aMemcpyAsync(d_bucket, h_bucket.data(), sizeof(GRID_BUCKET) * grid_numbers, cudaMemcpyHostToDevice, in Launch() 99 …pyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice, in Launch()
|
D | neighbor_list_update_new_kernel.h | 98 …aMemcpyAsync(d_bucket, h_bucket.data(), sizeof(GRID_BUCKET) * grid_numbers, cudaMemcpyHostToDevice, in Launch() 100 …pyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice, in Launch()
|