/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ |
D | ctcloss_gpu_kernel.h | 156 …e_, cudaMemcpyAsync(&max_sequence, max_labels_length, sizeof(int), cudaMemcpyDeviceToHost, stream), in LaunchFirstHalf() 168 …de_, cudaMemcpyAsync(&batch_label, max_labels_length, sizeof(int), cudaMemcpyDeviceToHost, stream), in LaunchFirstHalf() 182 …mcpyAsync(&max_labels_length_host, max_labels_length, sizeof(int), cudaMemcpyDeviceToHost, stream), in LaunchFirstHalf() 264 cudaMemcpyDeviceToHost, stream), in MemManageForCus()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/ |
D | topk_gpu_kernel.h | 53 …cudaMemcpyAsync(&k_cut, k, sizeof(S), cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(strea… in Launch() 76 …cudaMemcpyAsync(&k_cut, k, sizeof(S), cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(strea… in Launch()
|
D | dynamic_range_gpu_kernel.h | 59 … cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch() 66 … cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | embedding_lookup_gpu_kernel.h | 51 … cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
D | gatherv2_gpu_kernel.h | 51 … cudaMemcpyAsync(&axis_, axis_device_address, sizeof(int64_t), cudaMemcpyDeviceToHost, in Launch()
|
/third_party/boost/libs/fiber/examples/cuda/ |
D | multiple_streams.cu | 68 … cudaMemcpyAsync( host_c + i, dev_c0, size * sizeof( int), cudaMemcpyDeviceToHost, stream0); in main() 69 … cudaMemcpyAsync( host_c + i + size, dev_c1, size * sizeof( int), cudaMemcpyDeviceToHost, stream1); in main()
|
D | single_stream.cu | 59 … cudaMemcpyAsync( host_c + i, dev_c, size * sizeof( int), cudaMemcpyDeviceToHost, stream); in main()
|
/third_party/mindspore/tests/ut/cpp/stub/runtime/ |
D | cuda_runtime_api.h | 27 cudaMemcpyDeviceToHost = 2, enumerator
|
/third_party/mindspore/mindspore/ccsrc/runtime/device/gpu/ |
D | cuda_driver.cc | 86 auto ret = cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost); in CopyDeviceMemToHost() 106 auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, (cudaStream_t)stream); in CopyDeviceMemToHostAsync()
|
/third_party/mindspore/mindspore/ccsrc/ps/ps_cache/gpu/ |
D | gpu_ps_cache.cc | 80 … cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_)), in CopyDeviceMemToHost()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/ |
D | pme_excluded_force_update_kernel.h | 65 …cudaMemcpyAsync(&beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_… in Launch()
|
D | pme_reciprocal_force_update_kernel.h | 145 cudaMemcpyAsync(&h_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/random/ |
D | randperm_gpu_kernel.h | 48 … cudaMemcpyAsync(&n, input_device, sizeof(int32_t), cudaMemcpyDeviceToHost, in Launch()
|
D | uniform_candidate_sampler_gpu_kernel.h | 67 … cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/ |
D | buffer_sample_gpu_kernel.cc | 99 … cudaMemcpyAsync(&k_num, count_addr, sizeof(int), cudaMemcpyDeviceToHost, cuda_stream), in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/ |
D | batchnorm_fold_grad_gpu_kernel.h | 65 … cudaMemcpyAsync(current_step_host, current_step, sizeof(int), cudaMemcpyDeviceToHost, in Launch()
|
D | batchnorm_fold2_grad_gpu_kernel.h | 73 … cudaMemcpyAsync(current_step_host, global_step, sizeof(int32_t), cudaMemcpyDeviceToHost, in Launch()
|
D | batchnorm_fold_gpu_kernel.h | 66 … cudaMemcpyAsync(current_step_host, current_step, sizeof(int), cudaMemcpyDeviceToHost, in Launch()
|
/third_party/mindspore/mindspore/lite/src/delegate/tensorrt/ |
D | tensorrt_allocator.cc | 115 cudaMemcpyKind kind = is_host2device ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost; in SyncMemInHostAndDevice()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/ |
D | lj_force_with_pme_direct_force_update_kernel.h | 82 cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, in Launch()
|
D | lj_force_with_virial_energy_update_kernel.h | 90 cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/ |
D | constrain_kernel.h | 100 cudaMemcpyAsync(&need_pressure, d_need_pressure, sizeof(int), cudaMemcpyDeviceToHost, in Launch()
|
/third_party/boost/libs/fiber/doc/ |
D | cuda.qbk | 60 … cudaMemcpyAsync( host_c + i, dev_c, size * sizeof( int), cudaMemcpyDeviceToHost, stream);
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/ |
D | neighbor_list_impl.cu | 313 cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream); in Refresh_Neighbor_List_Half() 453 …mcpyAsync(refresh_count_list.data(), d_refresh_count, sizeof(int), cudaMemcpyDeviceToHost, stream); in NeighborListUpdate() 529 cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream); in Refresh_Neighbor_List()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/debug/ |
D | print_gpu_kernel.h | 76 … cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
|