Home
last modified time | relevance | path

Searched refs:cudaMemcpyDeviceToHost (Results 1 – 25 of 26) sorted by relevance

12

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/
Dctcloss_gpu_kernel.h156 …e_, cudaMemcpyAsync(&max_sequence, max_labels_length, sizeof(int), cudaMemcpyDeviceToHost, stream), in LaunchFirstHalf()
168 …de_, cudaMemcpyAsync(&batch_label, max_labels_length, sizeof(int), cudaMemcpyDeviceToHost, stream), in LaunchFirstHalf()
182 …mcpyAsync(&max_labels_length_host, max_labels_length, sizeof(int), cudaMemcpyDeviceToHost, stream), in LaunchFirstHalf()
264 cudaMemcpyDeviceToHost, stream), in MemManageForCus()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/
Dtopk_gpu_kernel.h53 …cudaMemcpyAsync(&k_cut, k, sizeof(S), cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(strea… in Launch()
76 …cudaMemcpyAsync(&k_cut, k, sizeof(S), cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(strea… in Launch()
Ddynamic_range_gpu_kernel.h59cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
66cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
Dembedding_lookup_gpu_kernel.h51cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
Dgatherv2_gpu_kernel.h51 … cudaMemcpyAsync(&axis_, axis_device_address, sizeof(int64_t), cudaMemcpyDeviceToHost, in Launch()
/third_party/boost/libs/fiber/examples/cuda/
Dmultiple_streams.cu68 … cudaMemcpyAsync( host_c + i, dev_c0, size * sizeof( int), cudaMemcpyDeviceToHost, stream0); in main()
69 … cudaMemcpyAsync( host_c + i + size, dev_c1, size * sizeof( int), cudaMemcpyDeviceToHost, stream1); in main()
Dsingle_stream.cu59 … cudaMemcpyAsync( host_c + i, dev_c, size * sizeof( int), cudaMemcpyDeviceToHost, stream); in main()
/third_party/mindspore/tests/ut/cpp/stub/runtime/
Dcuda_runtime_api.h27 cudaMemcpyDeviceToHost = 2, enumerator
/third_party/mindspore/mindspore/ccsrc/runtime/device/gpu/
Dcuda_driver.cc86 auto ret = cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost); in CopyDeviceMemToHost()
106 auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, (cudaStream_t)stream); in CopyDeviceMemToHostAsync()
/third_party/mindspore/mindspore/ccsrc/ps/ps_cache/gpu/
Dgpu_ps_cache.cc80 … cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_)), in CopyDeviceMemToHost()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/
Dpme_excluded_force_update_kernel.h65 …cudaMemcpyAsync(&beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_… in Launch()
Dpme_reciprocal_force_update_kernel.h145 cudaMemcpyAsync(&h_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, in Launch()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/random/
Drandperm_gpu_kernel.h48 … cudaMemcpyAsync(&n, input_device, sizeof(int32_t), cudaMemcpyDeviceToHost, in Launch()
Duniform_candidate_sampler_gpu_kernel.h67cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/
Dbuffer_sample_gpu_kernel.cc99 … cudaMemcpyAsync(&k_num, count_addr, sizeof(int), cudaMemcpyDeviceToHost, cuda_stream), in Launch()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/
Dbatchnorm_fold_grad_gpu_kernel.h65 … cudaMemcpyAsync(current_step_host, current_step, sizeof(int), cudaMemcpyDeviceToHost, in Launch()
Dbatchnorm_fold2_grad_gpu_kernel.h73 … cudaMemcpyAsync(current_step_host, global_step, sizeof(int32_t), cudaMemcpyDeviceToHost, in Launch()
Dbatchnorm_fold_gpu_kernel.h66 … cudaMemcpyAsync(current_step_host, current_step, sizeof(int), cudaMemcpyDeviceToHost, in Launch()
/third_party/mindspore/mindspore/lite/src/delegate/tensorrt/
Dtensorrt_allocator.cc115 cudaMemcpyKind kind = is_host2device ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost; in SyncMemInHostAndDevice()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/
Dlj_force_with_pme_direct_force_update_kernel.h82 cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, in Launch()
Dlj_force_with_virial_energy_update_kernel.h90 cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, in Launch()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/
Dconstrain_kernel.h100 cudaMemcpyAsync(&need_pressure, d_need_pressure, sizeof(int), cudaMemcpyDeviceToHost, in Launch()
/third_party/boost/libs/fiber/doc/
Dcuda.qbk60 … cudaMemcpyAsync( host_c + i, dev_c, size * sizeof( int), cudaMemcpyDeviceToHost, stream);
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/
Dneighbor_list_impl.cu313 cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream); in Refresh_Neighbor_List_Half()
453 …mcpyAsync(refresh_count_list.data(), d_refresh_count, sizeof(int), cudaMemcpyDeviceToHost, stream); in NeighborListUpdate()
529 cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream); in Refresh_Neighbor_List()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/debug/
Dprint_gpu_kernel.h76cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), in Launch()

12