Home
last modified time | relevance | path

Searched refs:getCurrentCUDAStream (Results 1 – 25 of 162) sorted by relevance

1234567

/external/pytorch/aten/src/ATen/test/
Dcuda_stream_test.cpp79 at::cuda::CUDAStream curStream = at::cuda::getCurrentCUDAStream(); in TEST()
86 curStream = at::cuda::getCurrentCUDAStream(); in TEST()
95 cur_thread_stream = {at::cuda::getCurrentCUDAStream()}; in thread_fun()
109 at::cuda::CUDAStream cur_stream = at::cuda::getCurrentCUDAStream(); in TEST()
153 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[1]); in TEST()
158 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[0]); in TEST()
165 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[0]); in TEST()
169 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(0), streams0[0]); in TEST()
205 ASSERT_EQ_CUDA(s0, at::cuda::getCurrentCUDAStream()); in TEST()
208 ASSERT_EQ_CUDA(s1, at::cuda::getCurrentCUDAStream()); in TEST()
[all …]
Dcuda_caching_host_allocator_test.cpp24 at::cuda::getCurrentCUDAStream())); in TEST()
39 at::cuda::getCurrentCUDAStream())); in TEST()
52 at::cuda::getCurrentCUDAStream())); in TEST()
62 at::cuda::getCurrentCUDAStream())); in TEST()
109 at::cuda::getCurrentCUDAStream())); in TEST()
123 at::cuda::getCurrentCUDAStream())); in TEST()
139 ptr, ctx, at::cuda::getCurrentCUDAStream())); in TEST()
144 ptr, ctx, at::cuda::getCurrentCUDAStream())); in TEST()
/external/pytorch/aten/src/ATen/cuda/
Dcub.cuh152 begin_bit, end_bit, c10::cuda::getCurrentCUDAStream()); in segmented_sort_pairs()
157 begin_bit, end_bit, c10::cuda::getCurrentCUDAStream()); in segmented_sort_pairs()
175 …alues_in, keys_out_, values_out, num_selected, num_input_items, c10::cuda::getCurrentCUDAStream()); in unique_by_key()
236 at::cuda::getCurrentCUDAStream()); in inclusive_scan()
248 at::cuda::getCurrentCUDAStream()); in inclusive_scan()
257 impl::transform_vals<<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( in inclusive_scan()
280 at::cuda::getCurrentCUDAStream()); in inclusive_scan()
288 at::cuda::getCurrentCUDAStream()); in inclusive_scan()
304 at::cuda::getCurrentCUDAStream()); in exclusive_scan()
317 at::cuda::getCurrentCUDAStream()); in exclusive_scan()
[all …]
DCUDAGraph.cpp68 : capture_stream_(at::cuda::getCurrentCUDAStream()) { in CUDAGraph()
98 auto stream = at::cuda::getCurrentCUDAStream(); in capture_begin()
154 auto stream = at::cuda::getCurrentCUDAStream(); in capture_end()
234 AT_CUDA_CHECK(cudaGraphLaunch(graph_exec_, at::cuda::getCurrentCUDAStream())); in replay()
DSleep.cu32 spin_kernel<<<grid, block, 0, c10::cuda::getCurrentCUDAStream()>>>(cycles); in sleep()
64 flush_icache_kernel<<<grid, block, 0, c10::cuda::getCurrentCUDAStream()>>>(); in flush_icache()
Dcub-RadixSortKeys.cu31 c10::cuda::getCurrentCUDAStream()); in radix_sort_keys()
40 c10::cuda::getCurrentCUDAStream()); in radix_sort_keys()
Dcub-RadixSortPairs.cu43 c10::cuda::getCurrentCUDAStream()); in radix_sort_pairs_impl()
54 c10::cuda::getCurrentCUDAStream()); in radix_sort_pairs_impl()
DCUDAEvent.h106 void record() { record(getCurrentCUDAStream()); } in record()
178 createEvent(getCurrentCUDAStream().device_index()); in ipc_handle()
/external/pytorch/aten/src/ATen/native/cuda/linalg/
DMagmaUtils.h32 at::cuda::getCurrentCUDAStream(), in MAGMAQueue()
72 auto stream = at::cuda::getCurrentCUDAStream(); in MagmaStreamSyncGuard()
80 if (at::cuda::getCurrentCUDAStream() != default_stream) { in noexcept()
/external/pytorch/aten/src/ATen/native/cuda/
DMultiTensorApply.cuh180 at::cuda::getCurrentCUDAStream()>>>( in multi_tensor_apply()
212 at::cuda::getCurrentCUDAStream()>>>(tensorListMeta, callable, args...); in multi_tensor_apply()
263 at::cuda::getCurrentCUDAStream()>>>( in multi_tensor_apply()
292 at::cuda::getCurrentCUDAStream()>>>(tensorListMeta, callable, args...); in multi_tensor_apply()
345 at::cuda::getCurrentCUDAStream()>>>( in multi_tensor_apply_for_fused_optimizer()
374 at::cuda::getCurrentCUDAStream()>>>(tensorListMeta, callable, args...); in multi_tensor_apply_for_fused_optimizer()
DLegacyThrustHelpers.cu24 const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in index_put_with_sort_kernel_thrust_helper()
48 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in embedding_dense_backward_cuda_scan()
90 auto stream = at::cuda::getCurrentCUDAStream(); in embedding_backward_cuda_kernel_unique_by_key()
DAdaptiveMaxPooling2d.cu251 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC()
300 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC()
385 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC()
400 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC()
446 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC()
461 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC()
DSortStable.cu113 auto stream = at::cuda::getCurrentCUDAStream(); in segmented_sort_large_segments()
151 auto stream = c10::cuda::getCurrentCUDAStream(); in segmented_sort_pairs_by_full_sort()
178 at::cuda::getCurrentCUDAStream()>>>( in segmented_sort_pairs_by_full_sort()
199 auto stream = c10::cuda::getCurrentCUDAStream(); in segmented_sort_pairs()
DSegmentReduce.cu339 at::cuda::getCurrentCUDAStream()>>>( in _segment_reduce_lengths_offsets_backward_cuda_kernel()
473 at::cuda::getCurrentCUDAStream()>>>( in _segment_reduce_lengths_offsets_cuda_kernel()
504 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel()
516 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel()
522 at::cuda::getCurrentCUDAStream()>>>( in _segment_reduce_lengths_offsets_cuda_kernel()
540 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel()
552 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel()
564 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel()
DTensorModeKernel.cu29 auto stream = at::cuda::getCurrentCUDAStream(); in operator ()()
99 auto stream = at::cuda::getCurrentCUDAStream(); in operator ()()
171 auto stream = at::cuda::getCurrentCUDAStream(); in calculate_mode()
214 <<<grid, num_threads, memsize, at::cuda::getCurrentCUDAStream()>>>( in handle_fused_mode()
DMultiLabelMarginCriterion.cu233 <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_forward_out_cuda_template()
258 <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_forward_out_cuda_template()
284 <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_forward_out_cuda_template()
343 <<<blocks, threads, 0, c10::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_backward_cuda_out_template()
372 <<<blocks, threads, 0, c10::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_backward_cuda_out_template()
DCopy.cu195 CUDAStream copy_stream = getCurrentCUDAStream(src_device.index()); in copy_device_to_device()
205 dst_ready.record(getCurrentCUDAStream(dst_device.index())); in copy_device_to_device()
251 src_ready.block(getCurrentCUDAStream(dst_device.index())); in copy_device_to_device()
360 CUDAStream stream = getCurrentCUDAStream(); in copy_kernel_cuda()
DDropout.cu260 <<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>( in launcher()
276 <<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>( in launcher()
290 <<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>( in launcher()
306 at::cuda::getCurrentCUDAStream()>>>( in launcher()
319 at::cuda::getCurrentCUDAStream()>>>( in launcher()
DLoss.cu316 at::cuda::getCurrentCUDAStream()>>>( in nll_loss_forward_out_cuda_template()
360 <<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( in nll_loss_forward_out_cuda_template()
389 at::cuda::getCurrentCUDAStream()>>>( in nll_loss_forward_out_cuda_template()
520 at::cuda::getCurrentCUDAStream()>>>( in nll_loss_backward_out_cuda_template()
546 <<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( in nll_loss_backward_out_cuda_template()
571 <<<1, NLL_LOSS_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>( in nll_loss_backward_out_cuda_template()
DNLLLoss2d.cu276 at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_forward_out_cuda_template()
331 at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_forward_out_cuda_template()
345 <<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_forward_out_cuda_template()
415 at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_backward_out_cuda_template()
450 at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_backward_out_cuda_template()
DWeightNorm.cu365 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in weight_norm_cuda()
396 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in weight_norm_cuda()
454 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in weight_norm_backward_cuda()
487 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in weight_norm_backward_cuda()
DSort.cu79 const auto stream = at::cuda::getCurrentCUDAStream(); in sort()
143 const auto stream = at::cuda::getCurrentCUDAStream(); in sort()
264 const auto stream = at::cuda::getCurrentCUDAStream(); in fixed_size_sort()
/external/pytorch/aten/src/ATen/cuda/tunable/
DStreamTimer.cpp28 AT_CUDA_CHECK(cudaEventRecord(start_, at::cuda::getCurrentCUDAStream())); in Start()
32 AT_CUDA_CHECK(cudaEventRecord(end_, at::cuda::getCurrentCUDAStream())); in End()
/external/pytorch/torch/csrc/
DCudaIPCTypes.cpp176 event_, c10::cuda::getCurrentCUDAStream(device.index()))); in CudaIPCSentData()
179 auto stream = c10::cuda::getCurrentCUDAStream(device.index()); in CudaIPCSentData()
187 auto stream = c10::cuda::getCurrentCUDAStream(device.index()); in CudaIPCSentData()
/external/pytorch/torch/csrc/distributed/c10d/quantization/
Dquantization_gpu.cu91 at::cuda::getCurrentCUDAStream()>>>( in _float_to_bfloat16_cuda()
139 at::cuda::getCurrentCUDAStream()>>>( in _bfloat16_to_float_cuda()

1234567