/external/pytorch/aten/src/ATen/test/ |
D | cuda_stream_test.cpp | 79 at::cuda::CUDAStream curStream = at::cuda::getCurrentCUDAStream(); in TEST() 86 curStream = at::cuda::getCurrentCUDAStream(); in TEST() 95 cur_thread_stream = {at::cuda::getCurrentCUDAStream()}; in thread_fun() 109 at::cuda::CUDAStream cur_stream = at::cuda::getCurrentCUDAStream(); in TEST() 153 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[1]); in TEST() 158 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[0]); in TEST() 165 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[0]); in TEST() 169 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(0), streams0[0]); in TEST() 205 ASSERT_EQ_CUDA(s0, at::cuda::getCurrentCUDAStream()); in TEST() 208 ASSERT_EQ_CUDA(s1, at::cuda::getCurrentCUDAStream()); in TEST() [all …]
|
D | cuda_caching_host_allocator_test.cpp | 24 at::cuda::getCurrentCUDAStream())); in TEST() 39 at::cuda::getCurrentCUDAStream())); in TEST() 52 at::cuda::getCurrentCUDAStream())); in TEST() 62 at::cuda::getCurrentCUDAStream())); in TEST() 109 at::cuda::getCurrentCUDAStream())); in TEST() 123 at::cuda::getCurrentCUDAStream())); in TEST() 139 ptr, ctx, at::cuda::getCurrentCUDAStream())); in TEST() 144 ptr, ctx, at::cuda::getCurrentCUDAStream())); in TEST()
|
/external/pytorch/aten/src/ATen/cuda/ |
D | cub.cuh | 152 begin_bit, end_bit, c10::cuda::getCurrentCUDAStream()); in segmented_sort_pairs() 157 begin_bit, end_bit, c10::cuda::getCurrentCUDAStream()); in segmented_sort_pairs() 175 …alues_in, keys_out_, values_out, num_selected, num_input_items, c10::cuda::getCurrentCUDAStream()); in unique_by_key() 236 at::cuda::getCurrentCUDAStream()); in inclusive_scan() 248 at::cuda::getCurrentCUDAStream()); in inclusive_scan() 257 impl::transform_vals<<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( in inclusive_scan() 280 at::cuda::getCurrentCUDAStream()); in inclusive_scan() 288 at::cuda::getCurrentCUDAStream()); in inclusive_scan() 304 at::cuda::getCurrentCUDAStream()); in exclusive_scan() 317 at::cuda::getCurrentCUDAStream()); in exclusive_scan() [all …]
|
D | CUDAGraph.cpp | 68 : capture_stream_(at::cuda::getCurrentCUDAStream()) { in CUDAGraph() 98 auto stream = at::cuda::getCurrentCUDAStream(); in capture_begin() 154 auto stream = at::cuda::getCurrentCUDAStream(); in capture_end() 234 AT_CUDA_CHECK(cudaGraphLaunch(graph_exec_, at::cuda::getCurrentCUDAStream())); in replay()
|
D | Sleep.cu | 32 spin_kernel<<<grid, block, 0, c10::cuda::getCurrentCUDAStream()>>>(cycles); in sleep() 64 flush_icache_kernel<<<grid, block, 0, c10::cuda::getCurrentCUDAStream()>>>(); in flush_icache()
|
D | cub-RadixSortKeys.cu | 31 c10::cuda::getCurrentCUDAStream()); in radix_sort_keys() 40 c10::cuda::getCurrentCUDAStream()); in radix_sort_keys()
|
D | cub-RadixSortPairs.cu | 43 c10::cuda::getCurrentCUDAStream()); in radix_sort_pairs_impl() 54 c10::cuda::getCurrentCUDAStream()); in radix_sort_pairs_impl()
|
D | CUDAEvent.h | 106 void record() { record(getCurrentCUDAStream()); } in record() 178 createEvent(getCurrentCUDAStream().device_index()); in ipc_handle()
|
/external/pytorch/aten/src/ATen/native/cuda/linalg/ |
D | MagmaUtils.h | 32 at::cuda::getCurrentCUDAStream(), in MAGMAQueue() 72 auto stream = at::cuda::getCurrentCUDAStream(); in MagmaStreamSyncGuard() 80 if (at::cuda::getCurrentCUDAStream() != default_stream) { in noexcept()
|
/external/pytorch/aten/src/ATen/native/cuda/ |
D | MultiTensorApply.cuh | 180 at::cuda::getCurrentCUDAStream()>>>( in multi_tensor_apply() 212 at::cuda::getCurrentCUDAStream()>>>(tensorListMeta, callable, args...); in multi_tensor_apply() 263 at::cuda::getCurrentCUDAStream()>>>( in multi_tensor_apply() 292 at::cuda::getCurrentCUDAStream()>>>(tensorListMeta, callable, args...); in multi_tensor_apply() 345 at::cuda::getCurrentCUDAStream()>>>( in multi_tensor_apply_for_fused_optimizer() 374 at::cuda::getCurrentCUDAStream()>>>(tensorListMeta, callable, args...); in multi_tensor_apply_for_fused_optimizer()
|
D | LegacyThrustHelpers.cu | 24 const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in index_put_with_sort_kernel_thrust_helper() 48 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in embedding_dense_backward_cuda_scan() 90 auto stream = at::cuda::getCurrentCUDAStream(); in embedding_backward_cuda_kernel_unique_by_key()
|
D | AdaptiveMaxPooling2d.cu | 251 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC() 300 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC() 385 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC() 400 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC() 446 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC() 461 at::cuda::getCurrentCUDAStream()>>>( in TORCH_IMPL_FUNC()
|
D | SortStable.cu | 113 auto stream = at::cuda::getCurrentCUDAStream(); in segmented_sort_large_segments() 151 auto stream = c10::cuda::getCurrentCUDAStream(); in segmented_sort_pairs_by_full_sort() 178 at::cuda::getCurrentCUDAStream()>>>( in segmented_sort_pairs_by_full_sort() 199 auto stream = c10::cuda::getCurrentCUDAStream(); in segmented_sort_pairs()
|
D | SegmentReduce.cu | 339 at::cuda::getCurrentCUDAStream()>>>( in _segment_reduce_lengths_offsets_backward_cuda_kernel() 473 at::cuda::getCurrentCUDAStream()>>>( in _segment_reduce_lengths_offsets_cuda_kernel() 504 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel() 516 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel() 522 at::cuda::getCurrentCUDAStream()>>>( in _segment_reduce_lengths_offsets_cuda_kernel() 540 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel() 552 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel() 564 at::cuda::getCurrentCUDAStream()); in _segment_reduce_lengths_offsets_cuda_kernel()
|
D | TensorModeKernel.cu | 29 auto stream = at::cuda::getCurrentCUDAStream(); in operator ()() 99 auto stream = at::cuda::getCurrentCUDAStream(); in operator ()() 171 auto stream = at::cuda::getCurrentCUDAStream(); in calculate_mode() 214 <<<grid, num_threads, memsize, at::cuda::getCurrentCUDAStream()>>>( in handle_fused_mode()
|
D | MultiLabelMarginCriterion.cu | 233 <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_forward_out_cuda_template() 258 <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_forward_out_cuda_template() 284 <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_forward_out_cuda_template() 343 <<<blocks, threads, 0, c10::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_backward_cuda_out_template() 372 <<<blocks, threads, 0, c10::cuda::getCurrentCUDAStream()>>>( in multilabel_margin_loss_backward_cuda_out_template()
|
D | Copy.cu | 195 CUDAStream copy_stream = getCurrentCUDAStream(src_device.index()); in copy_device_to_device() 205 dst_ready.record(getCurrentCUDAStream(dst_device.index())); in copy_device_to_device() 251 src_ready.block(getCurrentCUDAStream(dst_device.index())); in copy_device_to_device() 360 CUDAStream stream = getCurrentCUDAStream(); in copy_kernel_cuda()
|
D | Dropout.cu | 260 <<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>( in launcher() 276 <<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>( in launcher() 290 <<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>( in launcher() 306 at::cuda::getCurrentCUDAStream()>>>( in launcher() 319 at::cuda::getCurrentCUDAStream()>>>( in launcher()
|
D | Loss.cu | 316 at::cuda::getCurrentCUDAStream()>>>( in nll_loss_forward_out_cuda_template() 360 <<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( in nll_loss_forward_out_cuda_template() 389 at::cuda::getCurrentCUDAStream()>>>( in nll_loss_forward_out_cuda_template() 520 at::cuda::getCurrentCUDAStream()>>>( in nll_loss_backward_out_cuda_template() 546 <<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( in nll_loss_backward_out_cuda_template() 571 <<<1, NLL_LOSS_THREADS, 0, at::cuda::getCurrentCUDAStream()>>>( in nll_loss_backward_out_cuda_template()
|
D | NLLLoss2d.cu | 276 at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_forward_out_cuda_template() 331 at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_forward_out_cuda_template() 345 <<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_forward_out_cuda_template() 415 at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_backward_out_cuda_template() 450 at::cuda::getCurrentCUDAStream()>>>( in nll_loss2d_backward_out_cuda_template()
|
D | WeightNorm.cu | 365 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in weight_norm_cuda() 396 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in weight_norm_cuda() 454 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in weight_norm_backward_cuda() 487 cudaStream_t stream = at::cuda::getCurrentCUDAStream(); in weight_norm_backward_cuda()
|
D | Sort.cu | 79 const auto stream = at::cuda::getCurrentCUDAStream(); in sort() 143 const auto stream = at::cuda::getCurrentCUDAStream(); in sort() 264 const auto stream = at::cuda::getCurrentCUDAStream(); in fixed_size_sort()
|
/external/pytorch/aten/src/ATen/cuda/tunable/ |
D | StreamTimer.cpp | 28 AT_CUDA_CHECK(cudaEventRecord(start_, at::cuda::getCurrentCUDAStream())); in Start() 32 AT_CUDA_CHECK(cudaEventRecord(end_, at::cuda::getCurrentCUDAStream())); in End()
|
/external/pytorch/torch/csrc/ |
D | CudaIPCTypes.cpp | 176 event_, c10::cuda::getCurrentCUDAStream(device.index()))); in CudaIPCSentData() 179 auto stream = c10::cuda::getCurrentCUDAStream(device.index()); in CudaIPCSentData() 187 auto stream = c10::cuda::getCurrentCUDAStream(device.index()); in CudaIPCSentData()
|
/external/pytorch/torch/csrc/distributed/c10d/quantization/ |
D | quantization_gpu.cu | 91 at::cuda::getCurrentCUDAStream()>>>( in _float_to_bfloat16_cuda() 139 at::cuda::getCurrentCUDAStream()>>>( in _bfloat16_to_float_cuda()
|