/external/pytorch/torch/csrc/jit/cuda/ |
D | cuda.h | 13 class CUDAStream final : public CustomClassHolder { 15 CUDAStream( 20 stream_ = std::make_unique<c10::cuda::CUDAStream>( 24 CUDAStream(c10::cuda::CUDAStream s) { in CUDAStream() function 25 stream_ = std::make_unique<c10::cuda::CUDAStream>(s); in CUDAStream() 41 void waitStream(const c10::intrusive_ptr<CUDAStream>& stream); 60 std::unique_ptr<c10::cuda::CUDAStream> stream_; 104 void record(const c10::intrusive_ptr<CUDAStream>& stream); 109 void wait(const c10::intrusive_ptr<CUDAStream>& stream); 112 void recordInternal(CUDAStream* stream); [all …]
|
/external/pytorch/c10/cuda/ |
D | CUDAStream.h | 60 class C10_CUDA_API CUDAStream { 66 explicit CUDAStream(Stream stream) : stream_(stream) { in CUDAStream() function 73 explicit CUDAStream(Unchecked, Stream stream) : stream_(stream) {} in CUDAStream() function 75 bool operator==(const CUDAStream& other) const noexcept { 79 bool operator!=(const CUDAStream& other) const noexcept { 165 static CUDAStream unpack3( in unpack3() 169 return CUDAStream(Stream::unpack3(stream_id, device_index, device_type)); in unpack3() 212 C10_API CUDAStream 215 C10_API CUDAStream 225 C10_API CUDAStream [all …]
|
D | CUDAGuard.h | 178 CUDAStream original_stream() const { in original_stream() 179 return CUDAStream(CUDAStream::UNCHECKED, guard_.original_stream()); in original_stream() 184 CUDAStream current_stream() const { in current_stream() 185 return CUDAStream(CUDAStream::UNCHECKED, guard_.current_stream()); in current_stream() 242 std::optional<CUDAStream> original_stream() const { in original_stream() 245 return std::make_optional(CUDAStream(CUDAStream::UNCHECKED, r.value())); in original_stream() 254 std::optional<CUDAStream> current_stream() const { in current_stream() 257 return std::make_optional(CUDAStream(CUDAStream::UNCHECKED, r.value())); in current_stream() 275 explicit CUDAMultiStreamGuard(ArrayRef<CUDAStream> streams) in CUDAMultiStreamGuard() 291 static std::vector<Stream> unwrapStreams(ArrayRef<CUDAStream> cudaStreams) { in unwrapStreams() [all …]
|
D | CUDAStream.cpp | 257 CUDAStream CUDAStreamForId(DeviceIndex device_index, StreamId stream_id) { in CUDAStreamForId() 258 return CUDAStream( in CUDAStreamForId() 259 CUDAStream::UNCHECKED, in CUDAStreamForId() 269 cudaStream_t CUDAStream::stream() const { in stream() 315 CUDAStream getStreamFromPool(const int priority, DeviceIndex device_index) { in getStreamFromPool() 340 CUDAStream getStreamFromPool(const bool isHighPriority, DeviceIndex device) { in getStreamFromPool() 346 CUDAStream getStreamFromExternal( in getStreamFromExternal() 353 CUDAStream getDefaultCUDAStream(DeviceIndex device_index) { in getDefaultCUDAStream() 363 CUDAStream getCurrentCUDAStream(DeviceIndex device_index) { in getCurrentCUDAStream() 373 void setCurrentCUDAStream(CUDAStream stream) { in setCurrentCUDAStream() [all …]
|
/external/pytorch/aten/src/ATen/test/ |
D | cuda_stream_test.cpp | 40 at::cuda::CUDAStream copyStream = at::cuda::getStreamFromPool(); in TEST() 56 at::cuda::CUDAStream moveStream = at::cuda::getStreamFromPool(); in TEST() 75 at::cuda::CUDAStream myStream = at::cuda::getStreamFromPool(); in TEST() 79 at::cuda::CUDAStream curStream = at::cuda::getCurrentCUDAStream(); in TEST() 84 at::cuda::CUDAStream defaultStream = at::cuda::getDefaultCUDAStream(); in TEST() 92 void thread_fun(std::optional<at::cuda::CUDAStream>& cur_thread_stream) { in thread_fun() 102 std::optional<at::cuda::CUDAStream> s0, s1; in TEST() 109 at::cuda::CUDAStream cur_stream = at::cuda::getCurrentCUDAStream(); in TEST() 110 at::cuda::CUDAStream default_stream = at::cuda::getDefaultCUDAStream(); in TEST() 128 std::vector<at::cuda::CUDAStream> streams0 = { in TEST() [all …]
|
/external/pytorch/torch/csrc/cuda/ |
D | nccl.h | 115 using stream_list = std::vector<std::optional<at::cuda::CUDAStream>>; 167 at::cuda::CUDAStream& stream, 180 at::cuda::CUDAStream& stream, 188 at::cuda::CUDAStream& stream); 200 at::cuda::CUDAStream& stream); 206 at::cuda::CUDAStream& stream); 211 at::cuda::CUDAStream stream, 217 at::cuda::CUDAStream stream,
|
D | Stream.cpp | 61 at::cuda::CUDAStream stream = (stream_id || device_index || device_type) in THCPStream_pynew() 62 ? at::cuda::CUDAStream::unpack3( in THCPStream_pynew() 76 new (&self->cuda_stream) at::cuda::CUDAStream(stream); in THCPStream_pynew() 83 self->cuda_stream.~CUDAStream(); in THCPStream_dealloc() 104 at::cuda::CUDAStream::priority_range(); in THCPStream_priority_range()
|
D | utils.cpp | 9 std::vector<std::optional<at::cuda::CUDAStream>> 21 std::vector<std::optional<at::cuda::CUDAStream>> streams; in THPUtils_PySequence_to_CUDAStreamList() 28 streams.emplace_back(at::cuda::CUDAStream::unpack3( in THPUtils_PySequence_to_CUDAStreamList()
|
D | comm.h | 31 const std::optional<std::vector<std::optional<at::cuda::CUDAStream>>>& 39 const std::optional<std::vector<std::optional<at::cuda::CUDAStream>>>&
|
D | python_comm.cpp | 52 std::optional<std::vector<std::optional<at::cuda::CUDAStream>>> in initCommMethods() 73 std::optional<std::vector<std::optional<at::cuda::CUDAStream>>> in initCommMethods()
|
D | Stream.h | 10 at::cuda::CUDAStream cuda_stream;
|
/external/pytorch/torch/csrc/jit/runtime/ |
D | register_cuda_ops.cpp | 45 auto st = make_custom_class<torch::jit::CUDAStream>(s); in __anon3d06fc340202() 56 auto st = make_custom_class<torch::jit::CUDAStream>(s); in __anon3d06fc340302() 68 auto st = make_custom_class<torch::jit::CUDAStream>(s); in __anon3d06fc340402() 79 auto st = make_custom_class<torch::jit::CUDAStream>(s); in __anon3d06fc340502() 142 auto s = v.toCustomClass<torch::jit::CUDAStream>(); in __anon3d06fc340c02() 158 auto unpacked = c10::cuda::CUDAStream::unpack3( in __anon3d06fc340c02()
|
/external/pytorch/docs/cpp/source/notes/ |
D | tensor_cuda_stream.rst | 5 The PyTorch C++ API supports CUDA streams with the CUDAStream class and useful helper functions to … 6 You can find them in `CUDAStream.h`_. This note provides more details on how to use Pytorch C++ CUD… 21 CUDAStream getStreamFromPool(const bool isHighPriority = false, DeviceIndex device = -1); 32 CUDAStream getDefaultCUDAStream(DeviceIndex device_index = -1); 42 CUDAStream getCurrentCUDAStream(DeviceIndex device_index = -1); 60 void setCurrentCUDAStream(CUDAStream stream); 87 at::cuda::CUDAStream myStream = at::cuda::getStreamFromPool(); 94 at::cuda::CUDAStream defaultStream = at::cuda::getDefaultCUDAStream(); 108 at::cuda::CUDAStream myStream = at::cuda::getStreamFromPool(false, 0); 133 at::cuda::CUDAStream myStream0 = at::cuda::getStreamFromPool(false, 0); [all …]
|
/external/pytorch/c10/cuda/impl/ |
D | CUDAGuardImpl.h | 74 CUDAStream cs(s); in exchangeStream() 137 CUDAStream cuda_stream{stream}; in record() 165 CUDAStream cuda_stream{stream}; in block() 200 CUDAStream cuda_stream{stream}; in queryStream() 205 CUDAStream cuda_stream{stream}; in synchronizeStream() 224 CUDAStream cuda_stream{stream}; in recordDataPtrOnStream()
|
/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/ |
D | cuda_stream.h | 27 using CUDAStream = gpu::GpuStream; variable 29 inline CUDAStream* AsCUDAStream(Stream* stream) { in AsCUDAStream()
|
/external/pytorch/torch/csrc/distributed/c10d/ |
D | intra_node_comm.hpp | 84 at::cuda::CUDAStream& stream); 88 at::cuda::CUDAStream& stream); 92 at::cuda::CUDAStream& stream);
|
D | ProcessGroupNCCL.cpp | 246 at::cuda::CUDAStream& ncclStream) { in syncStream() 3309 [](at::cuda::CUDAStream&, in checkForNCCLErrorsInternal() 3311 [](at::cuda::CUDAStream&, in checkForNCCLErrorsInternal() argument 3331 [](at::cuda::CUDAStream&, in checkForNCCLErrorsInternal() argument 3333 [](at::cuda::CUDAStream&) {}, in checkForNCCLErrorsInternal() argument 3355 at::cuda::CUDAStream& stream) { in checkForNCCLErrorsInternal() 3386 [](at::cuda::CUDAStream& ncclStream, in checkForNCCLErrorsInternal() 3388 [&](at::cuda::CUDAStream& ncclStream, in checkForNCCLErrorsInternal() 3419 at::cuda::CUDAStream& stream) { in checkForNCCLErrorsInternal() 3518 at::cuda::CUDAStream& stream) { in checkForNCCLErrorsInternal() [all …]
|
D | NanCheck.hpp | 12 void checkForNan(const at::Tensor& tensor, at::cuda::CUDAStream& stream);
|
/external/pytorch/torch/csrc/inductor/aoti_runner/ |
D | model_container_runner_cuda.cpp | 21 at::cuda::CUDAStream cuda_stream = c10::cuda::getCurrentCUDAStream(); in run() 28 at::cuda::CUDAStream cuda_stream) { in run_with_cuda_stream()
|
/external/pytorch/torch/csrc/autograd/functions/ |
D | comm.h | 21 std::optional<std::vector<std::optional<at::cuda::CUDAStream>>> streams = 31 std::optional<std::vector<std::optional<at::cuda::CUDAStream>>> streams_;
|
/external/pytorch/aten/src/ATen/cuda/ |
D | CachingHostAllocator.cpp | 69 using Block = HostBlock<CUDAStream>; 72 : public CachingHostAllocatorImpl<CUDAStream, EventPool::Event> { 109 CUDAStream stream) override { in record_stream() 255 at::cuda::CUDAStream stream) { in CachingHostAllocator_recordEvent()
|
D | CUDAEvent.h | 108 void recordOnce(const CUDAStream& stream) { in recordOnce() 113 void record(const CUDAStream& stream) { in record() 134 void block(const CUDAStream& stream) { in block()
|
D | CachingHostAllocator.h | 28 c10::cuda::CUDAStream stream);
|
/external/pytorch/torch/csrc/distributed/rpc/ |
D | tensorpipe_cuda.cpp | 80 at::cuda::CUDAStream(getStreamForDevice(streams, storage.device())); in prepareTensorForSending() 104 at::cuda::CUDAStream stream(getStreamForDevice(streams, device)); in allocateTensorForReceiving()
|
/external/pytorch/test/cpp/c10d/ |
D | CUDATest.hpp | 14 EXPORT_TEST_API void cudaSleep(at::cuda::CUDAStream& stream, uint64_t clocks);
|