Home
last modified time | relevance | path

Searched refs:CUDAStream (Results 1 – 25 of 55) sorted by relevance

123

/external/pytorch/torch/csrc/jit/cuda/
Dcuda.h13 class CUDAStream final : public CustomClassHolder {
15 CUDAStream(
20 stream_ = std::make_unique<c10::cuda::CUDAStream>(
24 CUDAStream(c10::cuda::CUDAStream s) { in CUDAStream() function
25 stream_ = std::make_unique<c10::cuda::CUDAStream>(s); in CUDAStream()
41 void waitStream(const c10::intrusive_ptr<CUDAStream>& stream);
60 std::unique_ptr<c10::cuda::CUDAStream> stream_;
104 void record(const c10::intrusive_ptr<CUDAStream>& stream);
109 void wait(const c10::intrusive_ptr<CUDAStream>& stream);
112 void recordInternal(CUDAStream* stream);
[all …]
/external/pytorch/c10/cuda/
DCUDAStream.h60 class C10_CUDA_API CUDAStream {
66 explicit CUDAStream(Stream stream) : stream_(stream) { in CUDAStream() function
73 explicit CUDAStream(Unchecked, Stream stream) : stream_(stream) {} in CUDAStream() function
75 bool operator==(const CUDAStream& other) const noexcept {
79 bool operator!=(const CUDAStream& other) const noexcept {
165 static CUDAStream unpack3( in unpack3()
169 return CUDAStream(Stream::unpack3(stream_id, device_index, device_type)); in unpack3()
212 C10_API CUDAStream
215 C10_API CUDAStream
225 C10_API CUDAStream
[all …]
DCUDAGuard.h178 CUDAStream original_stream() const { in original_stream()
179 return CUDAStream(CUDAStream::UNCHECKED, guard_.original_stream()); in original_stream()
184 CUDAStream current_stream() const { in current_stream()
185 return CUDAStream(CUDAStream::UNCHECKED, guard_.current_stream()); in current_stream()
242 std::optional<CUDAStream> original_stream() const { in original_stream()
245 return std::make_optional(CUDAStream(CUDAStream::UNCHECKED, r.value())); in original_stream()
254 std::optional<CUDAStream> current_stream() const { in current_stream()
257 return std::make_optional(CUDAStream(CUDAStream::UNCHECKED, r.value())); in current_stream()
275 explicit CUDAMultiStreamGuard(ArrayRef<CUDAStream> streams) in CUDAMultiStreamGuard()
291 static std::vector<Stream> unwrapStreams(ArrayRef<CUDAStream> cudaStreams) { in unwrapStreams()
[all …]
DCUDAStream.cpp257 CUDAStream CUDAStreamForId(DeviceIndex device_index, StreamId stream_id) { in CUDAStreamForId()
258 return CUDAStream( in CUDAStreamForId()
259 CUDAStream::UNCHECKED, in CUDAStreamForId()
269 cudaStream_t CUDAStream::stream() const { in stream()
315 CUDAStream getStreamFromPool(const int priority, DeviceIndex device_index) { in getStreamFromPool()
340 CUDAStream getStreamFromPool(const bool isHighPriority, DeviceIndex device) { in getStreamFromPool()
346 CUDAStream getStreamFromExternal( in getStreamFromExternal()
353 CUDAStream getDefaultCUDAStream(DeviceIndex device_index) { in getDefaultCUDAStream()
363 CUDAStream getCurrentCUDAStream(DeviceIndex device_index) { in getCurrentCUDAStream()
373 void setCurrentCUDAStream(CUDAStream stream) { in setCurrentCUDAStream()
[all …]
/external/pytorch/aten/src/ATen/test/
Dcuda_stream_test.cpp40 at::cuda::CUDAStream copyStream = at::cuda::getStreamFromPool(); in TEST()
56 at::cuda::CUDAStream moveStream = at::cuda::getStreamFromPool(); in TEST()
75 at::cuda::CUDAStream myStream = at::cuda::getStreamFromPool(); in TEST()
79 at::cuda::CUDAStream curStream = at::cuda::getCurrentCUDAStream(); in TEST()
84 at::cuda::CUDAStream defaultStream = at::cuda::getDefaultCUDAStream(); in TEST()
92 void thread_fun(std::optional<at::cuda::CUDAStream>& cur_thread_stream) { in thread_fun()
102 std::optional<at::cuda::CUDAStream> s0, s1; in TEST()
109 at::cuda::CUDAStream cur_stream = at::cuda::getCurrentCUDAStream(); in TEST()
110 at::cuda::CUDAStream default_stream = at::cuda::getDefaultCUDAStream(); in TEST()
128 std::vector<at::cuda::CUDAStream> streams0 = { in TEST()
[all …]
/external/pytorch/torch/csrc/cuda/
Dnccl.h115 using stream_list = std::vector<std::optional<at::cuda::CUDAStream>>;
167 at::cuda::CUDAStream& stream,
180 at::cuda::CUDAStream& stream,
188 at::cuda::CUDAStream& stream);
200 at::cuda::CUDAStream& stream);
206 at::cuda::CUDAStream& stream);
211 at::cuda::CUDAStream stream,
217 at::cuda::CUDAStream stream,
DStream.cpp61 at::cuda::CUDAStream stream = (stream_id || device_index || device_type) in THCPStream_pynew()
62 ? at::cuda::CUDAStream::unpack3( in THCPStream_pynew()
76 new (&self->cuda_stream) at::cuda::CUDAStream(stream); in THCPStream_pynew()
83 self->cuda_stream.~CUDAStream(); in THCPStream_dealloc()
104 at::cuda::CUDAStream::priority_range(); in THCPStream_priority_range()
Dutils.cpp9 std::vector<std::optional<at::cuda::CUDAStream>>
21 std::vector<std::optional<at::cuda::CUDAStream>> streams; in THPUtils_PySequence_to_CUDAStreamList()
28 streams.emplace_back(at::cuda::CUDAStream::unpack3( in THPUtils_PySequence_to_CUDAStreamList()
Dcomm.h31 const std::optional<std::vector<std::optional<at::cuda::CUDAStream>>>&
39 const std::optional<std::vector<std::optional<at::cuda::CUDAStream>>>&
Dpython_comm.cpp52 std::optional<std::vector<std::optional<at::cuda::CUDAStream>>> in initCommMethods()
73 std::optional<std::vector<std::optional<at::cuda::CUDAStream>>> in initCommMethods()
DStream.h10 at::cuda::CUDAStream cuda_stream;
/external/pytorch/torch/csrc/jit/runtime/
Dregister_cuda_ops.cpp45 auto st = make_custom_class<torch::jit::CUDAStream>(s); in __anon3d06fc340202()
56 auto st = make_custom_class<torch::jit::CUDAStream>(s); in __anon3d06fc340302()
68 auto st = make_custom_class<torch::jit::CUDAStream>(s); in __anon3d06fc340402()
79 auto st = make_custom_class<torch::jit::CUDAStream>(s); in __anon3d06fc340502()
142 auto s = v.toCustomClass<torch::jit::CUDAStream>(); in __anon3d06fc340c02()
158 auto unpacked = c10::cuda::CUDAStream::unpack3( in __anon3d06fc340c02()
/external/pytorch/docs/cpp/source/notes/
Dtensor_cuda_stream.rst5 The PyTorch C++ API supports CUDA streams with the CUDAStream class and useful helper functions to …
6 You can find them in `CUDAStream.h`_. This note provides more details on how to use Pytorch C++ CUD…
21 CUDAStream getStreamFromPool(const bool isHighPriority = false, DeviceIndex device = -1);
32 CUDAStream getDefaultCUDAStream(DeviceIndex device_index = -1);
42 CUDAStream getCurrentCUDAStream(DeviceIndex device_index = -1);
60 void setCurrentCUDAStream(CUDAStream stream);
87 at::cuda::CUDAStream myStream = at::cuda::getStreamFromPool();
94 at::cuda::CUDAStream defaultStream = at::cuda::getDefaultCUDAStream();
108 at::cuda::CUDAStream myStream = at::cuda::getStreamFromPool(false, 0);
133 at::cuda::CUDAStream myStream0 = at::cuda::getStreamFromPool(false, 0);
[all …]
/external/pytorch/c10/cuda/impl/
DCUDAGuardImpl.h74 CUDAStream cs(s); in exchangeStream()
137 CUDAStream cuda_stream{stream}; in record()
165 CUDAStream cuda_stream{stream}; in block()
200 CUDAStream cuda_stream{stream}; in queryStream()
205 CUDAStream cuda_stream{stream}; in synchronizeStream()
224 CUDAStream cuda_stream{stream}; in recordDataPtrOnStream()
/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/
Dcuda_stream.h27 using CUDAStream = gpu::GpuStream; variable
29 inline CUDAStream* AsCUDAStream(Stream* stream) { in AsCUDAStream()
/external/pytorch/torch/csrc/distributed/c10d/
Dintra_node_comm.hpp84 at::cuda::CUDAStream& stream);
88 at::cuda::CUDAStream& stream);
92 at::cuda::CUDAStream& stream);
DProcessGroupNCCL.cpp246 at::cuda::CUDAStream& ncclStream) { in syncStream()
3309 [](at::cuda::CUDAStream&, in checkForNCCLErrorsInternal()
3311 [](at::cuda::CUDAStream&, in checkForNCCLErrorsInternal() argument
3331 [](at::cuda::CUDAStream&, in checkForNCCLErrorsInternal() argument
3333 [](at::cuda::CUDAStream&) {}, in checkForNCCLErrorsInternal() argument
3355 at::cuda::CUDAStream& stream) { in checkForNCCLErrorsInternal()
3386 [](at::cuda::CUDAStream& ncclStream, in checkForNCCLErrorsInternal()
3388 [&](at::cuda::CUDAStream& ncclStream, in checkForNCCLErrorsInternal()
3419 at::cuda::CUDAStream& stream) { in checkForNCCLErrorsInternal()
3518 at::cuda::CUDAStream& stream) { in checkForNCCLErrorsInternal()
[all …]
DNanCheck.hpp12 void checkForNan(const at::Tensor& tensor, at::cuda::CUDAStream& stream);
/external/pytorch/torch/csrc/inductor/aoti_runner/
Dmodel_container_runner_cuda.cpp21 at::cuda::CUDAStream cuda_stream = c10::cuda::getCurrentCUDAStream(); in run()
28 at::cuda::CUDAStream cuda_stream) { in run_with_cuda_stream()
/external/pytorch/torch/csrc/autograd/functions/
Dcomm.h21 std::optional<std::vector<std::optional<at::cuda::CUDAStream>>> streams =
31 std::optional<std::vector<std::optional<at::cuda::CUDAStream>>> streams_;
/external/pytorch/aten/src/ATen/cuda/
DCachingHostAllocator.cpp69 using Block = HostBlock<CUDAStream>;
72 : public CachingHostAllocatorImpl<CUDAStream, EventPool::Event> {
109 CUDAStream stream) override { in record_stream()
255 at::cuda::CUDAStream stream) { in CachingHostAllocator_recordEvent()
DCUDAEvent.h108 void recordOnce(const CUDAStream& stream) { in recordOnce()
113 void record(const CUDAStream& stream) { in record()
134 void block(const CUDAStream& stream) { in block()
DCachingHostAllocator.h28 c10::cuda::CUDAStream stream);
/external/pytorch/torch/csrc/distributed/rpc/
Dtensorpipe_cuda.cpp80 at::cuda::CUDAStream(getStreamForDevice(streams, storage.device())); in prepareTensorForSending()
104 at::cuda::CUDAStream stream(getStreamForDevice(streams, device)); in allocateTensorForReceiving()
/external/pytorch/test/cpp/c10d/
DCUDATest.hpp14 EXPORT_TEST_API void cudaSleep(at::cuda::CUDAStream& stream, uint64_t clocks);

123