Home
last modified time | relevance | path

Searched refs:compute_stream (Results 1 – 12 of 12) sorted by relevance

/external/tensorflow/tensorflow/core/kernels/
Dnccl_ops.cc108 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local
111 compute_stream->parent(), compute_stream, gpu_info, input, output, in ComputeAsync()
139 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local
142 compute_stream->parent(), compute_stream, gpu_info, &c->input(0), in ComputeAsync()
175 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local
178 compute_stream->parent(), compute_stream, gpu_info, input, output, in ComputeAsync()
209 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local
212 compute_stream->parent(), compute_stream, gpu_info, &c->input(0), in ComputeAsync()
245 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local
248 compute_stream->parent(), compute_stream, gpu_info, in ComputeAsync()
/external/pytorch/torch/distributed/tensor/parallel/
D_data_parallel_utils.py11 def sync_grad_hook(grad, *, device_handle=None, compute_stream=None): argument
13 if compute_stream is not None:
14 with device_handle.stream(compute_stream):
32 def _unflatten_tensor(tensor, spec, *, device_handle=None, compute_stream=None): argument
48 compute_stream=compute_stream,
Dfsdp.py327 self.compute_stream = None
342 stream = self.compute_stream or self.device_handle.current_stream()
353 compute_stream=self.compute_stream,
/external/tensorflow/tensorflow/core/nccl/
Dcollective_communicator.cc89 auto* compute_stream = col_ctx->op_ctx->op_device_context()->stream(); in Enqueue() local
93 compute_stream->parent(), compute_stream, gpu_info, col_ctx->input, in Enqueue()
/external/tensorflow/tensorflow/core/common_runtime/device/
Ddevice_event_mgr_test.cc159 se::Stream* compute_stream() { return gpu_->stream_->compute; } in compute_stream() function in tensorflow::GPUDeviceTestHelper
382 gpu_helper_->h2d_stream()->ThenWaitFor(gpu_helper_->compute_stream()); in DoAddChain()
393 gpu_helper_->compute_stream()->ThenWaitFor(gpu_helper_->h2d_stream()); in DoAddChain()
396 gpu_helper_->compute_stream(), [times, r]() { in DoAddChain()
406 gpu_helper_->event_mgr()->ThenExecute(gpu_helper_->compute_stream(), in DoAddChain()
413 gpu_helper_->compute_stream(), [times, r]() { in DoAddChain()
417 gpu_helper_->d2h_stream()->ThenWaitFor(gpu_helper_->compute_stream()); in DoAddChain()
/external/tensorflow/tensorflow/compiler/xla/pjrt/
Dpjrt_stream_executor_client.cc285 if (stream != local_device->compute_stream()) { in StallStreamOnError()
286 local_device->compute_stream()->ThenWaitFor(stream); in StallStreamOnError()
394 copy_stream->ThenWaitFor(local_device->compute_stream()); in AllocateDestinationBuffer()
398 local_device->compute_stream()->parent(), dst_buffer)); in AllocateDestinationBuffer()
414 local_device->compute_stream())); in AllocateDestinationBuffer()
416 std::move(event), local_device->compute_stream()); in AllocateDestinationBuffer()
438 tuple_table_stream->ThenWaitFor(local_device->compute_stream()); in AllocateDestinationBuffer()
441 local_device->compute_stream()->parent(), dst_buffer)); in AllocateDestinationBuffer()
1201 local_device_state->compute_stream()) && in Release()
1447 transfer_stream, dst_local_device->compute_stream(), input_buffer, in CopyToDeviceHelper()
[all …]
Dgpu_device.cc94 ordinal_and_device.second->compute_stream() in CreateCudaAsyncAllocator()
98 ordinal_and_device.second->compute_stream()); in CreateCudaAsyncAllocator()
259 ordinal_and_device.second->compute_stream()); in CreateBFCAllocator()
Dlocal_device_state.h107 se::Stream* compute_stream() const { return compute_stream_.get(); } in compute_stream() function
/external/tensorflow/tensorflow/compiler/jit/
Dxla_device_context.cc88 std::shared_ptr<se::Stream> compute_stream, in XlaDeviceContext() argument
95 : stream_(std::move(compute_stream)), in XlaDeviceContext()
Dxla_device_context.h57 std::shared_ptr<se::Stream> compute_stream,
/external/tensorflow/tensorflow/core/common_runtime/gpu/
Dgpu_device.h236 se::Stream* compute_stream, in GPUKernelTracker() argument
241 stream_(compute_stream), in GPUKernelTracker()
/external/pytorch/torch/distributed/fsdp/
D_runtime_utils.py223 fsdp_state._fsdp_extension.compute_stream = root_state._default_stream
255 state._fsdp_extension.compute_stream = state._default_stream