/external/tensorflow/tensorflow/core/kernels/ |
D | nccl_ops.cc | 108 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local 111 compute_stream->parent(), compute_stream, gpu_info, input, output, in ComputeAsync() 139 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local 142 compute_stream->parent(), compute_stream, gpu_info, &c->input(0), in ComputeAsync() 175 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local 178 compute_stream->parent(), compute_stream, gpu_info, input, output, in ComputeAsync() 209 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local 212 compute_stream->parent(), compute_stream, gpu_info, &c->input(0), in ComputeAsync() 245 auto* compute_stream = c->op_device_context()->stream(); in ComputeAsync() local 248 compute_stream->parent(), compute_stream, gpu_info, in ComputeAsync()
|
/external/pytorch/torch/distributed/tensor/parallel/ |
D | _data_parallel_utils.py | 11 def sync_grad_hook(grad, *, device_handle=None, compute_stream=None): argument 13 if compute_stream is not None: 14 with device_handle.stream(compute_stream): 32 def _unflatten_tensor(tensor, spec, *, device_handle=None, compute_stream=None): argument 48 compute_stream=compute_stream,
|
D | fsdp.py | 327 self.compute_stream = None 342 stream = self.compute_stream or self.device_handle.current_stream() 353 compute_stream=self.compute_stream,
|
/external/tensorflow/tensorflow/core/nccl/ |
D | collective_communicator.cc | 89 auto* compute_stream = col_ctx->op_ctx->op_device_context()->stream(); in Enqueue() local 93 compute_stream->parent(), compute_stream, gpu_info, col_ctx->input, in Enqueue()
|
/external/tensorflow/tensorflow/core/common_runtime/device/ |
D | device_event_mgr_test.cc | 159 se::Stream* compute_stream() { return gpu_->stream_->compute; } in compute_stream() function in tensorflow::GPUDeviceTestHelper 382 gpu_helper_->h2d_stream()->ThenWaitFor(gpu_helper_->compute_stream()); in DoAddChain() 393 gpu_helper_->compute_stream()->ThenWaitFor(gpu_helper_->h2d_stream()); in DoAddChain() 396 gpu_helper_->compute_stream(), [times, r]() { in DoAddChain() 406 gpu_helper_->event_mgr()->ThenExecute(gpu_helper_->compute_stream(), in DoAddChain() 413 gpu_helper_->compute_stream(), [times, r]() { in DoAddChain() 417 gpu_helper_->d2h_stream()->ThenWaitFor(gpu_helper_->compute_stream()); in DoAddChain()
|
/external/tensorflow/tensorflow/compiler/xla/pjrt/ |
D | pjrt_stream_executor_client.cc | 285 if (stream != local_device->compute_stream()) { in StallStreamOnError() 286 local_device->compute_stream()->ThenWaitFor(stream); in StallStreamOnError() 394 copy_stream->ThenWaitFor(local_device->compute_stream()); in AllocateDestinationBuffer() 398 local_device->compute_stream()->parent(), dst_buffer)); in AllocateDestinationBuffer() 414 local_device->compute_stream())); in AllocateDestinationBuffer() 416 std::move(event), local_device->compute_stream()); in AllocateDestinationBuffer() 438 tuple_table_stream->ThenWaitFor(local_device->compute_stream()); in AllocateDestinationBuffer() 441 local_device->compute_stream()->parent(), dst_buffer)); in AllocateDestinationBuffer() 1201 local_device_state->compute_stream()) && in Release() 1447 transfer_stream, dst_local_device->compute_stream(), input_buffer, in CopyToDeviceHelper() [all …]
|
D | gpu_device.cc | 94 ordinal_and_device.second->compute_stream() in CreateCudaAsyncAllocator() 98 ordinal_and_device.second->compute_stream()); in CreateCudaAsyncAllocator() 259 ordinal_and_device.second->compute_stream()); in CreateBFCAllocator()
|
D | local_device_state.h | 107 se::Stream* compute_stream() const { return compute_stream_.get(); } in compute_stream() function
|
/external/tensorflow/tensorflow/compiler/jit/ |
D | xla_device_context.cc | 88 std::shared_ptr<se::Stream> compute_stream, in XlaDeviceContext() argument 95 : stream_(std::move(compute_stream)), in XlaDeviceContext()
|
D | xla_device_context.h | 57 std::shared_ptr<se::Stream> compute_stream,
|
/external/tensorflow/tensorflow/core/common_runtime/gpu/ |
D | gpu_device.h | 236 se::Stream* compute_stream, in GPUKernelTracker() argument 241 stream_(compute_stream), in GPUKernelTracker()
|
/external/pytorch/torch/distributed/fsdp/ |
D | _runtime_utils.py | 223 fsdp_state._fsdp_extension.compute_stream = root_state._default_stream 255 state._fsdp_extension.compute_stream = state._default_stream
|