Home
last modified time | relevance | path

Searched refs:cu_stream (Results 1 – 10 of 10) sorted by relevance

/external/tensorflow/tensorflow/core/kernels/
Dgpu_prim_helpers.h110 const auto& cu_stream = GetGpuStream(context); variable
115 size, /*begin_bit=*/0, /*end_bit=*/num_bits, cu_stream);
119 size, /*begin_bit=*/0, /*end_bit=*/num_bits, cu_stream);
136 cu_stream);
141 cu_stream);
183 const auto& cu_stream = GetGpuStream(context); in GpuInclusivePrefixSum() local
186 input, output, size, cu_stream); in GpuInclusivePrefixSum()
199 size, cu_stream); in GpuInclusivePrefixSum()
219 const auto& cu_stream = GetGpuStream(context); in GpuSegmentedReduce() local
223 segment_offsets + 1, reduce_op, initial_value, cu_stream); in GpuSegmentedReduce()
[all …]
Dreduction_gpu_kernels.cu.h651 const gpuStream_t& cu_stream) {
657 num_blocks, num_threads, 0, cu_stream, in, out,
679 num_blocks, num_threads, 0, cu_stream, in,
688 TF_RED_WARPSIZE, 0, cu_stream,
698 out, in_size, op, init, cu_stream);
719 const gpuStream_t& cu_stream) {
726 threads_per_block, 0, cu_stream, in, out,
742 transform_iter + 1, op, init, cu_stream);
763 const gpuStream_t& cu_stream) {
783 grid_dim, block_dim, 0, cu_stream, in, out,
[all …]
Dwhere_op_gpu.cu.h140 const auto& cu_stream = GetGpuStream(ctx);
153 /*stream*/ cu_stream);
172 /*stream*/ cu_stream);
267 const auto& cu_stream = GetGpuStream(ctx);
290 /*stream*/ cu_stream);
310 /*stream*/ cu_stream);
Ddynamic_partition_op_gpu.cu.cc328 const auto& cu_stream = GetGpuStream(c); in RadixSort() local
342 indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); in RadixSort()
354 0, sizeof(int32) * 8, cu_stream); in RadixSort()
361 const auto& cu_stream = GetGpuStream(c); in CountAndSortParts() local
418 aggregates_out_it, num_runs_ptr, reduction_op, N, cu_stream); in CountAndSortParts()
433 N, cu_stream); in CountAndSortParts()
Dtopk_op_gpu.h448 const auto& cu_stream = GetGpuStream(ctx); in LaunchSortKernel() local
519 /* stream */ cu_stream); in LaunchSortKernel()
544 /* stream */ cu_stream); in LaunchSortKernel()
586 const auto& cu_stream = GetGpuStream(context);
587 auto err = impl::LaunchTopKKernel(cu_stream, /* num_shards */ 0,
Dsoftmax_op_gpu.cu.cc213 const auto& cu_stream = GetGpuStream(context); in Compute() local
258 numThreadsPerBlock, 0, cu_stream, in Compute()
270 numThreadsPerBlock, 0, cu_stream, in Compute()
/external/tensorflow/tensorflow/core/nccl/
Dnccl_manager.cc705 const cudaStream_t* cu_stream = reinterpret_cast<const cudaStream_t*>( in LoopKernelLaunches() local
745 << " cuda_stream " << cu_stream; in LoopKernelLaunches()
754 nccl_comm, *cu_stream); in LoopKernelLaunches()
782 << " cuda_stream " << cu_stream; in LoopKernelLaunches()
791 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches()
807 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches()
820 << " cuda_stream " << cu_stream; in LoopKernelLaunches()
828 data_type, nccl_comm, *cu_stream); in LoopKernelLaunches()
/external/tensorflow/tensorflow/core/kernels/rnn/
Dlstm_ops_gpu.cu.cc240 const auto& cu_stream = GetGpuStream(ctx); in LSTMBlockCellFpropWithCUDA() local
250 TF_CHECK_OK(GpuLaunchKernel(concat_xh<T>, grid_dim, block_dim, 0, cu_stream, in LSTMBlockCellFpropWithCUDA()
272 cu_stream, gates.data(), b.data(), cs_prev.data(), wci.data(), in LSTMBlockCellFpropWithCUDA()
279 cu_stream, gates.data(), b.data(), cs_prev.data(), wci.data(), in LSTMBlockCellFpropWithCUDA()
381 const auto& cu_stream = GetGpuStream(ctx); in LSTMBlockCellBpropWithCUDA() local
388 lstm_gates_bprop<T, gate_layout>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellBpropWithCUDA()
/external/tensorflow/tensorflow/core/kernels/sparse/
Dkernels_gpu.cu.cc56 const auto& cu_stream = GetGpuStream(c); in operator ()() local
85 /*stream*/ cu_stream); in operator ()()
109 /*stream*/ cu_stream); in operator ()()
/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/
Dcuda_dnn.cc215 CUstream cu_stream = stream ? AsGpuStreamValue(stream) : cudaStreamLegacy; in GetHandle() local
216 if (!current_stream_ || cu_stream != *current_stream_) { in GetHandle()
217 current_stream_ = cu_stream; in GetHandle()
218 const auto status = cudnnSetStream(handle_, cu_stream); in GetHandle()
225 CUstream cu_stream = AsGpuStreamValue(stream); in NotifyStreamDestroyed() local
227 if (current_stream_ && cu_stream == *current_stream_) { in NotifyStreamDestroyed()