/external/tensorflow/tensorflow/core/kernels/ |
D | gpu_prim_helpers.h | 110 const auto& cu_stream = GetGpuStream(context); variable 115 size, /*begin_bit=*/0, /*end_bit=*/num_bits, cu_stream); 119 size, /*begin_bit=*/0, /*end_bit=*/num_bits, cu_stream); 136 cu_stream); 141 cu_stream); 183 const auto& cu_stream = GetGpuStream(context); in GpuInclusivePrefixSum() local 186 input, output, size, cu_stream); in GpuInclusivePrefixSum() 199 size, cu_stream); in GpuInclusivePrefixSum() 219 const auto& cu_stream = GetGpuStream(context); in GpuSegmentedReduce() local 223 segment_offsets + 1, reduce_op, initial_value, cu_stream); in GpuSegmentedReduce() [all …]
|
D | reduction_gpu_kernels.cu.h | 651 const gpuStream_t& cu_stream) { 657 num_blocks, num_threads, 0, cu_stream, in, out, 679 num_blocks, num_threads, 0, cu_stream, in, 688 TF_RED_WARPSIZE, 0, cu_stream, 698 out, in_size, op, init, cu_stream); 719 const gpuStream_t& cu_stream) { 726 threads_per_block, 0, cu_stream, in, out, 742 transform_iter + 1, op, init, cu_stream); 763 const gpuStream_t& cu_stream) { 783 grid_dim, block_dim, 0, cu_stream, in, out, [all …]
|
D | where_op_gpu.cu.h | 140 const auto& cu_stream = GetGpuStream(ctx); 153 /*stream*/ cu_stream); 172 /*stream*/ cu_stream); 267 const auto& cu_stream = GetGpuStream(ctx); 290 /*stream*/ cu_stream); 310 /*stream*/ cu_stream);
|
D | dynamic_partition_op_gpu.cu.cc | 328 const auto& cu_stream = GetGpuStream(c); in RadixSort() local 342 indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); in RadixSort() 354 0, sizeof(int32) * 8, cu_stream); in RadixSort() 361 const auto& cu_stream = GetGpuStream(c); in CountAndSortParts() local 418 aggregates_out_it, num_runs_ptr, reduction_op, N, cu_stream); in CountAndSortParts() 433 N, cu_stream); in CountAndSortParts()
|
D | topk_op_gpu.h | 448 const auto& cu_stream = GetGpuStream(ctx); in LaunchSortKernel() local 519 /* stream */ cu_stream); in LaunchSortKernel() 544 /* stream */ cu_stream); in LaunchSortKernel() 586 const auto& cu_stream = GetGpuStream(context); 587 auto err = impl::LaunchTopKKernel(cu_stream, /* num_shards */ 0,
|
D | softmax_op_gpu.cu.cc | 213 const auto& cu_stream = GetGpuStream(context); in Compute() local 258 numThreadsPerBlock, 0, cu_stream, in Compute() 270 numThreadsPerBlock, 0, cu_stream, in Compute()
|
/external/tensorflow/tensorflow/core/nccl/ |
D | nccl_manager.cc | 705 const cudaStream_t* cu_stream = reinterpret_cast<const cudaStream_t*>( in LoopKernelLaunches() local 745 << " cuda_stream " << cu_stream; in LoopKernelLaunches() 754 nccl_comm, *cu_stream); in LoopKernelLaunches() 782 << " cuda_stream " << cu_stream; in LoopKernelLaunches() 791 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches() 807 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches() 820 << " cuda_stream " << cu_stream; in LoopKernelLaunches() 828 data_type, nccl_comm, *cu_stream); in LoopKernelLaunches()
|
/external/tensorflow/tensorflow/core/kernels/rnn/ |
D | lstm_ops_gpu.cu.cc | 240 const auto& cu_stream = GetGpuStream(ctx); in LSTMBlockCellFpropWithCUDA() local 250 TF_CHECK_OK(GpuLaunchKernel(concat_xh<T>, grid_dim, block_dim, 0, cu_stream, in LSTMBlockCellFpropWithCUDA() 272 cu_stream, gates.data(), b.data(), cs_prev.data(), wci.data(), in LSTMBlockCellFpropWithCUDA() 279 cu_stream, gates.data(), b.data(), cs_prev.data(), wci.data(), in LSTMBlockCellFpropWithCUDA() 381 const auto& cu_stream = GetGpuStream(ctx); in LSTMBlockCellBpropWithCUDA() local 388 lstm_gates_bprop<T, gate_layout>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellBpropWithCUDA()
|
/external/tensorflow/tensorflow/core/kernels/sparse/ |
D | kernels_gpu.cu.cc | 56 const auto& cu_stream = GetGpuStream(c); in operator ()() local 85 /*stream*/ cu_stream); in operator ()() 109 /*stream*/ cu_stream); in operator ()()
|
/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/ |
D | cuda_dnn.cc | 215 CUstream cu_stream = stream ? AsGpuStreamValue(stream) : cudaStreamLegacy; in GetHandle() local 216 if (!current_stream_ || cu_stream != *current_stream_) { in GetHandle() 217 current_stream_ = cu_stream; in GetHandle() 218 const auto status = cudnnSetStream(handle_, cu_stream); in GetHandle() 225 CUstream cu_stream = AsGpuStreamValue(stream); in NotifyStreamDestroyed() local 227 if (current_stream_ && cu_stream == *current_stream_) { in NotifyStreamDestroyed()
|