Searched refs:BlockDim (Results 1 – 15 of 15) sorted by relevance
/external/tensorflow/tensorflow/core/kernels/ |
D | scan_ops_gpu.h | 160 template <typename T, typename Op, int BlockDim = 128, int ItemsPerThread = 4> 163 typedef cub::BlockLoad<T, BlockDim, ItemsPerThread, cub::BLOCK_LOAD_TRANSPOSE> in scan_kernel() 165 typedef cub::BlockStore<T, BlockDim, ItemsPerThread, in scan_kernel() 168 typedef cub::BlockScan<T, BlockDim> BlockScan; in scan_kernel() 187 block_offset += BlockDim * ItemsPerThread) { in scan_kernel() 188 int valid_items = min(BlockDim * ItemsPerThread, in scan_kernel()
|
/external/tensorflow/tensorflow/stream_executor/ |
D | launch_dim.h | 66 struct BlockDim : public Dim3D { struct 67 explicit BlockDim(uint64 x = 1, uint64 y = 1, uint64 z = 1)
|
D | device_description.h | 79 const BlockDim &block_dim_limit() const { return block_dim_limit_; } in block_dim_limit() 184 BlockDim block_dim_limit_; 250 void set_block_dim_limit(const BlockDim &value) { in set_block_dim_limit()
|
D | trace_listener.h | 50 const BlockDim& block_dims, in LaunchSubmit()
|
D | device_description.cc | 73 const BlockDim &block_dim = block_dim_limit(); in ToMap()
|
D | stream_executor_pimpl.h | 453 const BlockDim &block_dims, const KernelBase &kernel, 867 inline Stream &Stream::ThenLaunch(ThreadDim thread_dims, BlockDim block_dims, in ThenLaunch()
|
D | stream_executor_internal.h | 192 const BlockDim &block_dims, const KernelBase &k, in Launch()
|
D | stream_executor_pimpl.cc | 480 const BlockDim &block_dims, in Launch()
|
D | stream.h | 171 Stream &ThenLaunch(ThreadDim thread_dims, BlockDim block_dims,
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | gpu_executor.h | 73 const BlockDim& block_dims, const KernelBase& k, 206 bool FillBlockDimLimit(BlockDim* block_dim_limit) const; 270 const BlockDim& block_dims);
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | kernel_thunk.cc | 104 se::BlockDim(launch_dimensions.block_count()), *kernel, in ExecuteOnStream()
|
/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_gpu_executor.cc | 287 const BlockDim& block_dims, const KernelBase& kernel, in Launch() 421 const BlockDim& block_dims) { in VlogOccupancyInfo() 819 bool GpuExecutor::FillBlockDimLimit(BlockDim* block_dim_limit) const { in FillBlockDimLimit() 927 BlockDim block_dim_limit; in PopulateDeviceDescription()
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_gpu_executor.cc | 421 const BlockDim& block_dims, const KernelBase& kernel, in Launch() 468 const BlockDim& block_dims) { in VlogOccupancyInfo() 917 bool GpuExecutor::FillBlockDimLimit(BlockDim* block_dim_limit) const { in FillBlockDimLimit() 1102 BlockDim block_dim_limit; in PopulateDeviceDescription()
|
/external/tensorflow/tensorflow/compiler/xla/service/interpreter/ |
D | executor.h | 66 const BlockDim &block_dims, const KernelBase &kernel, in Launch()
|
/external/tensorflow/tensorflow/stream_executor/host/ |
D | host_gpu_executor.h | 58 const BlockDim &block_dims, const KernelBase &kernel, in Launch()
|