Home
last modified time | relevance | path

Searched refs:BlockDim (Results 1 – 18 of 18) sorted by relevance

/external/tensorflow/tensorflow/core/kernels/
Dscan_ops_gpu.h169 template <typename T, typename Op, int BlockDim = 128, int ItemsPerThread = 4>
170 __launch_bounds__(BlockDim) __global__ in __launch_bounds__() argument
173 typedef gpuprim::BlockLoad<T, BlockDim, ItemsPerThread, in __launch_bounds__()
176 typedef gpuprim::BlockStore<T, BlockDim, ItemsPerThread, in __launch_bounds__()
179 typedef gpuprim::BlockScan<T, BlockDim> BlockScan; in __launch_bounds__()
198 block_offset += BlockDim * ItemsPerThread) { in __launch_bounds__()
199 int valid_items = min(BlockDim * ItemsPerThread, in __launch_bounds__()
/external/tensorflow/tensorflow/stream_executor/
Dlaunch_dim.h66 struct BlockDim : public Dim3D { struct
67 explicit BlockDim(uint64 x = 1, uint64 y = 1, uint64 z = 1)
Ddevice_description.h79 const BlockDim &block_dim_limit() const { return block_dim_limit_; } in block_dim_limit()
187 BlockDim block_dim_limit_;
256 void set_block_dim_limit(const BlockDim &value) { in set_block_dim_limit()
Dtrace_listener.h50 const BlockDim& block_dims, in LaunchSubmit()
Ddevice_description.cc74 const BlockDim &block_dim = block_dim_limit(); in ToMap()
Dstream_executor_internal.h191 const BlockDim &block_dims, const KernelBase &k, in Launch()
Dstream_executor_pimpl.h475 const BlockDim &block_dims, const KernelBase &kernel,
Dstream_executor_pimpl.cc466 const BlockDim &block_dims, in Launch()
Dstream.h185 Stream &ThenLaunch(ThreadDim thread_dims, BlockDim block_dims,
2119 inline Stream &Stream::ThenLaunch(ThreadDim thread_dims, BlockDim block_dims, in ThenLaunch()
/external/tensorflow/tensorflow/stream_executor/gpu/
Dgpu_executor.h73 const BlockDim& block_dims, const KernelBase& k,
268 const BlockDim& block_dims);
Dredzone_allocator.cc228 stream->ThenLaunch(ThreadDim(threads_per_block), BlockDim(block_count), in RunRedzoneChecker()
/external/tensorflow/tensorflow/stream_executor/rocm/
Drocm_gpu_executor.cc297 const BlockDim& block_dims, in Launch()
421 const BlockDim& block_dims) { in VlogOccupancyInfo()
753 bool FillBlockDimLimit(GpuDeviceHandle device, BlockDim* block_dim_limit) { in FillBlockDimLimit()
885 BlockDim block_dim_limit; in CreateDeviceDescription()
/external/llvm-project/clang/lib/CodeGen/
DCGCUDANV.cpp301 Address BlockDim = in emitDeviceStubBodyNew() local
310 /*blockDim=*/BlockDim.getType(), in emitDeviceStubBodyNew()
317 {GridDim.getPointer(), BlockDim.getPointer(), in emitDeviceStubBodyNew()
326 LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty); in emitDeviceStubBodyNew()
/external/tensorflow/tensorflow/stream_executor/cuda/
Dcuda_gpu_executor.cc401 const BlockDim& block_dims, in Launch()
441 const BlockDim& block_dims) { in VlogOccupancyInfo()
848 bool FillBlockDimLimit(GpuDeviceHandle device, BlockDim* block_dim_limit) { in FillBlockDimLimit()
1048 BlockDim block_dim_limit; in CreateDeviceDescription()
/external/tensorflow/tensorflow/stream_executor/host/
Dhost_gpu_executor.h58 const BlockDim &block_dims, const KernelBase &kernel, in Launch()
/external/tensorflow/tensorflow/compiler/xla/service/interpreter/
Dexecutor.h65 const BlockDim &block_dims, const KernelBase &kernel, in Launch()
/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dbuffer_comparator.cc625 se::BlockDim(block_counts.x, block_counts.y, block_counts.z), in DeviceCompare()
Dstream_executor_util.cc222 se::BlockDim(block_counts.x, block_counts.y, block_counts.z), kernel, in ExecuteKernelOnStream()