/external/tensorflow/tensorflow/core/kernels/ |
D | scan_ops_gpu.h | 169 template <typename T, typename Op, int BlockDim = 128, int ItemsPerThread = 4> 170 __launch_bounds__(BlockDim) __global__ in __launch_bounds__() argument 173 typedef gpuprim::BlockLoad<T, BlockDim, ItemsPerThread, in __launch_bounds__() 176 typedef gpuprim::BlockStore<T, BlockDim, ItemsPerThread, in __launch_bounds__() 179 typedef gpuprim::BlockScan<T, BlockDim> BlockScan; in __launch_bounds__() 198 block_offset += BlockDim * ItemsPerThread) { in __launch_bounds__() 199 int valid_items = min(BlockDim * ItemsPerThread, in __launch_bounds__()
|
/external/tensorflow/tensorflow/stream_executor/ |
D | launch_dim.h | 66 struct BlockDim : public Dim3D { struct 67 explicit BlockDim(uint64 x = 1, uint64 y = 1, uint64 z = 1)
|
D | device_description.h | 79 const BlockDim &block_dim_limit() const { return block_dim_limit_; } in block_dim_limit() 187 BlockDim block_dim_limit_; 256 void set_block_dim_limit(const BlockDim &value) { in set_block_dim_limit()
|
D | trace_listener.h | 50 const BlockDim& block_dims, in LaunchSubmit()
|
D | device_description.cc | 74 const BlockDim &block_dim = block_dim_limit(); in ToMap()
|
D | stream_executor_internal.h | 191 const BlockDim &block_dims, const KernelBase &k, in Launch()
|
D | stream_executor_pimpl.h | 475 const BlockDim &block_dims, const KernelBase &kernel,
|
D | stream_executor_pimpl.cc | 466 const BlockDim &block_dims, in Launch()
|
D | stream.h | 185 Stream &ThenLaunch(ThreadDim thread_dims, BlockDim block_dims, 2119 inline Stream &Stream::ThenLaunch(ThreadDim thread_dims, BlockDim block_dims, in ThenLaunch()
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | gpu_executor.h | 73 const BlockDim& block_dims, const KernelBase& k, 268 const BlockDim& block_dims);
|
D | redzone_allocator.cc | 228 stream->ThenLaunch(ThreadDim(threads_per_block), BlockDim(block_count), in RunRedzoneChecker()
|
/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_gpu_executor.cc | 297 const BlockDim& block_dims, in Launch() 421 const BlockDim& block_dims) { in VlogOccupancyInfo() 753 bool FillBlockDimLimit(GpuDeviceHandle device, BlockDim* block_dim_limit) { in FillBlockDimLimit() 885 BlockDim block_dim_limit; in CreateDeviceDescription()
|
/external/llvm-project/clang/lib/CodeGen/ |
D | CGCUDANV.cpp | 301 Address BlockDim = in emitDeviceStubBodyNew() local 310 /*blockDim=*/BlockDim.getType(), in emitDeviceStubBodyNew() 317 {GridDim.getPointer(), BlockDim.getPointer(), in emitDeviceStubBodyNew() 326 LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty); in emitDeviceStubBodyNew()
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_gpu_executor.cc | 401 const BlockDim& block_dims, in Launch() 441 const BlockDim& block_dims) { in VlogOccupancyInfo() 848 bool FillBlockDimLimit(GpuDeviceHandle device, BlockDim* block_dim_limit) { in FillBlockDimLimit() 1048 BlockDim block_dim_limit; in CreateDeviceDescription()
|
/external/tensorflow/tensorflow/stream_executor/host/ |
D | host_gpu_executor.h | 58 const BlockDim &block_dims, const KernelBase &kernel, in Launch()
|
/external/tensorflow/tensorflow/compiler/xla/service/interpreter/ |
D | executor.h | 65 const BlockDim &block_dims, const KernelBase &kernel, in Launch()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | buffer_comparator.cc | 625 se::BlockDim(block_counts.x, block_counts.y, block_counts.z), in DeviceCompare()
|
D | stream_executor_util.cc | 222 se::BlockDim(block_counts.x, block_counts.y, block_counts.z), kernel, in ExecuteKernelOnStream()
|