Home
last modified time | relevance | path

Searched refs:getThreadsGrid (Results 1 – 8 of 8) sorted by relevance

/external/pytorch/aten/src/ATen/native/sparse/cuda/
DSparseSemiStructuredApplyDense.cu35 static CUTLASS_HOST_DEVICE dim3 getThreadsGrid() {
41 gmem_threads_masks += blockIdx.y * getThreadsGrid().y + threadIdx.y;
42 int64_t strideX = gridDim.y * getThreadsGrid().y;
44 (blockIdx.x * getThreadsGrid().x + threadIdx.x) * strideX;
148 TORCH_CHECK(threads_masks.size(0) == p.getBlocksGrid().x * p.getThreadsGrid().x); in _sparse_semi_structured_apply_dense()
149 TORCH_CHECK(threads_masks.size(1) == p.getBlocksGrid().y * p.getThreadsGrid().y); in _sparse_semi_structured_apply_dense()
169 <<<p.getBlocksGrid(), p.getThreadsGrid(), smem_bytes, stream>>>(p); in _sparse_semi_structured_apply_dense()
172 <<<p.getBlocksGrid(), p.getThreadsGrid(), smem_bytes, stream>>>(p); in _sparse_semi_structured_apply_dense()
DSparseSemiSturcturedApply.cu69 threads_masks.size(0) == p.getBlocksGrid().x * p.getThreadsGrid().x);
71 threads_masks.size(1) == p.getBlocksGrid().y * p.getThreadsGrid().y);
81 p.getThreadsGrid(),
DSparseSemiStructuredTile.cu244 {p.getBlocksGrid().x * p.getThreadsGrid().x,
245 p.getBlocksGrid().y * p.getThreadsGrid().y,
260 p.getThreadsGrid(),
DSparseSemiStructuredPack.h152 static CUTLASS_HOST_DEVICE dim3 getThreadsGrid() { in getThreadsGrid() function
158 gmem_threads_masks += blockIdx.y * getThreadsGrid().y + threadIdx.y; in getCurrentThreadIndices()
159 int64_t strideX = gridDim.y * getThreadsGrid().y; in getCurrentThreadIndices()
161 (blockIdx.x * getThreadsGrid().x + threadIdx.x) * strideX; in getCurrentThreadIndices()
/external/pytorch/aten/src/ATen/native/transformers/cuda/
Dattention_backward.cu717 kernel_fn<<<p.getBlocksGrid(), p.getThreadsGrid(), smem_bytes, stream>>>(p); in _efficient_attention_backward()
Dattention.cu1290 kernel_fn<<<blocks, p.getThreadsGrid(), smem_bytes, stream>>>(p); in _efficient_attention_forward()
/external/pytorch/aten/src/ATen/native/transformers/cuda/mem_eff_attention/
Dkernel_forward.h342 __host__ dim3 getThreadsGrid() const { in getThreadsGrid() function
Dkernel_backward.h853 __host__ dim3 getThreadsGrid() const { in getThreadsGrid() function