Home
last modified time | relevance | path

Searched defs:n_block (Results 1 – 8 of 8) sorted by relevance

/external/ComputeLibrary/src/core/NEON/kernels/arm_gemm/
Dgemm_hybrid_quantized.hpp113 unsigned int n_block = args._cfg->outer_block_size; in compute_n_block() local
135 unsigned int n_block = (scaled_l2_size - k_block_area) / (sizeof(Toi) * k_block); in compute_n_block() local
Dgemm_hybrid.hpp100 unsigned int n_block = args._cfg->outer_block_size; in compute_n_block() local
Dgemm_hybrid_quantized_inline.hpp120 …unsigned int n_block = (((L2_size * 9) / 10) - (k_block * sizeof(Toi) * (strategy::out_width() + s… in compute_n_block() local
Dgemv_pretransposed.hpp97 unsigned int n_block=0; member in arm_gemm::GemvPretransposed
Dgemm_hybrid_indirect.hpp367 unsigned int n_block = iceildiv(args._Nsize, columns_needed); local
/external/pytorch/aten/src/ATen/native/transformers/cuda/flash_attn/
Dflash_bwd_preprocess_kernel.h145 const int n_block = blockIdx.x; in clear_dKVaccum() local
277 const int n_block = blockIdx.x; in convert_dKV() local
Dflash_bwd_kernel.h80 …mpute_dq_dk_dv_1colblock(const Params &params, const int bidb, const int bidh, const int n_block) { in compute_dq_dk_dv_1colblock()
805 for (int n_block = n_block_max - 2; n_block > 0; n_block--) { in compute_dq_dk_dv() local
823 …for (int n_block = blockIdx.x; n_block < (params.seqlen_k + Kernel_traits::kBlockN - 1) / Kernel_t… in compute_dq_dk_dv_seqk_parallel() local
Dflash_fwd_kernel.h249 int n_block = n_block_max - 1; in compute_attn_1rowblock() local
712 for (int n_block = n_block_max - 1; n_block >= n_block_copy_min; n_block--) { in compute_attn_1rowblock_splitkv() local
802 int n_block = n_block_max - 1; in compute_attn_1rowblock_splitkv() local