Lines Matching refs:hsg_config
122 struct hsg_config hsg_config = variable
225 uint32_t const bs_keys = hsg_config.block.smem_bs / (hsg_config.type.words * sizeof(uint32_t)); in hsg_config_init_shared()
227 hsg_config.warp.skpw_bs = bs_keys / hsg_merge[0].warps; in hsg_config_init_shared()
240 uint32_t const bs_threads = merge->warps << hsg_config.warp.lanes_log2; in hsg_merge_levels_init_shared()
241 …uint32_t const bs_keys = hsg_config.block.smem_bs / (hsg_config.type.words * sizeof(uint32_t)… in hsg_merge_levels_init_shared()
243 … uint32_t const bs_kpt_mod = (bs_kpt / hsg_config.block.warps_mod) * hsg_config.block.warps_mod; in hsg_merge_levels_init_shared()
254 merge->rows_bs = MIN_MACRO(bs_rows_even, hsg_config.thread.regs); in hsg_merge_levels_init_shared()
269 uint32_t const bc_warps_min = MAX_MACRO(merge->warps,hsg_config.block.warps_min); in hsg_merge_levels_init_shared()
270 uint32_t const bc_threads = bc_warps_min << hsg_config.warp.lanes_log2; in hsg_merge_levels_init_shared()
271 …uint32_t const bc_block_rd = (((hsg_config.block.smem_bc * bc_warps_min) / hsg_config.block.warp… in hsg_merge_levels_init_shared()
272 hsg_config.block.smem_quantum) * hsg_config.block.smem_quantum; in hsg_merge_levels_init_shared()
273 uint32_t const bc_block_max = MAX_MACRO(bc_block_rd,hsg_config.block.smem_min); in hsg_merge_levels_init_shared()
274 uint32_t const bc_block_smem = MIN_MACRO(bc_block_max,hsg_config.block.smem_bs); in hsg_merge_levels_init_shared()
277 uint32_t const bc_keys = bc_block_smem / (hsg_config.type.words * sizeof(uint32_t)); in hsg_merge_levels_init_shared()
279 … uint32_t const bc_kpt_mod = (bc_kpt / hsg_config.block.warps_mod) * hsg_config.block.warps_mod; in hsg_merge_levels_init_shared()
281 merge->rows_bc = MIN_MACRO(bc_kpt_mod, hsg_config.thread.regs); in hsg_merge_levels_init_shared()
416 if (n_max <= (merge->rows_bs + hsg_config.thread.xtra)) in hsg_merge_levels_hint()
421 hsg_config.thread.xtra = n_max - merge->rows_bs; in hsg_merge_levels_hint()
423 uint32_t const r_total = hsg_config.thread.regs + hsg_config.thread.xtra; in hsg_merge_levels_hint()
424 uint32_t const r_limit = (hsg_config.type.words == 1) ? 120 : 58; in hsg_merge_levels_hint()
429 hsg_config.thread.regs, in hsg_merge_levels_hint()
430 hsg_config.thread.xtra); in hsg_merge_levels_hint()
436 hsg_config.thread.regs, in hsg_merge_levels_hint()
437 hsg_config.thread.xtra, in hsg_merge_levels_hint()
517 uint32_t const idx = hsg_config.thread.regs / 2 - 1; in hsg_thread_sort()
590 uint32_t const n = hsg_config.thread.regs; in hsg_warp_half_network()
623 uint32_t const n = hsg_config.thread.regs; in hsg_warp_flip_network()
649 uint32_t const n = hsg_config.thread.regs; in hsg_bx_warp_load()
661 uint32_t const n = hsg_config.thread.regs; in hsg_bx_warp_store()
712 ops = hsg_thread_merge(ops,hsg_config.thread.regs); in hsg_warp_half()
728 uint32_t const level = hsg_config.warp.lanes; in hsg_warp_merge()
764 …uint32_t const scale = net_even >= hsg_config.block.warps_min ? 1 : hsg_config.block.warps_min / n… in hsg_bc_half_merge_level()
777 ops = hsg_op(ops,BC_REG_GLOBAL_LOAD_LEFT(ll,gmem_base+(ll-1)*hsg_config.thread.regs,0)); in hsg_bc_half_merge_level()
806 uint32_t const warps = MAX_MACRO(merge->warps,hsg_config.block.warps_min); in hsg_bc_half_merge()
819 for (uint32_t r_lo = 1; r_lo <= hsg_config.thread.regs; r_lo += s_max) in hsg_bc_half_merge()
822 uint32_t const r_rem = hsg_config.thread.regs + 1 - r_lo; in hsg_bc_half_merge()
973 uint32_t const r_mid = hsg_config.thread.regs/2 + 1; in hsg_bs_flip_merge()
979 uint32_t r_hi = hsg_config.thread.regs + 1 - r_lo; in hsg_bs_flip_merge()
1014 ops = hsg_warp_half(ops,hsg_config.warp.lanes); in hsg_bs_flip_merge()
1141 ops = hsg_warp_half(ops,hsg_config.warp.lanes); in hsg_bc_clean()
1351 …for (uint32_t scale_log2=hsg_config.merge.flip.lo; scale_log2<=hsg_config.merge.flip.hi; scale_log… in hsg_xm_merge_all()
1357 …for (uint32_t scale_log2=hsg_config.merge.half.lo; scale_log2<=hsg_config.merge.half.hi; scale_log… in hsg_xm_merge_all()
1371 struct hsg_config const * const config, in hsg_op_translate_depth()
1401 struct hsg_config const * const config, in hsg_op_translate()
1441 hsg_config.block.smem_min = atoi(optarg); in main()
1445 hsg_config.block.smem_quantum = atoi(optarg); in main()
1449 hsg_config.block.smem_bs = atoi(optarg); in main()
1452 if (hsg_config.block.smem_bc == UINT32_MAX) in main()
1453 hsg_config.block.smem_bc = hsg_config.block.smem_bs; in main()
1457 hsg_config.block.smem_bc = atoi(optarg); in main()
1461 hsg_config.warp.lanes = atoi(optarg); in main()
1462 hsg_config.warp.lanes_log2 = msb_idx_u32(hsg_config.warp.lanes); in main()
1481 if (hsg_config.block.warps_max == UINT32_MAX) in main()
1482 hsg_config.block.warps_max = pow2_ru_u32(warps); in main()
1488 hsg_config.block.warps_max = atoi(optarg); in main()
1493 hsg_config.block.warps_min = atoi(optarg); in main()
1498 hsg_config.block.warps_mod = atoi(optarg); in main()
1511 hsg_config.thread.regs = regs; in main()
1516 hsg_config.thread.xtra = atoi(optarg); in main()
1520 hsg_config.type.words = atoi(optarg); in main()
1524 hsg_config.merge.flip.lo = atoi(optarg); in main()
1528 hsg_config.merge.flip.hi = atoi(optarg); in main()
1532 hsg_config.merge.half.lo = atoi(optarg); in main()
1536 hsg_config.merge.half.hi = atoi(optarg); in main()
1540 hsg_config.merge.flip.warps = atoi(optarg); in main()
1544 hsg_config.merge.half.warps = atoi(optarg); in main()
1605 hsg_config.type.words, in main()
1606 hsg_config.block.smem_bs, in main()
1607 hsg_config.block.smem_bc, in main()
1608 hsg_config.thread.regs, in main()
1674 hsg_op_translate(hsg_target_pfn,&target,&hsg_config,hsg_merge,ops_begin); in main()