Lines Matching +full:vm +full:- +full:other
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
40 {"force-waitcnt", DEBUG_FORCE_WAITCNT},
72 program->stage = stage; in init_program()
73 program->config = config; in init_program()
74 program->info = *info; in init_program()
75 program->gfx_level = gfx_level; in init_program()
78 case GFX6: program->family = CHIP_TAHITI; break; in init_program()
79 case GFX7: program->family = CHIP_BONAIRE; break; in init_program()
80 case GFX8: program->family = CHIP_POLARIS10; break; in init_program()
81 case GFX9: program->family = CHIP_VEGA10; break; in init_program()
82 case GFX10: program->family = CHIP_NAVI10; break; in init_program()
83 default: program->family = CHIP_UNKNOWN; break; in init_program()
86 program->family = family; in init_program()
88 program->wave_size = info->wave_size; in init_program()
89 program->lane_mask = program->wave_size == 32 ? s1 : s2; in init_program()
91 program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 : in init_program()
93 program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule; in init_program()
94 program->dev.lds_limit = gfx_level >= GFX7 ? 65536 : 32768; in init_program()
95 /* apparently gfx702 also has 16-bank LDS but I can't find a family for that */ in init_program()
96 program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY; in init_program()
98 program->dev.vgpr_limit = 256; in init_program()
99 program->dev.physical_vgprs = 256; in init_program()
100 program->dev.vgpr_alloc_granule = 4; in init_program()
103 program->dev.physical_sgprs = 5120; /* doesn't matter as long as it's at least 128 * 40 */ in init_program()
104 program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512; in init_program()
105 program->dev.sgpr_alloc_granule = 128; in init_program()
106 program->dev.sgpr_limit = in init_program()
107 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */ in init_program()
109 program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8; in init_program()
111 program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4; in init_program()
112 } else if (program->gfx_level >= GFX8) { in init_program()
113 program->dev.physical_sgprs = 800; in init_program()
114 program->dev.sgpr_alloc_granule = 16; in init_program()
115 program->dev.sgpr_limit = 102; in init_program()
117 program->dev.sgpr_alloc_granule = 96; /* workaround hardware bug */ in init_program()
119 program->dev.physical_sgprs = 512; in init_program()
120 program->dev.sgpr_alloc_granule = 8; in init_program()
121 program->dev.sgpr_limit = 104; in init_program()
124 program->dev.max_wave64_per_simd = 10; in init_program()
125 if (program->gfx_level >= GFX10_3) in init_program()
126 program->dev.max_wave64_per_simd = 16; in init_program()
127 else if (program->gfx_level == GFX10) in init_program()
128 program->dev.max_wave64_per_simd = 20; in init_program()
129 else if (program->family >= CHIP_POLARIS10 && program->family <= CHIP_VEGAM) in init_program()
130 program->dev.max_wave64_per_simd = 8; in init_program()
132 program->dev.simd_per_cu = program->gfx_level >= GFX10 ? 2 : 4; in init_program()
134 switch (program->family) { in init_program()
141 case CHIP_RENOIR: program->dev.xnack_enabled = true; break; in init_program()
145 program->dev.sram_ecc_enabled = program->family == CHIP_ARCTURUS; in init_program()
147 program->dev.has_fast_fma32 = program->gfx_level >= GFX9; in init_program()
148 if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO || in init_program()
149 program->family == CHIP_HAWAII) in init_program()
150 program->dev.has_fast_fma32 = true; in init_program()
151 program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level >= GFX10; in init_program()
153 program->dev.fused_mad_mix = program->gfx_level >= GFX10; in init_program()
154 if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 || in init_program()
155 program->family == CHIP_ARCTURUS || program->family == CHIP_ALDEBARAN) in init_program()
156 program->dev.fused_mad_mix = true; in init_program()
158 if (program->gfx_level >= GFX11) { in init_program()
159 program->dev.scratch_global_offset_min = -4096; in init_program()
160 program->dev.scratch_global_offset_max = 4095; in init_program()
161 } else if (program->gfx_level >= GFX10 || program->gfx_level == GFX8) { in init_program()
162 program->dev.scratch_global_offset_min = -2048; in init_program()
163 program->dev.scratch_global_offset_max = 2047; in init_program()
164 } else if (program->gfx_level == GFX9) { in init_program()
165 /* The minimum is actually -4096, but negative offsets are broken when SADDR is used. */ in init_program()
166 program->dev.scratch_global_offset_min = 0; in init_program()
167 program->dev.scratch_global_offset_max = 4095; in init_program()
170 program->wgp_mode = wgp_mode; in init_program()
172 program->progress = CompilationProgress::after_isel; in init_program()
174 program->next_fp_mode.preserve_signed_zero_inf_nan32 = false; in init_program()
175 program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false; in init_program()
176 program->next_fp_mode.must_flush_denorms32 = false; in init_program()
177 program->next_fp_mode.must_flush_denorms16_64 = false; in init_program()
178 program->next_fp_mode.care_about_round32 = false; in init_program()
179 program->next_fp_mode.care_about_round16_64 = false; in init_program()
180 program->next_fp_mode.denorm16_64 = fp_denorm_keep; in init_program()
181 program->next_fp_mode.denorm32 = 0; in init_program()
182 program->next_fp_mode.round16_64 = fp_round_ne; in init_program()
183 program->next_fp_mode.round32 = fp_round_ne; in init_program()
189 switch (instr->format) { in get_sync_info()
190 case Format::SMEM: return instr->smem().sync; in get_sync_info()
191 case Format::MUBUF: return instr->mubuf().sync; in get_sync_info()
192 case Format::MIMG: return instr->mimg().sync; in get_sync_info()
193 case Format::MTBUF: return instr->mtbuf().sync; in get_sync_info()
196 case Format::SCRATCH: return instr->flatlike().sync; in get_sync_info()
197 case Format::DS: return instr->ds().sync; in get_sync_info()
205 if (!instr->isVALU()) in can_use_SDWA()
208 if (gfx_level < GFX8 || gfx_level >= GFX11 || instr->isDPP() || instr->isVOP3P()) in can_use_SDWA()
211 if (instr->isSDWA()) in can_use_SDWA()
214 if (instr->isVOP3()) { in can_use_SDWA()
215 VOP3_instruction& vop3 = instr->vop3(); in can_use_SDWA()
216 if (instr->format == Format::VOP3) in can_use_SDWA()
218 if (vop3.clamp && instr->isVOPC() && gfx_level != GFX8) in can_use_SDWA()
224 if (!pre_ra && instr->definitions.size() >= 2) in can_use_SDWA()
227 for (unsigned i = 1; i < instr->operands.size(); i++) { in can_use_SDWA()
228 if (instr->operands[i].isLiteral()) in can_use_SDWA()
230 if (gfx_level < GFX9 && !instr->operands[i].isOfType(RegType::vgpr)) in can_use_SDWA()
235 if (!instr->definitions.empty() && instr->definitions[0].bytes() > 4 && !instr->isVOPC()) in can_use_SDWA()
238 if (!instr->operands.empty()) { in can_use_SDWA()
239 if (instr->operands[0].isLiteral()) in can_use_SDWA()
241 if (gfx_level < GFX9 && !instr->operands[0].isOfType(RegType::vgpr)) in can_use_SDWA()
243 if (instr->operands[0].bytes() > 4) in can_use_SDWA()
245 if (instr->operands.size() > 1 && instr->operands[1].bytes() > 4) in can_use_SDWA()
249 bool is_mac = instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_mac_f16 || in can_use_SDWA()
250 instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_fmac_f16; in can_use_SDWA()
256 if (!pre_ra && instr->isVOPC() && gfx_level == GFX8) in can_use_SDWA()
258 if (!pre_ra && instr->operands.size() >= 3 && !is_mac) in can_use_SDWA()
261 return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 && in can_use_SDWA()
262 instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 && in can_use_SDWA()
263 instr->opcode != aco_opcode::v_readfirstlane_b32 && in can_use_SDWA()
264 instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32; in can_use_SDWA()
271 if (instr->isSDWA()) in convert_to_SDWA()
276 (Format)(((uint16_t)tmp->format & ~(uint16_t)Format::VOP3) | (uint16_t)Format::SDWA); in convert_to_SDWA()
277 instr.reset(create_instruction<SDWA_instruction>(tmp->opcode, format, tmp->operands.size(), in convert_to_SDWA()
278 tmp->definitions.size())); in convert_to_SDWA()
279 std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin()); in convert_to_SDWA()
280 std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(), instr->definitions.begin()); in convert_to_SDWA()
282 SDWA_instruction& sdwa = instr->sdwa(); in convert_to_SDWA()
284 if (tmp->isVOP3()) { in convert_to_SDWA()
285 VOP3_instruction& vop3 = tmp->vop3(); in convert_to_SDWA()
292 for (unsigned i = 0; i < instr->operands.size(); i++) { in convert_to_SDWA()
297 sdwa.sel[i] = SubdwordSel(instr->operands[i].bytes(), 0, false); in convert_to_SDWA()
300 sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false); in convert_to_SDWA()
302 if (instr->definitions[0].getTemp().type() == RegType::sgpr && gfx_level == GFX8) in convert_to_SDWA()
303 instr->definitions[0].setFixed(vcc); in convert_to_SDWA()
304 if (instr->definitions.size() >= 2) in convert_to_SDWA()
305 instr->definitions[1].setFixed(vcc); in convert_to_SDWA()
306 if (instr->operands.size() >= 3) in convert_to_SDWA()
307 instr->operands[2].setFixed(vcc); in convert_to_SDWA()
309 instr->pass_flags = tmp->pass_flags; in convert_to_SDWA()
317 assert(instr->isVALU() && !instr->operands.empty()); in can_use_DPP()
319 if (instr->isDPP()) in can_use_DPP()
320 return instr->isDPP8() == dpp8; in can_use_DPP()
322 if (instr->operands.size() && instr->operands[0].isLiteral()) in can_use_DPP()
325 if (instr->isSDWA()) in can_use_DPP()
328 if (!pre_ra && (instr->isVOPC() || instr->definitions.size() > 1) && in can_use_DPP()
329 instr->definitions.back().physReg() != vcc) in can_use_DPP()
332 if (!pre_ra && instr->operands.size() >= 3 && instr->operands[2].physReg() != vcc) in can_use_DPP()
335 if (instr->isVOP3()) { in can_use_DPP()
336 const VOP3_instruction* vop3 = &instr->vop3(); in can_use_DPP()
337 if (vop3->clamp || vop3->omod || vop3->opsel) in can_use_DPP()
341 if (instr->format == Format::VOP3) in can_use_DPP()
343 if (instr->operands.size() > 1 && !instr->operands[1].isOfType(RegType::vgpr)) in can_use_DPP()
347 /* there are more cases but those all take 64-bit inputs */ in can_use_DPP()
348 return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 && in can_use_DPP()
349 instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 && in can_use_DPP()
350 instr->opcode != aco_opcode::v_readfirstlane_b32 && in can_use_DPP()
351 instr->opcode != aco_opcode::v_cvt_f64_i32 && in can_use_DPP()
352 instr->opcode != aco_opcode::v_cvt_f64_f32 && instr->opcode != aco_opcode::v_cvt_f64_u32; in can_use_DPP()
358 if (instr->isDPP()) in convert_to_DPP()
362 Format format = (Format)(((uint32_t)tmp->format & ~(uint32_t)Format::VOP3) | in convert_to_DPP()
365 instr.reset(create_instruction<DPP8_instruction>(tmp->opcode, format, tmp->operands.size(), in convert_to_DPP()
366 tmp->definitions.size())); in convert_to_DPP()
368 instr.reset(create_instruction<DPP16_instruction>(tmp->opcode, format, tmp->operands.size(), in convert_to_DPP()
369 tmp->definitions.size())); in convert_to_DPP()
370 std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin()); in convert_to_DPP()
371 for (unsigned i = 0; i < instr->definitions.size(); i++) in convert_to_DPP()
372 instr->definitions[i] = tmp->definitions[i]; in convert_to_DPP()
375 DPP8_instruction* dpp = &instr->dpp8(); in convert_to_DPP()
377 dpp->lane_sel[i] = i; in convert_to_DPP()
379 DPP16_instruction* dpp = &instr->dpp16(); in convert_to_DPP()
380 dpp->dpp_ctrl = dpp_quad_perm(0, 1, 2, 3); in convert_to_DPP()
381 dpp->row_mask = 0xf; in convert_to_DPP()
382 dpp->bank_mask = 0xf; in convert_to_DPP()
384 if (tmp->isVOP3()) { in convert_to_DPP()
385 const VOP3_instruction* vop3 = &tmp->vop3(); in convert_to_DPP()
386 memcpy(dpp->neg, vop3->neg, sizeof(dpp->neg)); in convert_to_DPP()
387 memcpy(dpp->abs, vop3->abs, sizeof(dpp->abs)); in convert_to_DPP()
391 if (instr->isVOPC() || instr->definitions.size() > 1) in convert_to_DPP()
392 instr->definitions.back().setFixed(vcc); in convert_to_DPP()
394 if (instr->operands.size() >= 3) in convert_to_DPP()
395 instr->operands[2].setFixed(vcc); in convert_to_DPP()
397 instr->pass_flags = tmp->pass_flags; in convert_to_DPP()
438 case aco_opcode::v_cvt_pknorm_u16_f16: return idx != -1; in can_use_opsel()
501 default: return gfx_level >= GFX10 && can_use_opsel(gfx_level, op, -1); in instr_is_16bit()
565 if (instr->isVALU()) { in needs_exec_mask()
566 return instr->opcode != aco_opcode::v_readlane_b32 && in needs_exec_mask()
567 instr->opcode != aco_opcode::v_readlane_b32_e64 && in needs_exec_mask()
568 instr->opcode != aco_opcode::v_writelane_b32 && in needs_exec_mask()
569 instr->opcode != aco_opcode::v_writelane_b32_e64; in needs_exec_mask()
572 if (instr->isVMEM() || instr->isFlatLike()) in needs_exec_mask()
575 if (instr->isSALU() || instr->isBranch() || instr->isSMEM() || instr->isBarrier()) in needs_exec_mask()
576 return instr->reads_exec(); in needs_exec_mask()
578 if (instr->isPseudo()) { in needs_exec_mask()
579 switch (instr->opcode) { in needs_exec_mask()
585 for (Definition def : instr->definitions) { in needs_exec_mask()
589 return instr->reads_exec(); in needs_exec_mask()
596 case aco_opcode::p_init_scratch: return instr->reads_exec(); in needs_exec_mask()
617 info->ordered = aco_opcode::num_opcodes; in get_cmp_info()
618 info->unordered = aco_opcode::num_opcodes; in get_cmp_info()
619 info->swapped = aco_opcode::num_opcodes; in get_cmp_info()
620 info->inverse = aco_opcode::num_opcodes; in get_cmp_info()
621 info->f32 = aco_opcode::num_opcodes; in get_cmp_info()
623 // clang-format off in get_cmp_info()
627 info->ordered = aco_opcode::v_cmp_##ord##_f##sz; \ in get_cmp_info()
628 info->unordered = aco_opcode::v_cmp_n##unord##_f##sz; \ in get_cmp_info()
629 info->swapped = op == aco_opcode::v_cmp_##ord##_f##sz ? aco_opcode::v_cmp_##ord_swap##_f##sz \ in get_cmp_info()
631 info->inverse = op == aco_opcode::v_cmp_n##unord##_f##sz ? aco_opcode::v_cmp_##unord##_f##sz \ in get_cmp_info()
633 info->f32 = op == aco_opcode::v_cmp_##ord##_f##sz ? aco_opcode::v_cmp_##ord##_f32 \ in get_cmp_info()
635 info->vcmpx = op == aco_opcode::v_cmp_##ord##_f##sz ? aco_opcode::v_cmpx_##ord##_f##sz \ in get_cmp_info()
637 info->size = sz; \ in get_cmp_info()
653 info->f32 = aco_opcode::v_cmp_u_f32; \ in get_cmp_info()
654 info->swapped = aco_opcode::v_cmp_u_f##sz; \ in get_cmp_info()
655 info->inverse = aco_opcode::v_cmp_o_f##sz; \ in get_cmp_info()
656 info->vcmpx = aco_opcode::v_cmpx_u_f##sz; \ in get_cmp_info()
657 info->size = sz; \ in get_cmp_info()
660 info->f32 = aco_opcode::v_cmp_o_f32; \ in get_cmp_info()
661 info->swapped = aco_opcode::v_cmp_o_f##sz; \ in get_cmp_info()
662 info->inverse = aco_opcode::v_cmp_u_f##sz; \ in get_cmp_info()
663 info->vcmpx = aco_opcode::v_cmpx_o_f##sz; \ in get_cmp_info()
664 info->size = sz; \ in get_cmp_info()
672 info->swapped = aco_opcode::v_cmp_##swap##_##type##sz; \ in get_cmp_info()
673 info->inverse = aco_opcode::v_cmp_##inv##_##type##sz; \ in get_cmp_info()
674 info->vcmpx = aco_opcode::v_cmpx_##op##_##type##sz; \ in get_cmp_info()
675 info->size = sz; \ in get_cmp_info()
694 info->vcmpx = aco_opcode::v_cmpx_class_f##sz; \ in get_cmp_info()
695 info->size = sz; \ in get_cmp_info()
701 // clang-format on in get_cmp_info()
758 if (instr->isDPP()) in can_swap_operands()
761 if (instr->operands[0].isConstant() || in can_swap_operands()
762 (instr->operands[0].isTemp() && instr->operands[0].getTemp().type() == RegType::sgpr)) in can_swap_operands()
765 switch (instr->opcode) { in can_swap_operands()
792 case aco_opcode::v_min_u16_e64: *new_op = instr->opcode; return true; in can_swap_operands()
800 if (get_cmp_info(instr->opcode, &info) && info.swapped != aco_opcode::num_opcodes) { in can_swap_operands()
809 wait_imm::wait_imm() : vm(unset_counter), exp(unset_counter), lgkm(unset_counter), vs(unset_counter) in wait_imm()
812 : vm(vm_), exp(exp_), lgkm(lgkm_), vs(vs_) in wait_imm()
817 vm = packed & 0xf; in wait_imm()
819 vm |= (packed >> 10) & 0x30; in wait_imm()
836 assert(vm == unset_counter || vm <= 0x3f); in pack()
837 imm = ((vm & 0x3f) << 10) | ((lgkm & 0x3f) << 4) | (exp & 0x7); in pack()
842 assert(vm == unset_counter || vm <= 0x3f); in pack()
843 imm = ((vm & 0x30) << 10) | ((lgkm & 0x3f) << 8) | ((exp & 0x7) << 4) | (vm & 0xf); in pack()
847 assert(vm == unset_counter || vm <= 0x3f); in pack()
848 imm = ((vm & 0x30) << 10) | ((lgkm & 0xf) << 8) | ((exp & 0x7) << 4) | (vm & 0xf); in pack()
852 assert(vm == unset_counter || vm <= 0xf); in pack()
853 imm = ((lgkm & 0xf) << 8) | ((exp & 0x7) << 4) | (vm & 0xf); in pack()
856 if (gfx_level < GFX9 && vm == wait_imm::unset_counter) in pack()
857 imm |= 0xc000; /* should have no effect on pre-GFX9 and now we won't have to worry about the in pack()
860 imm |= 0x3000; /* should have no effect on pre-GFX10 and now we won't have to worry about the in pack()
866 wait_imm::combine(const wait_imm& other) in combine() argument
868 bool changed = other.vm < vm || other.exp < exp || other.lgkm < lgkm || other.vs < vs; in combine()
869 vm = std::min(vm, other.vm); in combine()
870 exp = std::min(exp, other.exp); in combine()
871 lgkm = std::min(lgkm, other.lgkm); in combine()
872 vs = std::min(vs, other.vs); in combine()
879 return vm == unset_counter && exp == unset_counter && lgkm == unset_counter && in empty()
888 a->isMUBUF() ? a->mubuf().vtx_binding : (a->isMTBUF() ? a->mtbuf().vtx_binding : 0); in should_form_clause()
890 b->isMUBUF() ? b->mubuf().vtx_binding : (b->isMTBUF() ? b->mtbuf().vtx_binding : 0); in should_form_clause()
894 if (a->format != b->format) in should_form_clause()
898 if (a->isFlatLike()) in should_form_clause()
900 if (a->isSMEM() && a->operands[0].bytes() == 8 && b->operands[0].bytes() == 8) in should_form_clause()
906 if (a->isVMEM() || a->isSMEM()) in should_form_clause()
907 return a->operands[0].tempId() == b->operands[0].tempId(); in should_form_clause()