• Home
  • Raw
  • Download

Lines Matching +full:vega +full:- +full:format

56        : program(program_), gfx_level(program->gfx_level), symbols(symbols_)  in asm_context()
68 int subvector_begin_pos = -1;
74 unsigned addr_dwords = instr->operands.size() - 3; in get_mimg_nsa_dwords()
76 if (instr->operands[3 + i].physReg() != in get_mimg_nsa_dwords()
77 instr->operands[3 + (i - 1)].physReg().advance(instr->operands[3 + (i - 1)].bytes())) in get_mimg_nsa_dwords()
78 return DIV_ROUND_UP(addr_dwords - 1, 4); in get_mimg_nsa_dwords()
86 switch (instr->opcode) { in get_vopd_opy_start()
128 uint8_t mask = get_gfx11_true16_mask(instr->opcode); in needs_vop3_gfx11()
133 if (instr->operands[i].physReg().reg() >= (256 + 128)) in needs_vop3_gfx11()
136 if ((mask & 0x8) && instr->definitions[0].physReg().reg() >= (256 + 128)) in needs_vop3_gfx11()
144 /* lower remaining pseudo-instructions */ in emit_instruction()
145 if (instr->opcode == aco_opcode::p_constaddr_getpc) { in emit_instruction()
146 ctx.constaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1; in emit_instruction()
148 instr->opcode = aco_opcode::s_getpc_b64; in emit_instruction()
149 instr->operands.pop_back(); in emit_instruction()
150 } else if (instr->opcode == aco_opcode::p_constaddr_addlo) { in emit_instruction()
151 ctx.constaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1; in emit_instruction()
153 instr->opcode = aco_opcode::s_add_u32; in emit_instruction()
154 instr->operands.pop_back(); in emit_instruction()
155 assert(instr->operands[1].isConstant()); in emit_instruction()
157 instr->operands[1] = Operand::literal32(instr->operands[1].constantValue()); in emit_instruction()
158 } else if (instr->opcode == aco_opcode::p_resumeaddr_getpc) { in emit_instruction()
159 ctx.resumeaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1; in emit_instruction()
161 instr->opcode = aco_opcode::s_getpc_b64; in emit_instruction()
162 instr->operands.pop_back(); in emit_instruction()
163 } else if (instr->opcode == aco_opcode::p_resumeaddr_addlo) { in emit_instruction()
164 ctx.resumeaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1; in emit_instruction()
166 instr->opcode = aco_opcode::s_add_u32; in emit_instruction()
167 instr->operands.pop_back(); in emit_instruction()
168 assert(instr->operands[1].isConstant()); in emit_instruction()
170 instr->operands[1] = Operand::literal32(instr->operands[1].constantValue()); in emit_instruction()
171 } else if (instr->opcode == aco_opcode::p_load_symbol) { in emit_instruction()
172 assert(instr->operands[0].isConstant()); in emit_instruction()
176 info.id = (enum aco_symbol_id)instr->operands[0].constantValue(); in emit_instruction()
178 ctx.symbols->push_back(info); in emit_instruction()
180 instr->opcode = aco_opcode::s_mov_b32; in emit_instruction()
182 instr->operands[0] = Operand::literal32(0); in emit_instruction()
186 if ((instr->isVOP1() || instr->isVOP2() || instr->isVOPC()) && !instr->isVOP3() && in emit_instruction()
188 instr->format = asVOP3(instr->format); in emit_instruction()
189 if (instr->opcode == aco_opcode::v_fmaak_f16) { in emit_instruction()
190 instr->opcode = aco_opcode::v_fma_f16; in emit_instruction()
191 instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2); in emit_instruction()
192 } else if (instr->opcode == aco_opcode::v_fmamk_f16) { in emit_instruction()
193 instr->valu().swapOperands(1, 2); in emit_instruction()
194 instr->opcode = aco_opcode::v_fma_f16; in emit_instruction()
195 instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2); in emit_instruction()
199 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_instruction()
200 if (opcode == (uint32_t)-1) { in emit_instruction()
217 switch (instr->format) { in emit_instruction()
218 case Format::SOP2: { in emit_instruction()
221 encoding |= !instr->definitions.empty() ? reg(ctx, instr->definitions[0]) << 16 : 0; in emit_instruction()
222 encoding |= instr->operands.size() >= 2 ? reg(ctx, instr->operands[1]) << 8 : 0; in emit_instruction()
223 encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0; in emit_instruction()
227 case Format::SOPK: { in emit_instruction()
228 SOPK_instruction& sopk = instr->sopk(); in emit_instruction()
230 if (instr->opcode == aco_opcode::s_subvector_loop_begin) { in emit_instruction()
232 assert(ctx.subvector_begin_pos == -1); in emit_instruction()
234 } else if (instr->opcode == aco_opcode::s_subvector_loop_end) { in emit_instruction()
236 assert(ctx.subvector_begin_pos != -1); in emit_instruction()
238 out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos); in emit_instruction()
240 sopk.imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size()); in emit_instruction()
241 ctx.subvector_begin_pos = -1; in emit_instruction()
246 encoding |= !instr->definitions.empty() && !(instr->definitions[0].physReg() == scc) in emit_instruction()
247 ? reg(ctx, instr->definitions[0]) << 16 in emit_instruction()
248 : !instr->operands.empty() && instr->operands[0].physReg() <= 127 in emit_instruction()
249 ? reg(ctx, instr->operands[0]) << 16 in emit_instruction()
255 case Format::SOP1: { in emit_instruction()
257 encoding |= !instr->definitions.empty() ? reg(ctx, instr->definitions[0]) << 16 : 0; in emit_instruction()
259 encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0; in emit_instruction()
263 case Format::SOPC: { in emit_instruction()
266 encoding |= instr->operands.size() == 2 ? reg(ctx, instr->operands[1]) << 8 : 0; in emit_instruction()
267 encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0; in emit_instruction()
271 case Format::SOPP: { in emit_instruction()
272 SOPP_instruction& sopp = instr->sopp(); in emit_instruction()
276 if (sopp.block != -1) { in emit_instruction()
283 case Format::SMEM: { in emit_instruction()
284 SMEM_instruction& smem = instr->smem(); in emit_instruction()
285 bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4); in emit_instruction()
286 bool is_load = !instr->definitions.empty(); in emit_instruction()
292 encoding |= instr->definitions.size() ? reg(ctx, instr->definitions[0]) << 15 : 0; in emit_instruction()
293 encoding |= instr->operands.size() ? (reg(ctx, instr->operands[0]) >> 1) << 9 : 0; in emit_instruction()
294 if (instr->operands.size() >= 2) { in emit_instruction()
295 if (!instr->operands[1].isConstant()) { in emit_instruction()
296 encoding |= reg(ctx, instr->operands[1]); in emit_instruction()
297 } else if (instr->operands[1].constantValue() >= 1024) { in emit_instruction()
300 encoding |= instr->operands[1].constantValue() >> 2; in emit_instruction()
306 if (instr->operands.size() >= 2 && instr->operands[1].isConstant() && in emit_instruction()
307 instr->operands[1].constantValue() >= 1024) in emit_instruction()
308 out.push_back(instr->operands[1].constantValue() >> 2); in emit_instruction()
314 assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */ in emit_instruction()
318 assert(!smem.nv); /* Non-volatile is not supported on GFX10 */ in emit_instruction()
326 if (instr->operands.size() >= 2) in emit_instruction()
327 encoding |= instr->operands[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */ in emit_instruction()
333 if (is_load || instr->operands.size() >= 3) { /* SDATA */ in emit_instruction()
334 encoding |= (is_load ? reg(ctx, instr->definitions[0]) : reg(ctx, instr->operands[2])) in emit_instruction()
337 if (instr->operands.size() >= 1) { /* SBASE */ in emit_instruction()
338 encoding |= reg(ctx, instr->operands[0]) >> 1; in emit_instruction()
350 if (instr->operands.size() >= 2) { in emit_instruction()
351 const Operand& op_off1 = instr->operands[1]; in emit_instruction()
366 const Operand& op_off2 = instr->operands.back(); in emit_instruction()
379 case Format::VOP2: { in emit_instruction()
380 VALU_instruction& valu = instr->valu(); in emit_instruction()
383 encoding |= reg(ctx, instr->definitions[0], 8) << 17; in emit_instruction()
385 encoding |= reg(ctx, instr->operands[1], 8) << 9; in emit_instruction()
387 encoding |= reg(ctx, instr->operands[0]); in emit_instruction()
392 case Format::VOP1: { in emit_instruction()
393 VALU_instruction& valu = instr->valu(); in emit_instruction()
395 if (!instr->definitions.empty()) { in emit_instruction()
396 encoding |= reg(ctx, instr->definitions[0], 8) << 17; in emit_instruction()
400 if (!instr->operands.empty()) { in emit_instruction()
401 encoding |= reg(ctx, instr->operands[0]); in emit_instruction()
407 case Format::VOPC: { in emit_instruction()
408 VALU_instruction& valu = instr->valu(); in emit_instruction()
411 encoding |= reg(ctx, instr->operands[1], 8) << 9; in emit_instruction()
413 encoding |= reg(ctx, instr->operands[0]); in emit_instruction()
418 case Format::VINTRP: { in emit_instruction()
419 VINTRP_instruction& interp = instr->vintrp(); in emit_instruction()
422 if (instr->opcode == aco_opcode::v_interp_p1ll_f16 || in emit_instruction()
423 instr->opcode == aco_opcode::v_interp_p1lv_f16 || in emit_instruction()
424 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || in emit_instruction()
425 instr->opcode == aco_opcode::v_interp_p2_f16) { in emit_instruction()
435 encoding |= reg(ctx, instr->definitions[0], 8); in emit_instruction()
441 encoding |= reg(ctx, instr->operands[0]) << 9; in emit_instruction()
442 if (instr->opcode == aco_opcode::v_interp_p2_f16 || in emit_instruction()
443 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || in emit_instruction()
444 instr->opcode == aco_opcode::v_interp_p1lv_f16) { in emit_instruction()
445 encoding |= reg(ctx, instr->operands[2]) << 18; in emit_instruction()
450 encoding = (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */ in emit_instruction()
456 encoding |= reg(ctx, instr->definitions[0], 8) << 18; in emit_instruction()
460 if (instr->opcode == aco_opcode::v_interp_mov_f32) in emit_instruction()
461 encoding |= (0x3 & instr->operands[0].constantValue()); in emit_instruction()
463 encoding |= reg(ctx, instr->operands[0], 8); in emit_instruction()
468 case Format::VINTERP_INREG: { in emit_instruction()
469 VINTERP_inreg_instruction& interp = instr->vinterp_inreg(); in emit_instruction()
471 encoding |= reg(ctx, instr->definitions[0], 8); in emit_instruction()
479 for (unsigned i = 0; i < instr->operands.size(); i++) in emit_instruction()
480 encoding |= reg(ctx, instr->operands[i]) << (i * 9); in emit_instruction()
486 case Format::VOPD: { in emit_instruction()
487 VOPD_instruction& vopd = instr->vopd(); in emit_instruction()
489 encoding |= reg(ctx, instr->operands[0]); in emit_instruction()
490 if (instr->opcode != aco_opcode::v_dual_mov_b32) in emit_instruction()
491 encoding |= reg(ctx, instr->operands[1], 8) << 9; in emit_instruction()
498 encoding = reg(ctx, instr->operands[opy_start]); in emit_instruction()
500 encoding |= reg(ctx, instr->operands[opy_start + 1], 8) << 9; in emit_instruction()
501 encoding |= (reg(ctx, instr->definitions[1], 8) >> 1) << 17; in emit_instruction()
502 encoding |= reg(ctx, instr->definitions[0], 8) << 24; in emit_instruction()
506 case Format::DS: { in emit_instruction()
507 DS_instruction& ds = instr->ds(); in emit_instruction()
520 if (!instr->definitions.empty()) in emit_instruction()
521 encoding |= reg(ctx, instr->definitions[0], 8) << 24; in emit_instruction()
522 if (instr->operands.size() >= 3 && instr->operands[2].physReg() != m0) in emit_instruction()
523 encoding |= reg(ctx, instr->operands[2], 8) << 16; in emit_instruction()
524 if (instr->operands.size() >= 2 && instr->operands[1].physReg() != m0) in emit_instruction()
525 encoding |= reg(ctx, instr->operands[1], 8) << 8; in emit_instruction()
526 if (!instr->operands[0].isUndefined()) in emit_instruction()
527 encoding |= reg(ctx, instr->operands[0], 8); in emit_instruction()
531 case Format::LDSDIR: { in emit_instruction()
532 LDSDIR_instruction& dir = instr->ldsdir(); in emit_instruction()
538 encoding |= reg(ctx, instr->definitions[0], 8); in emit_instruction()
542 case Format::MUBUF: { in emit_instruction()
543 MUBUF_instruction& mubuf = instr->mubuf(); in emit_instruction()
559 assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */ in emit_instruction()
573 encoding |= reg(ctx, instr->operands[2]) << 24; in emit_instruction()
581 encoding |= (reg(ctx, instr->operands[0]) >> 2) << 16; in emit_instruction()
582 if (instr->operands.size() > 3 && !mubuf.lds) in emit_instruction()
583 encoding |= reg(ctx, instr->operands[3], 8) << 8; in emit_instruction()
585 encoding |= reg(ctx, instr->definitions[0], 8) << 8; in emit_instruction()
586 encoding |= reg(ctx, instr->operands[1], 8); in emit_instruction()
590 case Format::MTBUF: { in emit_instruction()
591 MTBUF_instruction& mtbuf = instr->mtbuf(); in emit_instruction()
610 encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */ in emit_instruction()
615 encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */ in emit_instruction()
621 encoding |= reg(ctx, instr->operands[2]) << 24; in emit_instruction()
630 encoding |= (reg(ctx, instr->operands[0]) >> 2) << 16; in emit_instruction()
631 if (instr->operands.size() > 3) in emit_instruction()
632 encoding |= reg(ctx, instr->operands[3], 8) << 8; in emit_instruction()
634 encoding |= reg(ctx, instr->definitions[0], 8) << 8; in emit_instruction()
635 encoding |= reg(ctx, instr->operands[1], 8); in emit_instruction()
638 encoding |= (((opcode & 0x08) >> 3) << 21); /* MSB of 4-bit OPCODE */ in emit_instruction()
644 case Format::MIMG: { in emit_instruction()
648 MIMG_instruction& mimg = instr->mimg(); in emit_instruction()
672 assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */ in emit_instruction()
688 encoding = reg(ctx, instr->operands[3], 8); /* VADDR */ in emit_instruction()
689 if (!instr->definitions.empty()) { in emit_instruction()
690 encoding |= reg(ctx, instr->definitions[0], 8) << 8; /* VDATA */ in emit_instruction()
691 } else if (!instr->operands[2].isUndefined()) { in emit_instruction()
692 encoding |= reg(ctx, instr->operands[2], 8) << 8; /* VDATA */ in emit_instruction()
694 encoding |= (0x1F & (reg(ctx, instr->operands[0]) >> 2)) << 16; /* T# (resource) */ in emit_instruction()
698 if (!instr->operands[1].isUndefined()) in emit_instruction()
699 encoding |= (0x1F & (reg(ctx, instr->operands[1]) >> 2)) << 26; /* sampler */ in emit_instruction()
704 if (!instr->operands[1].isUndefined()) in emit_instruction()
705 encoding |= (0x1F & (reg(ctx, instr->operands[1]) >> 2)) << 21; /* sampler */ in emit_instruction()
719 for (unsigned i = 0; i < instr->operands.size() - 4u; i++) in emit_instruction()
720 nsa[i / 4] |= reg(ctx, instr->operands[4 + i], 8) << (i % 4 * 8); in emit_instruction()
724 case Format::FLAT: in emit_instruction()
725 case Format::SCRATCH: in emit_instruction()
726 case Format::GLOBAL: { in emit_instruction()
727 FLAT_instruction& flat = instr->flatlike(); in emit_instruction()
731 if (instr->isFlat()) in emit_instruction()
734 assert(flat.offset >= -4096 && flat.offset < 4096); in emit_instruction()
736 } else if (ctx.gfx_level <= GFX8 || instr->isFlat()) { in emit_instruction()
737 /* GFX10 has a 12-bit immediate OFFSET field, in emit_instruction()
742 assert(flat.offset >= -2048 && flat.offset <= 2047); in emit_instruction()
745 if (instr->isScratch()) in emit_instruction()
747 else if (instr->isGlobal()) in emit_instruction()
759 encoding = reg(ctx, instr->operands[0], 8); in emit_instruction()
760 if (!instr->definitions.empty()) in emit_instruction()
761 encoding |= reg(ctx, instr->definitions[0], 8) << 24; in emit_instruction()
762 if (instr->operands.size() >= 3) in emit_instruction()
763 encoding |= reg(ctx, instr->operands[2], 8) << 8; in emit_instruction()
764 if (!instr->operands[1].isUndefined()) { in emit_instruction()
765 assert(ctx.gfx_level >= GFX10 || instr->operands[1].physReg() != 0x7F); in emit_instruction()
766 assert(instr->format != Format::FLAT); in emit_instruction()
767 encoding |= reg(ctx, instr->operands[1], 8) << 16; in emit_instruction()
768 } else if (instr->format != Format::FLAT || in emit_instruction()
774 (instr->format == Format::SCRATCH && instr->operands[0].isUndefined())) in emit_instruction()
779 if (ctx.gfx_level >= GFX11 && instr->isScratch()) in emit_instruction()
780 encoding |= !instr->operands[0].isUndefined() ? 1 << 23 : 0; in emit_instruction()
786 case Format::EXP: { in emit_instruction()
787 Export_instruction& exp = instr->exp(); in emit_instruction()
812 case Format::PSEUDO: in emit_instruction()
813 case Format::PSEUDO_BARRIER: in emit_instruction()
814 if (instr->opcode != aco_opcode::p_unit_test) in emit_instruction()
818 if (instr->isDPP16()) { in emit_instruction()
820 DPP16_instruction& dpp = instr->dpp16(); in emit_instruction()
823 Operand dpp_op = instr->operands[0]; in emit_instruction()
824 instr->operands[0] = Operand(PhysReg{250}, v1); in emit_instruction()
825 instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16); in emit_instruction()
837 encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0; in emit_instruction()
840 } else if (instr->isDPP8()) { in emit_instruction()
842 DPP8_instruction& dpp = instr->dpp8(); in emit_instruction()
845 Operand dpp_op = instr->operands[0]; in emit_instruction()
846 instr->operands[0] = Operand(PhysReg{233u + dpp.fetch_inactive}, v1); in emit_instruction()
847 instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8); in emit_instruction()
850 encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0; in emit_instruction()
854 } else if (instr->isVOP3()) { in emit_instruction()
855 VALU_instruction& vop3 = instr->valu(); in emit_instruction()
857 if (instr->isVOP2()) { in emit_instruction()
859 } else if (instr->isVOP1()) { in emit_instruction()
864 } else if (instr->isVOPC()) { in emit_instruction()
866 } else if (instr->isVINTRP()) { in emit_instruction()
892 if (instr->definitions.size() == 2 && instr->isVOPC()) in emit_instruction()
893 assert(ctx.gfx_level <= GFX9 && instr->definitions[1].physReg() == exec); in emit_instruction()
894 else if (instr->definitions.size() == 2) in emit_instruction()
895 encoding |= reg(ctx, instr->definitions[1]) << 8; in emit_instruction()
896 encoding |= reg(ctx, instr->definitions[0], 8); in emit_instruction()
899 if (instr->opcode == aco_opcode::v_interp_mov_f32) { in emit_instruction()
900 encoding = 0x3 & instr->operands[0].constantValue(); in emit_instruction()
901 } else if (instr->opcode == aco_opcode::v_writelane_b32_e64) { in emit_instruction()
902 encoding |= reg(ctx, instr->operands[0]) << 0; in emit_instruction()
903 encoding |= reg(ctx, instr->operands[1]) << 9; in emit_instruction()
906 for (unsigned i = 0; i < instr->operands.size(); i++) in emit_instruction()
907 encoding |= reg(ctx, instr->operands[i]) << (i * 9); in emit_instruction()
914 } else if (instr->isVOP3P()) { in emit_instruction()
915 VALU_instruction& vop3 = instr->valu(); in emit_instruction()
932 encoding |= reg(ctx, instr->definitions[0], 8); in emit_instruction()
935 for (unsigned i = 0; i < instr->operands.size(); i++) in emit_instruction()
936 encoding |= reg(ctx, instr->operands[i]) << (i * 9); in emit_instruction()
941 } else if (instr->isSDWA()) { in emit_instruction()
943 SDWA_instruction& sdwa = instr->sdwa(); in emit_instruction()
946 Operand sdwa_op = instr->operands[0]; in emit_instruction()
947 instr->operands[0] = Operand(PhysReg{249}, v1); in emit_instruction()
948 instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::SDWA); in emit_instruction()
953 if (instr->isVOPC()) { in emit_instruction()
954 if (instr->definitions[0].physReg() != in emit_instruction()
955 (ctx.gfx_level >= GFX10 && is_cmpx(instr->opcode) ? exec : vcc)) { in emit_instruction()
956 encoding |= reg(ctx, instr->definitions[0]) << 8; in emit_instruction()
961 encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8; in emit_instruction()
963 if (instr->definitions[0].bytes() < 4) /* dst_preserve */ in emit_instruction()
975 if (instr->operands.size() >= 2) { in emit_instruction()
976 encoding |= sdwa.sel[1].to_sdwa_sel(instr->operands[1].physReg().byte()) << 24; in emit_instruction()
984 if (instr->operands.size() >= 2) in emit_instruction()
985 encoding |= (instr->operands[1].physReg() < 256) << 31; in emit_instruction()
988 unreachable("unimplemented instruction format"); in emit_instruction()
994 for (const Operand& op : instr->operands) { in emit_instruction()
1024 for (Block& block : program->blocks) { in fix_exports()
1029 if ((*it)->isEXP()) { in fix_exports()
1030 Export_instruction& exp = (*it)->exp(); in fix_exports()
1031 if (program->stage.hw == AC_HW_VERTEX_SHADER || in fix_exports()
1032 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) { in fix_exports()
1044 } else if ((*it)->definitions.size() && (*it)->definitions[0].physReg() == exec) { in fix_exports()
1052 bool may_skip_export = program->stage.hw == AC_HW_PIXEL_SHADER && program->gfx_level >= GFX10; in fix_exports()
1056 bool is_vertex_or_ngg = (program->stage.hw == AC_HW_VERTEX_SHADER || in fix_exports()
1057 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER); in fix_exports()
1072 for (Block& block : ctx.program->blocks) { in insert_code()
1079 [insert_before](const auto& branch) -> bool in insert_code()
1084 branch_it->first += insert_count; in insert_code()
1121 [&ctx](const auto& branch) -> bool { in fix_branches_gfx10()
1122 return ((int)ctx.program->blocks[branch.second->block].offset - branch.first - 1) == in fix_branches_gfx10()
1131 insert_code(ctx, out, buggy_branch_it->first + 1, 1, &s_nop_0); in fix_branches_gfx10()
1143 if (branch->definitions.empty()) { in emit_long_jump()
1144 assert(ctx.program->blocks[branch->block].kind & block_kind_discard_early_exit); in emit_long_jump()
1147 def = branch->definitions[0]; in emit_long_jump()
1157 if (branch->opcode != aco_opcode::s_branch) { in emit_long_jump()
1160 switch (branch->opcode) { in emit_long_jump()
1169 instr.reset(bld.sopp(inv, -1, 6)); in emit_long_jump()
1180 branch->pass_flags = out.size(); in emit_long_jump()
1182 /* s_addc_u32 for high 32 bits not needed because the program is in a 32-bit VA range */ in emit_long_jump()
1206 int offset = (int)ctx.program->blocks[branch.second->block].offset - branch.first - 1; in fix_branches()
1207 if ((offset < INT16_MIN || offset > INT16_MAX) && !branch.second->pass_flags) { in fix_branches()
1210 ctx.program->blocks[branch.second->block].offset < (unsigned)branch.first; in fix_branches()
1214 insert_code(ctx, out, branch.first + 1, long_jump.size() - 1, long_jump.data() + 1); in fix_branches()
1220 if (branch.second->pass_flags) { in fix_branches()
1221 int after_getpc = branch.first + branch.second->pass_flags - 2; in fix_branches()
1222 offset = (int)ctx.program->blocks[branch.second->block].offset - after_getpc; in fix_branches()
1223 out[branch.first + branch.second->pass_flags - 1] = offset * 4; in fix_branches()
1237 out[info.add_literal] += (out.size() - info.getpc_end) * 4u; in fix_constaddrs()
1243 ctx.symbols->push_back(sym); in fix_constaddrs()
1248 const Block& block = ctx.program->blocks[out[info.add_literal]]; in fix_constaddrs()
1250 out[info.add_literal] = (block.offset - info.getpc_end) * 4u; in fix_constaddrs()
1261 block.loop_nest_depth < ctx.loop_header->loop_nest_depth) { in align_block()
1266 const unsigned loop_num_cl = DIV_ROUND_UP(block.offset - loop_header->offset, 16); in align_block()
1272 ctx.program->gfx_level >= GFX10_3 && loop_num_cl > 1 && loop_num_cl <= 3; in align_block()
1277 aco_ptr<Instruction> instr(bld.sopp(aco_opcode::s_inst_prefetch, -1, prefetch_mode)); in align_block()
1279 insert_code(ctx, code, loop_header->offset, nops.size(), nops.data()); in align_block()
1282 instr->sopp().imm = 0x3; in align_block()
1286 const unsigned loop_start_cl = loop_header->offset >> 4; in align_block()
1287 const unsigned loop_end_cl = (block.offset - 1) >> 4; in align_block()
1292 const bool align_loop = loop_end_cl - loop_start_cl >= loop_num_cl && in align_block()
1293 (loop_num_cl == 1 || change_prefetch || loop_header->offset % 16 > 8); in align_block()
1297 nops.resize(16 - (loop_header->offset % 16), 0xbf800000u); in align_block()
1298 insert_code(ctx, code, loop_header->offset, nops.size(), nops.data()); in align_block()
1303 /* In case of nested loops, only handle the inner-most loops in order in align_block()
1305 * Also ignore loops without back-edge. in align_block()
1325 (program->stage.sw == SWStage::VS || program->stage.sw == SWStage::TES) && in emit_program()
1326 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && in emit_program()
1327 program->info.merged_shader_compiled_separately; in emit_program()
1330 if (!program->is_prolog && !program->info.has_epilog && !is_separately_compiled_ngg_vs_or_es && in emit_program()
1331 (program->stage.hw == AC_HW_VERTEX_SHADER || program->stage.hw == AC_HW_PIXEL_SHADER || in emit_program()
1332 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)) in emit_program()
1335 for (Block& block : program->blocks) { in emit_program()
1345 /* Add end-of-code markers for the UMR disassembler. */ in emit_program()
1351 while (program->constant_data.size() % 4u) in emit_program()
1352 program->constant_data.push_back(0); in emit_program()
1354 code.insert(code.end(), (uint32_t*)program->constant_data.data(), in emit_program()
1355 (uint32_t*)(program->constant_data.data() + program->constant_data.size())); in emit_program()
1357 program->config->scratch_bytes_per_wave = in emit_program()
1358 align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule); in emit_program()