Lines Matching +full:flat +full:- +full:cache
4 * SPDX-License-Identifier: MIT
39 uint32_t loop_header = -1u;
45 : program(program_), gfx_level(program->gfx_level), symbols(symbols_) in asm_context()
59 int subvector_begin_pos = -1;
65 unsigned addr_dwords = instr->operands.size() - 3; in get_mimg_nsa_dwords()
67 if (instr->operands[3 + i].physReg() != in get_mimg_nsa_dwords()
68 instr->operands[3 + (i - 1)].physReg().advance(instr->operands[3 + (i - 1)].bytes())) in get_mimg_nsa_dwords()
69 return DIV_ROUND_UP(addr_dwords - 1, 4); in get_mimg_nsa_dwords()
77 switch (instr->opcode) { in get_vopd_opy_start()
119 uint8_t mask = get_gfx11_true16_mask(instr->opcode); in needs_vop3_gfx11()
124 if (instr->operands[i].physReg().reg() >= (256 + 128)) in needs_vop3_gfx11()
127 if ((mask & 0x8) && instr->definitions[0].physReg().reg() >= (256 + 128)) in needs_vop3_gfx11()
136 uint32_t scope = instr.cache.gfx12.scope; in get_gfx12_cpol()
137 uint32_t th = instr.cache.gfx12.temporal_hint; in get_gfx12_cpol()
144 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_sop2_instruction()
148 encoding |= !instr->definitions.empty() ? reg(ctx, instr->definitions[0]) << 16 : 0; in emit_sop2_instruction()
149 encoding |= instr->operands.size() >= 2 ? reg(ctx, instr->operands[1]) << 8 : 0; in emit_sop2_instruction()
150 encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0; in emit_sop2_instruction()
157 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_sopk_instruction()
158 const SALU_instruction& sopk = instr->salu(); in emit_sopk_instruction()
162 if (instr->opcode == aco_opcode::s_subvector_loop_begin) { in emit_sopk_instruction()
164 assert(ctx.subvector_begin_pos == -1); in emit_sopk_instruction()
166 } else if (instr->opcode == aco_opcode::s_subvector_loop_end) { in emit_sopk_instruction()
168 assert(ctx.subvector_begin_pos != -1); in emit_sopk_instruction()
170 out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos); in emit_sopk_instruction()
172 imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size()); in emit_sopk_instruction()
173 ctx.subvector_begin_pos = -1; in emit_sopk_instruction()
178 encoding |= !instr->definitions.empty() && !(instr->definitions[0].physReg() == scc) in emit_sopk_instruction()
179 ? reg(ctx, instr->definitions[0]) << 16 in emit_sopk_instruction()
180 : !instr->operands.empty() && instr->operands[0].physReg() <= 127 in emit_sopk_instruction()
181 ? reg(ctx, instr->operands[0]) << 16 in emit_sopk_instruction()
190 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_sop1_instruction()
193 encoding |= !instr->definitions.empty() ? reg(ctx, instr->definitions[0]) << 16 : 0; in emit_sop1_instruction()
195 encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0; in emit_sop1_instruction()
202 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_sopc_instruction()
206 encoding |= instr->operands.size() == 2 ? reg(ctx, instr->operands[1]) << 8 : 0; in emit_sopc_instruction()
207 encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0; in emit_sopc_instruction()
215 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_sopp_instruction()
216 const SALU_instruction& sopp = instr->salu(); in emit_sopp_instruction()
221 if (!force_imm && instr_info.classes[(int)instr->opcode] == instr_class::branch) { in emit_sopp_instruction()
233 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_smem_instruction()
234 const SMEM_instruction& smem = instr->smem(); in emit_smem_instruction()
235 bool glc = smem.cache.value & ac_glc; in emit_smem_instruction()
236 bool dlc = smem.cache.value & ac_dlc; in emit_smem_instruction()
238 bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4); in emit_smem_instruction()
239 bool is_load = !instr->definitions.empty(); in emit_smem_instruction()
245 encoding |= instr->definitions.size() ? reg(ctx, instr->definitions[0]) << 15 : 0; in emit_smem_instruction()
246 encoding |= instr->operands.size() ? (reg(ctx, instr->operands[0]) >> 1) << 9 : 0; in emit_smem_instruction()
247 if (instr->operands.size() >= 2) { in emit_smem_instruction()
248 if (!instr->operands[1].isConstant()) { in emit_smem_instruction()
249 encoding |= reg(ctx, instr->operands[1]); in emit_smem_instruction()
250 } else if (instr->operands[1].constantValue() >= 1024) { in emit_smem_instruction()
253 encoding |= instr->operands[1].constantValue() >> 2; in emit_smem_instruction()
259 if (instr->operands.size() >= 2 && instr->operands[1].isConstant() && in emit_smem_instruction()
260 instr->operands[1].constantValue() >= 1024) in emit_smem_instruction()
261 out.push_back(instr->operands[1].constantValue() >> 2); in emit_smem_instruction()
267 assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */ in emit_smem_instruction()
284 if (instr->operands.size() >= 2) in emit_smem_instruction()
285 encoding |= instr->operands[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */ in emit_smem_instruction()
291 if (is_load || instr->operands.size() >= 3) { /* SDATA */ in emit_smem_instruction()
292 encoding |= (is_load ? reg(ctx, instr->definitions[0]) : reg(ctx, instr->operands[2])) << 6; in emit_smem_instruction()
294 if (instr->operands.size() >= 1) { /* SBASE */ in emit_smem_instruction()
295 encoding |= reg(ctx, instr->operands[0]) >> 1; in emit_smem_instruction()
307 if (instr->operands.size() >= 2) { in emit_smem_instruction()
308 const Operand& op_off1 = instr->operands[1]; in emit_smem_instruction()
323 const Operand& op_off2 = instr->operands.back(); in emit_smem_instruction()
339 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_vop2_instruction()
340 const VALU_instruction& valu = instr->valu(); in emit_vop2_instruction()
344 encoding |= reg(ctx, instr->definitions[0], 8) << 17; in emit_vop2_instruction()
346 encoding |= reg(ctx, instr->operands[1], 8) << 9; in emit_vop2_instruction()
348 encoding |= reg(ctx, instr->operands[0]); in emit_vop2_instruction()
356 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_vop1_instruction()
357 const VALU_instruction& valu = instr->valu(); in emit_vop1_instruction()
360 if (!instr->definitions.empty()) { in emit_vop1_instruction()
361 encoding |= reg(ctx, instr->definitions[0], 8) << 17; in emit_vop1_instruction()
365 if (!instr->operands.empty()) { in emit_vop1_instruction()
366 encoding |= reg(ctx, instr->operands[0]); in emit_vop1_instruction()
375 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_vopc_instruction()
376 const VALU_instruction& valu = instr->valu(); in emit_vopc_instruction()
380 encoding |= reg(ctx, instr->operands[1], 8) << 9; in emit_vopc_instruction()
382 encoding |= reg(ctx, instr->operands[0]); in emit_vopc_instruction()
390 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_vintrp_instruction()
391 const VINTRP_instruction& interp = instr->vintrp(); in emit_vintrp_instruction()
394 if (instr->opcode == aco_opcode::v_interp_p1ll_f16 || in emit_vintrp_instruction()
395 instr->opcode == aco_opcode::v_interp_p1lv_f16 || in emit_vintrp_instruction()
396 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || in emit_vintrp_instruction()
397 instr->opcode == aco_opcode::v_interp_p2_f16 || in emit_vintrp_instruction()
398 instr->opcode == aco_opcode::v_interp_p2_hi_f16) { in emit_vintrp_instruction()
407 unsigned opsel = instr->opcode == aco_opcode::v_interp_p2_hi_f16 ? 0x8 : 0; in emit_vintrp_instruction()
411 encoding |= reg(ctx, instr->definitions[0], 8); in emit_vintrp_instruction()
418 encoding |= reg(ctx, instr->operands[0]) << 9; in emit_vintrp_instruction()
419 if (instr->opcode == aco_opcode::v_interp_p2_f16 || in emit_vintrp_instruction()
420 instr->opcode == aco_opcode::v_interp_p2_hi_f16 || in emit_vintrp_instruction()
421 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || in emit_vintrp_instruction()
422 instr->opcode == aco_opcode::v_interp_p1lv_f16) { in emit_vintrp_instruction()
423 encoding |= reg(ctx, instr->operands[2]) << 18; in emit_vintrp_instruction()
434 encoding |= reg(ctx, instr->definitions[0], 8) << 18; in emit_vintrp_instruction()
438 if (instr->opcode == aco_opcode::v_interp_mov_f32) in emit_vintrp_instruction()
439 encoding |= (0x3 & instr->operands[0].constantValue()); in emit_vintrp_instruction()
441 encoding |= reg(ctx, instr->operands[0], 8); in emit_vintrp_instruction()
450 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_vinterp_inreg_instruction()
451 const VINTERP_inreg_instruction& interp = instr->vinterp_inreg(); in emit_vinterp_inreg_instruction()
454 encoding |= reg(ctx, instr->definitions[0], 8); in emit_vinterp_inreg_instruction()
462 for (unsigned i = 0; i < instr->operands.size(); i++) in emit_vinterp_inreg_instruction()
463 encoding |= reg(ctx, instr->operands[i]) << (i * 9); in emit_vinterp_inreg_instruction()
472 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_vopd_instruction()
473 const VOPD_instruction& vopd = instr->vopd(); in emit_vopd_instruction()
476 encoding |= reg(ctx, instr->operands[0]); in emit_vopd_instruction()
477 if (instr->opcode != aco_opcode::v_dual_mov_b32) in emit_vopd_instruction()
478 encoding |= reg(ctx, instr->operands[1], 8) << 9; in emit_vopd_instruction()
485 encoding = reg(ctx, instr->operands[opy_start]); in emit_vopd_instruction()
487 encoding |= reg(ctx, instr->operands[opy_start + 1], 8) << 9; in emit_vopd_instruction()
488 encoding |= (reg(ctx, instr->definitions[1], 8) >> 1) << 17; in emit_vopd_instruction()
489 encoding |= reg(ctx, instr->definitions[0], 8) << 24; in emit_vopd_instruction()
496 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_ds_instruction()
497 const DS_instruction& ds = instr->ds(); in emit_ds_instruction()
511 if (!instr->definitions.empty()) in emit_ds_instruction()
512 encoding |= reg(ctx, instr->definitions[0], 8) << 24; in emit_ds_instruction()
513 for (unsigned i = 0; i < MIN2(instr->operands.size(), 3); i++) { in emit_ds_instruction()
514 const Operand& op = instr->operands[i]; in emit_ds_instruction()
524 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_ldsdir_instruction()
525 const LDSDIR_instruction& dir = instr->ldsdir(); in emit_ldsdir_instruction()
534 encoding |= reg(ctx, instr->definitions[0], 8); in emit_ldsdir_instruction()
541 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_mubuf_instruction()
542 const MUBUF_instruction& mubuf = instr->mubuf(); in emit_mubuf_instruction()
543 bool glc = mubuf.cache.value & ac_glc; in emit_mubuf_instruction()
544 bool slc = mubuf.cache.value & ac_slc; in emit_mubuf_instruction()
545 bool dlc = mubuf.cache.value & ac_dlc; in emit_mubuf_instruction()
562 assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */ in emit_mubuf_instruction()
576 encoding |= reg(ctx, instr->operands[2]) << 24; in emit_mubuf_instruction()
584 encoding |= (reg(ctx, instr->operands[0]) >> 2) << 16; in emit_mubuf_instruction()
585 if (instr->operands.size() > 3 && !mubuf.lds) in emit_mubuf_instruction()
586 encoding |= reg(ctx, instr->operands[3], 8) << 8; in emit_mubuf_instruction()
588 encoding |= reg(ctx, instr->definitions[0], 8) << 8; in emit_mubuf_instruction()
589 encoding |= reg(ctx, instr->operands[1], 8); in emit_mubuf_instruction()
596 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_mubuf_instruction_gfx12()
597 const MUBUF_instruction& mubuf = instr->mubuf(); in emit_mubuf_instruction_gfx12()
602 if (instr->operands[2].isConstant()) { in emit_mubuf_instruction_gfx12()
603 assert(instr->operands[2].constantValue() == 0); in emit_mubuf_instruction_gfx12()
606 encoding |= reg(ctx, instr->operands[2]); in emit_mubuf_instruction_gfx12()
612 if (instr->operands.size() > 3) in emit_mubuf_instruction_gfx12()
613 encoding |= reg(ctx, instr->operands[3], 8); in emit_mubuf_instruction_gfx12()
615 encoding |= reg(ctx, instr->definitions[0], 8); in emit_mubuf_instruction_gfx12()
616 encoding |= reg(ctx, instr->operands[0]) << 9; in emit_mubuf_instruction_gfx12()
624 if (!instr->operands[1].isUndefined()) in emit_mubuf_instruction_gfx12()
625 encoding |= reg(ctx, instr->operands[1], 8); in emit_mubuf_instruction_gfx12()
633 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_mtbuf_instruction()
634 const MTBUF_instruction& mtbuf = instr->mtbuf(); in emit_mtbuf_instruction()
635 bool glc = mtbuf.cache.value & ac_glc; in emit_mtbuf_instruction()
636 bool slc = mtbuf.cache.value & ac_slc; in emit_mtbuf_instruction()
637 bool dlc = mtbuf.cache.value & ac_dlc; in emit_mtbuf_instruction()
649 encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */ in emit_mtbuf_instruction()
666 encoding |= reg(ctx, instr->operands[2]) << 24; in emit_mtbuf_instruction()
675 encoding |= (((opcode & 0x08) >> 3) << 21); /* MSB of 4-bit OPCODE */ in emit_mtbuf_instruction()
677 encoding |= (reg(ctx, instr->operands[0]) >> 2) << 16; in emit_mtbuf_instruction()
678 if (instr->operands.size() > 3) in emit_mtbuf_instruction()
679 encoding |= reg(ctx, instr->operands[3], 8) << 8; in emit_mtbuf_instruction()
681 encoding |= reg(ctx, instr->definitions[0], 8) << 8; in emit_mtbuf_instruction()
682 encoding |= reg(ctx, instr->operands[1], 8); in emit_mtbuf_instruction()
689 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_mtbuf_instruction_gfx12()
690 const MTBUF_instruction& mtbuf = instr->mtbuf(); in emit_mtbuf_instruction_gfx12()
697 if (instr->operands[2].isConstant()) { in emit_mtbuf_instruction_gfx12()
698 assert(instr->operands[2].constantValue() == 0); in emit_mtbuf_instruction_gfx12()
701 encoding |= reg(ctx, instr->operands[2]); in emit_mtbuf_instruction_gfx12()
707 if (instr->operands.size() > 3) in emit_mtbuf_instruction_gfx12()
708 encoding |= reg(ctx, instr->operands[3], 8); in emit_mtbuf_instruction_gfx12()
710 encoding |= reg(ctx, instr->definitions[0], 8); in emit_mtbuf_instruction_gfx12()
711 encoding |= reg(ctx, instr->operands[0]) << 9; in emit_mtbuf_instruction_gfx12()
719 encoding |= reg(ctx, instr->operands[1], 8); in emit_mtbuf_instruction_gfx12()
727 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_mimg_instruction()
728 const MIMG_instruction& mimg = instr->mimg(); in emit_mimg_instruction()
729 bool glc = mimg.cache.value & ac_glc; in emit_mimg_instruction()
730 bool slc = mimg.cache.value & ac_slc; in emit_mimg_instruction()
731 bool dlc = mimg.cache.value & ac_dlc; in emit_mimg_instruction()
759 assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */ in emit_mimg_instruction()
774 encoding = reg(ctx, instr->operands[3], 8); /* VADDR */ in emit_mimg_instruction()
775 if (!instr->definitions.empty()) { in emit_mimg_instruction()
776 encoding |= reg(ctx, instr->definitions[0], 8) << 8; /* VDATA */ in emit_mimg_instruction()
777 } else if (!instr->operands[2].isUndefined()) { in emit_mimg_instruction()
778 encoding |= reg(ctx, instr->operands[2], 8) << 8; /* VDATA */ in emit_mimg_instruction()
780 encoding |= (0x1F & (reg(ctx, instr->operands[0]) >> 2)) << 16; /* T# (resource) */ in emit_mimg_instruction()
784 if (!instr->operands[1].isUndefined()) in emit_mimg_instruction()
785 encoding |= (0x1F & (reg(ctx, instr->operands[1]) >> 2)) << 26; /* sampler */ in emit_mimg_instruction()
790 if (!instr->operands[1].isUndefined()) in emit_mimg_instruction()
791 encoding |= (0x1F & (reg(ctx, instr->operands[1]) >> 2)) << 21; /* sampler */ in emit_mimg_instruction()
805 for (unsigned i = 0; i < instr->operands.size() - 4u; i++) in emit_mimg_instruction()
806 nsa[i / 4] |= reg(ctx, instr->operands[4 + i], 8) << (i % 4 * 8); in emit_mimg_instruction()
813 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_mimg_instruction_gfx12()
814 const MIMG_instruction& mimg = instr->mimg(); in emit_mimg_instruction_gfx12()
816 bool vsample = !instr->operands[1].isUndefined() || instr->opcode == aco_opcode::image_msaa_load; in emit_mimg_instruction_gfx12()
833 for (unsigned i = 3; i < instr->operands.size(); i++) in emit_mimg_instruction_gfx12()
834 vaddr[i - 3] = reg(ctx, instr->operands[i], 8); in emit_mimg_instruction_gfx12()
835 unsigned num_vaddr = instr->operands.size() - 3; in emit_mimg_instruction_gfx12()
836 for (unsigned i = 0; i < MIN2(instr->operands.back().size() - 1, 5 - num_vaddr); i++) in emit_mimg_instruction_gfx12()
837 vaddr[num_vaddr + i] = reg(ctx, instr->operands.back(), 8) + i + 1; in emit_mimg_instruction_gfx12()
840 if (!instr->definitions.empty()) in emit_mimg_instruction_gfx12()
841 encoding |= reg(ctx, instr->definitions[0], 8); /* VDATA */ in emit_mimg_instruction_gfx12()
842 else if (!instr->operands[2].isUndefined()) in emit_mimg_instruction_gfx12()
843 encoding |= reg(ctx, instr->operands[2], 8); /* VDATA */ in emit_mimg_instruction_gfx12()
844 encoding |= reg(ctx, instr->operands[0]) << 9; /* T# (resource) */ in emit_mimg_instruction_gfx12()
847 if (instr->opcode != aco_opcode::image_msaa_load) in emit_mimg_instruction_gfx12()
848 encoding |= reg(ctx, instr->operands[1]) << 23; /* sampler */ in emit_mimg_instruction_gfx12()
865 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_flatlike_instruction()
866 const FLAT_instruction& flat = instr->flatlike(); in emit_flatlike_instruction() local
867 bool glc = flat.cache.value & ac_glc; in emit_flatlike_instruction()
868 bool slc = flat.cache.value & ac_slc; in emit_flatlike_instruction()
869 bool dlc = flat.cache.value & ac_dlc; in emit_flatlike_instruction()
874 if (instr->isFlat()) in emit_flatlike_instruction()
875 assert(flat.offset <= 0xfff); in emit_flatlike_instruction()
877 assert(flat.offset >= -4096 && flat.offset < 4096); in emit_flatlike_instruction()
878 encoding |= flat.offset & 0x1fff; in emit_flatlike_instruction()
879 } else if (ctx.gfx_level <= GFX8 || instr->isFlat()) { in emit_flatlike_instruction()
880 /* GFX10 has a 12-bit immediate OFFSET field, in emit_flatlike_instruction()
883 assert(flat.offset == 0); in emit_flatlike_instruction()
885 assert(flat.offset >= -2048 && flat.offset <= 2047); in emit_flatlike_instruction()
886 encoding |= flat.offset & 0xfff; in emit_flatlike_instruction()
888 if (instr->isScratch()) in emit_flatlike_instruction()
890 else if (instr->isGlobal()) in emit_flatlike_instruction()
892 encoding |= flat.lds ? 1 << 13 : 0; in emit_flatlike_instruction()
896 assert(!flat.nv); in emit_flatlike_instruction()
902 encoding = reg(ctx, instr->operands[0], 8); in emit_flatlike_instruction()
903 if (!instr->definitions.empty()) in emit_flatlike_instruction()
904 encoding |= reg(ctx, instr->definitions[0], 8) << 24; in emit_flatlike_instruction()
905 if (instr->operands.size() >= 3) in emit_flatlike_instruction()
906 encoding |= reg(ctx, instr->operands[2], 8) << 8; in emit_flatlike_instruction()
907 if (!instr->operands[1].isUndefined()) { in emit_flatlike_instruction()
908 assert(ctx.gfx_level >= GFX10 || instr->operands[1].physReg() != 0x7F); in emit_flatlike_instruction()
909 assert(instr->format != Format::FLAT); in emit_flatlike_instruction()
910 encoding |= reg(ctx, instr->operands[1], 8) << 16; in emit_flatlike_instruction()
911 } else if (instr->format != Format::FLAT || in emit_flatlike_instruction()
912 ctx.gfx_level >= GFX10) { /* SADDR is actually used with FLAT on GFX10 */ in emit_flatlike_instruction()
917 (instr->isScratch() && instr->operands[0].isUndefined() && ctx.gfx_level < GFX11)) in emit_flatlike_instruction()
922 if (ctx.gfx_level >= GFX11 && instr->isScratch()) in emit_flatlike_instruction()
923 encoding |= !instr->operands[0].isUndefined() ? 1 << 23 : 0; in emit_flatlike_instruction()
925 encoding |= flat.nv ? 1 << 23 : 0; in emit_flatlike_instruction()
933 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_flatlike_instruction_gfx12()
934 const FLAT_instruction& flat = instr->flatlike(); in emit_flatlike_instruction_gfx12() local
935 assert(!flat.lds); in emit_flatlike_instruction_gfx12()
939 if (!instr->operands[1].isUndefined()) { in emit_flatlike_instruction_gfx12()
940 assert(!instr->isFlat()); in emit_flatlike_instruction_gfx12()
941 encoding |= reg(ctx, instr->operands[1]); in emit_flatlike_instruction_gfx12()
945 if (instr->isScratch()) in emit_flatlike_instruction_gfx12()
947 else if (instr->isGlobal()) in emit_flatlike_instruction_gfx12()
952 if (!instr->definitions.empty()) in emit_flatlike_instruction_gfx12()
953 encoding |= reg(ctx, instr->definitions[0], 8); in emit_flatlike_instruction_gfx12()
954 if (instr->isScratch()) in emit_flatlike_instruction_gfx12()
955 encoding |= !instr->operands[0].isUndefined() ? 1 << 17 : 0; in emit_flatlike_instruction_gfx12()
956 encoding |= get_gfx12_cpol(flat) << 18; in emit_flatlike_instruction_gfx12()
957 if (instr->operands.size() >= 3) in emit_flatlike_instruction_gfx12()
958 encoding |= reg(ctx, instr->operands[2], 8) << 23; in emit_flatlike_instruction_gfx12()
962 if (!instr->operands[0].isUndefined()) in emit_flatlike_instruction_gfx12()
963 encoding |= reg(ctx, instr->operands[0], 8); in emit_flatlike_instruction_gfx12()
964 encoding |= (flat.offset & 0x00ffffff) << 8; in emit_flatlike_instruction_gfx12()
971 const Export_instruction& exp = instr->exp(); in emit_exp_instruction()
1002 DPP16_instruction& dpp = instr->dpp16(); in emit_dpp16_instruction()
1005 Operand dpp_op = instr->operands[0]; in emit_dpp16_instruction()
1006 instr->operands[0] = Operand(PhysReg{250}, v1); in emit_dpp16_instruction()
1007 instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16); in emit_dpp16_instruction()
1009 instr->format = (Format)((uint16_t)instr->format | (uint16_t)Format::DPP16); in emit_dpp16_instruction()
1010 instr->operands[0] = dpp_op; in emit_dpp16_instruction()
1022 encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0; in emit_dpp16_instruction()
1030 DPP8_instruction& dpp = instr->dpp8(); in emit_dpp8_instruction()
1033 Operand dpp_op = instr->operands[0]; in emit_dpp8_instruction()
1034 instr->operands[0] = Operand(PhysReg{233u + dpp.fetch_inactive}, v1); in emit_dpp8_instruction()
1035 instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8); in emit_dpp8_instruction()
1037 instr->format = (Format)((uint16_t)instr->format | (uint16_t)Format::DPP8); in emit_dpp8_instruction()
1038 instr->operands[0] = dpp_op; in emit_dpp8_instruction()
1041 encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0; in emit_dpp8_instruction()
1049 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_vop3_instruction()
1050 const VALU_instruction& vop3 = instr->valu(); in emit_vop3_instruction()
1052 if (instr->isVOP2()) { in emit_vop3_instruction()
1054 } else if (instr->isVOP1()) { in emit_vop3_instruction()
1059 } else if (instr->isVOPC()) { in emit_vop3_instruction()
1061 } else if (instr->isVINTRP()) { in emit_vop3_instruction()
1087 if (instr->definitions.size() == 2 && instr->isVOPC()) in emit_vop3_instruction()
1088 assert(ctx.gfx_level <= GFX9 && instr->definitions[1].physReg() == exec); in emit_vop3_instruction()
1089 else if (instr->definitions.size() == 2 && instr->opcode != aco_opcode::v_swap_b16) in emit_vop3_instruction()
1090 encoding |= reg(ctx, instr->definitions[1]) << 8; in emit_vop3_instruction()
1091 encoding |= reg(ctx, instr->definitions[0], 8); in emit_vop3_instruction()
1095 unsigned num_ops = instr->operands.size(); in emit_vop3_instruction()
1097 if (instr->opcode == aco_opcode::v_writelane_b32_e64) in emit_vop3_instruction()
1099 else if (instr->opcode == aco_opcode::v_swap_b16) in emit_vop3_instruction()
1103 encoding |= reg(ctx, instr->operands[i]) << (i * 9); in emit_vop3_instruction()
1113 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_vop3p_instruction()
1114 const VALU_instruction& vop3 = instr->valu(); in emit_vop3p_instruction()
1131 encoding |= reg(ctx, instr->definitions[0], 8); in emit_vop3p_instruction()
1134 for (unsigned i = 0; i < instr->operands.size(); i++) in emit_vop3p_instruction()
1135 encoding |= reg(ctx, instr->operands[i]) << (i * 9); in emit_vop3p_instruction()
1146 SDWA_instruction& sdwa = instr->sdwa(); in emit_sdwa_instruction()
1149 Operand sdwa_op = instr->operands[0]; in emit_sdwa_instruction()
1150 instr->operands[0] = Operand(PhysReg{249}, v1); in emit_sdwa_instruction()
1151 instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::SDWA); in emit_sdwa_instruction()
1153 instr->format = (Format)((uint16_t)instr->format | (uint16_t)Format::SDWA); in emit_sdwa_instruction()
1154 instr->operands[0] = sdwa_op; in emit_sdwa_instruction()
1158 if (instr->isVOPC()) { in emit_sdwa_instruction()
1159 if (instr->definitions[0].physReg() != in emit_sdwa_instruction()
1160 (ctx.gfx_level >= GFX10 && is_cmpx(instr->opcode) ? exec : vcc)) { in emit_sdwa_instruction()
1161 encoding |= reg(ctx, instr->definitions[0]) << 8; in emit_sdwa_instruction()
1166 encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8; in emit_sdwa_instruction()
1168 if (instr->definitions[0].bytes() < 4) /* dst_preserve */ in emit_sdwa_instruction()
1180 if (instr->operands.size() >= 2) { in emit_sdwa_instruction()
1181 encoding |= sdwa.sel[1].to_sdwa_sel(instr->operands[1].physReg().byte()) << 24; in emit_sdwa_instruction()
1189 if (instr->operands.size() >= 2) in emit_sdwa_instruction()
1190 encoding |= (instr->operands[1].physReg() < 256) << 31; in emit_sdwa_instruction()
1197 /* lower remaining pseudo-instructions */ in emit_instruction()
1198 if (instr->opcode == aco_opcode::p_constaddr_getpc) { in emit_instruction()
1199 ctx.constaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1; in emit_instruction()
1201 instr->opcode = aco_opcode::s_getpc_b64; in emit_instruction()
1202 instr->operands.pop_back(); in emit_instruction()
1203 } else if (instr->opcode == aco_opcode::p_constaddr_addlo) { in emit_instruction()
1204 ctx.constaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1; in emit_instruction()
1206 instr->opcode = aco_opcode::s_add_u32; in emit_instruction()
1207 instr->operands.pop_back(); in emit_instruction()
1208 assert(instr->operands[1].isConstant()); in emit_instruction()
1210 instr->operands[1] = Operand::literal32(instr->operands[1].constantValue()); in emit_instruction()
1211 } else if (instr->opcode == aco_opcode::p_resumeaddr_getpc) { in emit_instruction()
1212 ctx.resumeaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1; in emit_instruction()
1214 instr->opcode = aco_opcode::s_getpc_b64; in emit_instruction()
1215 instr->operands.pop_back(); in emit_instruction()
1216 } else if (instr->opcode == aco_opcode::p_resumeaddr_addlo) { in emit_instruction()
1217 ctx.resumeaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1; in emit_instruction()
1219 instr->opcode = aco_opcode::s_add_u32; in emit_instruction()
1220 instr->operands.pop_back(); in emit_instruction()
1221 assert(instr->operands[1].isConstant()); in emit_instruction()
1223 instr->operands[1] = Operand::literal32(instr->operands[1].constantValue()); in emit_instruction()
1224 } else if (instr->opcode == aco_opcode::p_load_symbol) { in emit_instruction()
1225 assert(instr->operands[0].isConstant()); in emit_instruction()
1229 info.id = (enum aco_symbol_id)instr->operands[0].constantValue(); in emit_instruction()
1231 ctx.symbols->push_back(info); in emit_instruction()
1233 instr->opcode = aco_opcode::s_mov_b32; in emit_instruction()
1235 instr->operands[0] = Operand::literal32(0); in emit_instruction()
1236 } else if (instr->opcode == aco_opcode::p_debug_info) { in emit_instruction()
1237 assert(instr->operands[0].isConstant()); in emit_instruction()
1238 uint32_t index = instr->operands[0].constantValue(); in emit_instruction()
1239 ctx.program->debug_info[index].offset = (out.size() - 1) * 4; in emit_instruction()
1244 if ((instr->isVOP1() || instr->isVOP2() || instr->isVOPC()) && !instr->isVOP3() && in emit_instruction()
1246 instr->format = asVOP3(instr->format); in emit_instruction()
1247 if (instr->opcode == aco_opcode::v_fmaak_f16) { in emit_instruction()
1248 instr->opcode = aco_opcode::v_fma_f16; in emit_instruction()
1249 instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2); in emit_instruction()
1250 } else if (instr->opcode == aco_opcode::v_fmamk_f16) { in emit_instruction()
1251 instr->valu().swapOperands(1, 2); in emit_instruction()
1252 instr->opcode = aco_opcode::v_fma_f16; in emit_instruction()
1253 instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2); in emit_instruction()
1257 uint32_t opcode = ctx.opcode[(int)instr->opcode]; in emit_instruction()
1258 if (opcode == (uint32_t)-1) { in emit_instruction()
1275 switch (instr->format) { in emit_instruction()
1353 case Format::FLAT: in emit_instruction()
1368 if (instr->opcode != aco_opcode::p_unit_test) in emit_instruction()
1372 if (instr->isDPP16()) { in emit_instruction()
1375 } else if (instr->isDPP8()) { in emit_instruction()
1378 } else if (instr->isVOP3()) { in emit_instruction()
1380 } else if (instr->isVOP3P()) { in emit_instruction()
1382 } else if (instr->isSDWA()) { in emit_instruction()
1391 for (const Operand& op : instr->operands) { in emit_instruction()
1421 for (Block& block : program->blocks) { in fix_exports()
1426 if ((*it)->isEXP()) { in fix_exports()
1427 Export_instruction& exp = (*it)->exp(); in fix_exports()
1428 if (program->stage.hw == AC_HW_VERTEX_SHADER || in fix_exports()
1429 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) { in fix_exports()
1441 } else if ((*it)->definitions.size() && (*it)->definitions[0].physReg() == exec) { in fix_exports()
1449 bool may_skip_export = program->stage.hw == AC_HW_PIXEL_SHADER && program->gfx_level >= GFX10; in fix_exports()
1453 bool is_vertex_or_ngg = (program->stage.hw == AC_HW_VERTEX_SHADER || in fix_exports()
1454 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER); in fix_exports()
1469 for (Block& block : ctx.program->blocks) { in insert_code()
1514 ctx.branches.begin(), ctx.branches.end(), [&](const branch_info& branch) -> bool in fix_branches_gfx10()
1515 { return ((int)ctx.program->blocks[branch.target].offset - branch.pos - 1) == 0x3f; }); in fix_branches_gfx10()
1521 insert_code(ctx, out, buggy_branch_it->pos + 1, 1, &s_nop_0); in fix_branches_gfx10()
1532 Block* new_block = ctx.program->create_and_insert_block(); in chain_branches()
1537 /* Re-direct original branch to new block (offset). */ in chain_branches()
1539 branch.target = new_block->index; in chain_branches()
1547 const int half_dist = (INT16_MAX - 31) / 2; in chain_branches()
1548 const unsigned upper_start = MIN2(ctx.program->blocks[target].offset, branch.pos) + half_dist; in chain_branches()
1550 const unsigned lower_end = MAX2(ctx.program->blocks[target].offset, branch.pos) - half_dist; in chain_branches()
1551 const unsigned lower_start = lower_end - half_dist; in chain_branches()
1553 for (unsigned i = 0; i < ctx.program->blocks.size() - 1; i++) { in chain_branches()
1554 Block& block = ctx.program->blocks[i]; in chain_branches()
1555 Block& next = ctx.program->blocks[i + 1]; in chain_branches()
1566 block.instructions.back()->opcode == aco_opcode::s_branch) { in chain_branches()
1578 while (ctx.program->blocks[insertion_block_idx + 1].offset < upper_end) in chain_branches()
1581 insert_at = ctx.program->blocks[insertion_block_idx].offset; in chain_branches()
1582 auto it = ctx.program->blocks[insertion_block_idx].instructions.begin(); in chain_branches()
1586 while (skip-- > 0 || insert_at < upper_start) { in chain_branches()
1587 Instruction* instr = (it++)->get(); in chain_branches()
1588 if (instr->isSOPP()) { in chain_branches()
1589 if (instr->opcode == aco_opcode::s_clause) in chain_branches()
1590 skip = instr->salu().imm + 1; in chain_branches()
1591 else if (instr->opcode == aco_opcode::s_delay_alu) in chain_branches()
1592 skip = ((instr->salu().imm >> 4) & 0x7) + 1; in chain_branches()
1593 else if (instr->opcode == aco_opcode::s_branch) in chain_branches()
1606 bld.reset(&ctx.program->blocks[insertion_block_idx].instructions, it); in chain_branches()
1608 bld.reset(&ctx.program->blocks[insertion_block_idx - 1].instructions); in chain_branches()
1613 if (ctx.program->gfx_level == GFX10) { in chain_branches()
1628 new_block->offset = block_offset; in chain_branches()
1632 ctx.branches.push_back({block_offset - 1, skip_branch_target}); in chain_branches()
1649 int offset = (int)ctx.program->blocks[branch.target].offset - branch.pos - 1; in fix_branches()
1667 out[info.add_literal] += (out.size() - info.getpc_end) * 4u; in fix_constaddrs()
1673 ctx.symbols->push_back(sym); in fix_constaddrs()
1678 const Block& block = ctx.program->blocks[out[info.add_literal]]; in fix_constaddrs()
1680 out[info.add_literal] = (block.offset - info.getpc_end) * 4u; in fix_constaddrs()
1688 if (ctx.loop_header != -1u && in align_block()
1689 block.loop_nest_depth < ctx.program->blocks[ctx.loop_header].loop_nest_depth) { in align_block()
1690 assert(ctx.loop_exit != -1u); in align_block()
1691 Block& loop_header = ctx.program->blocks[ctx.loop_header]; in align_block()
1692 Block& loop_exit = ctx.program->blocks[ctx.loop_exit]; in align_block()
1693 ctx.loop_header = -1u; in align_block()
1694 ctx.loop_exit = -1u; in align_block()
1697 const unsigned loop_num_cl = DIV_ROUND_UP(block.offset - loop_header.offset, 16); in align_block()
1699 /* On GFX10.3+, change the prefetch mode if the loop fits into 2 or 3 cache lines. in align_block()
1702 const bool change_prefetch = ctx.program->gfx_level >= GFX10_3 && in align_block()
1703 ctx.program->gfx_level <= GFX11 && loop_num_cl > 1 && in align_block()
1707 Builder bld(ctx.program, &ctx.program->blocks[loop_header.linear_preds[0]]); in align_block()
1724 const unsigned loop_end_cl = (block.offset - 1) >> 4; in align_block()
1726 /* Align the loop if it fits into the fetched cache lines or if we can in align_block()
1727 * reduce the number of cache lines with less than 8 NOPs. in align_block()
1729 const bool align_loop = loop_end_cl - loop_start_cl >= loop_num_cl && in align_block()
1734 nops.resize(16 - (loop_header.offset % 16), 0xbf800000u); in align_block()
1740 /* In case of nested loops, only handle the inner-most loops in order in align_block()
1742 * Also ignore loops without back-edge. in align_block()
1746 ctx.loop_exit = -1u; in align_block()
1752 * This works, because control flow always re-converges after loops. in align_block()
1754 if (ctx.loop_header != -1u && ctx.loop_exit == -1u) { in align_block()
1756 Block& succ = ctx.program->blocks[succ_idx]; in align_block()
1757 if (succ.loop_nest_depth < ctx.program->blocks[ctx.loop_header].loop_nest_depth) in align_block()
1762 /* align resume shaders with cache line */ in align_block()
1777 (program->stage.sw == SWStage::VS || program->stage.sw == SWStage::TES) && in emit_program()
1778 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && in emit_program()
1779 program->info.merged_shader_compiled_separately; in emit_program()
1782 if (!program->is_prolog && !program->info.ps.has_epilog && in emit_program()
1784 (program->stage.hw == AC_HW_VERTEX_SHADER || program->stage.hw == AC_HW_PIXEL_SHADER || in emit_program()
1785 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)) in emit_program()
1788 for (Block& block : program->blocks) { in emit_program()
1798 /* Add end-of-code markers for the UMR disassembler. */ in emit_program()
1804 while (program->constant_data.size() % 4u) in emit_program()
1805 program->constant_data.push_back(0); in emit_program()
1807 code.insert(code.end(), (uint32_t*)program->constant_data.data(), in emit_program()
1808 (uint32_t*)(program->constant_data.data() + program->constant_data.size())); in emit_program()
1810 program->config->scratch_bytes_per_wave = in emit_program()
1811 align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule); in emit_program()