aco_assembler.cpp - OpenGrok cross reference for /third_party/mesa3d/src/amd/compiler/aco

Lines Matching +full:flat +full:- +full:cache
4  * SPDX-License-Identifier: MIT
39    uint32_t loop_header = -1u;
45        : program(program_), gfx_level(program->gfx_level), symbols(symbols_)  in asm_context()
59    int subvector_begin_pos = -1;
65    unsigned addr_dwords = instr->operands.size() - 3;  in get_mimg_nsa_dwords()
67       if (instr->operands[3 + i].physReg() !=  in get_mimg_nsa_dwords()
68           instr->operands[3 + (i - 1)].physReg().advance(instr->operands[3 + (i - 1)].bytes()))  in get_mimg_nsa_dwords()
69          return DIV_ROUND_UP(addr_dwords - 1, 4);  in get_mimg_nsa_dwords()
77    switch (instr->opcode) {  in get_vopd_opy_start()
119    uint8_t mask = get_gfx11_true16_mask(instr->opcode);  in needs_vop3_gfx11()
124       if (instr->operands[i].physReg().reg() >= (256 + 128))  in needs_vop3_gfx11()
127    if ((mask & 0x8) && instr->definitions[0].physReg().reg() >= (256 + 128))  in needs_vop3_gfx11()
136    uint32_t scope = instr.cache.gfx12.scope;  in get_gfx12_cpol()
137    uint32_t th = instr.cache.gfx12.temporal_hint;  in get_gfx12_cpol()
144    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_sop2_instruction()
148    encoding |= !instr->definitions.empty() ? reg(ctx, instr->definitions[0]) << 16 : 0;  in emit_sop2_instruction()
149    encoding |= instr->operands.size() >= 2 ? reg(ctx, instr->operands[1]) << 8 : 0;  in emit_sop2_instruction()
150    encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0;  in emit_sop2_instruction()
157    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_sopk_instruction()
158    const SALU_instruction& sopk = instr->salu();  in emit_sopk_instruction()
162    if (instr->opcode == aco_opcode::s_subvector_loop_begin) {  in emit_sopk_instruction()
164       assert(ctx.subvector_begin_pos == -1);  in emit_sopk_instruction()
166    } else if (instr->opcode == aco_opcode::s_subvector_loop_end) {  in emit_sopk_instruction()
168       assert(ctx.subvector_begin_pos != -1);  in emit_sopk_instruction()
170       out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos);  in emit_sopk_instruction()
172       imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());  in emit_sopk_instruction()
173       ctx.subvector_begin_pos = -1;  in emit_sopk_instruction()
178    encoding |= !instr->definitions.empty() && !(instr->definitions[0].physReg() == scc)  in emit_sopk_instruction()
179                   ? reg(ctx, instr->definitions[0]) << 16  in emit_sopk_instruction()
180                : !instr->operands.empty() && instr->operands[0].physReg() <= 127  in emit_sopk_instruction()
181                   ? reg(ctx, instr->operands[0]) << 16  in emit_sopk_instruction()
190    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_sop1_instruction()
193    encoding |= !instr->definitions.empty() ? reg(ctx, instr->definitions[0]) << 16 : 0;  in emit_sop1_instruction()
195    encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0;  in emit_sop1_instruction()
202    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_sopc_instruction()
206    encoding |= instr->operands.size() == 2 ? reg(ctx, instr->operands[1]) << 8 : 0;  in emit_sopc_instruction()
207    encoding |= !instr->operands.empty() ? reg(ctx, instr->operands[0]) : 0;  in emit_sopc_instruction()
215    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_sopp_instruction()
216    const SALU_instruction& sopp = instr->salu();  in emit_sopp_instruction()
221    if (!force_imm && instr_info.classes[(int)instr->opcode] == instr_class::branch) {  in emit_sopp_instruction()
233    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_smem_instruction()
234    const SMEM_instruction& smem = instr->smem();  in emit_smem_instruction()
235    bool glc = smem.cache.value & ac_glc;  in emit_smem_instruction()
236    bool dlc = smem.cache.value & ac_dlc;  in emit_smem_instruction()
238    bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);  in emit_smem_instruction()
239    bool is_load = !instr->definitions.empty();  in emit_smem_instruction()
245       encoding |= instr->definitions.size() ? reg(ctx, instr->definitions[0]) << 15 : 0;  in emit_smem_instruction()
246       encoding |= instr->operands.size() ? (reg(ctx, instr->operands[0]) >> 1) << 9 : 0;  in emit_smem_instruction()
247       if (instr->operands.size() >= 2) {  in emit_smem_instruction()
248          if (!instr->operands[1].isConstant()) {  in emit_smem_instruction()
249             encoding |= reg(ctx, instr->operands[1]);  in emit_smem_instruction()
250          } else if (instr->operands[1].constantValue() >= 1024) {  in emit_smem_instruction()
253             encoding |= instr->operands[1].constantValue() >> 2;  in emit_smem_instruction()
259       if (instr->operands.size() >= 2 && instr->operands[1].isConstant() &&  in emit_smem_instruction()
260           instr->operands[1].constantValue() >= 1024)  in emit_smem_instruction()
261          out.push_back(instr->operands[1].constantValue() >> 2);  in emit_smem_instruction()
267       assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */  in emit_smem_instruction()
284       if (instr->operands.size() >= 2)  in emit_smem_instruction()
285          encoding |= instr->operands[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */  in emit_smem_instruction()
291    if (is_load || instr->operands.size() >= 3) { /* SDATA */  in emit_smem_instruction()
292       encoding |= (is_load ? reg(ctx, instr->definitions[0]) : reg(ctx, instr->operands[2])) << 6;  in emit_smem_instruction()
294    if (instr->operands.size() >= 1) { /* SBASE */  in emit_smem_instruction()
295       encoding |= reg(ctx, instr->operands[0]) >> 1;  in emit_smem_instruction()
307    if (instr->operands.size() >= 2) {  in emit_smem_instruction()
308       const Operand& op_off1 = instr->operands[1];  in emit_smem_instruction()
323          const Operand& op_off2 = instr->operands.back();  in emit_smem_instruction()
339    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_vop2_instruction()
340    const VALU_instruction& valu = instr->valu();  in emit_vop2_instruction()
344    encoding |= reg(ctx, instr->definitions[0], 8) << 17;  in emit_vop2_instruction()
346    encoding |= reg(ctx, instr->operands[1], 8) << 9;  in emit_vop2_instruction()
348    encoding |= reg(ctx, instr->operands[0]);  in emit_vop2_instruction()
356    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_vop1_instruction()
357    const VALU_instruction& valu = instr->valu();  in emit_vop1_instruction()
360    if (!instr->definitions.empty()) {  in emit_vop1_instruction()
361       encoding |= reg(ctx, instr->definitions[0], 8) << 17;  in emit_vop1_instruction()
365    if (!instr->operands.empty()) {  in emit_vop1_instruction()
366       encoding |= reg(ctx, instr->operands[0]);  in emit_vop1_instruction()
375    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_vopc_instruction()
376    const VALU_instruction& valu = instr->valu();  in emit_vopc_instruction()
380    encoding |= reg(ctx, instr->operands[1], 8) << 9;  in emit_vopc_instruction()
382    encoding |= reg(ctx, instr->operands[0]);  in emit_vopc_instruction()
390    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_vintrp_instruction()
391    const VINTRP_instruction& interp = instr->vintrp();  in emit_vintrp_instruction()
394    if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||  in emit_vintrp_instruction()
395        instr->opcode == aco_opcode::v_interp_p1lv_f16 ||  in emit_vintrp_instruction()
396        instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||  in emit_vintrp_instruction()
397        instr->opcode == aco_opcode::v_interp_p2_f16 ||  in emit_vintrp_instruction()
398        instr->opcode == aco_opcode::v_interp_p2_hi_f16) {  in emit_vintrp_instruction()
407       unsigned opsel = instr->opcode == aco_opcode::v_interp_p2_hi_f16 ? 0x8 : 0;  in emit_vintrp_instruction()
411       encoding |= reg(ctx, instr->definitions[0], 8);  in emit_vintrp_instruction()
418       encoding |= reg(ctx, instr->operands[0]) << 9;  in emit_vintrp_instruction()
419       if (instr->opcode == aco_opcode::v_interp_p2_f16 ||  in emit_vintrp_instruction()
420           instr->opcode == aco_opcode::v_interp_p2_hi_f16 ||  in emit_vintrp_instruction()
421           instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||  in emit_vintrp_instruction()
422           instr->opcode == aco_opcode::v_interp_p1lv_f16) {  in emit_vintrp_instruction()
423          encoding |= reg(ctx, instr->operands[2]) << 18;  in emit_vintrp_instruction()
434       encoding |= reg(ctx, instr->definitions[0], 8) << 18;  in emit_vintrp_instruction()
438       if (instr->opcode == aco_opcode::v_interp_mov_f32)  in emit_vintrp_instruction()
439          encoding |= (0x3 & instr->operands[0].constantValue());  in emit_vintrp_instruction()
441          encoding |= reg(ctx, instr->operands[0], 8);  in emit_vintrp_instruction()
450    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_vinterp_inreg_instruction()
451    const VINTERP_inreg_instruction& interp = instr->vinterp_inreg();  in emit_vinterp_inreg_instruction()
454    encoding |= reg(ctx, instr->definitions[0], 8);  in emit_vinterp_inreg_instruction()
462    for (unsigned i = 0; i < instr->operands.size(); i++)  in emit_vinterp_inreg_instruction()
463       encoding |= reg(ctx, instr->operands[i]) << (i * 9);  in emit_vinterp_inreg_instruction()
472    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_vopd_instruction()
473    const VOPD_instruction& vopd = instr->vopd();  in emit_vopd_instruction()
476    encoding |= reg(ctx, instr->operands[0]);  in emit_vopd_instruction()
477    if (instr->opcode != aco_opcode::v_dual_mov_b32)  in emit_vopd_instruction()
478       encoding |= reg(ctx, instr->operands[1], 8) << 9;  in emit_vopd_instruction()
485    encoding = reg(ctx, instr->operands[opy_start]);  in emit_vopd_instruction()
487       encoding |= reg(ctx, instr->operands[opy_start + 1], 8) << 9;  in emit_vopd_instruction()
488    encoding |= (reg(ctx, instr->definitions[1], 8) >> 1) << 17;  in emit_vopd_instruction()
489    encoding |= reg(ctx, instr->definitions[0], 8) << 24;  in emit_vopd_instruction()
496    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_ds_instruction()
497    const DS_instruction& ds = instr->ds();  in emit_ds_instruction()
511    if (!instr->definitions.empty())  in emit_ds_instruction()
512       encoding |= reg(ctx, instr->definitions[0], 8) << 24;  in emit_ds_instruction()
513    for (unsigned i = 0; i < MIN2(instr->operands.size(), 3); i++) {  in emit_ds_instruction()
514       const Operand& op = instr->operands[i];  in emit_ds_instruction()
524    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_ldsdir_instruction()
525    const LDSDIR_instruction& dir = instr->ldsdir();  in emit_ldsdir_instruction()
534    encoding |= reg(ctx, instr->definitions[0], 8);  in emit_ldsdir_instruction()
541    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_mubuf_instruction()
542    const MUBUF_instruction& mubuf = instr->mubuf();  in emit_mubuf_instruction()
543    bool glc = mubuf.cache.value & ac_glc;  in emit_mubuf_instruction()
544    bool slc = mubuf.cache.value & ac_slc;  in emit_mubuf_instruction()
545    bool dlc = mubuf.cache.value & ac_dlc;  in emit_mubuf_instruction()
562       assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */  in emit_mubuf_instruction()
576    encoding |= reg(ctx, instr->operands[2]) << 24;  in emit_mubuf_instruction()
584    encoding |= (reg(ctx, instr->operands[0]) >> 2) << 16;  in emit_mubuf_instruction()
585    if (instr->operands.size() > 3 && !mubuf.lds)  in emit_mubuf_instruction()
586       encoding |= reg(ctx, instr->operands[3], 8) << 8;  in emit_mubuf_instruction()
588       encoding |= reg(ctx, instr->definitions[0], 8) << 8;  in emit_mubuf_instruction()
589    encoding |= reg(ctx, instr->operands[1], 8);  in emit_mubuf_instruction()
596    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_mubuf_instruction_gfx12()
597    const MUBUF_instruction& mubuf = instr->mubuf();  in emit_mubuf_instruction_gfx12()
602    if (instr->operands[2].isConstant()) {  in emit_mubuf_instruction_gfx12()
603       assert(instr->operands[2].constantValue() == 0);  in emit_mubuf_instruction_gfx12()
606       encoding |= reg(ctx, instr->operands[2]);  in emit_mubuf_instruction_gfx12()
612    if (instr->operands.size() > 3)  in emit_mubuf_instruction_gfx12()
613       encoding |= reg(ctx, instr->operands[3], 8);  in emit_mubuf_instruction_gfx12()
615       encoding |= reg(ctx, instr->definitions[0], 8);  in emit_mubuf_instruction_gfx12()
616    encoding |= reg(ctx, instr->operands[0]) << 9;  in emit_mubuf_instruction_gfx12()
624    if (!instr->operands[1].isUndefined())  in emit_mubuf_instruction_gfx12()
625       encoding |= reg(ctx, instr->operands[1], 8);  in emit_mubuf_instruction_gfx12()
633    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_mtbuf_instruction()
634    const MTBUF_instruction& mtbuf = instr->mtbuf();  in emit_mtbuf_instruction()
635    bool glc = mtbuf.cache.value & ac_glc;  in emit_mtbuf_instruction()
636    bool slc = mtbuf.cache.value & ac_slc;  in emit_mtbuf_instruction()
637    bool dlc = mtbuf.cache.value & ac_dlc;  in emit_mtbuf_instruction()
649       encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */  in emit_mtbuf_instruction()
666    encoding |= reg(ctx, instr->operands[2]) << 24;  in emit_mtbuf_instruction()
675          encoding |= (((opcode & 0x08) >> 3) << 21); /* MSB of 4-bit OPCODE */  in emit_mtbuf_instruction()
677    encoding |= (reg(ctx, instr->operands[0]) >> 2) << 16;  in emit_mtbuf_instruction()
678    if (instr->operands.size() > 3)  in emit_mtbuf_instruction()
679       encoding |= reg(ctx, instr->operands[3], 8) << 8;  in emit_mtbuf_instruction()
681       encoding |= reg(ctx, instr->definitions[0], 8) << 8;  in emit_mtbuf_instruction()
682    encoding |= reg(ctx, instr->operands[1], 8);  in emit_mtbuf_instruction()
689    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_mtbuf_instruction_gfx12()
690    const MTBUF_instruction& mtbuf = instr->mtbuf();  in emit_mtbuf_instruction_gfx12()
697    if (instr->operands[2].isConstant()) {  in emit_mtbuf_instruction_gfx12()
698       assert(instr->operands[2].constantValue() == 0);  in emit_mtbuf_instruction_gfx12()
701       encoding |= reg(ctx, instr->operands[2]);  in emit_mtbuf_instruction_gfx12()
707    if (instr->operands.size() > 3)  in emit_mtbuf_instruction_gfx12()
708       encoding |= reg(ctx, instr->operands[3], 8);  in emit_mtbuf_instruction_gfx12()
710       encoding |= reg(ctx, instr->definitions[0], 8);  in emit_mtbuf_instruction_gfx12()
711    encoding |= reg(ctx, instr->operands[0]) << 9;  in emit_mtbuf_instruction_gfx12()
719    encoding |= reg(ctx, instr->operands[1], 8);  in emit_mtbuf_instruction_gfx12()
727    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_mimg_instruction()
728    const MIMG_instruction& mimg = instr->mimg();  in emit_mimg_instruction()
729    bool glc = mimg.cache.value & ac_glc;  in emit_mimg_instruction()
730    bool slc = mimg.cache.value & ac_slc;  in emit_mimg_instruction()
731    bool dlc = mimg.cache.value & ac_dlc;  in emit_mimg_instruction()
759          assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */  in emit_mimg_instruction()
774    encoding = reg(ctx, instr->operands[3], 8); /* VADDR */  in emit_mimg_instruction()
775    if (!instr->definitions.empty()) {  in emit_mimg_instruction()
776       encoding |= reg(ctx, instr->definitions[0], 8) << 8; /* VDATA */  in emit_mimg_instruction()
777    } else if (!instr->operands[2].isUndefined()) {  in emit_mimg_instruction()
778       encoding |= reg(ctx, instr->operands[2], 8) << 8; /* VDATA */  in emit_mimg_instruction()
780    encoding |= (0x1F & (reg(ctx, instr->operands[0]) >> 2)) << 16; /* T# (resource) */  in emit_mimg_instruction()
784       if (!instr->operands[1].isUndefined())  in emit_mimg_instruction()
785          encoding |= (0x1F & (reg(ctx, instr->operands[1]) >> 2)) << 26; /* sampler */  in emit_mimg_instruction()
790       if (!instr->operands[1].isUndefined())  in emit_mimg_instruction()
791          encoding |= (0x1F & (reg(ctx, instr->operands[1]) >> 2)) << 21; /* sampler */  in emit_mimg_instruction()
805       for (unsigned i = 0; i < instr->operands.size() - 4u; i++)  in emit_mimg_instruction()
806          nsa[i / 4] |= reg(ctx, instr->operands[4 + i], 8) << (i % 4 * 8);  in emit_mimg_instruction()
813    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_mimg_instruction_gfx12()
814    const MIMG_instruction& mimg = instr->mimg();  in emit_mimg_instruction_gfx12()
816    bool vsample = !instr->operands[1].isUndefined() || instr->opcode == aco_opcode::image_msaa_load;  in emit_mimg_instruction_gfx12()
833    for (unsigned i = 3; i < instr->operands.size(); i++)  in emit_mimg_instruction_gfx12()
834       vaddr[i - 3] = reg(ctx, instr->operands[i], 8);  in emit_mimg_instruction_gfx12()
835    unsigned num_vaddr = instr->operands.size() - 3;  in emit_mimg_instruction_gfx12()
836    for (unsigned i = 0; i < MIN2(instr->operands.back().size() - 1, 5 - num_vaddr); i++)  in emit_mimg_instruction_gfx12()
837       vaddr[num_vaddr + i] = reg(ctx, instr->operands.back(), 8) + i + 1;  in emit_mimg_instruction_gfx12()
840    if (!instr->definitions.empty())  in emit_mimg_instruction_gfx12()
841       encoding |= reg(ctx, instr->definitions[0], 8); /* VDATA */  in emit_mimg_instruction_gfx12()
842    else if (!instr->operands[2].isUndefined())  in emit_mimg_instruction_gfx12()
843       encoding |= reg(ctx, instr->operands[2], 8); /* VDATA */  in emit_mimg_instruction_gfx12()
844    encoding |= reg(ctx, instr->operands[0]) << 9;  /* T# (resource) */  in emit_mimg_instruction_gfx12()
847       if (instr->opcode != aco_opcode::image_msaa_load)  in emit_mimg_instruction_gfx12()
848          encoding |= reg(ctx, instr->operands[1]) << 23; /* sampler */  in emit_mimg_instruction_gfx12()
865    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_flatlike_instruction()
866    const FLAT_instruction& flat = instr->flatlike();  in emit_flatlike_instruction()  local
867    bool glc = flat.cache.value & ac_glc;  in emit_flatlike_instruction()
868    bool slc = flat.cache.value & ac_slc;  in emit_flatlike_instruction()
869    bool dlc = flat.cache.value & ac_dlc;  in emit_flatlike_instruction()
874       if (instr->isFlat())  in emit_flatlike_instruction()
875          assert(flat.offset <= 0xfff);  in emit_flatlike_instruction()
877          assert(flat.offset >= -4096 && flat.offset < 4096);  in emit_flatlike_instruction()
878       encoding |= flat.offset & 0x1fff;  in emit_flatlike_instruction()
879    } else if (ctx.gfx_level <= GFX8 || instr->isFlat()) {  in emit_flatlike_instruction()
880       /* GFX10 has a 12-bit immediate OFFSET field,  in emit_flatlike_instruction()
883       assert(flat.offset == 0);  in emit_flatlike_instruction()
885       assert(flat.offset >= -2048 && flat.offset <= 2047);  in emit_flatlike_instruction()
886       encoding |= flat.offset & 0xfff;  in emit_flatlike_instruction()
888    if (instr->isScratch())  in emit_flatlike_instruction()
890    else if (instr->isGlobal())  in emit_flatlike_instruction()
892    encoding |= flat.lds ? 1 << 13 : 0;  in emit_flatlike_instruction()
896       assert(!flat.nv);  in emit_flatlike_instruction()
902    encoding = reg(ctx, instr->operands[0], 8);  in emit_flatlike_instruction()
903    if (!instr->definitions.empty())  in emit_flatlike_instruction()
904       encoding |= reg(ctx, instr->definitions[0], 8) << 24;  in emit_flatlike_instruction()
905    if (instr->operands.size() >= 3)  in emit_flatlike_instruction()
906       encoding |= reg(ctx, instr->operands[2], 8) << 8;  in emit_flatlike_instruction()
907    if (!instr->operands[1].isUndefined()) {  in emit_flatlike_instruction()
908       assert(ctx.gfx_level >= GFX10 || instr->operands[1].physReg() != 0x7F);  in emit_flatlike_instruction()
909       assert(instr->format != Format::FLAT);  in emit_flatlike_instruction()
910       encoding |= reg(ctx, instr->operands[1], 8) << 16;  in emit_flatlike_instruction()
911    } else if (instr->format != Format::FLAT ||  in emit_flatlike_instruction()
912               ctx.gfx_level >= GFX10) { /* SADDR is actually used with FLAT on GFX10 */  in emit_flatlike_instruction()
917           (instr->isScratch() && instr->operands[0].isUndefined() && ctx.gfx_level < GFX11))  in emit_flatlike_instruction()
922    if (ctx.gfx_level >= GFX11 && instr->isScratch())  in emit_flatlike_instruction()
923       encoding |= !instr->operands[0].isUndefined() ? 1 << 23 : 0;  in emit_flatlike_instruction()
925       encoding |= flat.nv ? 1 << 23 : 0;  in emit_flatlike_instruction()
933    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_flatlike_instruction_gfx12()
934    const FLAT_instruction& flat = instr->flatlike();  in emit_flatlike_instruction_gfx12()  local
935    assert(!flat.lds);  in emit_flatlike_instruction_gfx12()
939    if (!instr->operands[1].isUndefined()) {  in emit_flatlike_instruction_gfx12()
940       assert(!instr->isFlat());  in emit_flatlike_instruction_gfx12()
941       encoding |= reg(ctx, instr->operands[1]);  in emit_flatlike_instruction_gfx12()
945    if (instr->isScratch())  in emit_flatlike_instruction_gfx12()
947    else if (instr->isGlobal())  in emit_flatlike_instruction_gfx12()
952    if (!instr->definitions.empty())  in emit_flatlike_instruction_gfx12()
953       encoding |= reg(ctx, instr->definitions[0], 8);  in emit_flatlike_instruction_gfx12()
954    if (instr->isScratch())  in emit_flatlike_instruction_gfx12()
955       encoding |= !instr->operands[0].isUndefined() ? 1 << 17 : 0;  in emit_flatlike_instruction_gfx12()
956    encoding |= get_gfx12_cpol(flat) << 18;  in emit_flatlike_instruction_gfx12()
957    if (instr->operands.size() >= 3)  in emit_flatlike_instruction_gfx12()
958       encoding |= reg(ctx, instr->operands[2], 8) << 23;  in emit_flatlike_instruction_gfx12()
962    if (!instr->operands[0].isUndefined())  in emit_flatlike_instruction_gfx12()
963       encoding |= reg(ctx, instr->operands[0], 8);  in emit_flatlike_instruction_gfx12()
964    encoding |= (flat.offset & 0x00ffffff) << 8;  in emit_flatlike_instruction_gfx12()
971    const Export_instruction& exp = instr->exp();  in emit_exp_instruction()
1002    DPP16_instruction& dpp = instr->dpp16();  in emit_dpp16_instruction()
1005    Operand dpp_op = instr->operands[0];  in emit_dpp16_instruction()
1006    instr->operands[0] = Operand(PhysReg{250}, v1);  in emit_dpp16_instruction()
1007    instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);  in emit_dpp16_instruction()
1009    instr->format = (Format)((uint16_t)instr->format | (uint16_t)Format::DPP16);  in emit_dpp16_instruction()
1010    instr->operands[0] = dpp_op;  in emit_dpp16_instruction()
1022    encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;  in emit_dpp16_instruction()
1030    DPP8_instruction& dpp = instr->dpp8();  in emit_dpp8_instruction()
1033    Operand dpp_op = instr->operands[0];  in emit_dpp8_instruction()
1034    instr->operands[0] = Operand(PhysReg{233u + dpp.fetch_inactive}, v1);  in emit_dpp8_instruction()
1035    instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);  in emit_dpp8_instruction()
1037    instr->format = (Format)((uint16_t)instr->format | (uint16_t)Format::DPP8);  in emit_dpp8_instruction()
1038    instr->operands[0] = dpp_op;  in emit_dpp8_instruction()
1041    encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;  in emit_dpp8_instruction()
1049    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_vop3_instruction()
1050    const VALU_instruction& vop3 = instr->valu();  in emit_vop3_instruction()
1052    if (instr->isVOP2()) {  in emit_vop3_instruction()
1054    } else if (instr->isVOP1()) {  in emit_vop3_instruction()
1059    } else if (instr->isVOPC()) {  in emit_vop3_instruction()
1061    } else if (instr->isVINTRP()) {  in emit_vop3_instruction()
1087    if (instr->definitions.size() == 2 && instr->isVOPC())  in emit_vop3_instruction()
1088       assert(ctx.gfx_level <= GFX9 && instr->definitions[1].physReg() == exec);  in emit_vop3_instruction()
1089    else if (instr->definitions.size() == 2 && instr->opcode != aco_opcode::v_swap_b16)  in emit_vop3_instruction()
1090       encoding |= reg(ctx, instr->definitions[1]) << 8;  in emit_vop3_instruction()
1091    encoding |= reg(ctx, instr->definitions[0], 8);  in emit_vop3_instruction()
1095    unsigned num_ops = instr->operands.size();  in emit_vop3_instruction()
1097    if (instr->opcode == aco_opcode::v_writelane_b32_e64)  in emit_vop3_instruction()
1099    else if (instr->opcode == aco_opcode::v_swap_b16)  in emit_vop3_instruction()
1103       encoding |= reg(ctx, instr->operands[i]) << (i * 9);  in emit_vop3_instruction()
1113    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_vop3p_instruction()
1114    const VALU_instruction& vop3 = instr->valu();  in emit_vop3p_instruction()
1131    encoding |= reg(ctx, instr->definitions[0], 8);  in emit_vop3p_instruction()
1134    for (unsigned i = 0; i < instr->operands.size(); i++)  in emit_vop3p_instruction()
1135       encoding |= reg(ctx, instr->operands[i]) << (i * 9);  in emit_vop3p_instruction()
1146    SDWA_instruction& sdwa = instr->sdwa();  in emit_sdwa_instruction()
1149    Operand sdwa_op = instr->operands[0];  in emit_sdwa_instruction()
1150    instr->operands[0] = Operand(PhysReg{249}, v1);  in emit_sdwa_instruction()
1151    instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::SDWA);  in emit_sdwa_instruction()
1153    instr->format = (Format)((uint16_t)instr->format | (uint16_t)Format::SDWA);  in emit_sdwa_instruction()
1154    instr->operands[0] = sdwa_op;  in emit_sdwa_instruction()
1158    if (instr->isVOPC()) {  in emit_sdwa_instruction()
1159       if (instr->definitions[0].physReg() !=  in emit_sdwa_instruction()
1160           (ctx.gfx_level >= GFX10 && is_cmpx(instr->opcode) ? exec : vcc)) {  in emit_sdwa_instruction()
1161          encoding |= reg(ctx, instr->definitions[0]) << 8;  in emit_sdwa_instruction()
1166       encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8;  in emit_sdwa_instruction()
1168       if (instr->definitions[0].bytes() < 4) /* dst_preserve */  in emit_sdwa_instruction()
1180    if (instr->operands.size() >= 2) {  in emit_sdwa_instruction()
1181       encoding |= sdwa.sel[1].to_sdwa_sel(instr->operands[1].physReg().byte()) << 24;  in emit_sdwa_instruction()
1189    if (instr->operands.size() >= 2)  in emit_sdwa_instruction()
1190       encoding |= (instr->operands[1].physReg() < 256) << 31;  in emit_sdwa_instruction()
1197    /* lower remaining pseudo-instructions */  in emit_instruction()
1198    if (instr->opcode == aco_opcode::p_constaddr_getpc) {  in emit_instruction()
1199       ctx.constaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1;  in emit_instruction()
1201       instr->opcode = aco_opcode::s_getpc_b64;  in emit_instruction()
1202       instr->operands.pop_back();  in emit_instruction()
1203    } else if (instr->opcode == aco_opcode::p_constaddr_addlo) {  in emit_instruction()
1204       ctx.constaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1;  in emit_instruction()
1206       instr->opcode = aco_opcode::s_add_u32;  in emit_instruction()
1207       instr->operands.pop_back();  in emit_instruction()
1208       assert(instr->operands[1].isConstant());  in emit_instruction()
1210       instr->operands[1] = Operand::literal32(instr->operands[1].constantValue());  in emit_instruction()
1211    } else if (instr->opcode == aco_opcode::p_resumeaddr_getpc) {  in emit_instruction()
1212       ctx.resumeaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1;  in emit_instruction()
1214       instr->opcode = aco_opcode::s_getpc_b64;  in emit_instruction()
1215       instr->operands.pop_back();  in emit_instruction()
1216    } else if (instr->opcode == aco_opcode::p_resumeaddr_addlo) {  in emit_instruction()
1217       ctx.resumeaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1;  in emit_instruction()
1219       instr->opcode = aco_opcode::s_add_u32;  in emit_instruction()
1220       instr->operands.pop_back();  in emit_instruction()
1221       assert(instr->operands[1].isConstant());  in emit_instruction()
1223       instr->operands[1] = Operand::literal32(instr->operands[1].constantValue());  in emit_instruction()
1224    } else if (instr->opcode == aco_opcode::p_load_symbol) {  in emit_instruction()
1225       assert(instr->operands[0].isConstant());  in emit_instruction()
1229       info.id = (enum aco_symbol_id)instr->operands[0].constantValue();  in emit_instruction()
1231       ctx.symbols->push_back(info);  in emit_instruction()
1233       instr->opcode = aco_opcode::s_mov_b32;  in emit_instruction()
1235       instr->operands[0] = Operand::literal32(0);  in emit_instruction()
1236    } else if (instr->opcode == aco_opcode::p_debug_info) {  in emit_instruction()
1237       assert(instr->operands[0].isConstant());  in emit_instruction()
1238       uint32_t index = instr->operands[0].constantValue();  in emit_instruction()
1239       ctx.program->debug_info[index].offset = (out.size() - 1) * 4;  in emit_instruction()
1244    if ((instr->isVOP1() || instr->isVOP2() || instr->isVOPC()) && !instr->isVOP3() &&  in emit_instruction()
1246       instr->format = asVOP3(instr->format);  in emit_instruction()
1247       if (instr->opcode == aco_opcode::v_fmaak_f16) {  in emit_instruction()
1248          instr->opcode = aco_opcode::v_fma_f16;  in emit_instruction()
1249          instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2);  in emit_instruction()
1250       } else if (instr->opcode == aco_opcode::v_fmamk_f16) {  in emit_instruction()
1251          instr->valu().swapOperands(1, 2);  in emit_instruction()
1252          instr->opcode = aco_opcode::v_fma_f16;  in emit_instruction()
1253          instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2);  in emit_instruction()
1257    uint32_t opcode = ctx.opcode[(int)instr->opcode];  in emit_instruction()
1258    if (opcode == (uint32_t)-1) {  in emit_instruction()
1275    switch (instr->format) {  in emit_instruction()
1353    case Format::FLAT:  in emit_instruction()
1368       if (instr->opcode != aco_opcode::p_unit_test)  in emit_instruction()
1372       if (instr->isDPP16()) {  in emit_instruction()
1375       } else if (instr->isDPP8()) {  in emit_instruction()
1378       } else if (instr->isVOP3()) {  in emit_instruction()
1380       } else if (instr->isVOP3P()) {  in emit_instruction()
1382       } else if (instr->isSDWA()) {  in emit_instruction()
1391    for (const Operand& op : instr->operands) {  in emit_instruction()
1421    for (Block& block : program->blocks) {  in fix_exports()
1426          if ((*it)->isEXP()) {  in fix_exports()
1427             Export_instruction& exp = (*it)->exp();  in fix_exports()
1428             if (program->stage.hw == AC_HW_VERTEX_SHADER ||  in fix_exports()
1429                 program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {  in fix_exports()
1441          } else if ((*it)->definitions.size() && (*it)->definitions[0].physReg() == exec) {  in fix_exports()
1449    bool may_skip_export = program->stage.hw == AC_HW_PIXEL_SHADER && program->gfx_level >= GFX10;  in fix_exports()
1453       bool is_vertex_or_ngg = (program->stage.hw == AC_HW_VERTEX_SHADER ||  in fix_exports()
1454                                program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER);  in fix_exports()
1469    for (Block& block : ctx.program->blocks) {  in insert_code()
1514          ctx.branches.begin(), ctx.branches.end(), [&](const branch_info& branch) -> bool  in fix_branches_gfx10()
1515          { return ((int)ctx.program->blocks[branch.target].offset - branch.pos - 1) == 0x3f; });  in fix_branches_gfx10()
1521          insert_code(ctx, out, buggy_branch_it->pos + 1, 1, &s_nop_0);  in fix_branches_gfx10()
1532    Block* new_block = ctx.program->create_and_insert_block();  in chain_branches()
1537    /* Re-direct original branch to new block (offset). */  in chain_branches()
1539    branch.target = new_block->index;  in chain_branches()
1547    const int half_dist = (INT16_MAX - 31) / 2;  in chain_branches()
1548    const unsigned upper_start = MIN2(ctx.program->blocks[target].offset, branch.pos) + half_dist;  in chain_branches()
1550    const unsigned lower_end = MAX2(ctx.program->blocks[target].offset, branch.pos) - half_dist;  in chain_branches()
1551    const unsigned lower_start = lower_end - half_dist;  in chain_branches()
1553    for (unsigned i = 0; i < ctx.program->blocks.size() - 1; i++) {  in chain_branches()
1554       Block& block = ctx.program->blocks[i];  in chain_branches()
1555       Block& next = ctx.program->blocks[i + 1];  in chain_branches()
1566           block.instructions.back()->opcode == aco_opcode::s_branch) {  in chain_branches()
1578       while (ctx.program->blocks[insertion_block_idx + 1].offset < upper_end)  in chain_branches()
1581       insert_at = ctx.program->blocks[insertion_block_idx].offset;  in chain_branches()
1582       auto it = ctx.program->blocks[insertion_block_idx].instructions.begin();  in chain_branches()
1586          while (skip-- > 0 || insert_at < upper_start) {  in chain_branches()
1587             Instruction* instr = (it++)->get();  in chain_branches()
1588             if (instr->isSOPP()) {  in chain_branches()
1589                if (instr->opcode == aco_opcode::s_clause)  in chain_branches()
1590                   skip = instr->salu().imm + 1;  in chain_branches()
1591                else if (instr->opcode == aco_opcode::s_delay_alu)  in chain_branches()
1592                   skip = ((instr->salu().imm >> 4) & 0x7) + 1;  in chain_branches()
1593                else if (instr->opcode == aco_opcode::s_branch)  in chain_branches()
1606          bld.reset(&ctx.program->blocks[insertion_block_idx].instructions, it);  in chain_branches()
1608          bld.reset(&ctx.program->blocks[insertion_block_idx - 1].instructions);  in chain_branches()
1613       if (ctx.program->gfx_level == GFX10) {  in chain_branches()
1628    new_block->offset = block_offset;  in chain_branches()
1632       ctx.branches.push_back({block_offset - 1, skip_branch_target});  in chain_branches()
1649          int offset = (int)ctx.program->blocks[branch.target].offset - branch.pos - 1;  in fix_branches()
1667       out[info.add_literal] += (out.size() - info.getpc_end) * 4u;  in fix_constaddrs()
1673          ctx.symbols->push_back(sym);  in fix_constaddrs()
1678       const Block& block = ctx.program->blocks[out[info.add_literal]];  in fix_constaddrs()
1680       out[info.add_literal] = (block.offset - info.getpc_end) * 4u;  in fix_constaddrs()
1688    if (ctx.loop_header != -1u &&  in align_block()
1689        block.loop_nest_depth < ctx.program->blocks[ctx.loop_header].loop_nest_depth) {  in align_block()
1690       assert(ctx.loop_exit != -1u);  in align_block()
1691       Block& loop_header = ctx.program->blocks[ctx.loop_header];  in align_block()
1692       Block& loop_exit = ctx.program->blocks[ctx.loop_exit];  in align_block()
1693       ctx.loop_header = -1u;  in align_block()
1694       ctx.loop_exit = -1u;  in align_block()
1697       const unsigned loop_num_cl = DIV_ROUND_UP(block.offset - loop_header.offset, 16);  in align_block()
1699       /* On GFX10.3+, change the prefetch mode if the loop fits into 2 or 3 cache lines.  in align_block()
1702       const bool change_prefetch = ctx.program->gfx_level >= GFX10_3 &&  in align_block()
1703                                    ctx.program->gfx_level <= GFX11 && loop_num_cl > 1 &&  in align_block()
1707          Builder bld(ctx.program, &ctx.program->blocks[loop_header.linear_preds[0]]);  in align_block()
1724       const unsigned loop_end_cl = (block.offset - 1) >> 4;  in align_block()
1726       /* Align the loop if it fits into the fetched cache lines or if we can  in align_block()
1727        * reduce the number of cache lines with less than 8 NOPs.  in align_block()
1729       const bool align_loop = loop_end_cl - loop_start_cl >= loop_num_cl &&  in align_block()
1734          nops.resize(16 - (loop_header.offset % 16), 0xbf800000u);  in align_block()
1740       /* In case of nested loops, only handle the inner-most loops in order  in align_block()
1742        * Also ignore loops without back-edge.  in align_block()
1746          ctx.loop_exit = -1u;  in align_block()
1752     * This works, because control flow always re-converges after loops.  in align_block()
1754    if (ctx.loop_header != -1u && ctx.loop_exit == -1u) {  in align_block()
1756          Block& succ = ctx.program->blocks[succ_idx];  in align_block()
1757          if (succ.loop_nest_depth < ctx.program->blocks[ctx.loop_header].loop_nest_depth)  in align_block()
1762    /* align resume shaders with cache line */  in align_block()
1777       (program->stage.sw == SWStage::VS || program->stage.sw == SWStage::TES) &&  in emit_program()
1778       program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&  in emit_program()
1779       program->info.merged_shader_compiled_separately;  in emit_program()
1782    if (!program->is_prolog && !program->info.ps.has_epilog &&  in emit_program()
1784        (program->stage.hw == AC_HW_VERTEX_SHADER || program->stage.hw == AC_HW_PIXEL_SHADER ||  in emit_program()
1785         program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER))  in emit_program()
1788    for (Block& block : program->blocks) {  in emit_program()
1798    /* Add end-of-code markers for the UMR disassembler. */  in emit_program()
1804    while (program->constant_data.size() % 4u)  in emit_program()
1805       program->constant_data.push_back(0);  in emit_program()
1807    code.insert(code.end(), (uint32_t*)program->constant_data.data(),  in emit_program()
1808                (uint32_t*)(program->constant_data.data() + program->constant_data.size()));  in emit_program()
1810    program->config->scratch_bytes_per_wave =  in emit_program()
1811       align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);  in emit_program()