Lines Matching refs:instr
39 perfwarn(Program* program, bool cond, const char* msg, Instruction* instr) in perfwarn() argument
49 aco_print_instr(instr, memf); in perfwarn()
81 mad_info(aco_ptr<Instruction> instr, uint32_t id) in mad_info()
82 : add_instr(std::move(instr)), mul_temp_id(id), literal_idx(0), check_literal(false) in mad_info()
148 Instruction* instr; member
187 instr = vec; in set_vec()
272 instr = mul; in set_mul()
289 instr = mad; in set_mad()
297 instr = mul; in set_omod2()
305 instr = mul; in set_omod4()
313 instr = mul; in set_omod5()
321 instr = med3; in set_clamp()
349 instr = add_sub_instr; in set_add_sub()
357 instr = bitwise_instr; in set_bitwise()
369 instr = minmax_instr; in set_minmax()
377 instr = vopc_instr; in set_vopc()
417 instr = label_instr; in set_usedef()
425 instr = vop3p_instr; in set_vop3p()
445 instr = extract; in set_extract()
453 instr = insert; in set_insert()
461 instr = mov; in set_dpp()
478 can_use_VOP3(opt_ctx& ctx, const aco_ptr<Instruction>& instr) in can_use_VOP3() argument
480 if (instr->isVOP3()) in can_use_VOP3()
483 if (instr->isVOP3P()) in can_use_VOP3()
486 if (instr->operands.size() && instr->operands[0].isLiteral() && ctx.program->chip_class < GFX10) in can_use_VOP3()
489 if (instr->isDPP() || instr->isSDWA()) in can_use_VOP3()
492 return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 && in can_use_VOP3()
493 instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 && in can_use_VOP3()
494 instr->opcode != aco_opcode::v_fmamk_f32 && instr->opcode != aco_opcode::v_fmaak_f32 && in can_use_VOP3()
495 instr->opcode != aco_opcode::v_fmamk_f16 && instr->opcode != aco_opcode::v_fmaak_f16 && in can_use_VOP3()
496 instr->opcode != aco_opcode::v_readlane_b32 && in can_use_VOP3()
497 instr->opcode != aco_opcode::v_writelane_b32 && in can_use_VOP3()
498 instr->opcode != aco_opcode::v_readfirstlane_b32; in can_use_VOP3()
502 pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsigned index) in pseudo_propagate_temp() argument
504 if (instr->definitions.empty()) in pseudo_propagate_temp()
508 instr->opcode == aco_opcode::p_as_uniform || in pseudo_propagate_temp()
509 std::all_of(instr->definitions.begin(), instr->definitions.end(), in pseudo_propagate_temp()
518 std::none_of(instr->definitions.begin(), instr->definitions.end(), in pseudo_propagate_temp()
521 switch (instr->opcode) { in pseudo_propagate_temp()
526 if (temp.bytes() != instr->operands[index].bytes()) in pseudo_propagate_temp()
537 if (temp.bytes() > instr->operands[index].bytes()) in pseudo_propagate_temp()
544 int decrease = instr->operands[index].bytes() - temp.bytes(); in pseudo_propagate_temp()
546 decrease -= instr->definitions.back().bytes(); in pseudo_propagate_temp()
547 instr->definitions.pop_back(); in pseudo_propagate_temp()
553 if (temp.regClass() == instr->definitions[0].regClass()) in pseudo_propagate_temp()
554 instr->opcode = aco_opcode::p_parallelcopy; in pseudo_propagate_temp()
559 instr->operands[index].setTemp(temp); in pseudo_propagate_temp()
565 can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr) in can_apply_sgprs() argument
567 if (instr->isSDWA() && ctx.program->chip_class < GFX9) in can_apply_sgprs()
569 return instr->opcode != aco_opcode::v_readfirstlane_b32 && in can_apply_sgprs()
570 instr->opcode != aco_opcode::v_readlane_b32 && in can_apply_sgprs()
571 instr->opcode != aco_opcode::v_readlane_b32_e64 && in can_apply_sgprs()
572 instr->opcode != aco_opcode::v_writelane_b32 && in can_apply_sgprs()
573 instr->opcode != aco_opcode::v_writelane_b32_e64 && in can_apply_sgprs()
574 instr->opcode != aco_opcode::v_permlane16_b32 && in can_apply_sgprs()
575 instr->opcode != aco_opcode::v_permlanex16_b32; in can_apply_sgprs()
579 to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr) in to_VOP3() argument
581 if (instr->isVOP3()) in to_VOP3()
584 aco_ptr<Instruction> tmp = std::move(instr); in to_VOP3()
586 instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), in to_VOP3()
588 std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin()); in to_VOP3()
589 for (unsigned i = 0; i < instr->definitions.size(); i++) { in to_VOP3()
590 instr->definitions[i] = tmp->definitions[i]; in to_VOP3()
591 if (instr->definitions[i].isTemp()) { in to_VOP3()
592 ssa_info& info = ctx.info[instr->definitions[i].tempId()]; in to_VOP3()
593 if (info.label & instr_usedef_labels && info.instr == tmp.get()) in to_VOP3()
594 info.instr = instr.get(); in to_VOP3()
608 to_SDWA(opt_ctx& ctx, aco_ptr<Instruction>& instr) in to_SDWA() argument
610 aco_ptr<Instruction> tmp = convert_to_SDWA(ctx.program->chip_class, instr); in to_SDWA()
614 for (unsigned i = 0; i < instr->definitions.size(); i++) { in to_SDWA()
615 ssa_info& info = ctx.info[instr->definitions[i].tempId()]; in to_SDWA()
616 if (info.label & instr_labels && info.instr == tmp.get()) in to_SDWA()
617 info.instr = instr.get(); in to_SDWA()
646 valu_can_accept_vgpr(aco_ptr<Instruction>& instr, unsigned operand) in valu_can_accept_vgpr() argument
648 if (instr->opcode == aco_opcode::v_readlane_b32 || in valu_can_accept_vgpr()
649 instr->opcode == aco_opcode::v_readlane_b32_e64 || in valu_can_accept_vgpr()
650 instr->opcode == aco_opcode::v_writelane_b32 || in valu_can_accept_vgpr()
651 instr->opcode == aco_opcode::v_writelane_b32_e64) in valu_can_accept_vgpr()
653 if (instr->opcode == aco_opcode::v_permlane16_b32 || in valu_can_accept_vgpr()
654 instr->opcode == aco_opcode::v_permlanex16_b32) in valu_can_accept_vgpr()
709 parse_base_offset(opt_ctx& ctx, Instruction* instr, unsigned op_index, Temp* base, uint32_t* offset, in parse_base_offset() argument
712 Operand op = instr->operands[op_index]; in parse_base_offset()
720 Instruction* add_instr = ctx.info[tmp.id()].instr; in parse_base_offset()
761 get_operand_size(aco_ptr<Instruction>& instr, unsigned index) in get_operand_size() argument
763 if (instr->isPseudo()) in get_operand_size()
764 return instr->operands[index].bytes() * 8u; in get_operand_size()
765 else if (instr->opcode == aco_opcode::v_mad_u64_u32 || in get_operand_size()
766 instr->opcode == aco_opcode::v_mad_i64_i32) in get_operand_size()
768 else if (instr->isVALU() || instr->isSALU()) in get_operand_size()
769 return instr_info.operand_size[(int)instr->opcode]; in get_operand_size()
789 parse_extract(Instruction* instr) in parse_extract() argument
791 if (instr->opcode == aco_opcode::p_extract) { in parse_extract()
792 unsigned size = instr->operands[2].constantValue() / 8; in parse_extract()
793 unsigned offset = instr->operands[1].constantValue() * size; in parse_extract()
794 bool sext = instr->operands[3].constantEquals(1); in parse_extract()
796 } else if (instr->opcode == aco_opcode::p_insert && instr->operands[1].constantEquals(0)) { in parse_extract()
797 return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword; in parse_extract()
804 parse_insert(Instruction* instr) in parse_insert() argument
806 if (instr->opcode == aco_opcode::p_extract && instr->operands[3].constantEquals(0) && in parse_insert()
807 instr->operands[1].constantEquals(0)) { in parse_insert()
808 return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword; in parse_insert()
809 } else if (instr->opcode == aco_opcode::p_insert) { in parse_insert()
810 unsigned size = instr->operands[2].constantValue() / 8; in parse_insert()
811 unsigned offset = instr->operands[1].constantValue() * size; in parse_insert()
819 can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info) in can_apply_extract() argument
824 Temp tmp = info.instr->operands[0].getTemp(); in can_apply_extract()
825 SubdwordSel sel = parse_extract(info.instr); in can_apply_extract()
831 } else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) { in can_apply_extract()
833 } else if (can_use_SDWA(ctx.program->chip_class, instr, true) && in can_apply_extract()
835 if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword) in can_apply_extract()
838 } else if (instr->isVOP3() && sel.size() == 2 && in can_apply_extract()
839 can_use_opsel(ctx.program->chip_class, instr->opcode, idx, sel.offset()) && in can_apply_extract()
840 !(instr->vop3().opsel & (1 << idx))) { in can_apply_extract()
851 apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info) in apply_extract() argument
853 Temp tmp = info.instr->operands[0].getTemp(); in apply_extract()
854 SubdwordSel sel = parse_extract(info.instr); in apply_extract()
857 instr->operands[idx].set16bit(false); in apply_extract()
858 instr->operands[idx].set24bit(false); in apply_extract()
864 } else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) { in apply_extract()
866 case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break; in apply_extract()
867 case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break; in apply_extract()
868 case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break; in apply_extract()
869 case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break; in apply_extract()
871 } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && instr->operands[0].isConstant() && in apply_extract()
873 ((sel.size() == 2 && instr->operands[0].constantValue() >= 16u) || in apply_extract()
874 (sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) { in apply_extract()
877 } else if (can_use_SDWA(ctx.program->chip_class, instr, true) && in apply_extract()
879 to_SDWA(ctx, instr); in apply_extract()
880 static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel; in apply_extract()
881 } else if (instr->isVOP3()) { in apply_extract()
883 instr->vop3().opsel |= 1 << idx; in apply_extract()
887 for (Definition& def : instr->definitions) in apply_extract()
892 check_sdwa_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr) in check_sdwa_extract() argument
894 for (unsigned i = 0; i < instr->operands.size(); i++) { in check_sdwa_extract()
895 Operand op = instr->operands[i]; in check_sdwa_extract()
899 if (info.is_extract() && (info.instr->operands[0].getTemp().type() == RegType::vgpr || in check_sdwa_extract()
901 if (!can_apply_extract(ctx, instr, i, info)) in check_sdwa_extract()
926 can_eliminate_fcanonicalize(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp tmp) in can_eliminate_fcanonicalize() argument
933 aco_opcode op = instr->opcode; in can_eliminate_fcanonicalize()
938 is_copy_label(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& info) in is_copy_label() argument
941 (info.is_fcanonicalize() && can_eliminate_fcanonicalize(ctx, instr, info.temp)); in is_copy_label()
963 label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) in label_instruction() argument
965 if (instr->isSALU() || instr->isVALU() || instr->isPseudo()) { in label_instruction()
967 for (Operand& op : instr->operands) in label_instruction()
970 perfwarn(ctx.program, all_const, "All instruction operands are constant", instr.get()); in label_instruction()
972 ASSERTED bool is_copy = instr->opcode == aco_opcode::s_mov_b32 || in label_instruction()
973 instr->opcode == aco_opcode::s_mov_b64 || in label_instruction()
974 instr->opcode == aco_opcode::v_mov_b32; in label_instruction()
975 perfwarn(ctx.program, is_copy && !instr->usesModifiers(), "Use p_parallelcopy instead", in label_instruction()
976 instr.get()); in label_instruction()
979 for (unsigned i = 0; i < instr->operands.size(); i++) { in label_instruction()
980 if (!instr->operands[i].isTemp()) in label_instruction()
983 ssa_info info = ctx.info[instr->operands[i].tempId()]; in label_instruction()
985 if (info.is_undefined() && is_phi(instr)) in label_instruction()
986 instr->operands[i] = Operand(instr->operands[i].regClass()); in label_instruction()
988 while (info.is_temp() && info.temp.regClass() == instr->operands[i].getTemp().regClass()) { in label_instruction()
989 instr->operands[i].setTemp(ctx.info[instr->operands[i].tempId()].temp); in label_instruction()
994 if (instr->isPseudo()) { in label_instruction()
996 pseudo_propagate_temp(ctx, instr, info.temp, i); in label_instruction()
1002 if (instr->isSALU() || instr->isPseudo()) { in label_instruction()
1003 unsigned bits = get_operand_size(instr, i); in label_instruction()
1004 if ((info.is_constant(bits) || (info.is_literal(bits) && instr->isPseudo())) && in label_instruction()
1005 !instr->operands[i].isFixed() && alu_can_accept_constant(instr->opcode, i)) { in label_instruction()
1006 instr->operands[i] = get_constant_op(ctx, info, bits); in label_instruction()
1012 else if (instr->isVALU()) { in label_instruction()
1013 if (is_copy_label(ctx, instr, info) && info.temp.type() == RegType::vgpr && in label_instruction()
1014 valu_can_accept_vgpr(instr, i)) { in label_instruction()
1015 instr->operands[i].setTemp(info.temp); in label_instruction()
1019 if (info.is_temp() && info.temp.type() == RegType::sgpr && can_apply_sgprs(ctx, instr) && in label_instruction()
1020 instr->operands.size() == 1) { in label_instruction()
1021 instr->format = withoutDPP(instr->format); in label_instruction()
1022 instr->operands[i].setTemp(info.temp); in label_instruction()
1029 instr->opcode != aco_opcode::v_cndmask_b32 || instr->operands[i].getTemp().bytes() == 4; in label_instruction()
1030 can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode]; in label_instruction()
1032 if (instr->isSDWA()) in label_instruction()
1033 can_use_mod = can_use_mod && instr->sdwa().sel[i].size() == 4; in label_instruction()
1035 can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr)); in label_instruction()
1037 if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) { in label_instruction()
1038 instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32; in label_instruction()
1039 instr->operands[i].setTemp(info.temp); in label_instruction()
1040 } else if (info.is_neg() && instr->opcode == aco_opcode::v_add_f16) { in label_instruction()
1041 instr->opcode = i ? aco_opcode::v_sub_f16 : aco_opcode::v_subrev_f16; in label_instruction()
1042 instr->operands[i].setTemp(info.temp); in label_instruction()
1044 can_eliminate_fcanonicalize(ctx, instr, info.temp)) { in label_instruction()
1045 if (!instr->isDPP() && !instr->isSDWA()) in label_instruction()
1046 to_VOP3(ctx, instr); in label_instruction()
1047 instr->operands[i].setTemp(info.temp); in label_instruction()
1048 if (instr->isDPP() && !instr->dpp().abs[i]) in label_instruction()
1049 instr->dpp().neg[i] = true; in label_instruction()
1050 else if (instr->isSDWA() && !instr->sdwa().abs[i]) in label_instruction()
1051 instr->sdwa().neg[i] = true; in label_instruction()
1052 else if (instr->isVOP3() && !instr->vop3().abs[i]) in label_instruction()
1053 instr->vop3().neg[i] = true; in label_instruction()
1055 if (info.is_abs() && can_use_mod && can_eliminate_fcanonicalize(ctx, instr, info.temp)) { in label_instruction()
1056 if (!instr->isDPP() && !instr->isSDWA()) in label_instruction()
1057 to_VOP3(ctx, instr); in label_instruction()
1058 instr->operands[i] = Operand(info.temp); in label_instruction()
1059 if (instr->isDPP()) in label_instruction()
1060 instr->dpp().abs[i] = true; in label_instruction()
1061 else if (instr->isSDWA()) in label_instruction()
1062 instr->sdwa().abs[i] = true; in label_instruction()
1064 instr->vop3().abs[i] = true; in label_instruction()
1068 unsigned bits = get_operand_size(instr, i); in label_instruction()
1069 if (info.is_constant(bits) && alu_can_accept_constant(instr->opcode, i) && in label_instruction()
1070 (!instr->isSDWA() || ctx.program->chip_class >= GFX9)) { in label_instruction()
1072 perfwarn(ctx.program, instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, in label_instruction()
1073 "v_cndmask_b32 with a constant selector", instr.get()); in label_instruction()
1074 if (i == 0 || instr->isSDWA() || instr->isVOP3P() || in label_instruction()
1075 instr->opcode == aco_opcode::v_readlane_b32 || in label_instruction()
1076 instr->opcode == aco_opcode::v_writelane_b32) { in label_instruction()
1077 instr->format = withoutDPP(instr->format); in label_instruction()
1078 instr->operands[i] = op; in label_instruction()
1080 } else if (!instr->isVOP3() && can_swap_operands(instr, &instr->opcode)) { in label_instruction()
1081 instr->operands[i] = instr->operands[0]; in label_instruction()
1082 instr->operands[0] = op; in label_instruction()
1084 } else if (can_use_VOP3(ctx, instr)) { in label_instruction()
1085 to_VOP3(ctx, instr); in label_instruction()
1086 instr->operands[i] = op; in label_instruction()
1093 else if (instr->isMUBUF()) { in label_instruction()
1094 MUBUF_instruction& mubuf = instr->mubuf(); in label_instruction()
1112 instr->operands[1] = Operand(v1); in label_instruction()
1117 instr->operands[2] = Operand::c32(0); in label_instruction()
1121 parse_base_offset(ctx, instr.get(), i, &base, &offset, in label_instruction()
1125 instr->operands[1].setTemp(base); in label_instruction()
1129 parse_base_offset(ctx, instr.get(), i, &base, &offset, in label_instruction()
1132 instr->operands[i].setTemp(base); in label_instruction()
1139 else if (instr->isDS()) { in label_instruction()
1141 DS_instruction& ds = instr->ds(); in label_instruction()
1146 parse_base_offset(ctx, instr.get(), i, &base, &offset, false) && in label_instruction()
1147 base.regClass() == instr->operands[i].regClass() && in label_instruction()
1148 instr->opcode != aco_opcode::ds_swizzle_b32) { in label_instruction()
1149 if (instr->opcode == aco_opcode::ds_write2_b32 || in label_instruction()
1150 instr->opcode == aco_opcode::ds_read2_b32 || in label_instruction()
1151 instr->opcode == aco_opcode::ds_write2_b64 || in label_instruction()
1152 instr->opcode == aco_opcode::ds_read2_b64) { in label_instruction()
1153 unsigned mask = (instr->opcode == aco_opcode::ds_write2_b64 || in label_instruction()
1154 instr->opcode == aco_opcode::ds_read2_b64) in label_instruction()
1157 unsigned shifts = (instr->opcode == aco_opcode::ds_write2_b64 || in label_instruction()
1158 instr->opcode == aco_opcode::ds_read2_b64) in label_instruction()
1164 instr->operands[i].setTemp(base); in label_instruction()
1170 instr->operands[i].setTemp(base); in label_instruction()
1178 else if (instr->isSMEM()) { in label_instruction()
1180 SMEM_instruction& smem = instr->smem(); in label_instruction()
1188 instr->operands[i] = Operand::c32(info.val); in label_instruction()
1191 parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && in label_instruction()
1216 instr.reset(new_instr); in label_instruction()
1222 else if (instr->isBranch()) { in label_instruction()
1223 if (ctx.info[instr->operands[0].tempId()].is_scc_invert()) { in label_instruction()
1225 instr->opcode = instr->opcode == aco_opcode::p_cbranch_z ? aco_opcode::p_cbranch_nz in label_instruction()
1227 instr->operands[0].setTemp(ctx.info[instr->operands[0].tempId()].temp); in label_instruction()
1233 if (instr->definitions.empty()) { in label_instruction()
1234 check_sdwa_extract(ctx, instr); in label_instruction()
1238 if (instr->isVALU() || instr->isVINTRP()) { in label_instruction()
1239 if (instr_info.can_use_output_modifiers[(int)instr->opcode] || instr->isVINTRP() || in label_instruction()
1240 instr->opcode == aco_opcode::v_cndmask_b32) { in label_instruction()
1242 if (!does_fp_op_flush_denorms(ctx, instr->opcode)) { in label_instruction()
1243 unsigned ops = instr->opcode == aco_opcode::v_cndmask_b32 ? 2 : instr->operands.size(); in label_instruction()
1245 canonicalized = is_op_canonicalized(ctx, instr->operands[i]); in label_instruction()
1248 ctx.info[instr->definitions[0].tempId()].set_canonicalized(); in label_instruction()
1251 if (instr->isVOPC()) { in label_instruction()
1252 ctx.info[instr->definitions[0].tempId()].set_vopc(instr.get()); in label_instruction()
1253 check_sdwa_extract(ctx, instr); in label_instruction()
1256 if (instr->isVOP3P()) { in label_instruction()
1257 ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get()); in label_instruction()
1262 switch (instr->opcode) { in label_instruction()
1264 bool copy_prop = instr->operands.size() == 1 && instr->operands[0].isTemp() && in label_instruction()
1265 instr->operands[0].regClass() == instr->definitions[0].regClass(); in label_instruction()
1267 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); in label_instruction()
1274 for (const Operand& op : instr->operands) { in label_instruction()
1279 Instruction* vec = ctx.info[op.tempId()].instr; in label_instruction()
1288 if (ops.size() != instr->operands.size()) { in label_instruction()
1289 assert(ops.size() > instr->operands.size()); in label_instruction()
1290 Definition def = instr->definitions[0]; in label_instruction()
1291 instr.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, in label_instruction()
1297 instr->operands[i] = ops[i]; in label_instruction()
1299 instr->definitions[0] = def; in label_instruction()
1302 assert(instr->operands[i] == ops[i]); in label_instruction()
1305 ctx.info[instr->definitions[0].tempId()].set_vec(instr.get()); in label_instruction()
1309 ssa_info& info = ctx.info[instr->operands[0].tempId()]; in label_instruction()
1313 for (Definition def : instr->definitions) { in label_instruction()
1323 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr; in label_instruction()
1327 for (unsigned i = 0; i < instr->definitions.size(); in label_instruction()
1328 split_offset += instr->definitions[i++].bytes()) { in label_instruction()
1333 vec->operands[vec_index].bytes() != instr->definitions[i].bytes()) in label_instruction()
1338 ctx.info[instr->definitions[i].tempId()].set_constant(ctx.program->chip_class, in label_instruction()
1341 ctx.info[instr->definitions[i].tempId()].set_undefined(); in label_instruction()
1344 ctx.info[instr->definitions[i].tempId()].set_temp(vec_op.getTemp()); in label_instruction()
1350 ssa_info& info = ctx.info[instr->operands[0].tempId()]; in label_instruction()
1351 const unsigned index = instr->operands[1].constantValue(); in label_instruction()
1352 const unsigned dst_offset = index * instr->definitions[0].bytes(); in label_instruction()
1356 Instruction* vec = info.instr; in label_instruction()
1363 } else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) { in label_instruction()
1366 instr->operands[0] = op; in label_instruction()
1371 uint32_t mask = u_bit_consecutive(0, instr->definitions[0].bytes() * 8u); in label_instruction()
1373 instr->operands[0] = in label_instruction()
1374 Operand::get_const(ctx.program->chip_class, val, instr->definitions[0].bytes()); in label_instruction()
1376 } else if (index == 0 && instr->operands[0].size() == instr->definitions[0].size()) { in label_instruction()
1377 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); in label_instruction()
1380 if (instr->operands[0].bytes() != instr->definitions[0].bytes()) in label_instruction()
1384 instr->opcode = aco_opcode::p_parallelcopy; in label_instruction()
1385 instr->operands.pop_back(); in label_instruction()
1389 if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_vec() && in label_instruction()
1390 instr->operands[0].regClass() != instr->definitions[0].regClass()) { in label_instruction()
1394 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr; in label_instruction()
1395 aco_ptr<Instruction> old_copy = std::move(instr); in label_instruction()
1397 instr.reset(create_instruction<Pseudo_instruction>( in label_instruction()
1399 instr->definitions[0] = old_copy->definitions[0]; in label_instruction()
1400 std::copy(vec->operands.begin(), vec->operands.end(), instr->operands.begin()); in label_instruction()
1402 Operand& op = instr->operands[i]; in label_instruction()
1404 ctx.info[op.tempId()].temp.type() == instr->definitions[0].regClass().type()) in label_instruction()
1407 ctx.info[instr->definitions[0].tempId()].set_vec(instr.get()); in label_instruction()
1412 if (instr->definitions[0].isFixed()) { in label_instruction()
1414 } else if (instr->usesModifiers()) { in label_instruction()
1416 } else if (instr->operands[0].isConstant()) { in label_instruction()
1417 ctx.info[instr->definitions[0].tempId()].set_constant( in label_instruction()
1418 ctx.program->chip_class, instr->operands[0].constantValue64()); in label_instruction()
1419 } else if (instr->operands[0].isTemp()) { in label_instruction()
1420 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); in label_instruction()
1421 if (ctx.info[instr->operands[0].tempId()].is_canonicalized()) in label_instruction()
1422 ctx.info[instr->definitions[0].tempId()].set_canonicalized(); in label_instruction()
1424 assert(instr->operands[0].isFixed()); in label_instruction()
1428 if (instr->isDPP()) { in label_instruction()
1430 assert(instr->dpp().row_mask == 0xf && instr->dpp().bank_mask == 0xf); in label_instruction()
1431 ctx.info[instr->definitions[0].tempId()].set_dpp(instr.get()); in label_instruction()
1436 ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u); in label_instruction()
1438 case aco_opcode::v_mul_f64: ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); break; in label_instruction()
1441 ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); in label_instruction()
1444 bool uses_mods = instr->usesModifiers(); in label_instruction()
1445 bool fp16 = instr->opcode == aco_opcode::v_mul_f16; in label_instruction()
1448 if (instr->operands[!i].isConstant() && instr->operands[i].isTemp()) { in label_instruction()
1449 if (!instr->isDPP() && !instr->isSDWA() && in label_instruction()
1450 (instr->operands[!i].constantEquals(fp16 ? 0x3c00 : 0x3f800000) || /* 1.0 */ in label_instruction()
1451 instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u))) { /* -1.0 */ in label_instruction()
1452 bool neg1 = instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u); in label_instruction()
1454 VOP3_instruction* vop3 = instr->isVOP3() ? &instr->vop3() : NULL; in label_instruction()
1461 Temp other = instr->operands[i].getTemp(); in label_instruction()
1463 ctx.info[instr->definitions[0].tempId()].set_neg_abs(other); in label_instruction()
1465 ctx.info[instr->definitions[0].tempId()].set_abs(other); in label_instruction()
1467 ctx.info[instr->definitions[0].tempId()].set_neg(other); in label_instruction()
1469 ctx.info[instr->definitions[0].tempId()].set_fcanonicalize(other); in label_instruction()
1472 } else if (instr->operands[!i].constantValue() == in label_instruction()
1474 ctx.info[instr->operands[i].tempId()].set_omod2(instr.get()); in label_instruction()
1475 } else if (instr->operands[!i].constantValue() == in label_instruction()
1477 ctx.info[instr->operands[i].tempId()].set_omod4(instr.get()); in label_instruction()
1478 } else if (instr->operands[!i].constantValue() == in label_instruction()
1480 ctx.info[instr->operands[i].tempId()].set_omod5(instr.get()); in label_instruction()
1481 } else if (instr->operands[!i].constantValue() == 0u && in label_instruction()
1484 ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u); in label_instruction()
1496 ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get()); in label_instruction()
1500 VOP3_instruction& vop3 = instr->vop3(); in label_instruction()
1507 bool is_fp16 = instr->opcode == aco_opcode::v_med3_f16; in label_instruction()
1509 if (instr->operands[i].constantEquals(0)) in label_instruction()
1511 else if (instr->operands[i].constantEquals(is_fp16 ? 0x3c00 : 0x3f800000)) /* 1.0 */ in label_instruction()
1516 if (found_zero && found_one && instr->operands[idx].isTemp()) in label_instruction()
1517 ctx.info[instr->operands[idx].tempId()].set_clamp(instr.get()); in label_instruction()
1521 if (instr->operands[0].constantEquals(0) && instr->operands[1].constantEquals(0xFFFFFFFF)) in label_instruction()
1522 ctx.info[instr->definitions[0].tempId()].set_vcc(instr->operands[2].getTemp()); in label_instruction()
1523 else if (instr->operands[0].constantEquals(0) && in label_instruction()
1524 instr->operands[1].constantEquals(0x3f800000u)) in label_instruction()
1525 ctx.info[instr->definitions[0].tempId()].set_b2f(instr->operands[2].getTemp()); in label_instruction()
1526 else if (instr->operands[0].constantEquals(0) && instr->operands[1].constantEquals(1)) in label_instruction()
1527 ctx.info[instr->definitions[0].tempId()].set_b2i(instr->operands[2].getTemp()); in label_instruction()
1529 ctx.info[instr->operands[2].tempId()].set_vcc_hint(); in label_instruction()
1532 if (instr->format == Format::VOPC && /* don't optimize VOP3 / SDWA / DPP */ in label_instruction()
1533 instr->operands[0].constantEquals(0) && instr->operands[1].isTemp() && in label_instruction()
1534 ctx.info[instr->operands[1].tempId()].is_vcc()) in label_instruction()
1535 ctx.info[instr->definitions[0].tempId()].set_temp( in label_instruction()
1536 ctx.info[instr->operands[1].tempId()].temp); in label_instruction()
1540 bool all_same_temp = instr->operands[0].isTemp(); in label_instruction()
1543 all_same_temp = instr->definitions[0].regClass() == instr->operands[0].regClass(); in label_instruction()
1544 for (unsigned i = 1; all_same_temp && (i < instr->operands.size()); i++) { in label_instruction()
1545 if (!instr->operands[i].isTemp() || in label_instruction()
1546 instr->operands[i].tempId() != instr->operands[0].tempId()) in label_instruction()
1550 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); in label_instruction()
1552 bool all_undef = instr->operands[0].isUndefined(); in label_instruction()
1553 for (unsigned i = 1; all_undef && (i < instr->operands.size()); i++) { in label_instruction()
1554 if (!instr->operands[i].isUndefined()) in label_instruction()
1558 ctx.info[instr->definitions[0].tempId()].set_undefined(); in label_instruction()
1568 ctx.info[instr->definitions[0].tempId()].set_add_sub(instr.get()); in label_instruction()
1572 if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) { in label_instruction()
1573 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise(); in label_instruction()
1574 ctx.info[instr->definitions[1].tempId()].set_scc_invert( in label_instruction()
1575 ctx.info[instr->operands[0].tempId()].temp); in label_instruction()
1576 } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bitwise()) { in label_instruction()
1577 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise(); in label_instruction()
1578 ctx.info[instr->definitions[1].tempId()].set_scc_invert( in label_instruction()
1579 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp()); in label_instruction()
1581 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get()); in label_instruction()
1585 if (fixed_to_exec(instr->operands[1]) && instr->operands[0].isTemp()) { in label_instruction()
1586 if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) { in label_instruction()
1589 ctx.info[instr->definitions[1].tempId()].set_temp( in label_instruction()
1590 ctx.info[instr->operands[0].tempId()].temp); in label_instruction()
1591 ctx.info[instr->definitions[0].tempId()].set_uniform_bool( in label_instruction()
1592 ctx.info[instr->operands[0].tempId()].temp); in label_instruction()
1594 } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bitwise()) { in label_instruction()
1597 ctx.info[instr->definitions[1].tempId()].set_temp( in label_instruction()
1598 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp()); in label_instruction()
1599 ctx.info[instr->definitions[0].tempId()].set_uniform_bool( in label_instruction()
1600 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp()); in label_instruction()
1604 instr->pass_flags == 1) { in label_instruction()
1607 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); in label_instruction()
1609 } else if (ctx.info[instr->operands[0].tempId()].is_vopc()) { in label_instruction()
1610 Instruction* vopc_instr = ctx.info[instr->operands[0].tempId()].instr; in label_instruction()
1613 if (vopc_instr->pass_flags == instr->pass_flags) { in label_instruction()
1614 assert(instr->pass_flags > 0); in label_instruction()
1615 ctx.info[instr->definitions[0].tempId()].set_temp( in label_instruction()
1626 if (std::all_of(instr->operands.begin(), instr->operands.end(), in label_instruction()
1632 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise(); in label_instruction()
1641 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get()); in label_instruction()
1655 ctx.info[instr->definitions[0].tempId()].set_minmax(instr.get()); in label_instruction()
1659 if (instr->operands[0].constantEquals((unsigned)-1) && instr->operands[1].constantEquals(0)) { in label_instruction()
1661 ctx.info[instr->definitions[0].tempId()].set_uniform_bool(instr->operands[2].getTemp()); in label_instruction()
1663 if (instr->operands[2].isTemp() && ctx.info[instr->operands[2].tempId()].is_scc_invert()) { in label_instruction()
1665 std::swap(instr->operands[0], instr->operands[1]); in label_instruction()
1666 instr->operands[2].setTemp(ctx.info[instr->operands[2].tempId()].temp); in label_instruction()
1670 if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_scc_invert()) { in label_instruction()
1671 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); in label_instruction()
1677 if (instr->operands[0].constantEquals(0x3f800000u)) in label_instruction()
1678 ctx.info[instr->definitions[0].tempId()].set_canonicalized(); in label_instruction()
1681 if (instr->definitions[0].bytes() == 4) { in label_instruction()
1682 ctx.info[instr->definitions[0].tempId()].set_extract(instr.get()); in label_instruction()
1683 if (instr->operands[0].regClass() == v1 && parse_insert(instr.get())) in label_instruction()
1684 ctx.info[instr->operands[0].tempId()].set_insert(instr.get()); in label_instruction()
1689 if (instr->operands[0].bytes() == 4) { in label_instruction()
1690 if (instr->operands[0].regClass() == v1) in label_instruction()
1691 ctx.info[instr->operands[0].tempId()].set_insert(instr.get()); in label_instruction()
1692 if (parse_extract(instr.get())) in label_instruction()
1693 ctx.info[instr->definitions[0].tempId()].set_extract(instr.get()); in label_instruction()
1694 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get()); in label_instruction()
1702 ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get()); in label_instruction()
1710 if (!(ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs))) in label_instruction()
1711 check_sdwa_extract(ctx, instr); in label_instruction()
1724 decrease_uses(opt_ctx& ctx, Instruction* instr) in decrease_uses() argument
1726 if (!--ctx.uses[instr->definitions[0].tempId()]) { in decrease_uses()
1727 for (const Operand& op : instr->operands) { in decrease_uses()
1742 Instruction* instr = ctx.info[op.tempId()].instr; in follow_operand() local
1744 if (instr->definitions.size() == 2) { in follow_operand()
1745 assert(instr->definitions[0].isTemp() && instr->definitions[0].tempId() == op.tempId()); in follow_operand()
1746 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()]) in follow_operand()
1750 return instr; in follow_operand()
1756 combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_ordering_test() argument
1758 if (instr->definitions[0].regClass() != ctx.program->lane_mask) in combine_ordering_test()
1760 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()]) in combine_ordering_test()
1763 bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32; in combine_ordering_test()
1773 op_instr[i] = follow_operand(ctx, instr->operands[i], true); in combine_ordering_test()
1837 instr->definitions[0].setHint(vcc); in combine_ordering_test()
1841 new_instr->definitions[0] = instr->definitions[0]; in combine_ordering_test()
1843 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_ordering_test()
1844 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr); in combine_ordering_test()
1846 instr.reset(new_instr); in combine_ordering_test()
1854 combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_comparison_ordering() argument
1856 if (instr->definitions[0].regClass() != ctx.program->lane_mask) in combine_comparison_ordering()
1858 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()]) in combine_comparison_ordering()
1861 bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32; in combine_comparison_ordering()
1864 Instruction* nan_test = follow_operand(ctx, instr->operands[0], true); in combine_comparison_ordering()
1865 Instruction* cmp = follow_operand(ctx, instr->operands[1], true); in combine_comparison_ordering()
1912 instr->definitions[0].setHint(vcc); in combine_comparison_ordering()
1916 new_instr->definitions[0] = instr->definitions[0]; in combine_comparison_ordering()
1918 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_comparison_ordering()
1919 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr); in combine_comparison_ordering()
1921 instr.reset(new_instr); in combine_comparison_ordering()
1956 combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_constant_comparison_ordering() argument
1958 if (instr->definitions[0].regClass() != ctx.program->lane_mask) in combine_constant_comparison_ordering()
1960 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()]) in combine_constant_comparison_ordering()
1963 bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32; in combine_constant_comparison_ordering()
1965 Instruction* nan_test = follow_operand(ctx, instr->operands[0], true); in combine_constant_comparison_ordering()
1966 Instruction* cmp = follow_operand(ctx, instr->operands[1], true); in combine_constant_comparison_ordering()
2038 instr->definitions[0].setHint(vcc); in combine_constant_comparison_ordering()
2042 new_instr->definitions[0] = instr->definitions[0]; in combine_constant_comparison_ordering()
2044 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_constant_comparison_ordering()
2045 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr); in combine_constant_comparison_ordering()
2047 instr.reset(new_instr); in combine_constant_comparison_ordering()
2054 combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_inverse_comparison() argument
2056 if (!instr->operands[0].isFixed() || instr->operands[0].physReg() != exec) in combine_inverse_comparison()
2058 if (ctx.uses[instr->definitions[1].tempId()]) in combine_inverse_comparison()
2061 Instruction* cmp = follow_operand(ctx, instr->operands[1]); in combine_inverse_comparison()
2112 instr->definitions[0].setHint(vcc); in combine_inverse_comparison()
2116 new_instr->definitions[0] = instr->definitions[0]; in combine_inverse_comparison()
2118 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_inverse_comparison()
2119 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr); in combine_inverse_comparison()
2121 instr.reset(new_instr); in combine_inverse_comparison()
2204 create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr, in create_vop3_for_op3() argument
2217 new_instr->definitions[0] = instr->definitions[0]; in create_vop3_for_op3()
2218 ctx.info[instr->definitions[0].tempId()].label = 0; in create_vop3_for_op3()
2220 instr.reset(new_instr); in create_vop3_for_op3()
2224 combine_three_valu_op(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode op2, aco_opcode new_op, in combine_three_valu_op() argument
2234 if (match_op3_for_vop3(ctx, instr->opcode, op2, instr.get(), swap, shuffle, operands, neg, in combine_three_valu_op()
2236 ctx.uses[instr->operands[swap].tempId()]--; in combine_three_valu_op()
2237 create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod); in combine_three_valu_op()
2246 combine_add_or_then_and_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_add_or_then_and_lshl() argument
2248 bool is_or = instr->opcode == aco_opcode::v_or_b32; in combine_add_or_then_and_lshl()
2251 if (is_or && combine_three_valu_op(ctx, instr, aco_opcode::s_and_b32, aco_opcode::v_and_or_b32, in combine_add_or_then_and_lshl()
2254 if (is_or && combine_three_valu_op(ctx, instr, aco_opcode::v_and_b32, aco_opcode::v_and_or_b32, in combine_add_or_then_and_lshl()
2257 if (combine_three_valu_op(ctx, instr, aco_opcode::s_lshl_b32, new_op_lshl, "120", 1 | 2)) in combine_add_or_then_and_lshl()
2259 if (combine_three_valu_op(ctx, instr, aco_opcode::v_lshlrev_b32, new_op_lshl, "210", 1 | 2)) in combine_add_or_then_and_lshl()
2262 if (instr->isSDWA() || instr->isDPP()) in combine_add_or_then_and_lshl()
2271 Instruction* extins = follow_operand(ctx, instr->operands[i]); in combine_add_or_then_and_lshl()
2295 operands[2] = instr->operands[!i]; in combine_add_or_then_and_lshl()
2303 if (instr->isVOP3()) in combine_add_or_then_and_lshl()
2304 clamp = instr->vop3().clamp; in combine_add_or_then_and_lshl()
2306 ctx.uses[instr->operands[i].tempId()]--; in combine_add_or_then_and_lshl()
2307 create_vop3_for_op3(ctx, op, instr, operands, neg, abs, opsel, clamp, omod); in combine_add_or_then_and_lshl()
2315 combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposite, aco_opcode minmax3) in combine_minmax() argument
2318 if (combine_three_valu_op(ctx, instr, instr->opcode, minmax3, "012", 1 | 2)) in combine_minmax()
2328 if (match_op3_for_vop3(ctx, instr->opcode, opposite, instr.get(), swap, "012", operands, neg, in combine_minmax()
2331 ctx.uses[instr->operands[swap].tempId()]--; in combine_minmax()
2334 create_vop3_for_op3(ctx, minmax3, instr, operands, neg, abs, opsel, clamp, omod); in combine_minmax()
2348 combine_salu_not_bitwise(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_salu_not_bitwise() argument
2351 if (!instr->operands[0].isTemp()) in combine_salu_not_bitwise()
2353 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()]) in combine_salu_not_bitwise()
2356 Instruction* op2_instr = follow_operand(ctx, instr->operands[0]); in combine_salu_not_bitwise()
2370 std::swap(instr->definitions[0], op2_instr->definitions[0]); in combine_salu_not_bitwise()
2371 std::swap(instr->definitions[1], op2_instr->definitions[1]); in combine_salu_not_bitwise()
2372 ctx.uses[instr->operands[0].tempId()]--; in combine_salu_not_bitwise()
2393 combine_salu_n2(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_salu_n2() argument
2395 if (instr->definitions[0].isTemp() && ctx.info[instr->definitions[0].tempId()].is_uniform_bool()) in combine_salu_n2()
2399 Instruction* op2_instr = follow_operand(ctx, instr->operands[i]); in combine_salu_n2()
2406 if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() && in combine_salu_n2()
2407 instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue()) in combine_salu_n2()
2410 ctx.uses[instr->operands[i].tempId()]--; in combine_salu_n2()
2411 instr->operands[0] = instr->operands[!i]; in combine_salu_n2()
2412 instr->operands[1] = op2_instr->operands[0]; in combine_salu_n2()
2413 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_salu_n2()
2415 switch (instr->opcode) { in combine_salu_n2()
2416 case aco_opcode::s_and_b32: instr->opcode = aco_opcode::s_andn2_b32; break; in combine_salu_n2()
2417 case aco_opcode::s_or_b32: instr->opcode = aco_opcode::s_orn2_b32; break; in combine_salu_n2()
2418 case aco_opcode::s_and_b64: instr->opcode = aco_opcode::s_andn2_b64; break; in combine_salu_n2()
2419 case aco_opcode::s_or_b64: instr->opcode = aco_opcode::s_orn2_b64; break; in combine_salu_n2()
2430 combine_salu_lshl_add(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_salu_lshl_add() argument
2432 if (instr->opcode == aco_opcode::s_add_i32 && ctx.uses[instr->definitions[1].tempId()]) in combine_salu_lshl_add()
2436 Instruction* op2_instr = follow_operand(ctx, instr->operands[i], true); in combine_salu_lshl_add()
2447 if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() && in combine_salu_lshl_add()
2448 instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue()) in combine_salu_lshl_add()
2451 ctx.uses[instr->operands[i].tempId()]--; in combine_salu_lshl_add()
2452 instr->operands[1] = instr->operands[!i]; in combine_salu_lshl_add()
2453 instr->operands[0] = op2_instr->operands[0]; in combine_salu_lshl_add()
2454 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_salu_lshl_add()
2456 instr->opcode = std::array<aco_opcode, 4>{ in combine_salu_lshl_add()
2466 combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode new_op, uint8_t ops) in combine_add_sub_b2i() argument
2468 if (instr->usesModifiers()) in combine_add_sub_b2i()
2474 if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2i() && in combine_add_sub_b2i()
2475 ctx.uses[instr->operands[i].tempId()] == 1) { in combine_add_sub_b2i()
2478 if (instr->operands[!i].isTemp() && in combine_add_sub_b2i()
2479 instr->operands[!i].getTemp().type() == RegType::vgpr) { in combine_add_sub_b2i()
2482 (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) { in combine_add_sub_b2i()
2488 ctx.uses[instr->operands[i].tempId()]--; in combine_add_sub_b2i()
2489 new_instr->definitions[0] = instr->definitions[0]; in combine_add_sub_b2i()
2490 if (instr->definitions.size() == 2) { in combine_add_sub_b2i()
2491 new_instr->definitions[1] = instr->definitions[1]; in combine_add_sub_b2i()
2502 new_instr->operands[1] = instr->operands[!i]; in combine_add_sub_b2i()
2503 new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp); in combine_add_sub_b2i()
2504 instr = std::move(new_instr); in combine_add_sub_b2i()
2505 ctx.info[instr->definitions[0].tempId()].set_add_sub(instr.get()); in combine_add_sub_b2i()
2514 combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_add_bcnt() argument
2516 if (instr->usesModifiers()) in combine_add_bcnt()
2520 Instruction* op_instr = follow_operand(ctx, instr->operands[i]); in combine_add_bcnt()
2527 ctx.uses[instr->operands[i].tempId()]--; in combine_add_bcnt()
2529 new_instr->operands[1] = instr->operands[!i]; in combine_add_bcnt()
2530 new_instr->definitions[0] = instr->definitions[0]; in combine_add_bcnt()
2531 instr = std::move(new_instr); in combine_add_bcnt()
2532 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_add_bcnt()
2573 combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode min, aco_opcode max, in combine_clamp() argument
2580 if (instr->opcode == min) in combine_clamp()
2582 else if (instr->opcode == max) in combine_clamp()
2591 if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap, "012", operands, neg, in combine_clamp()
2596 if (precise && instr->opcode != min) in combine_clamp()
2676 if (instr->opcode == min) { in combine_clamp()
2684 ctx.uses[instr->operands[swap].tempId()]--; in combine_clamp()
2685 create_vop3_for_op3(ctx, med, instr, operands, neg, abs, opsel, clamp, omod); in combine_clamp()
2695 apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr) in apply_sgprs() argument
2697 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 || in apply_sgprs()
2698 instr->opcode == aco_opcode::v_lshrrev_b64 || in apply_sgprs()
2699 instr->opcode == aco_opcode::v_ashrrev_i64; in apply_sgprs()
2705 for (unsigned i = 0; i < instr->operands.size(); i++) { in apply_sgprs()
2706 if (instr->operands[i].isLiteral()) in apply_sgprs()
2708 if (!instr->operands[i].isTemp()) in apply_sgprs()
2710 if (instr->operands[i].getTemp().type() == RegType::sgpr) { in apply_sgprs()
2711 if (instr->operands[i].tempId() != sgpr_ids[0]) in apply_sgprs()
2712 sgpr_ids[!!sgpr_ids[0]] = instr->operands[i].tempId(); in apply_sgprs()
2714 ssa_info& info = ctx.info[instr->operands[i].tempId()]; in apply_sgprs()
2715 if (is_copy_label(ctx, instr, info) && info.temp.type() == RegType::sgpr) in apply_sgprs()
2717 if (info.is_extract() && info.instr->operands[0].getTemp().type() == RegType::sgpr) in apply_sgprs()
2736 uint16_t uses = ctx.uses[instr->operands[i].tempId()]; in apply_sgprs()
2739 sgpr_info_id = instr->operands[i].tempId(); in apply_sgprs()
2749 if (!info.is_extract() && num_sgprs && ctx.uses[sgpr_info_id] > 1 && !instr->isVOP3() && in apply_sgprs()
2750 !instr->isSDWA() && instr->format != Format::VOP3P) in apply_sgprs()
2753 Temp sgpr = info.is_extract() ? info.instr->operands[0].getTemp() : info.temp; in apply_sgprs()
2759 instr->format = withoutDPP(instr->format); in apply_sgprs()
2761 if (sgpr_idx == 0 || instr->isVOP3() || instr->isSDWA() || instr->isVOP3P() || in apply_sgprs()
2764 if (info.is_extract() && can_apply_extract(ctx, instr, sgpr_idx, info)) in apply_sgprs()
2765 apply_extract(ctx, instr, sgpr_idx, info); in apply_sgprs()
2768 instr->operands[sgpr_idx] = Operand(sgpr); in apply_sgprs()
2769 } else if (can_swap_operands(instr, &instr->opcode)) { in apply_sgprs()
2770 instr->operands[sgpr_idx] = instr->operands[0]; in apply_sgprs()
2771 instr->operands[0] = Operand(sgpr); in apply_sgprs()
2775 } else if (can_use_VOP3(ctx, instr) && !info.is_extract()) { in apply_sgprs()
2776 to_VOP3(ctx, instr); in apply_sgprs()
2777 instr->operands[sgpr_idx] = Operand(sgpr); in apply_sgprs()
2796 apply_omod_clamp_helper(opt_ctx& ctx, T* instr, ssa_info& def_info) in apply_omod_clamp_helper() argument
2798 if (!def_info.is_clamp() && (instr->clamp || instr->omod)) in apply_omod_clamp_helper()
2802 instr->omod = 1; in apply_omod_clamp_helper()
2804 instr->omod = 2; in apply_omod_clamp_helper()
2806 instr->omod = 3; in apply_omod_clamp_helper()
2808 instr->clamp = true; in apply_omod_clamp_helper()
2815 apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr) in apply_omod_clamp() argument
2817 if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1 || in apply_omod_clamp()
2818 !instr_info.can_use_output_modifiers[(int)instr->opcode]) in apply_omod_clamp()
2821 bool can_vop3 = can_use_VOP3(ctx, instr); in apply_omod_clamp()
2822 if (!instr->isSDWA() && !can_vop3) in apply_omod_clamp()
2827 if (instr->definitions[0].bytes() == 4) in apply_omod_clamp()
2834 ssa_info& def_info = ctx.info[instr->definitions[0].tempId()]; in apply_omod_clamp()
2841 if (!ctx.uses[def_info.instr->definitions[0].tempId()]) in apply_omod_clamp()
2845 assert(!ctx.info[instr->definitions[0].tempId()].is_mad()); in apply_omod_clamp()
2847 if (instr->isSDWA()) { in apply_omod_clamp()
2848 if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info)) in apply_omod_clamp()
2851 to_VOP3(ctx, instr); in apply_omod_clamp()
2852 if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info)) in apply_omod_clamp()
2856 instr->definitions[0].swapTemp(def_info.instr->definitions[0]); in apply_omod_clamp()
2857 ctx.info[instr->definitions[0].tempId()].label &= label_clamp | label_insert; in apply_omod_clamp()
2858 ctx.uses[def_info.instr->definitions[0].tempId()]--; in apply_omod_clamp()
2867 apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr) in apply_insert() argument
2869 if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1) in apply_insert()
2872 ssa_info& def_info = ctx.info[instr->definitions[0].tempId()]; in apply_insert()
2877 if (!ctx.uses[def_info.instr->definitions[0].tempId()]) in apply_insert()
2881 assert(!ctx.info[instr->definitions[0].tempId()].is_mad()); in apply_insert()
2883 SubdwordSel sel = parse_insert(def_info.instr); in apply_insert()
2886 if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() && in apply_insert()
2887 can_use_opsel(ctx.program->chip_class, instr->opcode, 3, sel.offset())) { in apply_insert()
2888 if (instr->vop3().opsel & (1 << 3)) in apply_insert()
2891 instr->vop3().opsel |= 1 << 3; in apply_insert()
2893 if (!can_use_SDWA(ctx.program->chip_class, instr, true)) in apply_insert()
2896 to_SDWA(ctx, instr); in apply_insert()
2897 if (instr->sdwa().dst_sel.size() != 4) in apply_insert()
2899 static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel; in apply_insert()
2902 instr->definitions[0].swapTemp(def_info.instr->definitions[0]); in apply_insert()
2903 ctx.info[instr->definitions[0].tempId()].label = 0; in apply_insert()
2904 ctx.uses[def_info.instr->definitions[0].tempId()]--; in apply_insert()
2921 Instruction* ds = ctx.info[extract->operands[0].tempId()].instr; in apply_ds_extract()
2962 combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_and_subbrev() argument
2964 if (instr->usesModifiers()) in combine_and_subbrev()
2968 Instruction* op_instr = follow_operand(ctx, instr->operands[i], true); in combine_and_subbrev()
2974 if (instr->operands[!i].isTemp() && in combine_and_subbrev()
2975 instr->operands[!i].getTemp().type() == RegType::vgpr) { in combine_and_subbrev()
2979 (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) { in combine_and_subbrev()
2986 ctx.uses[instr->operands[i].tempId()]--; in combine_and_subbrev()
2987 if (ctx.uses[instr->operands[i].tempId()]) in combine_and_subbrev()
2991 new_instr->operands[1] = instr->operands[!i]; in combine_and_subbrev()
2993 new_instr->definitions[0] = instr->definitions[0]; in combine_and_subbrev()
2994 instr = std::move(new_instr); in combine_and_subbrev()
2995 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_and_subbrev()
3009 combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub) in combine_add_lshl() argument
3011 if (instr->usesModifiers()) in combine_add_lshl()
3025 Instruction* op_instr = follow_operand(ctx, instr->operands[i]); in combine_add_lshl()
3047 instr->operands[!i], in combine_add_lshl()
3052 ctx.uses[instr->operands[i].tempId()]--; in combine_add_lshl()
3059 new_instr->definitions[0] = instr->definitions[0]; in combine_add_lshl()
3060 instr = std::move(new_instr); in combine_add_lshl()
3061 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_add_lshl()
3070 propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opsel_hi) in propagate_swizzles() argument
3076 uint8_t tmp_lo = instr->opsel_lo; in propagate_swizzles()
3077 uint8_t tmp_hi = instr->opsel_hi; in propagate_swizzles()
3078 bool neg_lo[3] = {instr->neg_lo[0], instr->neg_lo[1], instr->neg_lo[2]}; in propagate_swizzles()
3079 bool neg_hi[3] = {instr->neg_hi[0], instr->neg_hi[1], instr->neg_hi[2]}; in propagate_swizzles()
3081 instr->opsel_lo = tmp_hi; in propagate_swizzles()
3083 instr->neg_lo[i] = neg_hi[i]; in propagate_swizzles()
3086 instr->opsel_hi = tmp_lo; in propagate_swizzles()
3088 instr->neg_hi[i] = neg_lo[i]; in propagate_swizzles()
3093 combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_vop3p() argument
3095 VOP3P_instruction* vop3p = &instr->vop3p(); in combine_vop3p()
3098 if (instr->opcode == aco_opcode::v_pk_mul_f16 && instr->operands[1].constantEquals(0x3C00) && in combine_vop3p()
3099 vop3p->clamp && instr->operands[0].isTemp() && ctx.uses[instr->operands[0].tempId()] == 1) { in combine_vop3p()
3101 ssa_info& info = ctx.info[instr->operands[0].tempId()]; in combine_vop3p()
3102 if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) { in combine_vop3p()
3103 VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p(); in combine_vop3p()
3106 instr->definitions[0].swapTemp(candidate->definitions[0]); in combine_vop3p()
3107 ctx.info[candidate->definitions[0].tempId()].instr = candidate; in combine_vop3p()
3108 ctx.uses[instr->definitions[0].tempId()]--; in combine_vop3p()
3114 if (instr_info.can_use_input_modifiers[(int)instr->opcode]) { in combine_vop3p()
3116 assert(instr->operands.size() == 2); in combine_vop3p()
3118 Operand& op = instr->operands[i]; in combine_vop3p()
3123 if (info.is_vop3p() && info.instr->opcode == aco_opcode::v_pk_mul_f16 && in combine_vop3p()
3124 info.instr->operands[1].constantEquals(0xBC00)) { in combine_vop3p()
3125 Operand ops[2] = {instr->operands[!i], info.instr->operands[0]}; in combine_vop3p()
3129 VOP3P_instruction* fneg = &info.instr->vop3p(); in combine_vop3p()
3132 instr->operands[i] = fneg->operands[0]; in combine_vop3p()
3153 if (instr->opcode == aco_opcode::v_pk_add_f16 || instr->opcode == aco_opcode::v_pk_add_u16) { in combine_vop3p()
3154 bool fadd = instr->opcode == aco_opcode::v_pk_add_f16; in combine_vop3p()
3155 if (fadd && instr->definitions[0].isPrecise()) in combine_vop3p()
3165 if (!instr->operands[i].isTemp() || !ctx.info[instr->operands[i].tempId()].is_vop3p()) in combine_vop3p()
3167 ssa_info& info = ctx.info[instr->operands[i].tempId()]; in combine_vop3p()
3169 if (info.instr->opcode != aco_opcode::v_pk_mul_f16 || in combine_vop3p()
3170 info.instr->definitions[0].isPrecise()) in combine_vop3p()
3173 if (info.instr->opcode != aco_opcode::v_pk_mul_lo_u16) in combine_vop3p()
3177 Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]}; in combine_vop3p()
3178 if (ctx.uses[instr->operands[i].tempId()] >= uses || !check_vop3_operands(ctx, 3, op)) in combine_vop3p()
3182 if (info.instr->vop3p().clamp) in combine_vop3p()
3185 mul_instr = info.instr; in combine_vop3p()
3189 uses = ctx.uses[instr->operands[i].tempId()]; in combine_vop3p()
3196 Operand op[3] = {mul_instr->operands[0], mul_instr->operands[1], instr->operands[add_op_idx]}; in combine_vop3p()
3227 fma->definitions[0] = instr->definitions[0]; in combine_vop3p()
3228 instr = std::move(fma); in combine_vop3p()
3229 ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get()); in combine_vop3p()
3238 combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) in combine_instruction() argument
3240 if (instr->definitions.empty() || is_dead(ctx.uses, instr.get())) in combine_instruction()
3243 if (instr->isVALU()) { in combine_instruction()
3246 for (unsigned i = 0; i < instr->operands.size(); i++) { in combine_instruction()
3247 Operand& op = instr->operands[i]; in combine_instruction()
3260 (info.instr->operands[0].getTemp().type() == RegType::vgpr || in combine_instruction()
3261 instr->operands[i].getTemp().type() == RegType::sgpr) && in combine_instruction()
3262 can_apply_extract(ctx, instr, i, info)) { in combine_instruction()
3263 apply_extract(ctx, instr, i, info); in combine_instruction()
3264 ctx.uses[instr->operands[i].tempId()]--; in combine_instruction()
3265 instr->operands[i].setTemp(info.instr->operands[0].getTemp()); in combine_instruction()
3269 if (can_apply_sgprs(ctx, instr)) in combine_instruction()
3270 apply_sgprs(ctx, instr); in combine_instruction()
3271 while (apply_omod_clamp(ctx, instr)) in combine_instruction()
3273 apply_insert(ctx, instr); in combine_instruction()
3276 if (instr->isVOP3P()) in combine_instruction()
3277 return combine_vop3p(ctx, instr); in combine_instruction()
3279 if (ctx.info[instr->definitions[0].tempId()].is_vcc_hint()) { in combine_instruction()
3280 instr->definitions[0].setHint(vcc); in combine_instruction()
3283 if (instr->isSDWA() || instr->isDPP()) in combine_instruction()
3286 if (instr->opcode == aco_opcode::p_extract) in combine_instruction()
3287 apply_ds_extract(ctx, instr); in combine_instruction()
3300 if (ctx.info[instr->definitions[0].tempId()].is_neg() && in combine_instruction()
3301 ctx.uses[instr->operands[1].tempId()] == 1) { in combine_instruction()
3302 Temp val = ctx.info[instr->definitions[0].tempId()].temp; in combine_instruction()
3307 Instruction* mul_instr = ctx.info[val.id()].instr; in combine_instruction()
3318 Definition def = instr->definitions[0]; in combine_instruction()
3320 bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs(); in combine_instruction()
3321 instr.reset( in combine_instruction()
3323 instr->operands[0] = mul_instr->operands[0]; in combine_instruction()
3324 instr->operands[1] = mul_instr->operands[1]; in combine_instruction()
3325 instr->definitions[0] = def; in combine_instruction()
3326 VOP3_instruction& new_mul = instr->vop3(); in combine_instruction()
3338 ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); in combine_instruction()
3343 bool mad32 = instr->opcode == aco_opcode::v_add_f32 || instr->opcode == aco_opcode::v_sub_f32 || in combine_instruction()
3344 instr->opcode == aco_opcode::v_subrev_f32; in combine_instruction()
3345 bool mad16 = instr->opcode == aco_opcode::v_add_f16 || instr->opcode == aco_opcode::v_sub_f16 || in combine_instruction()
3346 instr->opcode == aco_opcode::v_subrev_f16; in combine_instruction()
3347 bool mad64 = instr->opcode == aco_opcode::v_add_f64; in combine_instruction()
3352 if (need_fma && instr->definitions[0].isPrecise()) in combine_instruction()
3362 if (!instr->operands[i].isTemp() || !ctx.info[instr->operands[i].tempId()].is_mul()) in combine_instruction()
3365 ssa_info& info = ctx.info[instr->operands[i].tempId()]; in combine_instruction()
3366 if (need_fma && info.instr->definitions[0].isPrecise()) in combine_instruction()
3370 if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod)) in combine_instruction()
3373 Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]}; in combine_instruction()
3374 if (info.instr->isSDWA() || info.instr->isDPP() || !check_vop3_operands(ctx, 3, op) || in combine_instruction()
3375 ctx.uses[instr->operands[i].tempId()] >= uses) in combine_instruction()
3378 mul_instr = info.instr; in combine_instruction()
3380 uses = ctx.uses[instr->operands[i].tempId()]; in combine_instruction()
3386 instr->operands[add_op_idx]}; in combine_instruction()
3408 if (instr->isVOP3()) { in combine_instruction()
3409 VOP3_instruction& vop3 = instr->vop3(); in combine_instruction()
3424 if (instr->opcode == aco_opcode::v_sub_f32 || instr->opcode == aco_opcode::v_sub_f16) in combine_instruction()
3426 else if (instr->opcode == aco_opcode::v_subrev_f32 || in combine_instruction()
3427 instr->opcode == aco_opcode::v_subrev_f16) in combine_instruction()
3448 mad->definitions[0] = instr->definitions[0]; in combine_instruction()
3451 ctx.mad_infos.emplace_back(std::move(instr), mul_instr->definitions[0].tempId()); in combine_instruction()
3453 instr = std::move(mad); in combine_instruction()
3458 else if (instr->opcode == aco_opcode::v_mul_f32 && !instr->isVOP3()) { in combine_instruction()
3460 if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2f() && in combine_instruction()
3461 ctx.uses[instr->operands[i].tempId()] == 1 && instr->operands[!i].isTemp() && in combine_instruction()
3462 instr->operands[!i].getTemp().type() == RegType::vgpr) { in combine_instruction()
3463 ctx.uses[instr->operands[i].tempId()]--; in combine_instruction()
3464 ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++; in combine_instruction()
3469 new_instr->operands[1] = instr->operands[!i]; in combine_instruction()
3470 new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp); in combine_instruction()
3471 new_instr->definitions[0] = instr->definitions[0]; in combine_instruction()
3472 instr = std::move(new_instr); in combine_instruction()
3473 ctx.info[instr->definitions[0].tempId()].label = 0; in combine_instruction()
3477 } else if (instr->opcode == aco_opcode::v_or_b32 && ctx.program->chip_class >= GFX9) { in combine_instruction()
3478 if (combine_three_valu_op(ctx, instr, aco_opcode::s_or_b32, aco_opcode::v_or3_b32, "012", in combine_instruction()
3480 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_or_b32, aco_opcode::v_or3_b32, in combine_instruction()
3482 } else if (combine_add_or_then_and_lshl(ctx, instr)) { in combine_instruction()
3484 } else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->chip_class >= GFX10) { in combine_instruction()
3485 if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xor3_b32, "012", in combine_instruction()
3487 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32, in combine_instruction()
3490 } else if (instr->opcode == aco_opcode::v_add_u16) { in combine_instruction()
3492 ctx, instr, aco_opcode::v_mul_lo_u16, in combine_instruction()
3495 } else if (instr->opcode == aco_opcode::v_add_u16_e64) { in combine_instruction()
3496 combine_three_valu_op(ctx, instr, aco_opcode::v_mul_lo_u16_e64, aco_opcode::v_mad_u16, "120", in combine_instruction()
3498 } else if (instr->opcode == aco_opcode::v_add_u32) { in combine_instruction()
3499 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) { in combine_instruction()
3500 } else if (combine_add_bcnt(ctx, instr)) { in combine_instruction()
3501 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24, in combine_instruction()
3503 } else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) { in combine_instruction()
3504 if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", in combine_instruction()
3506 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, in combine_instruction()
3508 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, in combine_instruction()
3510 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_u32, aco_opcode::v_add3_u32, in combine_instruction()
3512 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add3_u32, in combine_instruction()
3514 } else if (combine_add_or_then_and_lshl(ctx, instr)) { in combine_instruction()
3517 } else if (instr->opcode == aco_opcode::v_add_co_u32 || in combine_instruction()
3518 instr->opcode == aco_opcode::v_add_co_u32_e64) { in combine_instruction()
3519 bool carry_out = ctx.uses[instr->definitions[1].tempId()] > 0; in combine_instruction()
3520 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) { in combine_instruction()
3521 } else if (!carry_out && combine_add_bcnt(ctx, instr)) { in combine_instruction()
3522 } else if (!carry_out && combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24, in combine_instruction()
3524 } else if (!carry_out && combine_add_lshl(ctx, instr, false)) { in combine_instruction()
3526 } else if (instr->opcode == aco_opcode::v_sub_u32 || instr->opcode == aco_opcode::v_sub_co_u32 || in combine_instruction()
3527 instr->opcode == aco_opcode::v_sub_co_u32_e64) { in combine_instruction()
3529 instr->opcode != aco_opcode::v_sub_u32 && ctx.uses[instr->definitions[1].tempId()] > 0; in combine_instruction()
3530 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 2)) { in combine_instruction()
3531 } else if (!carry_out && combine_add_lshl(ctx, instr, true)) { in combine_instruction()
3533 } else if (instr->opcode == aco_opcode::v_subrev_u32 || in combine_instruction()
3534 instr->opcode == aco_opcode::v_subrev_co_u32 || in combine_instruction()
3535 instr->opcode == aco_opcode::v_subrev_co_u32_e64) { in combine_instruction()
3536 combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 1); in combine_instruction()
3537 } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->chip_class >= GFX9) { in combine_instruction()
3538 combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120", in combine_instruction()
3540 } else if ((instr->opcode == aco_opcode::s_add_u32 || instr->opcode == aco_opcode::s_add_i32) && in combine_instruction()
3542 combine_salu_lshl_add(ctx, instr); in combine_instruction()
3543 } else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) { in combine_instruction()
3544 combine_salu_not_bitwise(ctx, instr); in combine_instruction()
3545 } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 || in combine_instruction()
3546 instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) { in combine_instruction()
3547 if (combine_ordering_test(ctx, instr)) { in combine_instruction()
3548 } else if (combine_comparison_ordering(ctx, instr)) { in combine_instruction()
3549 } else if (combine_constant_comparison_ordering(ctx, instr)) { in combine_instruction()
3550 } else if (combine_salu_n2(ctx, instr)) { in combine_instruction()
3552 } else if (instr->opcode == aco_opcode::v_and_b32) { in combine_instruction()
3553 combine_and_subbrev(ctx, instr); in combine_instruction()
3557 if (get_minmax_info(instr->opcode, &min, &max, &min3, &max3, &med3, &some_gfx9_only) && in combine_instruction()
3559 if (combine_minmax(ctx, instr, instr->opcode == min ? max : min, in combine_instruction()
3560 instr->opcode == min ? min3 : max3)) { in combine_instruction()
3562 combine_clamp(ctx, instr, min, max, med3); in combine_instruction()
3568 if (instr->opcode == aco_opcode::s_andn2_b32 || instr->opcode == aco_opcode::s_andn2_b64) in combine_instruction()
3569 combine_inverse_comparison(ctx, instr); in combine_instruction()
3573 to_uniform_bool_instr(opt_ctx& ctx, aco_ptr<Instruction>& instr) in to_uniform_bool_instr() argument
3576 for (Operand& op : instr->operands) { in to_uniform_bool_instr()
3583 switch (instr->opcode) { in to_uniform_bool_instr()
3585 case aco_opcode::s_and_b64: instr->opcode = aco_opcode::s_and_b32; break; in to_uniform_bool_instr()
3587 case aco_opcode::s_or_b64: instr->opcode = aco_opcode::s_or_b32; break; in to_uniform_bool_instr()
3589 case aco_opcode::s_xor_b64: instr->opcode = aco_opcode::s_absdiff_i32; break; in to_uniform_bool_instr()
3595 for (Operand& op : instr->operands) { in to_uniform_bool_instr()
3607 Instruction* pred_instr = ctx.info[op.tempId()].instr; in to_uniform_bool_instr()
3619 instr->definitions[0].setTemp(Temp(instr->definitions[0].tempId(), s1)); in to_uniform_bool_instr()
3620 assert(instr->operands[0].regClass() == s1); in to_uniform_bool_instr()
3621 assert(instr->operands[1].regClass() == s1); in to_uniform_bool_instr()
3626 select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) in select_instruction() argument
3630 if (is_dead(ctx.uses, instr.get())) { in select_instruction()
3631 instr.reset(); in select_instruction()
3636 if (instr->opcode == aco_opcode::p_split_vector) { in select_instruction()
3640 for (unsigned i = 0, offset = 0; i < instr->definitions.size(); in select_instruction()
3641 offset += instr->definitions[i++].bytes()) { in select_instruction()
3642 if (ctx.uses[instr->definitions[i].tempId()]) { in select_instruction()
3649 if (num_used == 1 && ctx.info[instr->operands[0].tempId()].is_vec() && in select_instruction()
3650 ctx.uses[instr->operands[0].tempId()] == 1) { in select_instruction()
3651 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr; in select_instruction()
3662 if (off != instr->operands[0].bytes() && op.bytes() == instr->definitions[idx].bytes()) { in select_instruction()
3663 ctx.uses[instr->operands[0].tempId()]--; in select_instruction()
3674 extract->definitions[0] = instr->definitions[idx]; in select_instruction()
3675 instr = std::move(extract); in select_instruction()
3682 instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 && in select_instruction()
3683 split_offset % instr->definitions[idx].bytes() == 0) { in select_instruction()
3686 extract->operands[0] = instr->operands[0]; in select_instruction()
3688 Operand::c32((uint32_t)split_offset / instr->definitions[idx].bytes()); in select_instruction()
3689 extract->definitions[0] = instr->definitions[idx]; in select_instruction()
3690 instr = std::move(extract); in select_instruction()
3695 if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) { in select_instruction()
3696 mad_info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].instr->pass_flags]; in select_instruction()
3700 if (instr->operands[0].isTemp()) in select_instruction()
3701 ctx.uses[instr->operands[0].tempId()]--; in select_instruction()
3702 if (instr->operands[1].isTemp()) in select_instruction()
3703 ctx.uses[instr->operands[1].tempId()]--; in select_instruction()
3704 instr.swap(mad_info->add_instr); in select_instruction()
3708 else if (!instr->usesModifiers() && instr->opcode != aco_opcode::v_fma_f64) { in select_instruction()
3710 if ((instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) && in select_instruction()
3716 if (instr->opcode == aco_opcode::v_fma_legacy_f16) in select_instruction()
3722 for (unsigned i = 0; i < instr->operands.size(); i++) { in select_instruction()
3723 if (instr->operands[i].isConstant() && i > 0) { in select_instruction()
3727 if (!instr->operands[i].isTemp()) in select_instruction()
3729 unsigned bits = get_operand_size(instr, i); in select_instruction()
3732 if (instr->operands[i].getTemp().type() == RegType::sgpr && in select_instruction()
3734 if (!sgpr_used && ctx.info[instr->operands[i].tempId()].is_literal(bits)) { in select_instruction()
3735 literal_uses = ctx.uses[instr->operands[i].tempId()]; in select_instruction()
3742 } else if (!sgpr_used && ctx.info[instr->operands[i].tempId()].is_literal(bits) && in select_instruction()
3743 ctx.uses[instr->operands[i].tempId()] < literal_uses) { in select_instruction()
3744 literal_uses = ctx.uses[instr->operands[i].tempId()]; in select_instruction()
3757 ctx.uses[instr->operands[literal_idx].tempId()]--; in select_instruction()
3767 if (instr->isBranch() && instr->operands.size() && instr->operands[0].isTemp() && in select_instruction()
3768 instr->operands[0].isFixed() && instr->operands[0].physReg() == scc) { in select_instruction()
3769 ctx.info[instr->operands[0].tempId()].set_scc_needed(); in select_instruction()
3771 } else if ((instr->opcode == aco_opcode::s_cselect_b64 || in select_instruction()
3772 instr->opcode == aco_opcode::s_cselect_b32) && in select_instruction()
3773 instr->operands[2].isTemp()) { in select_instruction()
3774 ctx.info[instr->operands[2].tempId()].set_scc_needed(); in select_instruction()
3775 } else if (instr->opcode == aco_opcode::p_wqm && instr->operands[0].isTemp() && in select_instruction()
3776 ctx.info[instr->definitions[0].tempId()].is_scc_needed()) { in select_instruction()
3778 ctx.info[instr->operands[0].tempId()].set_scc_needed(); in select_instruction()
3781 instr->definitions[0].setFixed(scc); in select_instruction()
3785 if (!instr->isSALU() && !instr->isVALU()) in select_instruction()
3789 if (instr->definitions.size() && ctx.uses[instr->definitions[0].tempId()] == 0 && in select_instruction()
3790 ctx.info[instr->definitions[0].tempId()].is_uniform_bitwise()) { in select_instruction()
3791 bool transform_done = to_uniform_bool_instr(ctx, instr); in select_instruction()
3793 if (transform_done && !ctx.info[instr->definitions[1].tempId()].is_scc_needed()) { in select_instruction()
3796 uint32_t def0_id = instr->definitions[0].getTemp().id(); in select_instruction()
3797 uint32_t def1_id = instr->definitions[1].getTemp().id(); in select_instruction()
3798 instr->definitions[0].setTemp(Temp(def1_id, s1)); in select_instruction()
3799 instr->definitions[1].setTemp(Temp(def0_id, s1)); in select_instruction()
3806 if (instr->isVALU()) { in select_instruction()
3807 for (unsigned i = 0; i < instr->operands.size(); i++) { in select_instruction()
3808 if (!instr->operands[i].isTemp()) in select_instruction()
3810 ssa_info info = ctx.info[instr->operands[i].tempId()]; in select_instruction()
3813 if (info.is_dpp() && info.instr->pass_flags == instr->pass_flags && in select_instruction()
3814 (i == 0 || can_swap_operands(instr, &swapped_op)) && can_use_DPP(instr, true) && in select_instruction()
3815 !instr->isDPP()) { in select_instruction()
3816 convert_to_DPP(instr); in select_instruction()
3817 DPP_instruction* dpp = static_cast<DPP_instruction*>(instr.get()); in select_instruction()
3819 instr->opcode = swapped_op; in select_instruction()
3820 std::swap(instr->operands[0], instr->operands[1]); in select_instruction()
3824 if (--ctx.uses[info.instr->definitions[0].tempId()]) in select_instruction()
3825 ctx.uses[info.instr->operands[0].tempId()]++; in select_instruction()
3826 instr->operands[0].setTemp(info.instr->operands[0].getTemp()); in select_instruction()
3827 dpp->dpp_ctrl = info.instr->dpp().dpp_ctrl; in select_instruction()
3828 dpp->bound_ctrl = info.instr->dpp().bound_ctrl; in select_instruction()
3829 dpp->neg[0] ^= info.instr->dpp().neg[0] && !dpp->abs[0]; in select_instruction()
3830 dpp->abs[0] |= info.instr->dpp().abs[0]; in select_instruction()
3836 if (instr->isSDWA() || (instr->isVOP3() && ctx.program->chip_class < GFX10) || in select_instruction()
3837 (instr->isVOP3P() && ctx.program->chip_class < GFX10)) in select_instruction()
3847 if (instr->isSALU() || in select_instruction()
3848 (ctx.program->chip_class >= GFX10 && (can_use_VOP3(ctx, instr) || instr->isVOP3P()))) in select_instruction()
3849 num_operands = instr->operands.size(); in select_instruction()
3851 else if (instr->isVALU() && instr->operands.size() >= 3) in select_instruction()
3860 Operand op = instr->operands[i]; in select_instruction()
3861 unsigned bits = get_operand_size(instr, i); in select_instruction()
3863 if (instr->isVALU() && op.isTemp() && op.getTemp().type() == RegType::sgpr && in select_instruction()
3874 if (!alu_can_accept_constant(instr->opcode, i)) in select_instruction()
3889 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 || in select_instruction()
3890 instr->opcode == aco_opcode::v_lshrrev_b64 || in select_instruction()
3891 instr->opcode == aco_opcode::v_ashrrev_i64; in select_instruction()
3892 unsigned const_bus_limit = instr->isVALU() ? 1 : UINT32_MAX; in select_instruction()
3907 if (instr->operands[i].isTemp() && instr->operands[i].tempId() == literal_id) in select_instruction()
3908 ctx.uses[instr->operands[i].tempId()]--; in select_instruction()
3914 apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr) in apply_literals() argument
3917 if (!instr) in apply_literals()
3921 if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) { in apply_literals()
3922 mad_info* info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].instr->pass_flags]; in apply_literals()
3924 (ctx.uses[instr->operands[info->literal_idx].tempId()] == 0 || info->literal_idx == 2)) { in apply_literals()
3929 if (instr->opcode == aco_opcode::v_fma_f32) in apply_literals()
3931 else if (instr->opcode == aco_opcode::v_mad_f16 || in apply_literals()
3932 instr->opcode == aco_opcode::v_mad_legacy_f16) in apply_literals()
3934 else if (instr->opcode == aco_opcode::v_fma_f16) in apply_literals()
3939 new_mad->operands[0] = instr->operands[0]; in apply_literals()
3940 new_mad->operands[1] = instr->operands[1]; in apply_literals()
3942 new_mad->operands[0] = instr->operands[1 - info->literal_idx]; in apply_literals()
3943 new_mad->operands[1] = instr->operands[2]; in apply_literals()
3946 Operand::c32(ctx.info[instr->operands[info->literal_idx].tempId()].val); in apply_literals()
3947 new_mad->definitions[0] = instr->definitions[0]; in apply_literals()
3954 if (instr->isSALU() || instr->isVALU()) { in apply_literals()
3955 for (unsigned i = 0; i < instr->operands.size(); i++) { in apply_literals()
3956 Operand op = instr->operands[i]; in apply_literals()
3957 unsigned bits = get_operand_size(instr, i); in apply_literals()
3960 instr->format = withoutDPP(instr->format); in apply_literals()
3961 if (instr->isVALU() && i > 0 && instr->format != Format::VOP3P) in apply_literals()
3962 to_VOP3(ctx, instr); in apply_literals()
3963 instr->operands[i] = literal; in apply_literals()
3968 ctx.instructions.emplace_back(std::move(instr)); in apply_literals()
3982 for (aco_ptr<Instruction>& instr : block.instructions) in optimize()
3983 label_instruction(ctx, instr); in optimize()
3991 for (aco_ptr<Instruction>& instr : block.instructions) in optimize()
3992 combine_instruction(ctx, instr); in optimize()
4009 for (aco_ptr<Instruction>& instr : block.instructions) in optimize()
4010 apply_literals(ctx, instr); in optimize()