Lines Matching full:bld
106 Builder bld(ctx->program, ctx->block); in create_alu_builder() local
107 bld.is_precise = instr->exact; in create_alu_builder()
108 bld.is_sz_preserve = nir_alu_instr_is_signed_zero_preserve(instr); in create_alu_builder()
109 bld.is_inf_preserve = nir_alu_instr_is_inf_preserve(instr); in create_alu_builder()
110 bld.is_nan_preserve = nir_alu_instr_is_nan_preserve(instr); in create_alu_builder()
111 return bld; in create_alu_builder()
117 Builder bld(ctx->program, ctx->block); in emit_mbcnt() local
119 assert(mask.isUndefined() || mask.bytes() == bld.lm.bytes()); in emit_mbcnt()
123 return bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, Definition(dst), mask_lo, base); in emit_mbcnt()
132 bld.pseudo(aco_opcode::p_split_vector, bld.def(rc), bld.def(rc), mask); in emit_mbcnt()
140 Temp mbcnt_lo = bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, bld.def(v1), mask_lo, base); in emit_mbcnt()
143 return bld.vop2(aco_opcode::v_mbcnt_hi_u32_b32, Definition(dst), mask_hi, mbcnt_lo); in emit_mbcnt()
145 return bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32_e64, Definition(dst), mask_hi, mbcnt_lo); in emit_mbcnt()
161 emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data) in emit_bpermute() argument
164 return bld.readlane(bld.def(s1), data, index); in emit_bpermute()
178 return bld.pseudo(aco_opcode::p_bpermute_readlane, bld.def(v1), bld.def(bld.lm), in emit_bpermute()
179 bld.def(bld.lm, vcc), index, data); in emit_bpermute()
185 bld.vopc(aco_opcode::v_cmp_ge_u32, bld.def(bld.lm), Operand::c32(31u), index); in emit_bpermute()
187 bld.pseudo(aco_opcode::p_split_vector, bld.def(s1), bld.def(s1), index_is_lo); in emit_bpermute()
188 Temp index_is_lo_n1 = bld.sop1(aco_opcode::s_not_b32, bld.def(s1), bld.def(s1, scc), in emit_bpermute()
190 Operand same_half = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), in emit_bpermute()
192 Operand index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index); in emit_bpermute()
200 return bld.pseudo(aco_opcode::p_bpermute_shared_vgpr, bld.def(v1), bld.def(s2), in emit_bpermute()
201 bld.def(s1, scc), index_x4, data, same_half); in emit_bpermute()
203 return bld.pseudo(aco_opcode::p_bpermute_permlane, bld.def(v1), bld.def(s2), in emit_bpermute()
204 bld.def(s1, scc), Operand(v1.as_linear()), index_x4, data, same_half); in emit_bpermute()
208 Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index); in emit_bpermute()
209 return bld.ds(aco_opcode::ds_bpermute_b32, bld.def(v1), index_x4, data); in emit_bpermute()
214 emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask, bool allow_fi) in emit_masked_swizzle() argument
250 return bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src, lane_sel, allow_fi); in emit_masked_swizzle()
257 Temp op1 = bld.copy(bld.def(s1), Operand::c32(lane_mask & 0xffffffff)); in emit_masked_swizzle()
258 Temp op2 = bld.copy(bld.def(s1), Operand::c32(lane_mask >> 32)); in emit_masked_swizzle()
259 Builder::Result ret = bld.vop3(opcode, bld.def(v1), src, op1, op2); in emit_masked_swizzle()
266 return bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl, 0xf, 0xf, true, in emit_masked_swizzle()
270 return bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, mask, 0, false); in emit_masked_swizzle()
274 as_vgpr(Builder& bld, Temp val) in as_vgpr() argument
277 return bld.copy(bld.def(RegType::vgpr, val.size()), val); in as_vgpr()
285 Builder bld(ctx->program, ctx->block); in as_vgpr() local
286 return as_vgpr(bld, val); in as_vgpr()
292 Builder bld(ctx->program, ctx->block); in emit_extract_vector() local
293 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::c32(idx)); in emit_extract_vector()
306 Builder bld(ctx->program, ctx->block); in emit_extract_vector() local
314 return bld.copy(bld.def(dst_rc), it->second[idx]); in emit_extract_vector()
323 return bld.copy(bld.def(dst_rc), src); in emit_extract_vector()
325 Temp dst = bld.tmp(dst_rc); in emit_extract_vector()
369 Builder bld(ctx->program, ctx->block); in expand_vector() local
372 Temp tmp_dst = bld.tmp(RegClass::get(RegType::vgpr, 2 * num_components)); in expand_vector()
374 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp_dst); in expand_vector()
386 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec_src); in expand_vector()
388 bld.copy(Definition(dst), vec_src); in expand_vector()
400 padding = bld.copy(bld.def(dst_rc), Operand::zero(component_bytes)); in expand_vector()
410 src = bld.as_uniform(src); in expand_vector()
436 Builder bld(ctx->program, ctx->block); in bool_to_vector_condition() local
438 dst = bld.tmp(bld.lm); in bool_to_vector_condition()
441 assert(dst.regClass() == bld.lm); in bool_to_vector_condition()
443 return bld.sop2(Builder::s_cselect, Definition(dst), Operand::c32(-1), Operand::zero(), in bool_to_vector_condition()
444 bld.scc(val)); in bool_to_vector_condition()
450 Builder bld(ctx->program, ctx->block); in bool_to_scalar_condition() local
452 dst = bld.tmp(s1); in bool_to_scalar_condition()
454 assert(val.regClass() == bld.lm); in bool_to_scalar_condition()
458 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.scc(Definition(dst)), val, Operand(exec, bld.lm)); in bool_to_scalar_condition()
472 convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits, in convert_int() argument
480 dst = bld.tmp(src.type(), DIV_ROUND_UP(dst_bits, 32u)); in convert_int()
482 dst = bld.tmp(RegClass(RegType::vgpr, dst_bits / 8u).as_subdword()); in convert_int()
491 return bld.copy(Definition(dst), src); in convert_int()
493 return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::zero()); in convert_int()
498 tmp = src_bits == 32 ? src : bld.tmp(src.type(), 1); in convert_int()
503 bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), src, Operand::zero(), in convert_int()
507 bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), in convert_int()
514 bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), tmp, Operand::c32(31u)); in convert_int()
515 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high); in convert_int()
517 Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), tmp); in convert_int()
518 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high); in convert_int()
520 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero()); in convert_int()
546 Builder bld(ctx->program, ctx->block); in extract_8_16_bit_sgpr_element() local
547 Temp tmp = dst.regClass() == s2 ? bld.tmp(s1) : dst; in extract_8_16_bit_sgpr_element()
550 bld.copy(Definition(tmp), vec); in extract_8_16_bit_sgpr_element()
552 bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), Operand(vec), in extract_8_16_bit_sgpr_element()
557 convert_int(ctx, bld, tmp, 32, 64, mode == sgpr_extract_sext, dst); in extract_8_16_bit_sgpr_element()
636 Builder bld(ctx->program, ctx->block); in get_alu_src_vop3p() local
638 return bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), it->second[index], in get_alu_src_vop3p()
662 Builder bld(ctx->program, ctx->block); in convert_pointer_to_64_bit() local
664 ptr = bld.as_uniform(ptr); in convert_pointer_to_64_bit()
665 return bld.pseudo(aco_opcode::p_create_vector, bld.def(RegClass(ptr.type(), 2)), ptr, in convert_pointer_to_64_bit()
673 Builder bld = create_alu_builder(ctx, instr); in emit_sop2_instruction() local
674 bld.is_nuw = instr->no_unsigned_wrap; in emit_sop2_instruction()
687 bld.sop2(op, Definition(dst), bld.def(s1, scc), operands[0], operands[1]); in emit_sop2_instruction()
689 bld.sop2(op, Definition(dst), operands[0], operands[1]); in emit_sop2_instruction()
697 Builder bld = create_alu_builder(ctx, instr); in emit_vop2_instruction() local
698 bld.is_nuw = nuw; in emit_vop2_instruction()
717 operands[1] = bld.copy(bld.def(RegType::vgpr, operands[1].size()), operands[1]); in emit_vop2_instruction()
723 Temp tmp = bld.vop2(opc, bld.def(dst.regClass()), operands[0], operands[1]); in emit_vop2_instruction()
725 bld.vop2(aco_opcode::v_mul_f16, Definition(dst), Operand::c16(0x3c00), tmp); in emit_vop2_instruction()
727 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp); in emit_vop2_instruction()
729 bld.vop2(opc, Definition(dst), operands[0], operands[1]); in emit_vop2_instruction()
736 Builder bld = create_alu_builder(ctx, instr); in emit_vop2_instruction_logic64() local
746 Temp src00 = bld.tmp(src0.type(), 1); in emit_vop2_instruction_logic64()
747 Temp src01 = bld.tmp(src0.type(), 1); in emit_vop2_instruction_logic64()
748 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0); in emit_vop2_instruction_logic64()
749 Temp src10 = bld.tmp(v1); in emit_vop2_instruction_logic64()
750 Temp src11 = bld.tmp(v1); in emit_vop2_instruction_logic64()
751 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1); in emit_vop2_instruction_logic64()
752 Temp lo = bld.vop2(op, bld.def(v1), src00, src10); in emit_vop2_instruction_logic64()
753 Temp hi = bld.vop2(op, bld.def(v1), src01, src11); in emit_vop2_instruction_logic64()
754 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi); in emit_vop2_instruction_logic64()
772 Builder bld = create_alu_builder(ctx, instr); in emit_vop3a_instruction() local
776 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1], src[2]); in emit_vop3a_instruction()
778 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1]); in emit_vop3a_instruction()
780 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp); in emit_vop3a_instruction()
782 bld.vop3(aco_opcode::v_mul_f64_e64, Definition(dst), Operand::c64(0x3FF0000000000000), in emit_vop3a_instruction()
785 bld.vop3(op, Definition(dst), src[0], src[1], src[2]); in emit_vop3a_instruction()
787 bld.vop3(op, Definition(dst), src[0], src[1]); in emit_vop3a_instruction()
807 Builder bld = create_alu_builder(ctx, instr); in emit_vop3p_instruction() local
808 Builder::Result res = bld.vop3p(op, Definition(dst), src0, src1, opsel_lo, opsel_hi); in emit_vop3p_instruction()
827 Builder bld = create_alu_builder(ctx, instr); in emit_idot_instruction() local
829 bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7)->valu(); in emit_idot_instruction()
837 Builder bld = create_alu_builder(ctx, instr); in emit_vop1_instruction() local
839 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), in emit_vop1_instruction()
840 bld.vop1(op, bld.def(RegType::vgpr, dst.size()), get_alu_src(ctx, instr->src[0]))); in emit_vop1_instruction()
842 bld.vop1(op, Definition(dst), get_alu_src(ctx, instr->src[0])); in emit_vop1_instruction()
865 Builder bld = create_alu_builder(ctx, instr); in emit_vopc_instruction() local
866 bld.vopc(op, Definition(dst), src0, src1); in emit_vopc_instruction()
874 Builder bld = create_alu_builder(ctx, instr); in emit_sopc_instruction() local
876 assert(dst.regClass() == bld.lm); in emit_sopc_instruction()
881 Temp cmp = bld.sopc(op, bld.scc(bld.def(s1)), src0, src1); in emit_sopc_instruction()
915 Builder bld(ctx->program, ctx->block); in emit_boolean_logic() local
919 assert(dst.regClass() == bld.lm); in emit_boolean_logic()
920 assert(src0.regClass() == bld.lm); in emit_boolean_logic()
921 assert(src1.regClass() == bld.lm); in emit_boolean_logic()
923 bld.sop2(op, Definition(dst), bld.def(s1, scc), src0, src1); in emit_boolean_logic()
929 Builder bld(ctx->program, ctx->block); in select_vec2() local
931 Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1); in select_vec2()
932 bld.pseudo(aco_opcode::p_split_vector, Definition(then_lo), Definition(then_hi), then); in select_vec2()
933 Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1); in select_vec2()
934 bld.pseudo(aco_opcode::p_split_vector, Definition(else_lo), Definition(else_hi), els); in select_vec2()
936 Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, cond); in select_vec2()
937 Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, cond); in select_vec2()
939 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1); in select_vec2()
945 Builder bld(ctx->program, ctx->block); in emit_bcsel() local
950 assert(cond.regClass() == bld.lm); in emit_bcsel()
958 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), els, then, cond); in emit_bcsel()
968 assert(dst.regClass() == bld.lm); in emit_bcsel()
969 assert(then.regClass() == bld.lm); in emit_bcsel()
970 assert(els.regClass() == bld.lm); in emit_bcsel()
980 bld.sop2(op, Definition(dst), then, els, bld.scc(bool_to_scalar_condition(ctx, cond))); in emit_bcsel()
993 then = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), cond, then); in emit_bcsel()
996 bld.copy(Definition(dst), then); in emit_bcsel()
998 bld.sop2(Builder::s_or, Definition(dst), bld.def(s1, scc), then, in emit_bcsel()
999 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), els, cond)); in emit_bcsel()
1003 emit_scaled_op(isel_context* ctx, Builder& bld, Definition dst, Temp val, aco_opcode vop, in emit_scaled_op() argument
1008 bld.vop1(vop, dst, val); in emit_scaled_op()
1010 bld.vop3(sop, dst, val); in emit_scaled_op()
1012 bld.pseudo(aco_opcode::p_as_uniform, dst, bld.vop1(vop, bld.def(v1), val)); in emit_scaled_op()
1019 val = as_vgpr(bld, val); in emit_scaled_op()
1020 Temp is_denormal = bld.tmp(bld.lm); in emit_scaled_op()
1021 VALU_instruction& valu = bld.vopc_e64(aco_opcode::v_cmp_class_f32, Definition(is_denormal), in emit_scaled_op()
1026 scale = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0x3f800000), in emit_scaled_op()
1027 bld.copy(bld.def(s1), Operand::c32(0x4b800000u)), is_denormal); in emit_scaled_op()
1028 unscale = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0x3f800000), in emit_scaled_op()
1029 bld.copy(bld.def(s1), Operand::c32(undo)), is_denormal); in emit_scaled_op()
1031 Temp abs = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), val, in emit_scaled_op()
1032 bld.copy(bld.def(s1), Operand::c32(0x7fffffff))); in emit_scaled_op()
1033 Temp denorm_cmp = bld.copy(bld.def(s1), Operand::c32(0x00800000)); in emit_scaled_op()
1034 Temp is_denormal = bld.sopc(aco_opcode::s_cmp_lt_u32, bld.def(s1, scc), abs, denorm_cmp); in emit_scaled_op()
1035 scale = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), in emit_scaled_op()
1036 bld.copy(bld.def(s1), Operand::c32(0x4b800000u)), Operand::c32(0x3f800000), in emit_scaled_op()
1037 bld.scc(is_denormal)); in emit_scaled_op()
1039 bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), bld.copy(bld.def(s1), Operand::c32(undo)), in emit_scaled_op()
1040 Operand::c32(0x3f800000), bld.scc(is_denormal)); in emit_scaled_op()
1044 Temp scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), scale, as_vgpr(bld, val)); in emit_scaled_op()
1045 scaled = bld.vop1(vop, bld.def(v1), scaled); in emit_scaled_op()
1046 bld.vop2(aco_opcode::v_mul_f32, dst, unscale, scaled); in emit_scaled_op()
1049 Temp scaled = bld.sop2(aco_opcode::s_mul_f32, bld.def(s1), scale, val); in emit_scaled_op()
1051 scaled = bld.vop3(sop, bld.def(s1), scaled); in emit_scaled_op()
1053 scaled = bld.as_uniform(bld.vop1(vop, bld.def(v1), scaled)); in emit_scaled_op()
1054 bld.sop2(aco_opcode::s_mul_f32, dst, unscale, scaled); in emit_scaled_op()
1059 emit_rcp(isel_context* ctx, Builder& bld, Definition dst, Temp val) in emit_rcp() argument
1061 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_rcp_f32, aco_opcode::v_s_rcp_f32, 0x4b800000u); in emit_rcp()
1065 emit_rsq(isel_context* ctx, Builder& bld, Definition dst, Temp val) in emit_rsq() argument
1067 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_rsq_f32, aco_opcode::v_s_rsq_f32, 0x45800000u); in emit_rsq()
1071 emit_sqrt(isel_context* ctx, Builder& bld, Definition dst, Temp val) in emit_sqrt() argument
1073 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_sqrt_f32, aco_opcode::v_s_sqrt_f32, in emit_sqrt()
1078 emit_log2(isel_context* ctx, Builder& bld, Definition dst, Temp val) in emit_log2() argument
1080 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_log_f32, aco_opcode::v_s_log_f32, 0xc1c00000u); in emit_log2()
1084 emit_trunc_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val) in emit_trunc_f64() argument
1087 return bld.vop1(aco_opcode::v_trunc_f64, Definition(dst), val); in emit_trunc_f64()
1095 Temp val_lo = bld.tmp(v1), val_hi = bld.tmp(v1); in emit_trunc_f64()
1096 bld.pseudo(aco_opcode::p_split_vector, Definition(val_lo), Definition(val_hi), val); in emit_trunc_f64()
1100 bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), val_hi, Operand::c32(20u), Operand::c32(11u)); in emit_trunc_f64()
1101 exponent = bld.vsub32(bld.def(v1), exponent, Operand::c32(1023u)); in emit_trunc_f64()
1104 Temp fract_mask = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u), in emit_trunc_f64()
1106 fract_mask = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), fract_mask, exponent); in emit_trunc_f64()
1108 Temp fract_mask_lo = bld.tmp(v1), fract_mask_hi = bld.tmp(v1); in emit_trunc_f64()
1109 bld.pseudo(aco_opcode::p_split_vector, Definition(fract_mask_lo), Definition(fract_mask_hi), in emit_trunc_f64()
1112 Temp fract_lo = bld.tmp(v1), fract_hi = bld.tmp(v1); in emit_trunc_f64()
1113 Temp tmp = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), fract_mask_lo); in emit_trunc_f64()
1114 fract_lo = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), val_lo, tmp); in emit_trunc_f64()
1115 tmp = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), fract_mask_hi); in emit_trunc_f64()
1116 fract_hi = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), val_hi, tmp); in emit_trunc_f64()
1119 Temp sign = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x80000000u), val_hi); in emit_trunc_f64()
1123 bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.def(bld.lm), exponent, Operand::zero()); in emit_trunc_f64()
1124 Temp dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_lo, in emit_trunc_f64()
1125 bld.copy(bld.def(v1), Operand::zero()), exp_lt0); in emit_trunc_f64()
1126 Temp dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_hi, sign, exp_lt0); in emit_trunc_f64()
1127 Temp exp_gt51 = bld.vopc_e64(aco_opcode::v_cmp_gt_i32, bld.def(s2), exponent, Operand::c32(51u)); in emit_trunc_f64()
1128 dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), dst_lo, val_lo, exp_gt51); in emit_trunc_f64()
1129 dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), dst_hi, val_hi, exp_gt51); in emit_trunc_f64()
1131 return bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst_lo, dst_hi); in emit_trunc_f64()
1135 emit_floor_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val) in emit_floor_f64() argument
1138 return bld.vop1(aco_opcode::v_floor_f64, Definition(dst), val); in emit_floor_f64()
1144 Temp min_val = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::c32(-1u), in emit_floor_f64()
1147 Temp isnan = bld.vopc(aco_opcode::v_cmp_neq_f64, bld.def(bld.lm), src0, src0); in emit_floor_f64()
1148 Temp fract = bld.vop1(aco_opcode::v_fract_f64, bld.def(v2), src0); in emit_floor_f64()
1149 Temp min = bld.vop3(aco_opcode::v_min_f64_e64, bld.def(v2), fract, min_val); in emit_floor_f64()
1151 Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1); in emit_floor_f64()
1152 bld.pseudo(aco_opcode::p_split_vector, Definition(then_lo), Definition(then_hi), src0); in emit_floor_f64()
1153 Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1); in emit_floor_f64()
1154 bld.pseudo(aco_opcode::p_split_vector, Definition(else_lo), Definition(else_hi), min); in emit_floor_f64()
1156 Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, isnan); in emit_floor_f64()
1157 Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, isnan); in emit_floor_f64()
1159 Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1); in emit_floor_f64()
1161 Instruction* add = bld.vop3(aco_opcode::v_add_f64_e64, Definition(dst), src0, v); in emit_floor_f64()
1168 uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1) in uadd32_sat() argument
1170 if (bld.program->gfx_level < GFX8) { in uadd32_sat()
1171 Builder::Result add = bld.vadd32(bld.def(v1), src0, src1, true); in uadd32_sat()
1172 return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand::c32(-1), in uadd32_sat()
1177 if (bld.program->gfx_level >= GFX9) { in uadd32_sat()
1178 add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1); in uadd32_sat()
1180 add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.def(bld.lm), src0, src1); in uadd32_sat()
1187 usub32_sat(Builder& bld, Definition dst, Temp src0, Temp src1) in usub32_sat() argument
1189 if (bld.program->gfx_level < GFX8) { in usub32_sat()
1190 Builder::Result sub = bld.vsub32(bld.def(v1), src0, src1, true); in usub32_sat()
1191 return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, sub.def(0).getTemp(), Operand::c32(0u), in usub32_sat()
1196 if (bld.program->gfx_level >= GFX9) { in usub32_sat()
1197 sub = bld.vop2_e64(aco_opcode::v_sub_u32, dst, src0, src1); in usub32_sat()
1199 sub = bld.vop2_e64(aco_opcode::v_sub_co_u32, dst, bld.def(bld.lm), src0, src1); in usub32_sat()
1208 Builder bld = create_alu_builder(ctx, instr); in emit_vec2_f2f16() local
1215 bld.sop2(aco_opcode::s_cvt_pk_rtz_f16_f32, Definition(dst), src0, src1); in emit_vec2_f2f16()
1219 bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src0, src1); in emit_vec2_f2f16()
1221 bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, src1); in emit_vec2_f2f16()
1229 Builder bld = create_alu_builder(ctx, instr); in visit_alu_instr() local
1261 Temp mask = bld.copy(bld.def(s1), Operand::c32((1u << instr->def.bit_size) - 1)); in visit_alu_instr()
1282 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), elems[i], mask); in visit_alu_instr()
1285 elems[i] = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), elems[i], in visit_alu_instr()
1289 packed[idx] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), elems[i], in visit_alu_instr()
1300 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), packed[i * 2], in visit_alu_instr()
1303 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), in visit_alu_instr()
1306 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), packed[i * 2], in visit_alu_instr()
1322 packed[i] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), in visit_alu_instr()
1325 packed[i] = bld.copy(bld.def(s1), Operand::c32(const_vals[i])); in visit_alu_instr()
1329 bld.copy(Definition(dst), packed[0]); in visit_alu_instr()
1336 bld.insert(std::move(vec)); in visit_alu_instr()
1346 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src); in visit_alu_instr()
1349 bld.copy(Definition(dst), src); in visit_alu_instr()
1358 Temp lo = bld.tmp(v1), hi = bld.tmp(v1); in visit_alu_instr()
1359 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); in visit_alu_instr()
1360 lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), lo); in visit_alu_instr()
1361 hi = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), hi); in visit_alu_instr()
1362 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi); in visit_alu_instr()
1365 bld.sop1(opcode, Definition(dst), bld.def(s1, scc), src); in visit_alu_instr()
1378 Temp sub = bld.vop3p(aco_opcode::v_pk_sub_u16, Definition(bld.tmp(v1)), Operand::zero(), in visit_alu_instr()
1380 bld.vop3p(aco_opcode::v_pk_max_i16, Definition(dst), sub, src, opsel_lo, opsel_hi); in visit_alu_instr()
1386 bld.sop1(aco_opcode::s_abs_i32, Definition(dst), bld.def(s1, scc), src); in visit_alu_instr()
1388 bld.vop2(aco_opcode::v_max_i32, Definition(dst), src, in visit_alu_instr()
1389 bld.vsub32(bld.def(v1), Operand::zero(), src)); in visit_alu_instr()
1391 bld.vop3( in visit_alu_instr()
1393 bld.vop3(aco_opcode::v_sub_u16_e64, Definition(bld.tmp(v2b)), Operand::zero(2), src)); in visit_alu_instr()
1396 bld.vop2(aco_opcode::v_max_i16, Definition(dst), src, in visit_alu_instr()
1397 bld.vop2(aco_opcode::v_sub_u16, Definition(bld.tmp(v2b)), Operand::zero(2), src)); in visit_alu_instr()
1407 bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(-1)); in visit_alu_instr()
1408 bld.sop2(aco_opcode::s_min_i32, Definition(dst), bld.def(s1, scc), tmp, Operand::c32(1u)); in visit_alu_instr()
1411 bld.sop2(aco_opcode::s_ashr_i64, bld.def(s2), bld.def(s1, scc), src, Operand::c32(63u)); in visit_alu_instr()
1414 neqz = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), src, Operand::zero()); in visit_alu_instr()
1417 bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), src, Operand::zero()) in visit_alu_instr()
1421 bld.sop2(aco_opcode::s_or_b64, Definition(dst), bld.def(s1, scc), neg, bld.scc(neqz)); in visit_alu_instr()
1423 bld.vop3(aco_opcode::v_med3_i32, Definition(dst), Operand::c32(-1), src, Operand::c32(1u)); in visit_alu_instr()
1425 bld.vop3(aco_opcode::v_med3_i16, Definition(dst), Operand::c16(-1), src, Operand::c16(1u)); in visit_alu_instr()
1428 bld.vop2(aco_opcode::v_max_i16, Definition(dst), Operand::c16(-1), in visit_alu_instr()
1429 bld.vop2(aco_opcode::v_min_i16, Definition(bld.tmp(v1)), Operand::c16(1u), src)); in visit_alu_instr()
1432 Temp neg = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), upper); in visit_alu_instr()
1433 Temp gtz = bld.vopc(aco_opcode::v_cmp_ge_i64, bld.def(bld.lm), Operand::zero(), src); in visit_alu_instr()
1434 Temp lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(1u), neg, gtz); in visit_alu_instr()
1435 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), neg, gtz); in visit_alu_instr()
1436 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); in visit_alu_instr()
1564 bld.vop3(aco_opcode::v_lshrrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]), in visit_alu_instr()
1588 bld.vop3(aco_opcode::v_lshlrev_b64_e64, Definition(dst), get_alu_src(ctx, instr->src[1]), in visit_alu_instr()
1611 bld.vop3(aco_opcode::v_ashrrev_i64, Definition(dst), get_alu_src(ctx, instr->src[1]), in visit_alu_instr()
1627 bld.sop1(aco_opcode::s_ff1_i32_b32, Definition(dst), src); in visit_alu_instr()
1631 bld.sop1(aco_opcode::s_ff1_i32_b64, Definition(dst), src); in visit_alu_instr()
1633 Temp lo = bld.tmp(v1), hi = bld.tmp(v1); in visit_alu_instr()
1634 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); in visit_alu_instr()
1635 lo = bld.vop1(aco_opcode::v_ffbl_b32, bld.def(v1), lo); in visit_alu_instr()
1636 hi = bld.vop1(aco_opcode::v_ffbl_b32, bld.def(v1), hi); in visit_alu_instr()
1637 hi = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(32u), hi); in visit_alu_instr()
1638 bld.vop2(aco_opcode::v_min_u32, Definition(dst), lo, hi); in visit_alu_instr()
1653 Temp msb_rev = bld.sop1(op, bld.def(s1), src); in visit_alu_instr()
1655 Builder::Result sub = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), in visit_alu_instr()
1660 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), msb, in visit_alu_instr()
1661 bld.scc(carry)); in visit_alu_instr()
1665 Temp msb_rev = bld.tmp(v1); in visit_alu_instr()
1667 Temp msb = bld.tmp(v1); in visit_alu_instr()
1669 bld.vsub32(Definition(msb), Operand::c32(31u), Operand(msb_rev), true).def(1).getTemp(); in visit_alu_instr()
1670 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), msb, msb_rev, carry); in visit_alu_instr()
1675 Temp lo = bld.tmp(v1), hi = bld.tmp(v1); in visit_alu_instr()
1676 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); in visit_alu_instr()
1678 lo = bld.vop1(op, bld.def(v1), lo); in visit_alu_instr()
1679 lo = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(32), lo); in visit_alu_instr()
1680 hi = bld.vop1(op, bld.def(v1), hi); in visit_alu_instr()
1681 Temp msb_rev = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), lo, hi); in visit_alu_instr()
1683 Temp msb = bld.tmp(v1); in visit_alu_instr()
1685 bld.vsub32(Definition(msb), Operand::c32(63u), Operand(msb_rev), true).def(1).getTemp(); in visit_alu_instr()
1686 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), msb, msb_rev, carry); in visit_alu_instr()
1698 bld.sop1(op, Definition(dst), src); in visit_alu_instr()
1710 bld.sop1(aco_opcode::s_brev_b32, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
1712 bld.vop1(aco_opcode::v_bfrev_b32, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
1737 bld.nuw().vadd32(Definition(dst), Operand(src0), Operand(src1)); in visit_alu_instr()
1739 bld.vadd32(Definition(dst), Operand(src0), Operand(src1)); in visit_alu_instr()
1744 Temp src00 = bld.tmp(src0.type(), 1); in visit_alu_instr()
1745 Temp src01 = bld.tmp(dst.type(), 1); in visit_alu_instr()
1746 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0); in visit_alu_instr()
1747 Temp src10 = bld.tmp(src1.type(), 1); in visit_alu_instr()
1748 Temp src11 = bld.tmp(dst.type(), 1); in visit_alu_instr()
1749 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1); in visit_alu_instr()
1752 Temp carry = bld.tmp(s1); in visit_alu_instr()
1754 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src10); in visit_alu_instr()
1755 Temp dst1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), src01, src11, in visit_alu_instr()
1756 bld.scc(carry)); in visit_alu_instr()
1757 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1); in visit_alu_instr()
1759 Temp dst0 = bld.tmp(v1); in visit_alu_instr()
1760 Temp carry = bld.vadd32(Definition(dst0), src00, src10, true).def(1).getTemp(); in visit_alu_instr()
1761 Temp dst1 = bld.vadd32(bld.def(v1), src01, src11, false, carry); in visit_alu_instr()
1762 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1); in visit_alu_instr()
1777 Temp tmp = bld.tmp(s1), carry = bld.tmp(s1); in visit_alu_instr()
1778 bld.sop2(aco_opcode::s_add_u32, Definition(tmp), bld.scc(Definition(carry)), src0, src1); in visit_alu_instr()
1779 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), tmp, in visit_alu_instr()
1780 bld.scc(carry)); in visit_alu_instr()
1785 add_instr = bld.vop3(aco_opcode::v_add_u16_e64, Definition(dst), src0, src1).instr; in visit_alu_instr()
1790 bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr; in visit_alu_instr()
1795 uadd32_sat(bld, Definition(dst), src0, src1); in visit_alu_instr()
1801 Temp src00 = bld.tmp(src0.type(), 1); in visit_alu_instr()
1802 Temp src01 = bld.tmp(src0.type(), 1); in visit_alu_instr()
1803 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0); in visit_alu_instr()
1804 Temp src10 = bld.tmp(src1.type(), 1); in visit_alu_instr()
1805 Temp src11 = bld.tmp(src1.type(), 1); in visit_alu_instr()
1806 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1); in visit_alu_instr()
1809 Temp carry0 = bld.tmp(s1); in visit_alu_instr()
1810 Temp carry1 = bld.tmp(s1); in visit_alu_instr()
1813 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry0)), src00, src10); in visit_alu_instr()
1814 Temp no_sat1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(Definition(carry1)), in visit_alu_instr()
1815 src01, src11, bld.scc(carry0)); in visit_alu_instr()
1817 Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1); in visit_alu_instr()
1819 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c64(-1), no_sat, in visit_alu_instr()
1820 bld.scc(carry1)); in visit_alu_instr()
1822 Temp no_sat0 = bld.tmp(v1); in visit_alu_instr()
1823 Temp dst0 = bld.tmp(v1); in visit_alu_instr()
1824 Temp dst1 = bld.tmp(v1); in visit_alu_instr()
1826 Temp carry0 = bld.vadd32(Definition(no_sat0), src00, src10, true).def(1).getTemp(); in visit_alu_instr()
1830 carry1 = bld.tmp(bld.lm); in visit_alu_instr()
1831 bld.vop2_e64(aco_opcode::v_addc_co_u32, Definition(dst1), Definition(carry1), in visit_alu_instr()
1836 Temp no_sat1 = bld.tmp(v1); in visit_alu_instr()
1837 carry1 = bld.vadd32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp(); in visit_alu_instr()
1838 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst1), no_sat1, Operand::c32(-1), in visit_alu_instr()
1842 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst0), no_sat0, Operand::c32(-1), in visit_alu_instr()
1844 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1); in visit_alu_instr()
1859 Temp cond = bld.sopc(aco_opcode::s_cmp_lt_i32, bld.def(s1, scc), src1, Operand::zero()); in visit_alu_instr()
1860 Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)), in visit_alu_instr()
1862 Temp overflow = bld.tmp(s1); in visit_alu_instr()
1864 bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.scc(Definition(overflow)), src0, src1); in visit_alu_instr()
1865 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bound, add, bld.scc(overflow)); in visit_alu_instr()
1873 bld.vop3(aco_opcode::v_add_i16, Definition(dst), src0, src1).instr; in visit_alu_instr()
1877 bld.vop3(aco_opcode::v_add_i32, Definition(dst), src0, src1).instr; in visit_alu_instr()
1888 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(dst)), src0, src1); in visit_alu_instr()
1892 Temp carry = bld.vadd32(bld.def(v1), src0, src1, true).def(1).getTemp(); in visit_alu_instr()
1893 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u), in visit_alu_instr()
1898 Temp src00 = bld.tmp(src0.type(), 1); in visit_alu_instr()
1899 Temp src01 = bld.tmp(dst.type(), 1); in visit_alu_instr()
1900 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0); in visit_alu_instr()
1901 Temp src10 = bld.tmp(src1.type(), 1); in visit_alu_instr()
1902 Temp src11 = bld.tmp(dst.type(), 1); in visit_alu_instr()
1903 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1); in visit_alu_instr()
1905 Temp carry = bld.tmp(s1); in visit_alu_instr()
1906 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src10); in visit_alu_instr()
1907 carry = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(bld.def(s1)), src01, src11, in visit_alu_instr()
1908 bld.scc(carry)) in visit_alu_instr()
1911 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand::zero()); in visit_alu_instr()
1913 Temp carry = bld.vadd32(bld.def(v1), src00, src10, true).def(1).getTemp(); in visit_alu_instr()
1914 carry = bld.vadd32(bld.def(v1), src01, src11, true, carry).def(1).getTemp(); in visit_alu_instr()
1915 carry = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), in visit_alu_instr()
1917 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand::zero()); in visit_alu_instr()
1935 bld.vsub32(Definition(dst), src0, src1); in visit_alu_instr()
1939 bld.vop3(aco_opcode::v_sub_u16_e64, Definition(dst), src0, src1); in visit_alu_instr()
1941 bld.vop2(aco_opcode::v_subrev_u16, Definition(dst), src1, as_vgpr(ctx, src0)); in visit_alu_instr()
1943 bld.vop2(aco_opcode::v_sub_u16, Definition(dst), src0, as_vgpr(ctx, src1)); in visit_alu_instr()
1945 bld.vsub32(Definition(dst), src0, src1); in visit_alu_instr()
1949 Temp src00 = bld.tmp(src0.type(), 1); in visit_alu_instr()
1950 Temp src01 = bld.tmp(dst.type(), 1); in visit_alu_instr()
1951 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0); in visit_alu_instr()
1952 Temp src10 = bld.tmp(src1.type(), 1); in visit_alu_instr()
1953 Temp src11 = bld.tmp(dst.type(), 1); in visit_alu_instr()
1954 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1); in visit_alu_instr()
1956 Temp borrow = bld.tmp(s1); in visit_alu_instr()
1958 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), src00, src10); in visit_alu_instr()
1959 Temp dst1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.def(s1, scc), src01, src11, in visit_alu_instr()
1960 bld.scc(borrow)); in visit_alu_instr()
1961 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1); in visit_alu_instr()
1963 Temp lower = bld.tmp(v1); in visit_alu_instr()
1964 Temp borrow = bld.vsub32(Definition(lower), src00, src10, true).def(1).getTemp(); in visit_alu_instr()
1965 Temp upper = bld.vsub32(bld.def(v1), src01, src11, false, borrow); in visit_alu_instr()
1966 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); in visit_alu_instr()
1976 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(dst)), src0, src1); in visit_alu_instr()
1979 Temp borrow = bld.vsub32(bld.def(v1), src0, src1, true).def(1).getTemp(); in visit_alu_instr()
1980 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u), in visit_alu_instr()
1985 Temp src00 = bld.tmp(src0.type(), 1); in visit_alu_instr()
1986 Temp src01 = bld.tmp(dst.type(), 1); in visit_alu_instr()
1987 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0); in visit_alu_instr()
1988 Temp src10 = bld.tmp(src1.type(), 1); in visit_alu_instr()
1989 Temp src11 = bld.tmp(dst.type(), 1); in visit_alu_instr()
1990 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1); in visit_alu_instr()
1992 Temp borrow = bld.tmp(s1); in visit_alu_instr()
1993 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), src00, src10); in visit_alu_instr()
1994 borrow = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.scc(bld.def(s1)), src01, src11, in visit_alu_instr()
1995 bld.scc(borrow)) in visit_alu_instr()
1998 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand::zero()); in visit_alu_instr()
2000 Temp borrow = bld.vsub32(bld.def(v1), src00, src10, true).def(1).getTemp(); in visit_alu_instr()
2001 borrow = bld.vsub32(bld.def(v1), src01, src11, true, Operand(borrow)).def(1).getTemp(); in visit_alu_instr()
2002 borrow = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), in visit_alu_instr()
2004 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand::zero()); in visit_alu_instr()
2019 Temp tmp = bld.tmp(s1), carry = bld.tmp(s1); in visit_alu_instr()
2020 bld.sop2(aco_opcode::s_sub_u32, Definition(tmp), bld.scc(Definition(carry)), src0, src1); in visit_alu_instr()
2021 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(0), tmp, bld.scc(carry)); in visit_alu_instr()
2026 sub_instr = bld.vop3(aco_opcode::v_sub_u16_e64, Definition(dst), src0, src1).instr; in visit_alu_instr()
2033 sub_instr = bld.vop2_e64(op, Definition(dst), src0, as_vgpr(ctx, src1)).instr; in visit_alu_instr()
2038 usub32_sat(bld, Definition(dst), src0, as_vgpr(ctx, src1)); in visit_alu_instr()
2043 Temp src00 = bld.tmp(src0.type(), 1); in visit_alu_instr()
2044 Temp src01 = bld.tmp(src0.type(), 1); in visit_alu_instr()
2045 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0); in visit_alu_instr()
2046 Temp src10 = bld.tmp(src1.type(), 1); in visit_alu_instr()
2047 Temp src11 = bld.tmp(src1.type(), 1); in visit_alu_instr()
2048 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1); in visit_alu_instr()
2051 Temp carry0 = bld.tmp(s1); in visit_alu_instr()
2052 Temp carry1 = bld.tmp(s1); in visit_alu_instr()
2055 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(carry0)), src00, src10); in visit_alu_instr()
2056 Temp no_sat1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.scc(Definition(carry1)), in visit_alu_instr()
2057 src01, src11, bld.scc(carry0)); in visit_alu_instr()
2059 Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1); in visit_alu_instr()
2061 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c64(0ull), no_sat, in visit_alu_instr()
2062 bld.scc(carry1)); in visit_alu_instr()
2064 Temp no_sat0 = bld.tmp(v1); in visit_alu_instr()
2065 Temp dst0 = bld.tmp(v1); in visit_alu_instr()
2066 Temp dst1 = bld.tmp(v1); in visit_alu_instr()
2068 Temp carry0 = bld.vsub32(Definition(no_sat0), src00, src10, true).def(1).getTemp(); in visit_alu_instr()
2072 carry1 = bld.tmp(bld.lm); in visit_alu_instr()
2073 bld.vop2_e64(aco_opcode::v_subb_co_u32, Definition(dst1), Definition(carry1), in visit_alu_instr()
2078 Temp no_sat1 = bld.tmp(v1); in visit_alu_instr()
2079 carry1 = bld.vsub32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp(); in visit_alu_instr()
2080 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst1), no_sat1, Operand::c32(0u), in visit_alu_instr()
2084 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst0), no_sat0, Operand::c32(0u), in visit_alu_instr()
2086 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1); in visit_alu_instr()
2101 Temp cond = bld.sopc(aco_opcode::s_cmp_gt_i32, bld.def(s1, scc), src1, Operand::zero()); in visit_alu_instr()
2102 Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)), in visit_alu_instr()
2104 Temp overflow = bld.tmp(s1); in visit_alu_instr()
2106 bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.scc(Definition(overflow)), src0, src1); in visit_alu_instr()
2107 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bound, sub, bld.scc(overflow)); in visit_alu_instr()
2115 bld.vop3(aco_opcode::v_sub_i16, Definition(dst), src0, src1).instr; in visit_alu_instr()
2119 bld.vop3(aco_opcode::v_sub_i32, Definition(dst), src0, src1).instr; in visit_alu_instr()
2142 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[1]), in visit_alu_instr()
2145 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[0]), in visit_alu_instr()
2164 Temp tmp = dst.regClass() == s1 ? bld.tmp(v1) : dst; in visit_alu_instr()
2172 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp); in visit_alu_instr()
2184 Temp tmp = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), get_alu_src(ctx, instr->src[0]), in visit_alu_instr()
2186 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp); in visit_alu_instr()
2258 Instruction* add = bld.vop3(aco_opcode::v_add_f64_e64, Definition(dst), as_vgpr(ctx, src0), in visit_alu_instr()
2287 bld.vop3p(aco_opcode::v_pk_fma_f16, Definition(dst), src0, src1, src2, opsel_lo, opsel_hi); in visit_alu_instr()
2300 bld.sop2(op, Definition(dst), src0, src1, src2); in visit_alu_instr()
2407 Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]); in visit_alu_instr()
2408 Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]); in visit_alu_instr()
2409 Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]); in visit_alu_instr()
2410 Temp id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), src[0], src[1], src[2]); in visit_alu_instr()
2411 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tc, sc, ma, id); in visit_alu_instr()
2421 bld.vop3(aco_opcode::v_s_rsq_f16, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2425 emit_rsq(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2438 bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00), in visit_alu_instr()
2447 bld.vop2(aco_opcode::v_mul_f16, Definition(dst), Operand::c16(0xbc00u), as_vgpr(ctx, src)); in visit_alu_instr()
2449 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0xbf800000u), in visit_alu_instr()
2453 src = bld.vop3(aco_opcode::v_mul_f64_e64, bld.def(v2), Operand::c64(0x3FF0000000000000), in visit_alu_instr()
2455 Temp upper = bld.tmp(v1), lower = bld.tmp(v1); in visit_alu_instr()
2456 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); in visit_alu_instr()
2457 upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand::c32(0x80000000u), upper); in visit_alu_instr()
2458 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); in visit_alu_instr()
2460 bld.sop2(aco_opcode::s_mul_f16, Definition(dst), Operand::c16(0xbc00u), src); in visit_alu_instr()
2462 bld.sop2(aco_opcode::s_mul_f32, Definition(dst), Operand::c32(0xbf800000u), src); in visit_alu_instr()
2472 bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src, in visit_alu_instr()
2482 Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst), in visit_alu_instr()
2487 Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f32, Definition(dst), in visit_alu_instr()
2493 src = bld.vop3(aco_opcode::v_mul_f64_e64, bld.def(v2), Operand::c64(0x3FF0000000000000), in visit_alu_instr()
2495 Temp upper = bld.tmp(v1), lower = bld.tmp(v1); in visit_alu_instr()
2496 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); in visit_alu_instr()
2497 upper = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7FFFFFFFu), upper); in visit_alu_instr()
2498 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); in visit_alu_instr()
2500 Temp mask = bld.copy(bld.def(s1), Operand::c32(0x7fff)); in visit_alu_instr()
2502 bld.sop2(aco_opcode::s_and_b32, Definition(dst), bld.def(s1, scc), mask, src); in visit_alu_instr()
2504 Temp tmp = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), mask, src); in visit_alu_instr()
2505 bld.sop2(aco_opcode::s_mul_f16, Definition(dst), Operand::c16(0x3c00), tmp); in visit_alu_instr()
2508 Temp mask = bld.copy(bld.def(s1), Operand::c32(0x7fffffff)); in visit_alu_instr()
2510 bld.sop2(aco_opcode::s_and_b32, Definition(dst), bld.def(s1, scc), mask, src); in visit_alu_instr()
2512 Temp tmp = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), mask, src); in visit_alu_instr()
2513 bld.sop2(aco_opcode::s_mul_f32, Definition(dst), Operand::c32(0x3f800000), tmp); in visit_alu_instr()
2524 bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00), in visit_alu_instr()
2532 bld.vop3(aco_opcode::v_med3_f16, Definition(dst), Operand::c16(0u), Operand::c16(0x3c00), in visit_alu_instr()
2535 bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst), Operand::c16(0x3c00), src) in visit_alu_instr()
2539 bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::zero(), in visit_alu_instr()
2546 bld.vop3(aco_opcode::v_add_f64_e64, Definition(dst), src, Operand::zero()); in visit_alu_instr()
2549 Temp low = bld.sop2(aco_opcode::s_max_f16, bld.def(s1), src, Operand::c16(0)); in visit_alu_instr()
2550 bld.sop2(aco_opcode::s_min_f16, Definition(dst), low, Operand::c16(0x3C00)); in visit_alu_instr()
2552 Temp low = bld.sop2(aco_opcode::s_max_f32, bld.def(s1), src, Operand::c32(0)); in visit_alu_instr()
2553 bld.sop2(aco_opcode::s_min_f32, Definition(dst), low, Operand::c32(0x3f800000)); in visit_alu_instr()
2562 bld.vop3(aco_opcode::v_s_log_f16, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2566 emit_log2(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2575 bld.vop3(aco_opcode::v_s_rcp_f16, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2579 emit_rcp(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2592 bld.vop3(opcode, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2605 bld.vop3(aco_opcode::v_s_sqrt_f16, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2609 emit_sqrt(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
2629 Temp floor = bld.sop1(op, bld.def(s1), src); in visit_alu_instr()
2631 bld.sop2(op, Definition(dst), src, floor); in visit_alu_instr()
2644 emit_floor_f64(ctx, bld, Definition(dst), src); in visit_alu_instr()
2649 bld.sop1(op, Definition(dst), src); in visit_alu_instr()
2670 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src0); in visit_alu_instr()
2672 bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, Operand::zero()); in visit_alu_instr()
2673 Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f64, bld.def(bld.lm), src0, trunc); in visit_alu_instr()
2674 Temp cond = bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.def(s1, scc), tmp0, tmp1); in visit_alu_instr()
2675 Temp add = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), in visit_alu_instr()
2676 bld.copy(bld.def(v1), Operand::zero()), in visit_alu_instr()
2677 bld.copy(bld.def(v1), Operand::c32(0x3ff00000u)), cond); in visit_alu_instr()
2678 add = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), in visit_alu_instr()
2679 bld.copy(bld.def(v1), Operand::zero()), add); in visit_alu_instr()
2680 bld.vop3(aco_opcode::v_add_f64_e64, Definition(dst), trunc, add); in visit_alu_instr()
2686 bld.sop1(op, Definition(dst), src); in visit_alu_instr()
2699 emit_trunc_f64(ctx, bld, Definition(dst), src); in visit_alu_instr()
2704 bld.sop1(op, Definition(dst), src); in visit_alu_instr()
2720 Temp src0_lo = bld.tmp(v1), src0_hi = bld.tmp(v1); in visit_alu_instr()
2722 bld.pseudo(aco_opcode::p_split_vector, Definition(src0_lo), Definition(src0_hi), src0); in visit_alu_instr()
2724 Temp bitmask = bld.sop1(aco_opcode::s_brev_b32, bld.def(s1), in visit_alu_instr()
2725 bld.copy(bld.def(s1), Operand::c32(-2u))); in visit_alu_instr()
2727 bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, in visit_alu_instr()
2728 bld.copy(bld.def(v1), Operand::c32(0x43300000u)), as_vgpr(ctx, src0_hi)); in visit_alu_instr()
2730 bld.vop3(aco_opcode::v_add_f64_e64, bld.def(v2), src0, in visit_alu_instr()
2731 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi)); in visit_alu_instr()
2733 bld.vop3(aco_opcode::v_add_f64_e64, bld.def(v2), tmp, in visit_alu_instr()
2734 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi)); in visit_alu_instr()
2738 Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u), in visit_alu_instr()
2740 Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, v); in visit_alu_instr()
2744 Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1); in visit_alu_instr()
2745 bld.pseudo(aco_opcode::p_split_vector, Definition(tmp_lo), Definition(tmp_hi), tmp); in visit_alu_instr()
2746 Temp dst0 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_lo, in visit_alu_instr()
2748 Temp dst1 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_hi, in visit_alu_instr()
2751 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1); in visit_alu_instr()
2757 bld.sop1(op, Definition(dst), src); in visit_alu_instr()
2782 src = bld.vop1(fract, bld.def(rc), src); in visit_alu_instr()
2785 bld.vop1(opcode, Definition(dst), src); in visit_alu_instr()
2787 Temp tmp = bld.vop1(opcode, bld.def(rc), src); in visit_alu_instr()
2788 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp); in visit_alu_instr()
2822 Temp tmp = bld.vop1(aco_opcode::v_frexp_exp_i16_f16, bld.def(v1), src); in visit_alu_instr()
2823 tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), tmp, Operand::zero()); in visit_alu_instr()
2824 convert_int(ctx, bld, tmp, 8, 32, true, dst); in visit_alu_instr()
2838 src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), as_vgpr(ctx, src)); in visit_alu_instr()
2840 src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, in visit_alu_instr()
2842 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src); in visit_alu_instr()
2844 src = convert_int(ctx, bld, src, 16, 32, true); in visit_alu_instr()
2845 src = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, in visit_alu_instr()
2847 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src); in visit_alu_instr()
2853 Temp inf = bld.copy(bld.def(s1), Operand::c32(0x7f800000)); in visit_alu_instr()
2854 src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, as_vgpr(ctx, src)); in visit_alu_instr()
2855 bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::c32(0x3f800000), src, in visit_alu_instr()
2859 Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src); in visit_alu_instr()
2860 Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u)); in visit_alu_instr()
2861 Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, in visit_alu_instr()
2864 cond = bld.vopc(aco_opcode::v_cmp_le_f64, bld.def(bld.lm), Operand::zero(), src); in visit_alu_instr()
2865 tmp = bld.copy(bld.def(v1), Operand::c32(0xBFF00000u)); in visit_alu_instr()
2866 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond); in visit_alu_instr()
2868 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper); in visit_alu_instr()
2870 Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f16, bld.def(s1, scc), Operand::c16(0), src); in visit_alu_instr()
2871 src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3c00), src, in visit_alu_instr()
2872 bld.scc(cond)); in visit_alu_instr()
2873 cond = bld.sopc(aco_opcode::s_cmp_ge_f16, bld.def(s1, scc), src, Operand::c16(0)); in visit_alu_instr()
2874 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbc00), in visit_alu_instr()
2875 bld.scc(cond)); in visit_alu_instr()
2877 Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f32, bld.def(s1, scc), Operand::c32(0), src); in visit_alu_instr()
2878 src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3f800000), src, in visit_alu_instr()
2879 bld.scc(cond)); in visit_alu_instr()
2880 cond = bld.sopc(aco_opcode::s_cmp_ge_f32, bld.def(s1, scc), src, Operand::c32(0)); in visit_alu_instr()
2881 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbf800000), in visit_alu_instr()
2882 bld.scc(cond)); in visit_alu_instr()
2905 bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, Definition(dst), src); in visit_alu_instr()
2907 bld.sop1(aco_opcode::p_s_cvt_f16_f32_rtne, Definition(dst), src); in visit_alu_instr()
2910 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src); in visit_alu_instr()
2912 bld.sop1(aco_opcode::s_cvt_f16_f32, Definition(dst), src); in visit_alu_instr()
2925 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src); in visit_alu_instr()
2927 bld.sop1(aco_opcode::s_cvt_f16_f32, Definition(dst), src); in visit_alu_instr()
2929 bld.sop2(aco_opcode::s_cvt_pk_rtz_f16_f32, Definition(dst), src, Operand::zero()); in visit_alu_instr()
2931 bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand::zero()); in visit_alu_instr()
2933 bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src, as_vgpr(ctx, src)); in visit_alu_instr()
2941 bld.sop1(aco_opcode::s_cvt_f32_f16, Definition(dst), src); in visit_alu_instr()
2954 bld.vop1(aco_opcode::v_cvt_f64_f32, Definition(dst), src); in visit_alu_instr()
2965 src = convert_int(ctx, bld, src, input_size, target_size, true); in visit_alu_instr()
2970 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src); in visit_alu_instr()
2977 src = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), src); in visit_alu_instr()
2978 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src); in visit_alu_instr()
2982 src = convert_int(ctx, bld, src, input_size, 32, true); in visit_alu_instr()
2984 src = bld.sop1(aco_opcode::s_cvt_f32_i32, bld.def(s1), src); in visit_alu_instr()
2985 bld.sop1(aco_opcode::s_cvt_f16_f32, Definition(dst), src); in visit_alu_instr()
2998 src = convert_int(ctx, bld, src, input_size, 32, true); in visit_alu_instr()
3001 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); in visit_alu_instr()
3003 bld.sop1(aco_opcode::s_cvt_f32_i32, Definition(dst), src); in visit_alu_instr()
3013 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, true); in visit_alu_instr()
3014 bld.vop1(aco_opcode::v_cvt_f64_i32, Definition(dst), src); in visit_alu_instr()
3028 src = convert_int(ctx, bld, src, input_size, target_size, false); in visit_alu_instr()
3033 bld.vop1(aco_opcode::v_cvt_f16_u16, Definition(dst), src); in visit_alu_instr()
3040 src = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), src); in visit_alu_instr()
3041 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src); in visit_alu_instr()
3045 src = convert_int(ctx, bld, src, input_size, 32, false); in visit_alu_instr()
3047 src = bld.sop1(aco_opcode::s_cvt_f32_u32, bld.def(s1), src); in visit_alu_instr()
3048 bld.sop1(aco_opcode::s_cvt_f16_f32, Definition(dst), src); in visit_alu_instr()
3059 bld.vop1(aco_opcode::v_cvt_f32_ubyte0, Definition(dst), src); in visit_alu_instr()
3062 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, false); in visit_alu_instr()
3064 bld.vop1(aco_opcode::v_cvt_f32_u32, Definition(dst), src); in visit_alu_instr()
3066 bld.sop1(aco_opcode::s_cvt_f32_u32, Definition(dst), src); in visit_alu_instr()
3076 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, false); in visit_alu_instr()
3077 bld.vop1(aco_opcode::v_cvt_f64_u32, Definition(dst), src); in visit_alu_instr()
3088 Temp tmp = bld.as_uniform(src); in visit_alu_instr()
3090 tmp = bld.sop1(aco_opcode::s_cvt_f32_f16, bld.def(s1), tmp); in visit_alu_instr()
3091 bld.sop1(aco_opcode::s_cvt_i32_f32, Definition(dst), tmp); in visit_alu_instr()
3097 Temp tmp = bld.tmp(v1); in visit_alu_instr()
3099 tmp = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), tmp); in visit_alu_instr()
3100 tmp = convert_int(ctx, bld, tmp, 32, instr->def.bit_size, false, in visit_alu_instr()
3103 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp); in visit_alu_instr()
3118 Temp tmp = bld.as_uniform(src); in visit_alu_instr()
3120 tmp = bld.sop1(aco_opcode::s_cvt_f32_f16, bld.def(s1), tmp); in visit_alu_instr()
3121 bld.sop1(aco_opcode::s_cvt_u32_f32, Definition(dst), tmp); in visit_alu_instr()
3127 Temp tmp = bld.tmp(v1); in visit_alu_instr()
3129 tmp = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), tmp); in visit_alu_instr()
3130 tmp = convert_int(ctx, bld, tmp, 32, instr->def.bit_size, false, in visit_alu_instr()
3133 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp); in visit_alu_instr()
3138 bld.vop3(aco_opcode::p_v_cvt_pk_u8_f32, Definition(dst), in visit_alu_instr()
3151 Temp tmp = bld.as_uniform(src); in visit_alu_instr()
3153 tmp = bld.sop1(aco_opcode::s_cvt_f32_f16, bld.def(s1), tmp); in visit_alu_instr()
3154 bld.sop1(aco_opcode::s_cvt_i32_f32, Definition(dst), tmp); in visit_alu_instr()
3156 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src); in visit_alu_instr()
3158 bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(dst), tmp); in visit_alu_instr()
3160 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), in visit_alu_instr()
3161 bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), tmp)); in visit_alu_instr()
3176 Temp tmp = bld.as_uniform(src); in visit_alu_instr()
3178 tmp = bld.sop1(aco_opcode::s_cvt_f32_f16, bld.def(s1), tmp); in visit_alu_instr()
3179 bld.sop1(aco_opcode::s_cvt_u32_f32, Definition(dst), tmp); in visit_alu_instr()
3181 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src); in visit_alu_instr()
3183 bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(dst), tmp); in visit_alu_instr()
3185 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), in visit_alu_instr()
3186 bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), tmp)); in visit_alu_instr()
3199 assert(src.regClass() == bld.lm); in visit_alu_instr()
3203 bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3c00u), src); in visit_alu_instr()
3205 Temp one = bld.copy(bld.def(v1), Operand::c32(0x3c00u)); in visit_alu_instr()
3206 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), one, src); in visit_alu_instr()
3214 assert(src.regClass() == bld.lm); in visit_alu_instr()
3218 bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3f800000u), src); in visit_alu_instr()
3220 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), in visit_alu_instr()
3229 assert(src.regClass() == bld.lm); in visit_alu_instr()
3233 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c32(0x3f800000u), in visit_alu_instr()
3234 Operand::zero(), bld.scc(src)); in visit_alu_instr()
3236 Temp one = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u)); in visit_alu_instr()
3238 bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), one, src); in visit_alu_instr()
3239 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper); in visit_alu_instr()
3257 convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]), input_bitsize, output_bitsize, in visit_alu_instr()
3272 convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]), instr->src[0].src.ssa->bit_size, in visit_alu_instr()
3282 assert(src.regClass() == bld.lm); in visit_alu_instr()
3287 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u), in visit_alu_instr()
3296 assert(dst.regClass() == bld.lm); in visit_alu_instr()
3300 assert(dst.regClass() == bld.lm); in visit_alu_instr()
3301 bld.vopc(src.size() == 2 ? aco_opcode::v_cmp_lg_u64 : aco_opcode::v_cmp_lg_u32, in visit_alu_instr()
3308 bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), Operand::zero(), src) in visit_alu_instr()
3312 tmp = bld.sopc(src.size() == 2 ? aco_opcode::s_cmp_lg_u64 : aco_opcode::s_cmp_lg_u32, in visit_alu_instr()
3313 bld.scc(bld.def(s1)), Operand::zero(), src); in visit_alu_instr()
3323 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
3337 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src[0], src[1]); in visit_alu_instr()
3341 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(dst.regClass()), in visit_alu_instr()
3345 bld.pseudo(aco_opcode::p_split_vector, bld.def(dst.regClass()), Definition(dst), in visit_alu_instr()
3350 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(dst.regClass()), in visit_alu_instr()
3353 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
3358 bld.pseudo(aco_opcode::p_split_vector, bld.def(dst.regClass()), Definition(dst), in visit_alu_instr()
3361 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), in visit_alu_instr()
3380 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, src1); in visit_alu_instr()
3382 bld.copy(Definition(dst), src0); in visit_alu_instr()
3384 bld.pseudo(aco_opcode::p_insert, Definition(dst), bld.def(s1, scc), src1, Operand::c32(1), in visit_alu_instr()
3387 bld.sop2(aco_opcode::s_pack_ll_b32_b16, Definition(dst), src0, src1); in visit_alu_instr()
3389 src0 = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), src0, in visit_alu_instr()
3391 src1 = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), src1, in visit_alu_instr()
3393 bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), src0, src1); in visit_alu_instr()
3397 case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break; in visit_alu_instr()
3425 src0 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src0); in visit_alu_instr()
3426 src1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src1); in visit_alu_instr()
3438 bld.vop3(opcode, Definition(dst), src0, src1); in visit_alu_instr()
3448 bld.vop3(opcode, Definition(dst), src0, src1); in visit_alu_instr()
3454 bld.sop1(aco_opcode::s_cvt_f32_f16, Definition(dst), src); in visit_alu_instr()
3458 src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src); in visit_alu_instr()
3460 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src); in visit_alu_instr()
3469 bld.sop1(aco_opcode::s_cvt_hi_f32_f16, Definition(dst), src); in visit_alu_instr()
3473 src = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), src, in visit_alu_instr()
3477 … bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src).def(1).getTemp(); in visit_alu_instr()
3479 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src); in visit_alu_instr()
3495 bld.vop3(aco_opcode::v_mqsad_u32_u8, Definition(dst), as_vgpr(ctx, src), as_vgpr(ctx, ref), in visit_alu_instr()
3509 bld.sop2(aco_opcode::s_pack_hl_b32_b16, Definition(dst), src1, src0); in visit_alu_instr()
3512 amount = bld.copy(bld.def(s1), Operand::c32(camount)); in visit_alu_instr()
3514 amount = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), in visit_alu_instr()
3520 Temp src = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), src1, src0); in visit_alu_instr()
3522 Temp res = bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), src, amount); in visit_alu_instr()
3523 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), res, Operand::zero()); in visit_alu_instr()
3544 f16 = bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, bld.def(v2b), src); in visit_alu_instr()
3546 f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src); in visit_alu_instr()
3549 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), f16); in visit_alu_instr()
3554 Temp f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16); in visit_alu_instr()
3558 bld.vopc_e64(aco_opcode::v_cmp_class_f16, bld.def(bld.lm), f16, Operand::c32(0x30)); in visit_alu_instr()
3566 Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u)); in visit_alu_instr()
3568 bld.vopc_e64(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), f32, smallest); in visit_alu_instr()
3574 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::zero(), as_vgpr(ctx, src)); in visit_alu_instr()
3575 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), f32, copysign_0, denorm_zero); in visit_alu_instr()
3577 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), f32, Operand::zero(), in visit_alu_instr()
3583 f16 = bld.sop1(aco_opcode::p_s_cvt_f16_f32_rtne, bld.def(s1), src); in visit_alu_instr()
3585 f16 = bld.sop1(aco_opcode::s_cvt_f16_f32, bld.def(s1), src); in visit_alu_instr()
3588 bld.sop1(aco_opcode::s_cvt_f32_f16, Definition(dst), f16); in visit_alu_instr()
3590 Temp f32 = bld.sop1(aco_opcode::s_cvt_f32_f16, bld.def(s1), f16); in visit_alu_instr()
3591 Temp abs_mask = bld.copy(bld.def(s1), Operand::c32(0x7fffffff)); in visit_alu_instr()
3593 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), f32, abs_mask); in visit_alu_instr()
3597 bld.sop2(aco_opcode::s_andn2_b32, bld.def(s1), bld.def(s1, scc), f32, abs_mask); in visit_alu_instr()
3601 Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u)); in visit_alu_instr()
3602 Temp denorm_zero = bld.sopc(aco_opcode::s_cmp_lt_u32, bld.def(s1, scc), abs, smallest); in visit_alu_instr()
3603 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), sign, f32, bld.scc(denorm_zero)); in visit_alu_instr()
3615 bld.sop2(aco_opcode::s_bfm_b32, Definition(dst), bits, offset); in visit_alu_instr()
3617 bld.vop3(aco_opcode::v_bfm_b32, Definition(dst), bits, offset); in visit_alu_instr()
3637 bld.sop2(aco_opcode::s_pack_lh_b32_b16, Definition(dst), insert, base); in visit_alu_instr()
3639 bld.sop2(aco_opcode::s_pack_lh_b32_b16, Definition(dst), base, insert); in visit_alu_instr()
3649 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), insert, bitmask); in visit_alu_instr()
3658 base = bld.sop2(aco_opcode::s_andn2_b32, bld.def(s1), bld.def(s1, scc), base, bitmask); in visit_alu_instr()
3662 bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), rhs, lhs); in visit_alu_instr()
3685 bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract)); in visit_alu_instr()
3694 : bld.sop2(aco_opcode::s_and_b32, bld.def(s1), in visit_alu_instr()
3695 bld.def(s1, scc), bits, Operand::c32(0x1fu)); in visit_alu_instr()
3696 Temp extract = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), offset, bits_op); in visit_alu_instr()
3697 bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract); in visit_alu_instr()
3699 Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset); in visit_alu_instr()
3701 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask); in visit_alu_instr()
3702 bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset); in visit_alu_instr()
3706 : bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), in visit_alu_instr()
3707 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), in visit_alu_instr()
3708 bld.def(s1, scc), bits, Operand::c32(0x1fu)), in visit_alu_instr()
3712 : bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), in visit_alu_instr()
3716 bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op); in visit_alu_instr()
3717 bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract); in visit_alu_instr()
3737 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
3746 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), Operand(vec), in visit_alu_instr()
3750 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), Operand(src), in visit_alu_instr()
3755 Temp extract = bld.copy(bld.def(s1), Operand::c32((bits << 16) | (index * bits))); in visit_alu_instr()
3756 bld.sop2(op, Definition(dst), bld.def(s1, scc), src, extract); in visit_alu_instr()
3765 def = bld.def(v1); in visit_alu_instr()
3770 bld.pseudo(aco_opcode::p_extract, def, Operand(src), Operand::c32(index), in visit_alu_instr()
3777 hi = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31), lo); in visit_alu_instr()
3778 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi); in visit_alu_instr()
3790 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0])); in visit_alu_instr()
3799 def = bld.def(src.type(), 1); in visit_alu_instr()
3802 bld.pseudo(aco_opcode::p_insert, def, bld.def(s1, scc), Operand(src), in visit_alu_instr()
3806 bld.pseudo(aco_opcode::p_insert, def, Operand(src), Operand::c32(index), in visit_alu_instr()
3810 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), in visit_alu_instr()
3813 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(), in visit_alu_instr()
3821 bld.sop1(aco_opcode::s_bcnt1_i32_b32, Definition(dst), bld.def(s1, scc), src); in visit_alu_instr()
3823 bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), src, Operand::zero()); in visit_alu_instr()
3825 bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), emit_extract_vector(ctx, src, 1, v1), in visit_alu_instr()
3826 bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), in visit_alu_instr()
3829 bld.sop1(aco_opcode::s_bcnt1_i32_b64, Definition(dst), bld.def(s1, scc), src); in visit_alu_instr()
3988 bld.vopc(test0 ? aco_opcode::v_cmp_le_i32 : aco_opcode::v_cmp_gt_i32, Definition(dst), in visit_alu_instr()
3994 bld.vopc(test0 ? aco_opcode::v_cmp_le_i16 : aco_opcode::v_cmp_gt_i16, Definition(dst), in visit_alu_instr()
4004 src0 = bld.pseudo(aco_opcode::p_extract, bld.def(v1), src0, Operand::c32(1), in visit_alu_instr()
4011 src0 = bld.pseudo(aco_opcode::p_extract, bld.def(v1), src0, Operand::c32(bit / 8), in visit_alu_instr()
4013 bld.vopc(test0 ? aco_opcode::v_cmp_le_i32 : aco_opcode::v_cmp_gt_i32, Definition(dst), in visit_alu_instr()
4019 src0 = bld.pseudo(aco_opcode::p_extract, bld.def(v1), src0, Operand::c32(bit / 8), in visit_alu_instr()
4042 Temp tmp = test0 ? bld.tmp(bld.lm) : dst; in visit_alu_instr()
4050 bld.vopc(op, Definition(tmp), bld.copy(bld.def(s1), Operand::c32(c)), src0)->valu(); in visit_alu_instr()
4057 bld.sop1(Builder::s_not, Definition(dst), bld.def(s1, scc), tmp); in visit_alu_instr()
4067 res = bld.vop2_e64(aco_opcode::v_lshlrev_b16, bld.def(v2b), src1, Operand::c32(1)); in visit_alu_instr()
4069 res = bld.vop3(aco_opcode::v_lshlrev_b16_e64, bld.def(v2b), src1, Operand::c32(1)); in visit_alu_instr()
4071 res = bld.vop2(aco_opcode::v_and_b32, bld.def(v2b), src0, res); in visit_alu_instr()
4073 res = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), src0, src1, Operand::c32(1)); in visit_alu_instr()
4076 res = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), src0, src1); in visit_alu_instr()
4078 res = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), src1, src0); in visit_alu_instr()
4081 res = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x1), res); in visit_alu_instr()
4085 bld.vopc(op, Definition(dst), Operand::c32(0), res); in visit_alu_instr()
4103 Builder bld(ctx->program, ctx->block); in visit_load_const() local
4106 assert(dst.regClass() == bld.lm); in visit_load_const()
4108 Operand op = bld.lm.size() == 1 ? Operand::c32(val) : Operand::c64(val); in visit_load_const()
4109 bld.copy(Definition(dst), op); in visit_load_const()
4111 bld.copy(Definition(dst), Operand::c32(instr->value[0].u8)); in visit_load_const()
4114 bld.copy(Definition(dst), Operand::c32(instr->value[0].i16)); in visit_load_const()
4116 bld.copy(Definition(dst), Operand::c32(instr->value[0].u32)); in visit_load_const()
4136 Builder bld(ctx->program, ctx->block); in emit_readfirstlane() local
4139 bld.copy(Definition(dst), src); in emit_readfirstlane()
4141 bld.vop1(aco_opcode::v_readfirstlane_b32, Definition(dst), src); in emit_readfirstlane()
4149 bld.def(RegClass::get(RegType::vgpr, MIN2(src.bytes() - i * 4, 4))); in emit_readfirstlane()
4159 vec->operands[i] = bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), in emit_readfirstlane()
4193 using Callback = Temp (*)(Builder& bld, const LoadEmitInfo& info, Temp offset,
4202 emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, in emit_load() argument
4240 offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), offset_tmp, in emit_load()
4243 offset = bld.vadd32(bld.def(v1), offset_tmp, Operand::c32(to_add)); in emit_load()
4245 Temp lo = bld.tmp(offset_tmp.type(), 1); in emit_load()
4246 Temp hi = bld.tmp(offset_tmp.type(), 1); in emit_load()
4247 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), offset_tmp); in emit_load()
4250 Temp carry = bld.tmp(s1); in emit_load()
4251 lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), lo, in emit_load()
4253 hi = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), hi, carry); in emit_load()
4254 offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), lo, hi); in emit_load()
4256 Temp new_lo = bld.tmp(v1); in emit_load()
4258 bld.vadd32(Definition(new_lo), lo, Operand::c32(to_add), true).def(1).getTemp(); in emit_load()
4259 hi = bld.vadd32(bld.def(v1), hi, Operand::zero(), false, carry); in emit_load()
4260 offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), new_lo, hi); in emit_load()
4267 : offset.isConstant() ? bld.copy(bld.def(s1), offset) in emit_load()
4270 Temp val = params.callback(bld, info, offset_tmp, bytes_needed, align, reduced_const_offset, in emit_load()
4315 tmp[0] = bld.tmp(RegClass::get(reg_type, tmp_size)); in emit_load()
4317 bld.insert(std::move(vec)); in emit_load()
4326 bld.pseudo(aco_opcode::p_extract_vector, bld.def(new_rc), tmp[0], Operand::zero()); in emit_load()
4340 Temp component = bld.tmp(elem_rc); in emit_load()
4345 bld.insert(std::move(split)); in emit_load()
4354 ctx, allocated_vec[j], bld.tmp(RegClass(RegType::sgpr, allocated_vec[j].size()))); in emit_load()
4356 allocated_vec[j] = bld.as_uniform(allocated_vec[j]); in emit_load()
4377 Temp tmp = bld.tmp(RegType::vgpr, info.dst.size()); in emit_load()
4379 bld.insert(std::move(vec)); in emit_load()
4383 bld.pseudo(aco_opcode::p_as_uniform, Definition(info.dst), tmp); in emit_load()
4386 bld.insert(std::move(vec)); in emit_load()
4391 load_lds_size_m0(Builder& bld) in load_lds_size_m0() argument
4394 if (bld.program->gfx_level >= GFX9) in load_lds_size_m0()
4397 return bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0xffffffffu))); in load_lds_size_m0()
4401 lds_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed, in lds_load_callback() argument
4404 offset = offset.regClass() == s1 ? bld.copy(bld.def(v1), offset) : offset; in lds_load_callback()
4406 Operand m = load_lds_size_m0(bld); in lds_load_callback()
4408 bool large_ds_read = bld.program->gfx_level >= GFX7; in lds_load_callback()
4409 bool usable_read2 = bld.program->gfx_level >= GFX7; in lds_load_callback()
4436 op = bld.program->gfx_level >= GFX9 ? aco_opcode::ds_read_u16_d16 : aco_opcode::ds_read_u16; in lds_load_callback()
4439 op = bld.program->gfx_level >= GFX9 ? aco_opcode::ds_read_u8_d16 : aco_opcode::ds_read_u8; in lds_load_callback()
4447 offset = bld.vadd32(bld.def(v1), offset, Operand::c32(excess)); in lds_load_callback()
4454 Temp val = rc == info.dst.regClass() && dst_hint.id() ? dst_hint : bld.tmp(rc); in lds_load_callback()
4457 instr = bld.ds(op, Definition(val), offset, m, const_offset, const_offset + 1); in lds_load_callback()
4459 instr = bld.ds(op, Definition(val), offset, m, const_offset); in lds_load_callback()
4471 smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed, in smem_load_callback() argument
4476 bld.program->has_smem_buffer_or_global_loads = true; in smem_load_callback()
4508 offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset, in smem_load_callback()
4515 load->operands[1] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset, in smem_load_callback()
4523 Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc); in smem_load_callback()
4527 bld.insert(std::move(load)); in smem_load_callback()
4534 mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed, in mubuf_load_callback() argument
4542 vaddr = bld.copy(bld.def(v1), soffset); in mubuf_load_callback()
4553 vaddr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), info.idx, vaddr); in mubuf_load_callback()
4571 } else if (bytes_needed <= 12 && bld.program->gfx_level > GFX6) { in mubuf_load_callback()
4588 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); in mubuf_load_callback()
4590 bld.insert(std::move(mubuf)); in mubuf_load_callback()
4598 mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, in mubuf_load_format_callback() argument
4607 vaddr = bld.copy(bld.def(v1), soffset); in mubuf_load_format_callback()
4618 vaddr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), info.idx, vaddr); in mubuf_load_format_callback()
4652 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); in mubuf_load_format_callback()
4654 bld.insert(std::move(mubuf)); in mubuf_load_format_callback()
4662 scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed, in scratch_load_callback() argument
4687 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); in scratch_load_callback()
4694 bld.insert(std::move(flat)); in scratch_load_callback()
4703 get_gfx6_global_rsrc(Builder& bld, Temp addr) in get_gfx6_global_rsrc() argument
4706 ac_build_raw_buffer_descriptor(bld.program->gfx_level, 0, 0xffffffff, desc); in get_gfx6_global_rsrc()
4709 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), Operand::zero(), Operand::zero(), in get_gfx6_global_rsrc()
4711 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), addr, Operand::c32(desc[2]), in get_gfx6_global_rsrc()
4716 add64_32(Builder& bld, Temp src0, Temp src1) in add64_32() argument
4718 Temp src00 = bld.tmp(src0.type(), 1); in add64_32()
4719 Temp src01 = bld.tmp(src0.type(), 1); in add64_32()
4720 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0); in add64_32()
4723 Temp dst0 = bld.tmp(v1); in add64_32()
4724 Temp carry = bld.vadd32(Definition(dst0), src00, src1, true).def(1).getTemp(); in add64_32()
4725 Temp dst1 = bld.vadd32(bld.def(v1), src01, Operand::zero(), false, carry); in add64_32()
4726 return bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1); in add64_32()
4728 Temp carry = bld.tmp(s1); in add64_32()
4730 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src1); in add64_32()
4731 Temp dst1 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), src01, carry); in add64_32()
4732 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), dst0, dst1); in add64_32()
4737 lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout, in lower_global_address() argument
4746 if (bld.program->gfx_level >= GFX9) in lower_global_address()
4747 max_const_offset_plus_one = bld.program->dev.scratch_global_offset_max; in lower_global_address()
4748 else if (bld.program->gfx_level == GFX6) in lower_global_address()
4755 address = add64_32(bld, address, bld.copy(bld.def(s1), Operand::c32(UINT32_MAX))); in lower_global_address()
4759 offset = bld.copy(bld.def(s1), Operand::c32(excess_offset)); in lower_global_address()
4769 address = add64_32(bld, address, bld.copy(bld.def(s1), Operand::c32(src2))); in lower_global_address()
4774 if (bld.program->gfx_level == GFX6) { in lower_global_address()
4777 address = add64_32(bld, address, offset); in lower_global_address()
4780 offset = offset.id() ? offset : bld.copy(bld.def(s1), Operand::zero()); in lower_global_address()
4781 } else if (bld.program->gfx_level <= GFX8) { in lower_global_address()
4784 address = add64_32(bld, address, offset); in lower_global_address()
4787 address = as_vgpr(bld, address); in lower_global_address()
4791 address = add64_32(bld, address, offset); in lower_global_address()
4794 offset = as_vgpr(bld, offset); in lower_global_address()
4797 offset = bld.copy(bld.def(v1), bld.copy(bld.def(s1), Operand::zero())); in lower_global_address()
4806 global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed, in global_load_callback() argument
4814 lower_global_address(bld, 0, &addr, &const_offset, &offset); in global_load_callback()
4817 bool use_mubuf = bld.program->gfx_level == GFX6; in global_load_callback()
4818 bool global = bld.program->gfx_level >= GFX9; in global_load_callback()
4850 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); in global_load_callback()
4853 mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, addr)); in global_load_callback()
4862 bld.insert(std::move(mubuf)); in global_load_callback()
4880 bld.insert(std::move(flat)); in global_load_callback()
4894 Builder bld(ctx->program, ctx->block); in load_lds() local
4908 emit_load(ctx, bld, info, lds_load_params); in load_lds()
4920 Builder bld(ctx->program, ctx->block); in split_store_data() local
4925 dst[0] = bld.as_uniform(src); in split_store_data()
4939 dst[i] = bld.tmp(RegClass::get(dst_type, bytes[i])); in split_store_data()
4967 src = bld.as_uniform(src); in split_store_data()
4974 temps.emplace_back(bld.tmp(RegClass::get(dst_type, elem_size_bytes))); in split_store_data()
4977 bld.insert(std::move(split)); in split_store_data()
4985 dst[i] = bld.as_uniform(temps[idx++]); in split_store_data()
4996 tmp = bld.as_uniform(tmp); in split_store_data()
5000 bld.insert(std::move(vec)); in split_store_data()
5033 Builder bld(ctx->program, ctx->block); in store_lds() local
5098 Operand m = load_lds_size_m0(bld); in store_lds()
5127 address_offset = bld.vadd32(bld.def(v1), Operand::c32(base_offset), address_offset); in store_lds()
5138 instr = bld.ds(op, address_offset, split_data, second_data, m, inline_offset, in store_lds()
5141 instr = bld.ds(op, address_offset, split_data, m, inline_offset); in store_lds()
5226 Builder bld(ctx->program, ctx->block); in create_vec_from_array() local
5230 dst = bld.tmp(RegClass(reg_type, cnt * dword_size)); in create_vec_from_array()
5243 Temp zero = bld.copy(bld.def(RegClass(reg_type, dword_size)), in create_vec_from_array()
5250 bld.insert(std::move(instr)); in create_vec_from_array()
5261 resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_offset) in resolve_excess_vmem_const_offset() argument
5268 voffset = bld.copy(bld.def(v1), Operand::c32(excess_const_offset)); in resolve_excess_vmem_const_offset()
5270 voffset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), in resolve_excess_vmem_const_offset()
5273 voffset = bld.vadd32(bld.def(v1), Operand(voffset), Operand::c32(excess_const_offset)); in resolve_excess_vmem_const_offset()
5403 Builder bld(ctx->program, ctx->block); in emit_interp_instr_gfx11() local
5406 bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), Operand(v1.as_linear()), in emit_interp_instr_gfx11()
5408 coord2, bld.m0(prim_mask)); in emit_interp_instr_gfx11()
5412 Temp p = bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component); in emit_interp_instr_gfx11()
5416 Temp p10 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), p, coord1, in emit_interp_instr_gfx11()
5418 bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, Definition(dst), p, coord2, p10, in emit_interp_instr_gfx11()
5421 Temp p10 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, bld.def(v1), p, coord1, p); in emit_interp_instr_gfx11()
5422 bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), p, coord2, p10); in emit_interp_instr_gfx11()
5440 Builder bld(ctx->program, ctx->block); in emit_interp_instr() local
5446 bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1), Operand::c32(2u) /* P0 */, in emit_interp_instr()
5447 bld.m0(prim_mask), idx, component); in emit_interp_instr()
5448 interp_p1 = bld.vintrp(aco_opcode::v_interp_p1lv_f16, bld.def(v1), coord1, in emit_interp_instr()
5449 bld.m0(prim_mask), interp_p1, idx, component, high_16bits); in emit_interp_instr()
5450 bld.vintrp(aco_opcode::v_interp_p2_legacy_f16, Definition(dst), coord2, bld.m0(prim_mask), in emit_interp_instr()
5458 Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1ll_f16, bld.def(v1), coord1, in emit_interp_instr()
5459 bld.m0(prim_mask), idx, component, high_16bits); in emit_interp_instr()
5460 bld.vintrp(interp_p2_op, Definition(dst), coord2, bld.m0(prim_mask), interp_p1, idx, in emit_interp_instr()
5465 Temp interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1, in emit_interp_instr()
5466 bld.m0(prim_mask), idx, component); in emit_interp_instr()
5468 bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1, in emit_interp_instr()
5477 Builder bld(ctx->program, ctx->block); in emit_interp_mov_instr() local
5478 Temp tmp = dst.bytes() == 2 ? bld.tmp(v1) : dst; in emit_interp_mov_instr()
5482 bld.pseudo(aco_opcode::p_interp_gfx11, Definition(tmp), Operand(v1.as_linear()), in emit_interp_mov_instr()
5484 bld.m0(prim_mask)); in emit_interp_mov_instr()
5487 bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component); in emit_interp_mov_instr()
5488 bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(tmp), p, dpp_ctrl); in emit_interp_mov_instr()
5493 bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(tmp), Operand::c32((vertex_id + 2) % 3), in emit_interp_mov_instr()
5494 bld.m0(prim_mask), idx, component); in emit_interp_mov_instr()
5529 mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed, in mtbuf_load_callback() argument
5537 vaddr = bld.copy(bld.def(v1), soffset); in mtbuf_load_callback()
5548 vaddr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), info.idx, vaddr); in mtbuf_load_callback()
5562 ac_get_safe_fetch_size(bld.program->gfx_level, vtx_info, const_offset, max_components, in mtbuf_load_callback()
5604 aco_err(bld.program, "unsupported bit size for typed buffer load"); in mtbuf_load_callback()
5620 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); in mtbuf_load_callback()
5622 bld.insert(std::move(mtbuf)); in mtbuf_load_callback()
5632 Builder bld(ctx->program, ctx->block); in visit_load_fs_input() local
5660 vec->operands[i] = Operand(bld.tmp(instr->def.bit_size == 16 ? v2b : v1)); in visit_load_fs_input()
5665 bld.insert(std::move(vec)); in visit_load_fs_input()
5674 Builder bld(ctx->program, ctx->block); in visit_load_tcs_per_vertex_input() local
5716 Builder bld(ctx->program, ctx->block); in load_buffer() local
5720 offset = bld.as_uniform(offset); in load_buffer()
5735 emit_load(ctx, bld, info, smem_load_params); in load_buffer()
5737 emit_load(ctx, bld, info, mubuf_load_params); in load_buffer()
5744 Builder bld(ctx->program, ctx->block); in visit_load_ubo() local
5745 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_load_ubo()
5758 Builder bld(ctx->program, ctx->block); in visit_load_constant() local
5768 offset = bld.nuw().sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset, in visit_load_constant()
5771 offset = bld.vadd32(bld.def(v1), Operand::c32(base), offset); in visit_load_constant()
5773 Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), in visit_load_constant()
5774 bld.pseudo(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc), in visit_load_constant()
5789 Builder bld(ctx->program, ctx->block); in emit_pack_v1() local
5798 Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, high); in emit_pack_v1()
5812 Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, Operand(v2b)); in emit_pack_v1()
5844 emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::vector<Temp> coords, in emit_mimg() argument
5849 size_t nsa_size = bld.program->dev.max_nsa_vgprs; in emit_mimg()
5850 if (!is_vsample && bld.program->gfx_level >= GFX12) in emit_mimg()
5852 nsa_size = bld.program->gfx_level >= GFX11 || coords.size() <= nsa_size ? nsa_size : 0; in emit_mimg()
5862 coords[i] = as_vgpr(bld, coords[i]); in emit_mimg()
5877 coord = bld.tmp(RegType::vgpr, coord_size); in emit_mimg()
5879 bld.insert(std::move(vec)); in emit_mimg()
5881 coord = as_vgpr(bld, coord); in emit_mimg()
5900 return &bld.insert(std::move(mimg))->mimg(); in emit_mimg()
5906 Builder bld(ctx->program, ctx->block); in visit_bvh64_intersect_ray_amd() local
5924 if (bld.program->gfx_level == GFX10_3) { in visit_bvh64_intersect_ray_amd()
5934 emit_mimg(bld, aco_opcode::image_bvh64_intersect_ray, dst, resource, Operand(s4), args); in visit_bvh64_intersect_ray_amd()
5959 Builder bld(ctx->program, ctx->block); in get_image_coords() local
5963 coords.emplace_back(bld.copy(bld.def(rc), Operand::zero(a16 ? 2 : 4))); in get_image_coords()
5992 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in get_image_coords()
5995 Temp first_layer = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), rsrc_word5, Operand::c32(0u), in get_image_coords()
6006 Temp type = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), rsrc_word3, in get_image_coords()
6008 Temp is_3d = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm), type, in get_image_coords()
6011 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), as_vgpr(ctx, lod), first_layer, is_3d); in get_image_coords()
6049 emit_tfe_init(Builder& bld, Temp dst) in emit_tfe_init() argument
6051 Temp tmp = bld.tmp(dst.regClass()); in emit_tfe_init()
6063 bld.insert(std::move(vec)); in emit_tfe_init()
6071 Builder bld(ctx->program, ctx->block); in visit_image_load() local
6102 tmp = bld.tmp(RegClass::get(RegType::vgpr, num_bytes)); in visit_image_load()
6104 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_image_load()
6137 load->operands[3] = emit_tfe_init(bld, tmp); in visit_image_load()
6150 Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1); in visit_image_load()
6151 MIMG_instruction* load = emit_mimg(bld, opcode, tmp, resource, Operand(s4), coords, vdata); in visit_image_load()
6175 tmp = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, tmp.size() + 1), tmp, in visit_image_load()
6185 Builder bld(ctx->program, ctx->block); in visit_image_store() local
6242 data = bld.tmp(RegClass::get(RegType::vgpr, dmask_count * rc.bytes())); in visit_image_store()
6244 bld.insert(std::move(vec)); in visit_image_store()
6250 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_image_store()
6286 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_image_store()
6292 emit_mimg(bld, opcode, Temp(0, v1), resource, Operand(s4), coords, Operand(data)); in visit_image_store()
6397 Builder bld(ctx->program, ctx->block); in visit_image_atomic() local
6410 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(is_64bit ? v4 : v2), in visit_image_atomic()
6418 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_image_atomic()
6428 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); in visit_image_atomic()
6439 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); in visit_image_atomic()
6444 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_image_atomic()
6445 Temp tmp = return_previous ? (cmpswap ? bld.tmp(data.regClass()) : dst) : Temp(0, v1); in visit_image_atomic()
6447 emit_mimg(bld, image_op, tmp, resource, Operand(s4), coords, Operand(data)); in visit_image_atomic()
6459 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), tmp, Operand::zero()); in visit_image_atomic()
6466 Builder bld(ctx->program, ctx->block); in visit_load_ssbo() local
6470 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_load_ssbo()
6483 Builder bld(ctx->program, ctx->block); in visit_store_ssbo() local
6489 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)); in visit_store_ssbo()
6529 Builder bld(ctx->program, ctx->block); in visit_atomic_ssbo() local
6540 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2), in visit_atomic_ssbo()
6544 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_atomic_ssbo()
6554 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); in visit_atomic_ssbo()
6565 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); in visit_atomic_ssbo()
6588 Builder bld(ctx->program, ctx->block); in visit_load_global() local
6611 info.resource = bld.as_uniform(info.resource); in visit_load_global()
6612 info.offset = Operand(bld.as_uniform(info.offset)); in visit_load_global()
6614 emit_load(ctx, bld, info, smem_load_params); in visit_load_global()
6618 emit_load(ctx, bld, info, params); in visit_load_global()
6625 Builder bld(ctx->program, ctx->block); in visit_store_global() local
6646 lower_global_address(bld, offsets[i], &write_address, &write_const_offset, &write_offset); in visit_store_global()
6695 Temp rsrc = get_gfx6_global_rsrc(bld, write_address); in visit_store_global()
6717 Builder bld(ctx->program, ctx->block); in visit_global_atomic() local
6725 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2), in visit_global_atomic()
6735 lower_global_address(bld, 0, &addr, &const_offset, &offset); in visit_global_atomic()
6828 Temp rsrc = get_gfx6_global_rsrc(bld, addr); in visit_global_atomic()
6838 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); in visit_global_atomic()
6849 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); in visit_global_atomic()
6875 Builder bld(ctx->program, ctx->block); in visit_load_buffer() local
6885 Temp descriptor = bld.as_uniform(get_ssa_temp(ctx, intrin->src[0].ssa)); in visit_load_buffer()
6889 s_offset_zero ? Temp(0, s1) : bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa)); in visit_load_buffer()
6928 emit_load(ctx, bld, info, mtbuf_load_params); in visit_load_buffer()
6935 emit_load(ctx, bld, info, mubuf_load_format_params); in visit_load_buffer()
6945 emit_load(ctx, bld, info, mubuf_load_params); in visit_load_buffer()
6953 Builder bld(ctx->program, ctx->block); in visit_store_buffer() local
6962 Temp descriptor = bld.as_uniform(get_ssa_temp(ctx, intrin->src[1].ssa)); in visit_store_buffer()
6964 Temp s_offset = bld.as_uniform(get_ssa_temp(ctx, intrin->src[3].ssa)); in visit_store_buffer()
6992 bld, write_voffset, offsets[i] + nir_intrinsic_base(intrin)); in visit_store_buffer()
6999 vaddr_op = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), idx, write_voffset); in visit_store_buffer()
7010 Instruction* mubuf = bld.mubuf(op, Operand(descriptor), vaddr_op, s_offset, in visit_store_buffer()
7021 Builder bld(ctx->program, ctx->block); in visit_load_smem() local
7023 Temp base = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_load_smem()
7024 Temp offset = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)); in visit_load_smem()
7028 base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), base, in visit_load_smem()
7052 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), in visit_load_smem()
7053 bld.smem(opcode, bld.def(RegType::sgpr, size), base, offset), Operand::c32(0u)); in visit_load_smem()
7055 bld.smem(opcode, Definition(dst), base, offset); in visit_load_smem()
7078 Builder bld(ctx->program, ctx->block); in emit_barrier() local
7129 bld.barrier(aco_opcode::p_barrier, in emit_barrier()
7140 Builder bld(ctx->program, ctx->block); in visit_load_shared() local
7164 Builder bld(ctx->program, ctx->block); in visit_shared_atomic() local
7165 Operand m = load_lds_size_m0(bld); in visit_shared_atomic()
7266 address = bld.vadd32(bld.def(v1), Operand::c32(offset), address); in visit_shared_atomic()
7277 if (bld.program->gfx_level >= GFX11) in visit_shared_atomic()
7295 Builder bld(ctx->program, ctx->block); in visit_shared_append() local
7306 Temp tmp = bld.tmp(v1); in visit_shared_append()
7308 Operand m = load_lds_size_m0(bld); in visit_shared_append()
7310 ds = bld.ds(op, Definition(tmp), address); in visit_shared_append()
7312 ds = bld.ds(op, Definition(tmp), m, address); in visit_shared_append()
7321 Temp last_lane = bld.sop1(aco_opcode::s_flbit_i32_b64, bld.def(s1), Operand(exec, s2)); in visit_shared_append()
7322 last_lane = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), Operand::c32(63), in visit_shared_append()
7324 bld.readlane(Definition(get_ssa_temp(ctx, &instr->def)), tmp, last_lane); in visit_shared_append()
7326 bld.pseudo(aco_opcode::p_as_uniform, Definition(get_ssa_temp(ctx, &instr->def)), tmp); in visit_shared_append()
7335 Builder bld(ctx->program, ctx->block); in visit_access_shared2_amd() local
7337 assert(bld.program->gfx_level >= GFX7); in visit_access_shared2_amd()
7344 Operand m = load_lds_size_m0(bld); in visit_access_shared2_amd()
7354 ds = bld.ds(op, address, data0, data1, m, offset0, offset1); in visit_access_shared2_amd()
7357 Definition tmp_dst(dst.type() == RegType::vgpr ? dst : bld.tmp(is64bit ? v4 : v2)); in visit_access_shared2_amd()
7360 ds = bld.ds(op, tmp_dst, address, m, offset0, offset1); in visit_access_shared2_amd()
7373 comp[i] = bld.as_uniform(emit_extract_vector(ctx, ds->definitions[0].getTemp(), i, v1)); in visit_access_shared2_amd()
7375 Temp comp0 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[0], comp[1]); in visit_access_shared2_amd()
7376 Temp comp1 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[2], comp[3]); in visit_access_shared2_amd()
7379 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), comp0, comp1); in visit_access_shared2_amd()
7382 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), comp[0], comp[1]); in visit_access_shared2_amd()
7393 Builder bld(ctx->program, ctx->block); in get_scratch_resource() local
7397 bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo)); in get_scratch_resource()
7399 bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi)); in get_scratch_resource()
7400 scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi); in get_scratch_resource()
7403 bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero()); in get_scratch_resource()
7421 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand::c32(desc[2]), in get_scratch_resource()
7428 Builder bld(ctx->program, ctx->block); in visit_load_scratch() local
7441 bld.copy(bld.def(s1), Operand::c32(ROUND_DOWN_TO(nir_src_as_uint(instr->src[0]), max))); in visit_load_scratch()
7448 emit_load(ctx, bld, info, params); in visit_load_scratch()
7453 emit_load(ctx, bld, info, scratch_mubuf_load_params); in visit_load_scratch()
7460 Builder bld(ctx->program, ctx->block); in visit_store_scratch() local
7498 saddr = bld.copy(bld.def(s1), Operand::c32(ROUND_DOWN_TO(const_offset, max))); in visit_store_scratch()
7500 bld.scratch(op, addr, saddr, write_datas[i], const_offset % max, in visit_store_scratch()
7508 Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, in visit_store_scratch()
7552 Builder bld(ctx->program, ctx->block); in emit_uniform_subgroup() local
7556 bld.pseudo(aco_opcode::p_as_uniform, dst, src); in emit_uniform_subgroup()
7558 bld.copy(dst, src); in emit_uniform_subgroup()
7564 Builder bld(ctx->program, ctx->block); in emit_addition_uniform_reduce() local
7569 Temp tmp = dst.regClass() == s1 ? bld.tmp(RegClass::get(RegType::vgpr, src.ssa->bit_size / 8)) in emit_addition_uniform_reduce()
7573 count = bld.vop1(aco_opcode::v_cvt_f16_u16, bld.def(v2b), count); in emit_addition_uniform_reduce()
7574 bld.vop2(aco_opcode::v_mul_f16, Definition(tmp), count, src_tmp); in emit_addition_uniform_reduce()
7577 count = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), count); in emit_addition_uniform_reduce()
7578 bld.vop2(aco_opcode::v_mul_f32, Definition(tmp), count, src_tmp); in emit_addition_uniform_reduce()
7582 bld.pseudo(aco_opcode::p_as_uniform, dst, tmp); in emit_addition_uniform_reduce()
7588 src_tmp = bld.as_uniform(src_tmp); in emit_addition_uniform_reduce()
7592 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), count, Operand::c32(1u)); in emit_addition_uniform_reduce()
7594 count = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), count); in emit_addition_uniform_reduce()
7601 bld.pseudo(aco_opcode::p_extract_vector, dst, count, Operand::zero()); in emit_addition_uniform_reduce()
7603 bld.copy(dst, count); in emit_addition_uniform_reduce()
7605 bld.copy(dst, Operand::zero(dst.bytes())); in emit_addition_uniform_reduce()
7607 bld.v_mul_imm(dst, count, imm, true, true); in emit_addition_uniform_reduce()
7609 bld.sop2(aco_opcode::s_sub_i32, dst, bld.def(s1, scc), Operand::zero(), count); in emit_addition_uniform_reduce()
7611 bld.sop2(aco_opcode::s_lshl_b32, dst, bld.def(s1, scc), count, in emit_addition_uniform_reduce()
7614 bld.sop2(aco_opcode::s_mul_i32, dst, src_tmp, count); in emit_addition_uniform_reduce()
7616 bld.vop3(aco_opcode::v_mul_lo_u16_e64, dst, src_tmp, count); in emit_addition_uniform_reduce()
7618 bld.vop2(aco_opcode::v_mul_lo_u16, dst, src_tmp, count); in emit_addition_uniform_reduce()
7620 bld.vop3(aco_opcode::v_mul_lo_u32, dst, src_tmp, count); in emit_addition_uniform_reduce()
7622 bld.sop2(aco_opcode::s_mul_i32, dst, src_tmp, count); in emit_addition_uniform_reduce()
7634 Builder bld(ctx->program, ctx->block); in emit_uniform_reduce() local
7641 bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), Operand(exec, bld.lm)); in emit_uniform_reduce()
7655 Builder bld(ctx->program, ctx->block); in emit_uniform_scan() local
7669 packed_tid = emit_mbcnt(ctx, bld.tmp(v1), Operand(exec, bld.lm), Operand::c32(1u)); in emit_uniform_scan()
7671 packed_tid = emit_mbcnt(ctx, bld.tmp(v1), Operand(exec, bld.lm)); in emit_uniform_scan()
7687 Temp lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)); in emit_uniform_scan()
7691 Temp lo = bld.tmp(v1), hi = bld.tmp(v1); in emit_uniform_scan()
7692 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); in emit_uniform_scan()
7697 bld.writelane(bld.def(v1), bld.copy(bld.def(s1, m0), Operand::c32(identity_lo)), lane, lo); in emit_uniform_scan()
7699 bld.writelane(bld.def(v1), bld.copy(bld.def(s1, m0), Operand::c32(identity_hi)), lane, hi); in emit_uniform_scan()
7700 bld.pseudo(aco_opcode::p_create_vector, dst, lo, hi); in emit_uniform_scan()
7703 bld.writelane(dst, bld.copy(bld.def(s1, m0), Operand::c32(identity)), lane, in emit_uniform_scan()
7718 Builder bld(ctx->program, ctx->block); in emit_reduction_instr() local
7723 defs[num_defs++] = bld.def(bld.lm); /* used internally to save/restore exec */ in emit_reduction_instr()
7735 defs[num_defs++] = bld.def(RegType::sgpr, dst.size()); in emit_reduction_instr()
7738 defs[num_defs++] = bld.def(s1, scc); in emit_reduction_instr()
7750 defs[num_defs++] = bld.def(bld.lm, vcc); in emit_reduction_instr()
7761 bld.insert(std::move(reduce)); in emit_reduction_instr()
7769 Builder bld(ctx->program, ctx->block); in inclusive_scan_to_exclusive() local
7772 bld.def(dst.regClass()), src); in inclusive_scan_to_exclusive()
7777 case iadd32: return bld.vsub32(dst, scan, src); in inclusive_scan_to_exclusive()
7780 Temp src00 = bld.tmp(v1); in inclusive_scan_to_exclusive()
7781 Temp src01 = bld.tmp(v1); in inclusive_scan_to_exclusive()
7782 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), scan); in inclusive_scan_to_exclusive()
7783 Temp src10 = bld.tmp(v1); in inclusive_scan_to_exclusive()
7784 Temp src11 = bld.tmp(v1); in inclusive_scan_to_exclusive()
7785 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src); in inclusive_scan_to_exclusive()
7787 Temp lower = bld.tmp(v1); in inclusive_scan_to_exclusive()
7788 Temp upper = bld.tmp(v1); in inclusive_scan_to_exclusive()
7790 Temp borrow = bld.vsub32(Definition(lower), src00, src10, true).def(1).getTemp(); in inclusive_scan_to_exclusive()
7791 bld.vsub32(Definition(upper), src01, src11, false, borrow); in inclusive_scan_to_exclusive()
7793 bld.vop2(aco_opcode::v_xor_b32, Definition(lower), src00, src10); in inclusive_scan_to_exclusive()
7794 bld.vop2(aco_opcode::v_xor_b32, Definition(upper), src01, src11); in inclusive_scan_to_exclusive()
7796 return bld.pseudo(aco_opcode::p_create_vector, dst, lower, upper); in inclusive_scan_to_exclusive()
7800 case ixor32: return bld.vop2(aco_opcode::v_xor_b32, dst, scan, src); in inclusive_scan_to_exclusive()
7809 Builder bld(ctx->program, ctx->block); in emit_rotate_by_constant() local
7815 dst = bld.copy(bld.def(rc), src); in emit_rotate_by_constant()
7817 dst = emit_masked_swizzle(ctx, bld, src, ds_pattern_bitmode(0x1f, 0, delta), true); in emit_rotate_by_constant()
7824 dst = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(rc), src, dpp_ctrl); in emit_rotate_by_constant()
7826 dst = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl); in emit_rotate_by_constant()
7831 dst = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(rc), src, lane_sel); in emit_rotate_by_constant()
7833 dst = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(rc), src, dpp_row_rr(16 - delta)); in emit_rotate_by_constant()
7836 dst = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, ctrl); in emit_rotate_by_constant()
7840 dst = bld.vop1(aco_opcode::v_permlane64_b32, bld.def(rc), src); in emit_rotate_by_constant()
7842 dst = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(rc), src, dpp_wf_rl1); in emit_rotate_by_constant()
7844 dst = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(rc), src, dpp_wf_rr1); in emit_rotate_by_constant()
7883 Builder bld(ctx->program, ctx->block); in create_fs_dual_src_export_gfx11() local
7893 exp->definitions[0] = bld.def(type); /* mrt0 */ in create_fs_dual_src_export_gfx11()
7894 exp->definitions[1] = bld.def(type); /* mrt1 */ in create_fs_dual_src_export_gfx11()
7895 exp->definitions[2] = bld.def(bld.lm); in create_fs_dual_src_export_gfx11()
7896 exp->definitions[3] = bld.def(bld.lm); in create_fs_dual_src_export_gfx11()
7897 exp->definitions[4] = bld.def(bld.lm, vcc); in create_fs_dual_src_export_gfx11()
7898 exp->definitions[5] = bld.def(s1, scc); in create_fs_dual_src_export_gfx11()
7928 Builder bld(ctx->program, ctx->block); in visit_cmat_muladd() local
7935 VALU_instruction& vop3p = bld.vop3p(opcode, Definition(dst), A, B, C, 0, 0)->valu(); in visit_cmat_muladd()
7950 Builder bld(ctx->program, ctx->block); in visit_intrinsic() local
7997 bld.copy(Definition(dst), get_arg(ctx, ctx->args->num_work_groups)); in visit_intrinsic()
8001 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), in visit_intrinsic()
8002 bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), addr, Operand::zero()), in visit_intrinsic()
8003 bld.smem(aco_opcode::s_load_dword, bld.def(s1), addr, Operand::c32(8))); in visit_intrinsic()
8011 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), ctx->workgroup_id[0], in visit_intrinsic()
8021 bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->def)), bld.def(s1, scc), in visit_intrinsic()
8069 tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl1); in visit_intrinsic()
8072 bld.vop3p(aco_opcode::v_pk_add_f16, bld.def(v1), src, tl, opsel_lo, opsel_hi); in visit_intrinsic()
8077 bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), sub, dpp_ctrl2); in visit_intrinsic()
8079 bld.copy(Definition(dst), sub); in visit_intrinsic()
8091 bld.vop2(subrev, Definition(dst), src, src); in visit_intrinsic()
8093 bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, Definition(dst), src, in visit_intrinsic()
8098 Builder::Result tmp = bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, bld.def(v1), in visit_intrinsic()
8101 bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), src, in visit_intrinsic()
8104 bld.vop2_dpp(subrev, Definition(dst), src, src, dpp_ctrl1); in visit_intrinsic()
8106 Temp tmp = bld.vop2_dpp(subrev, bld.def(v1), src, src, dpp_ctrl1); in visit_intrinsic()
8107 bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), tmp, dpp_ctrl2); in visit_intrinsic()
8109 Temp tl = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl1); in visit_intrinsic()
8112 tr = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl2); in visit_intrinsic()
8113 bld.vop2(subrev, Definition(dst), tl, tr); in visit_intrinsic()
8126 assert(src.regClass() == bld.lm); in visit_intrinsic()
8128 src = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src); in visit_intrinsic()
8130 src = bld.vopc(aco_opcode::v_cmp_lg_u64, bld.def(bld.lm), Operand::zero(), src); in visit_intrinsic()
8137 Definition def = dst.size() == bld.lm.size() ? Definition(dst) : bld.def(bld.lm); in visit_intrinsic()
8139 src = bld.copy(def, src); in visit_intrinsic()
8141 src = bld.sop2(Builder::s_and, def, bld.def(s1, scc), src, Operand(exec, bld.lm)); in visit_intrinsic()
8142 if (dst.size() != bld.lm.size()) { in visit_intrinsic()
8144 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src, Operand::zero()); in visit_intrinsic()
8151 Temp src = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_intrinsic()
8154 assert(dst.size() == bld.lm.size()); in visit_intrinsic()
8158 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src, Operand::zero()); in visit_intrinsic()
8160 bld.copy(Definition(dst), src); in visit_intrinsic()
8174 tid = bld.as_uniform(tid); in visit_intrinsic()
8180 Temp tmp = bld.tmp(v1); in visit_intrinsic()
8181 tmp = emit_bpermute(ctx, bld, tid, src); in visit_intrinsic()
8183 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), in visit_intrinsic()
8184 bld.def(src.regClass() == v1b ? v3b : v2b), tmp); in visit_intrinsic()
8186 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp); in visit_intrinsic()
8188 Temp tmp = emit_bpermute(ctx, bld, tid, src); in visit_intrinsic()
8189 bld.copy(Definition(dst), tmp); in visit_intrinsic()
8191 Temp lo = bld.tmp(v1), hi = bld.tmp(v1); in visit_intrinsic()
8192 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); in visit_intrinsic()
8193 lo = emit_bpermute(ctx, bld, tid, lo); in visit_intrinsic()
8194 hi = emit_bpermute(ctx, bld, tid, hi); in visit_intrinsic()
8195 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi); in visit_intrinsic()
8220 bld.copy(Definition(dst), src); in visit_intrinsic()
8224 delta = bld.as_uniform(delta); in visit_intrinsic()
8232 bld.sopc(aco_opcode::s_bitcmp0_b32, bld.def(s1, scc), delta, Operand::c32(0)); in visit_intrinsic()
8234 Temp swapped = emit_masked_swizzle(ctx, bld, src, ds_pattern_bitmode(0x1f, 0, 0x1), true); in visit_intrinsic()
8235 tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(src.regClass()), swapped, src, noswap); in visit_intrinsic()
8238 delta = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), delta, in visit_intrinsic()
8241 bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), delta, Operand::c32(2)); in visit_intrinsic()
8243 Temp lo = bld.copy(bld.def(s1), Operand::c32(cluster_size == 4 ? 0x32103210 : 0x76543210)); in visit_intrinsic()
8247 Temp shr = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), lo, delta); in visit_intrinsic()
8249 Temp lotolohi = bld.copy(bld.def(s1), Operand::c32(0x4444)); in visit_intrinsic()
8251 bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), shr, lotolohi); in visit_intrinsic()
8252 lo = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), shr, lohi); in visit_intrinsic()
8254 delta = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), in visit_intrinsic()
8257 bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), lo, delta); in visit_intrinsic()
8258 lo = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), shr, shl); in visit_intrinsic()
8260 Temp lotohi = bld.copy(bld.def(s1), Operand::c32(0x88888888)); in visit_intrinsic()
8261 hi = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), lo, lotohi); in visit_intrinsic()
8263 hi = bld.copy(bld.def(s1), Operand::c32(0xfedcba98)); in visit_intrinsic()
8265 Temp lohi = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), lo, hi); in visit_intrinsic()
8267 Temp shr = bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), lohi, delta); in visit_intrinsic()
8268 delta = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), Operand::c32(64), in visit_intrinsic()
8270 Temp shl = bld.sop2(aco_opcode::s_lshl_b64, bld.def(s2), bld.def(s1, scc), lohi, delta); in visit_intrinsic()
8272 lohi = bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), shr, shl); in visit_intrinsic()
8273 lo = bld.tmp(s1); in visit_intrinsic()
8274 hi = bld.tmp(s1); in visit_intrinsic()
8275 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), lohi); in visit_intrinsic()
8279 bld.vop3(aco_opcode::v_permlane16_b32, bld.def(src.regClass()), src, lo, hi); in visit_intrinsic()
8285 Temp tid = emit_mbcnt(ctx, bld.tmp(v1)); in visit_intrinsic()
8286 Temp src_lane = bld.vadd32(bld.def(v1), tid, delta); in visit_intrinsic()
8292 bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), src_lane); in visit_intrinsic()
8293 tmp = bld.ds(aco_opcode::ds_bpermute_b32, bld.def(v1), index_x4, src); in visit_intrinsic()
8296 src_lane = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), Operand::c32(cluster_size - 1), in visit_intrinsic()
8298 tmp = emit_bpermute(ctx, bld, src_lane, src); in visit_intrinsic()
8303 bld.copy(Definition(dst), tmp); in visit_intrinsic()
8311 assert(src.regClass() == bld.lm); in visit_intrinsic()
8312 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src, in visit_intrinsic()
8313 bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm))); in visit_intrinsic()
8325 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src); in visit_intrinsic()
8327 bld.copy(Definition(dst), src); in visit_intrinsic()
8333 assert(src.regClass() == bld.lm); in visit_intrinsic()
8334 assert(dst.regClass() == bld.lm); in visit_intrinsic()
8336 Temp tmp = bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), src); in visit_intrinsic()
8337 tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), tmp, Operand(exec, bld.lm)) in visit_intrinsic()
8341 bld.sop1(Builder::s_not, Definition(dst), bld.def(s1, scc), cond); in visit_intrinsic()
8348 assert(src.regClass() == bld.lm); in visit_intrinsic()
8349 assert(dst.regClass() == bld.lm); in visit_intrinsic()
8358 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)); in visit_intrinsic()
8359 bld.sop1(Builder::s_wqm, Definition(get_ssa_temp(ctx, &instr->def)), bld.def(s1, scc), src); in visit_intrinsic()
8365 src = bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), src); in visit_intrinsic()
8366 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)); in visit_intrinsic()
8367 src = bld.sop1(Builder::s_wqm, bld.def(bld.lm), bld.def(s1, scc), src); in visit_intrinsic()
8368 bld.sop1(Builder::s_not, Definition(get_ssa_temp(ctx, &instr->def)), bld.def(s1, scc), src); in visit_intrinsic()
8435 bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), src, dpp_ctrl); in visit_intrinsic()
8480 src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), in visit_intrinsic()
8487 assert(src.regClass() == bld.lm && dst.regClass() == bld.lm); in visit_intrinsic()
8490 Operand mask_tmp = bld.lm.bytes() == 4 in visit_intrinsic()
8492 : bld.pseudo(aco_opcode::p_create_vector, bld.def(bld.lm), in visit_intrinsic()
8496 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)); in visit_intrinsic()
8497 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp, src); in visit_intrinsic()
8498 bld.sop1(Builder::s_wqm, Definition(dst), bld.def(s1, scc), src); in visit_intrinsic()
8501 Definition def = (excess_bytes || bool_use_valu) ? bld.def(v1) : Definition(dst); in visit_intrinsic()
8504 bld.vop1_dpp(aco_opcode::v_mov_b32, def, src, dpp_ctrl, 0xf, 0xf, true, allow_fi); in visit_intrinsic()
8506 bld.ds(aco_opcode::ds_swizzle_b32, def, src, (1 << 15) | dpp_ctrl); in visit_intrinsic()
8509 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), in visit_intrinsic()
8510 bld.def(RegClass::get(dst.type(), excess_bytes)), def.getTemp()); in visit_intrinsic()
8512 bld.vopc(aco_opcode::v_cmp_lg_u32, Definition(dst), Operand::zero(), def.getTemp()); in visit_intrinsic()
8514 Temp lo = bld.tmp(v1), hi = bld.tmp(v1); in visit_intrinsic()
8515 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); in visit_intrinsic()
8518 lo = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl, 0xf, 0xf, true, in visit_intrinsic()
8520 hi = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl, 0xf, 0xf, true, in visit_intrinsic()
8523 lo = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, (1 << 15) | dpp_ctrl); in visit_intrinsic()
8524 hi = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, (1 << 15) | dpp_ctrl); in visit_intrinsic()
8527 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi); in visit_intrinsic()
8550 assert(src.regClass() == bld.lm); in visit_intrinsic()
8551 src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), in visit_intrinsic()
8553 src = emit_masked_swizzle(ctx, bld, src, mask, allow_fi); in visit_intrinsic()
8554 bld.vopc(aco_opcode::v_cmp_lg_u32, Definition(dst), Operand::zero(), src); in visit_intrinsic()
8556 Temp tmp = emit_masked_swizzle(ctx, bld, src, mask, allow_fi); in visit_intrinsic()
8559 Temp tmp = emit_masked_swizzle(ctx, bld, src, mask, allow_fi); in visit_intrinsic()
8562 bld.copy(Definition(dst), emit_masked_swizzle(ctx, bld, src, mask, allow_fi)); in visit_intrinsic()
8564 Temp lo = bld.tmp(v1), hi = bld.tmp(v1); in visit_intrinsic()
8565 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); in visit_intrinsic()
8566 lo = emit_masked_swizzle(ctx, bld, lo, mask, allow_fi); in visit_intrinsic()
8567 hi = emit_masked_swizzle(ctx, bld, hi, mask, allow_fi); in visit_intrinsic()
8568 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi); in visit_intrinsic()
8578 Temp val = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)); in visit_intrinsic()
8579 Temp lane = bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa)); in visit_intrinsic()
8583 bld.writelane(Definition(dst), val, lane, src); in visit_intrinsic()
8585 Temp src_lo = bld.tmp(v1), src_hi = bld.tmp(v1); in visit_intrinsic()
8586 Temp val_lo = bld.tmp(s1), val_hi = bld.tmp(s1); in visit_intrinsic()
8587 bld.pseudo(aco_opcode::p_split_vector, Definition(src_lo), Definition(src_hi), src); in visit_intrinsic()
8588 bld.pseudo(aco_opcode::p_split_vector, Definition(val_lo), Definition(val_hi), val); in visit_intrinsic()
8589 Temp lo = bld.writelane(bld.def(v1), val_lo, lane, src_hi); in visit_intrinsic()
8590 Temp hi = bld.writelane(bld.def(v1), val_hi, lane, src_hi); in visit_intrinsic()
8591 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi); in visit_intrinsic()
8603 src = emit_extract_vector(ctx, src, 0, RegClass(src.type(), bld.lm.size())); in visit_intrinsic()
8617 bld.copy(Definition(dst), src); in visit_intrinsic()
8619 bld.vop3(aco_opcode::v_permlane16_b32, Definition(dst), src, in visit_intrinsic()
8620 bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)), in visit_intrinsic()
8621 bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa))); in visit_intrinsic()
8632 bld.pseudo(aco_opcode::p_is_helper, Definition(dst), Operand(exec, bld.lm)); in visit_intrinsic()
8641 assert(src.regClass() == bld.lm); in visit_intrinsic()
8643 cond = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, in visit_intrinsic()
8644 Operand(exec, bld.lm)); in visit_intrinsic()
8650 bld.pseudo(aco_opcode::p_demote_to_helper, cond); in visit_intrinsic()
8672 assert(src.regClass() == bld.lm); in visit_intrinsic()
8674 cond = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, in visit_intrinsic()
8675 Operand(exec, bld.lm)); in visit_intrinsic()
8683 bld.pseudo(aco_opcode::p_discard_if, cond); in visit_intrinsic()
8695 bld.sopp(aco_opcode::s_trap, 1u); in visit_intrinsic()
8699 bld.sop1(Builder::s_ff1_i32, Definition(get_ssa_temp(ctx, &instr->def)), in visit_intrinsic()
8700 Operand(exec, bld.lm)); in visit_intrinsic()
8705 Temp flbit = bld.sop1(Builder::s_flbit_i32, bld.def(s1), Operand(exec, bld.lm)); in visit_intrinsic()
8706 bld.sop2(aco_opcode::s_sub_i32, Definition(get_ssa_temp(ctx, &instr->def)), bld.def(s1, scc), in visit_intrinsic()
8716 bld.pseudo(aco_opcode::p_elect, Definition(get_ssa_temp(ctx, &instr->def)), in visit_intrinsic()
8717 Operand(exec, bld.lm)); in visit_intrinsic()
8725 Temp hi0 = bld.tmp(s1); in visit_intrinsic()
8726 Temp hi1 = bld.tmp(s1); in visit_intrinsic()
8727 Temp lo = bld.tmp(s1); in visit_intrinsic()
8728 …bld.pseudo(aco_opcode::p_shader_cycles_hi_lo_hi, Definition(hi0), Definition(lo), Definition(hi1)); in visit_intrinsic()
8729 Temp hi_eq = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), hi0, hi1); in visit_intrinsic()
8730 lo = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), lo, Operand::zero(), bld.scc(hi_eq)); in visit_intrinsic()
8731 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi1); in visit_intrinsic()
8735 Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29); in visit_intrinsic()
8736 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), clock, Operand::zero()); in visit_intrinsic()
8739 bld.sop1(aco_opcode::s_sendmsg_rtn_b64, Definition(dst), in visit_intrinsic()
8745 bld.smem(opcode, Definition(dst), memory_sync_info(0, semantic_volatile)); in visit_intrinsic()
8752 Temp m0_content = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_intrinsic()
8753 bld.sopp(aco_opcode::s_sendmsg, bld.m0(m0_content), imm); in visit_intrinsic()
8757 Temp src = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_intrinsic()
8759 bld.copy(Definition(get_ssa_temp(ctx, &instr->def)), lanecount_to_mask(ctx, src, offset)); in visit_intrinsic()
8766 Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), bld.as_uniform(m0_val))); in visit_intrinsic()
8767 bld.ds(aco_opcode::ds_add_u32, as_vgpr(ctx, gds_addr), as_vgpr(ctx, store_val), m, 0u, 0u, in visit_intrinsic()
8775 bld.copy(Definition(dst), Operand(addr)); in visit_intrinsic()
8780 bld.pseudo(aco_opcode::p_resume_shader_address, Definition(get_ssa_temp(ctx, &instr->def)), in visit_intrinsic()
8781 bld.def(s1, scc), Operand::c32(nir_intrinsic_call_idx(instr))); in visit_intrinsic()
8792 bld.copy(Definition(dst), src); in visit_intrinsic()
8801 Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u)); in visit_intrinsic()
8808 m = bld.m0(bld.as_uniform(ordered_id)); in visit_intrinsic()
8810 bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true); in visit_intrinsic()
8821 ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1), Operand(), chan_counter, in visit_intrinsic()
8836 m = bld.m0(bld.as_uniform(ordered_id)); in visit_intrinsic()
8838 bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true); in visit_intrinsic()
8852 ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1), Operand(), chan_counter, in visit_intrinsic()
8912 Temp row = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)); in visit_intrinsic()
8914 row = bld.copy(bld.def(s1, m0), row); in visit_intrinsic()
8915 exp->operands[4] = bld.m0(row); in visit_intrinsic()
8967 bld.sop1(aco_opcode::p_load_symbol, Definition(dst), in visit_intrinsic()
8973 bld.sop1(aco_opcode::p_load_symbol, Definition(dst), in visit_intrinsic()
8980 bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); in visit_intrinsic()
8995 bld.pseudo(aco_opcode::p_pops_gfx9_ordered_section_done); in visit_intrinsic()
8999 case nir_intrinsic_nop_amd: bld.sopp(aco_opcode::s_nop, nir_intrinsic_base(instr)); break; in visit_intrinsic()
9000 case nir_intrinsic_sleep_amd: bld.sopp(aco_opcode::s_sleep, nir_intrinsic_base(instr)); break; in visit_intrinsic()
9002 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(nir_intrinsic_base(instr)), in visit_intrinsic()
9007 bld.pseudo(aco_opcode::p_unit_test, Definition(get_ssa_temp(ctx, &instr->def)), in visit_intrinsic()
9038 Builder bld(ctx->program, ctx->block); in visit_tex() local
9052 resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa)); in visit_tex()
9055 sampler = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa)); in visit_tex()
9174 acc = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), acc, in visit_tex()
9178 acc = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), acc, in visit_tex()
9185 pack = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), pack, acc); in visit_tex()
9190 pack = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), in visit_tex()
9198 acc = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x3Fu), acc); in visit_tex()
9201 acc = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(8u * i), acc); in visit_tex()
9207 pack = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), pack, acc); in visit_tex()
9212 pack = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(pack_const), pack); in visit_tex()
9215 offset = bld.copy(bld.def(v1), Operand::c32(pack_const)); in visit_tex()
9271 tmp_dst = bld.tmp(instr->is_sparse ? v5 : (d16 ? v2 : v4)); in visit_tex()
9273 tmp_dst = bld.tmp(v1); in visit_tex()
9276 tmp_dst = bld.tmp(RegClass::get(RegType::vgpr, bytes)); in visit_tex()
9284 half_texel[0] = half_texel[1] = bld.copy(bld.def(v1), Operand::c32(0xbf000000 /*-0.5*/)); in visit_tex()
9286 Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero()); in visit_tex()
9287 Temp size = bld.tmp(v2); in visit_tex()
9288 MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, size, resource, in visit_tex()
9297 half_texel[i] = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), half_texel[i]); in visit_tex()
9298 half_texel[i] = bld.vop1(aco_opcode::v_rcp_iflag_f32, bld.def(v1), half_texel[i]); in visit_tex()
9299 half_texel[i] = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), in visit_tex()
9314 bld.sopc(aco_opcode::s_bitcmp0_b32, bld.def(s1, scc), dword0, Operand::c32(bit_idx)); in visit_tex()
9317 half_texel[0] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), in visit_tex()
9319 half_texel[1] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), in visit_tex()
9324 Temp new_coords[2] = {bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[0], half_texel[0]), in visit_tex()
9325 bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[1], half_texel[1])}; in visit_tex()
9334 desc[i] = bld.tmp(s1); in visit_tex()
9339 Temp dfmt = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), desc[1], in visit_tex()
9341 Temp compare_cube_wa = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), dfmt, in visit_tex()
9346 nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), in visit_tex()
9348 Operand::c32(V_008F14_IMG_NUM_FORMAT_UINT), bld.scc(compare_cube_wa)); in visit_tex()
9350 nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), in visit_tex()
9352 Operand::c32(V_008F14_IMG_NUM_FORMAT_SINT), bld.scc(compare_cube_wa)); in visit_tex()
9354 tg4_compare_cube_wa64 = bld.tmp(bld.lm); in visit_tex()
9357 nfmt = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), nfmt, in visit_tex()
9360 desc[1] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), desc[1], in visit_tex()
9362 desc[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), desc[1], nfmt); in visit_tex()
9368 resource = bld.tmp(resource.regClass()); in visit_tex()
9372 new_coords[0] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), new_coords[0], coords[0], in visit_tex()
9374 new_coords[1] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), new_coords[1], coords[1], in visit_tex()
9413 mubuf->operands[3] = emit_tfe_init(bld, tmp_dst); in visit_tex()
9444 Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1); in visit_tex()
9445 MIMG_instruction* tex = emit_mimg(bld, op, tmp_dst, resource, Operand(s4), args, vdata); in visit_tex()
9463 Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(), in visit_tex()
9465 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bld.as_uniform(tmp_dst), in visit_tex()
9466 Operand::c32(0x76543210), bld.scc(is_not_null)); in visit_tex()
9468 Temp is_not_null = bld.tmp(bld.lm); in visit_tex()
9469 bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(), in visit_tex()
9471 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), in visit_tex()
9472 bld.copy(bld.def(v1), Operand::c32(0x76543210)), tmp_dst, is_not_null); in visit_tex()
9619 bool implicit_derivs = bld.program->stage == fragment_fs && !has_derivs && !has_lod && in visit_tex()
9623 Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1); in visit_tex()
9624 MIMG_instruction* tex = emit_mimg(bld, opcode, tmp_dst, resource, Operand(sampler), args, vdata); in visit_tex()
9645 cvt_val = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), val[i]); in visit_tex()
9647 cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]); in visit_tex()
9648 val[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), val[i], cvt_val, in visit_tex()
9652 Temp tmp = dst.regClass() == tmp_dst.regClass() ? dst : bld.tmp(tmp_dst.regClass()); in visit_tex()
9654 tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), val[0], val[1], val[2], in visit_tex()
9657 tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), val[0], val[1], val[2], in visit_tex()
9722 Builder bld(ctx->program, ctx->block); in begin_loop() local
9723 bld.branch(aco_opcode::p_branch); in begin_loop()
9777 Builder bld(ctx->program, ctx->block); in end_loop() local
9798 bld.reset(break_block); in end_loop()
9799 bld.branch(aco_opcode::p_branch); in end_loop()
9805 bld.reset(continue_block); in end_loop()
9806 bld.branch(aco_opcode::p_branch); in end_loop()
9824 bld.reset(ctx->block); in end_loop()
9825 bld.branch(aco_opcode::p_branch); in end_loop()
9846 Builder bld(ctx->program, ctx->block); in emit_loop_jump() local
9869 bld.branch(aco_opcode::p_branch); in emit_loop_jump()
9888 bld.branch(aco_opcode::p_branch); in emit_loop_jump()
9908 bld.branch(aco_opcode::p_branch); in emit_loop_jump()
9916 bld.reset(break_block); in emit_loop_jump()
9917 bld.branch(aco_opcode::p_branch); in emit_loop_jump()
9967 Builder bld(ctx->program, ctx->block); in visit_debug_info() local
9968 bld.pseudo(aco_opcode::p_debug_info, Operand::c32(ctx->program->debug_info.size())); in visit_debug_info()
9977 Builder bld(ctx->program, ctx->block); in visit_block() local
9979 bld.pseudo(aco_opcode::p_end_linear_vgpr, tmp); in visit_block()
10350 Builder bld(ctx->program, ctx->block); in visit_if() local
10444 Builder bld(ctx->program, ctx->block); in export_mrt() local
10446 bld.exp(aco_opcode::exp, mrt->out[0], mrt->out[1], mrt->out[2], mrt->out[3], in export_mrt()
10461 Builder bld(ctx->program, ctx->block); in export_fs_mrt_color() local
10483 bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), values[i], values[i]); in export_fs_mrt_color()
10484 values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), values[i], in export_fs_mrt_color()
10508 values[i] = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), values[i * 2], in export_fs_mrt_color()
10511 values[i] = bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, bld.def(v1), values[i * 2], in export_fs_mrt_color()
10514 values[i] = bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, bld.def(v1), values[i * 2], in export_fs_mrt_color()
10549 values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]); in export_fs_mrt_color()
10553 Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, false); in export_fs_mrt_color()
10570 values[i] = bld.vop2(aco_opcode::v_min_i32, bld.def(v1), Operand::c32(max), values[i]); in export_fs_mrt_color()
10571 values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]); in export_fs_mrt_color()
10575 Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, true); in export_fs_mrt_color()
10588 values[0] = bld.vop3(compr_op, bld.def(v1), values[0], values[1]); in export_fs_mrt_color()
10589 values[1] = bld.vop3(compr_op, bld.def(v1), values[2], values[3]); in export_fs_mrt_color()
10620 Builder bld(ctx->program, ctx->block); in export_fs_mrtz() local
10639 values[0] = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), stencil); in export_fs_mrtz()
10682 bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels, in export_fs_mrtz()
10693 Builder bld(ctx->program, ctx->block); in create_fs_null_export() local
10696 bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), in create_fs_null_export()
10705 Builder bld(ctx->program, ctx->block); in create_fs_jump_to_epilog() local
10741 chan = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), chan); in create_fs_jump_to_epilog()
10744 Temp tmp = convert_int(ctx, bld, chan.getTemp(), 16, 32, sign_ext); in create_fs_jump_to_epilog()
10806 Builder bld(ctx->program, ctx->block); in create_fs_end_for_epilog() local
10836 Temp dst = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi); in create_fs_end_for_epilog()
10919 Builder bld(ctx->program, ctx->block); in add_startpgm() local
10921 bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), idy, Operand::zero(), in add_startpgm()
10924 bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), idy, Operand::c32(1u), in add_startpgm()
10953 Builder bld(ctx->program, ctx->block); in add_startpgm() local
10954 bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), scratch_addr, in add_startpgm()
11074 Builder bld(ctx->program); in finish_program() local
11075 bld.reset(instrs, it); in finish_program()
11076 bld.pseudo(aco_opcode::p_end_wqm); in finish_program()
11085 Builder bld(ctx->program, ctx->block); in lanecount_to_mask() local
11090 count = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), count, in lanecount_to_mask()
11098 Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand::zero()); in lanecount_to_mask()
11099 return emit_extract_vector(ctx, mask, 0, bld.lm); in lanecount_to_mask()
11106 count = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), Operand::c32(0), count); in lanecount_to_mask()
11108 count = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), count, in lanecount_to_mask()
11113 return bld.sop2(aco_opcode::s_bfe_u32, bld.def(bld.lm), bld.def(s1, scc), Operand::c32(-1), in lanecount_to_mask()
11116 return bld.sop2(aco_opcode::s_bfe_u64, bld.def(bld.lm), bld.def(s1, scc), in lanecount_to_mask()
11158 Builder bld(ctx.program, ctx.block); in insert_rt_jump_next() local
11159 bld.sop1(aco_opcode::s_setpc_b64, get_arg(&ctx, ctx.args->rt.uniform_shader_addr)); in insert_rt_jump_next()
11201 Builder bld(ctx->program, ctx->block); in pops_await_overlapped_waves() local
11207 bld.sopp(aco_opcode::s_wait_event, in pops_await_overlapped_waves()
11218 bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), collision, Operand::c32(31)); in pops_await_overlapped_waves()
11221 bld.reset(ctx->block); in pops_await_overlapped_waves()
11226 const Temp packer_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), in pops_await_overlapped_waves()
11229 const Temp packer_id_hwreg_bits = bld.sop2(aco_opcode::s_lshl1_add_u32, bld.def(s1), in pops_await_overlapped_waves()
11230 bld.def(s1, scc), packer_id, Operand::c32(1)); in pops_await_overlapped_waves()
11231 bld.sopk(aco_opcode::s_setreg_b32, packer_id_hwreg_bits, ((3 - 1) << 11) | 25); in pops_await_overlapped_waves()
11234 const Temp packer_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), in pops_await_overlapped_waves()
11240 bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), packer_id, Operand::c32(1)); in pops_await_overlapped_waves()
11241 bld.sopk(aco_opcode::s_setreg_b32, packer_id_hwreg_bits, ((2 - 1) << 11) | (24 << 6) | 1); in pops_await_overlapped_waves()
11244 Temp newest_overlapped_wave_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), in pops_await_overlapped_waves()
11250 const Temp current_wave_id = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), in pops_await_overlapped_waves()
11252 const Temp newest_overlapped_wave_id_wrapped = bld.sopc( in pops_await_overlapped_waves()
11253 aco_opcode::s_cmp_gt_u32, bld.def(s1, scc), newest_overlapped_wave_id, current_wave_id); in pops_await_overlapped_waves()
11255 bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), newest_overlapped_wave_id, in pops_await_overlapped_waves()
11272 const Temp wave_id_offset = bld.sop2(aco_opcode::s_nand_b32, bld.def(s1), bld.def(s1, scc), in pops_await_overlapped_waves()
11274 newest_overlapped_wave_id = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), in pops_await_overlapped_waves()
11281 bld.reset(ctx->block); in pops_await_overlapped_waves()
11283 const Temp exiting_wave_id = bld.pseudo(aco_opcode::p_pops_gfx9_add_exiting_wave_id, bld.def(s1), in pops_await_overlapped_waves()
11284 bld.def(s1, scc), wave_id_offset); in pops_await_overlapped_waves()
11289 const Temp newest_overlapped_wave_exited = bld.sopc(aco_opcode::s_cmp_lt_u32, bld.def(s1, scc), in pops_await_overlapped_waves()
11297 bld.reset(ctx->block); in pops_await_overlapped_waves()
11300 bld.sopp(aco_opcode::s_sleep, ctx->program->gfx_level >= GFX10 ? UINT16_MAX : 3); in pops_await_overlapped_waves()
11303 bld.reset(ctx->block); in pops_await_overlapped_waves()
11306 bld.pseudo(aco_opcode::p_pops_gfx9_overlapped_wave_wait_done); in pops_await_overlapped_waves()
11310 bld.reset(ctx->block); in pops_await_overlapped_waves()
11316 Builder bld(ctx->program, ctx->block); in create_merged_jump_to_epilog() local
11518 Builder bld(ctx->program, ctx->block); in emit_polygon_stipple() local
11525 Temp addr0 = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x1f), pos_fixed_pt); in emit_polygon_stipple()
11526 Temp addr1 = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), pos_fixed_pt, Operand::c32(16u), in emit_polygon_stipple()
11532 Temp desc = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), list, in emit_polygon_stipple()
11536 Temp offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2), addr1); in emit_polygon_stipple()
11537 Temp row = bld.mubuf(aco_opcode::buffer_load_dword, bld.def(v1), desc, offset, Operand::c32(0u), in emit_polygon_stipple()
11539 Temp bit = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), row, addr0, Operand::c32(1u)); in emit_polygon_stipple()
11540 Temp cond = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm), Operand::zero(), bit); in emit_polygon_stipple()
11541 bld.pseudo(aco_opcode::p_demote_to_helper, cond); in emit_polygon_stipple()
11550 Builder bld(ctx->program, ctx->block); in overwrite_interp_args() local
11561 bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), bc_optimize, Operand::c32(31u)); in overwrite_interp_args()
11570 Temp dst = bld.tmp(v2); in overwrite_interp_args()
11579 Temp dst = bld.tmp(v2); in overwrite_interp_args()
11613 Builder bld(ctx->program, ctx->block); in overwrite_samplemask_arg() local
11633 Temp sampleid = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), ancillary, Operand::c32(8u), in overwrite_samplemask_arg()
11638 Temp iter_mask = bld.copy(bld.def(v1), Operand::c32(ps_iter_mask)); in overwrite_samplemask_arg()
11640 Temp mask = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), sampleid, iter_mask); in overwrite_samplemask_arg()
11641 samplemask = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), samplemask, mask); in overwrite_samplemask_arg()
11650 Builder bld(ctx->program, ctx->block); in get_interp_color() local
11652 Temp dst = bld.tmp(v1); in get_interp_color()
11676 Builder bld(ctx->program, ctx->block); in interpolate_color_args() local
11683 bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::zero(), face); in interpolate_color_args()
11701 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), back, front, is_face_positive); in interpolate_color_args()
11721 Builder bld(ctx->program, ctx->block); in emit_clamp_alpha_test() local
11726 colors[i] = bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u), in emit_clamp_alpha_test()
11730 colors[i] = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(), in emit_clamp_alpha_test()
11738 colors[3] = bld.copy(bld.def(v2b), Operand::c16(0x3c00)); in emit_clamp_alpha_test()
11740 colors[3] = bld.copy(bld.def(v1), Operand::c32(0x3f800000u)); in emit_clamp_alpha_test()
11761 ? bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), colors[3]) in emit_clamp_alpha_test()
11765 cond = bld.vopc(opcode, bld.def(bld.lm), ref, alpha); in emit_clamp_alpha_test()
11768 bld.pseudo(aco_opcode::p_discard_if, cond); in emit_clamp_alpha_test()
11815 Builder bld(ctx->program, ctx->block); in dump_sgpr_to_mem() local
11821 bld.copy(Definition(PhysReg{256}, v1) /* v0 */, data); in dump_sgpr_to_mem()
11823 bld.mubuf(aco_opcode::buffer_store_dword, Operand(rsrc), Operand(v1), Operand::c32(0u), in dump_sgpr_to_mem()
11827 bld.smem(aco_opcode::s_buffer_store_dword, Operand(rsrc), Operand::c32(offset), data, in dump_sgpr_to_mem()
11835 Builder bld(ctx->program, ctx->block); in enable_thread_indexing() local
11838 bld.sop2(aco_opcode::s_or_b32, Definition(rsrc_word3, s1), bld.def(s1, scc), in enable_thread_indexing()
11842 bld.sop2(aco_opcode::s_and_b32, Definition(rsrc_word3, s1), bld.def(s1, scc), in enable_thread_indexing()
11850 Builder bld(ctx->program, ctx->block); in disable_thread_indexing() local
11853 bld.sop2(aco_opcode::s_and_b32, Definition(rsrc_word3, s1), bld.def(s1, scc), in disable_thread_indexing()
11856 bld.sop2(aco_opcode::s_or_b32, Definition(rsrc_word3, s1), bld.def(s1, scc), in disable_thread_indexing()
11865 Builder bld(ctx->program, ctx->block); in save_or_restore_vgprs() local
11875 bld.mubuf(aco_opcode::buffer_store_dword, Operand(rsrc), Operand(v1), Operand::c32(0u), in save_or_restore_vgprs()
11880 bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg{256 + i}, v1), Operand(rsrc), in save_or_restore_vgprs()
11904 dump_vgprs_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) in dump_vgprs_to_mem() argument
11921 bld.sopk(aco_opcode::s_getreg_b32, Definition(num_vgprs, s1), in dump_vgprs_to_mem()
11923 bld.sop2(aco_opcode::s_bfe_u32, Definition(num_vgprs, s1), bld.def(s1, scc), in dump_vgprs_to_mem()
11925 bld.sop2(aco_opcode::s_add_u32, Definition(num_vgprs, s1), bld.def(s1, scc), in dump_vgprs_to_mem()
11927 bld.sop2(aco_opcode::s_lshl_b32, Definition(num_vgprs, s1), bld.def(s1, scc), in dump_vgprs_to_mem()
11929 bld.sop2(aco_opcode::s_mul_i32, Definition(num_vgprs, s1), Operand::c32(256), in dump_vgprs_to_mem()
11933 bld.copy(Definition(m0, s1), Operand::c32(0u)); in dump_vgprs_to_mem()
11934 bld.copy(Definition(soffset, s1), Operand::c32(0u)); in dump_vgprs_to_mem()
11938 bld.sopc(aco_opcode::s_set_gpr_idx_on, Definition(m0, s1), Operand(m0, s1), in dump_vgprs_to_mem()
11945 bld.reset(ctx->block); in dump_vgprs_to_mem()
11949 bld.vop1(aco_opcode::v_movrels_b32, Definition(PhysReg{256}, v1), in dump_vgprs_to_mem()
11952 bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{256}, v1), Operand(PhysReg{256}, v1)); in dump_vgprs_to_mem()
11955 bld.mubuf(aco_opcode::buffer_store_dword, Operand(rsrc), Operand(v1), in dump_vgprs_to_mem()
11961 bld.sop2(aco_opcode::s_add_u32, Definition(m0, s1), bld.def(s1, scc), Operand(m0, s1), in dump_vgprs_to_mem()
11963 bld.sop2(aco_opcode::s_add_u32, Definition(soffset, s1), bld.def(s1, scc), in dump_vgprs_to_mem()
11966 const Temp cond = bld.sopc(aco_opcode::s_cmp_ge_u32, bld.def(s1, scc), Operand(soffset, s1), in dump_vgprs_to_mem()
11978 bld.reset(ctx->block); in dump_vgprs_to_mem()
11982 bld.sopp(aco_opcode::s_set_gpr_idx_off); in dump_vgprs_to_mem()
11989 dump_lds_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) in dump_lds_to_mem() argument
12006 bld.sopk(aco_opcode::s_getreg_b32, Definition(lds_size, s1), in dump_lds_to_mem()
12009 bld.sopc(aco_opcode::s_cmp_lg_i32, bld.def(s1, scc), Operand(lds_size, s1), Operand::c32(0)); in dump_lds_to_mem()
12014 bld.reset(ctx->block); in dump_lds_to_mem()
12017 bld.sopp(aco_opcode::s_barrier, 0u); in dump_lds_to_mem()
12020 bld.sop2(aco_opcode::s_lshl_b32, Definition(lds_size, s1), bld.def(s1, scc), in dump_lds_to_mem()
12024 bld.sop2(aco_opcode::s_add_u32, Definition(lds_size, s1), bld.def(s1, scc), in dump_lds_to_mem()
12028 bld.copy(Definition(soffset, s1), Operand::c32(base_offset)); in dump_lds_to_mem()
12031 bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, Definition(PhysReg{256}, v1), Operand::c32(-1u), in dump_lds_to_mem()
12033 bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32_e64, Definition(PhysReg{256}, v1), Operand::c32(-1u), in dump_lds_to_mem()
12035 bld.vop2(aco_opcode::v_mul_u32_u24, Definition(PhysReg{256}, v1), Operand::c32(4u), in dump_lds_to_mem()
12038 Operand m = load_lds_size_m0(bld); in dump_lds_to_mem()
12043 bld.reset(ctx->block); in dump_lds_to_mem()
12046 bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg{257}, v1), Operand(PhysReg{256}, v1), in dump_lds_to_mem()
12049 bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg{257}, v1), Operand(PhysReg{256}, v1), in dump_lds_to_mem()
12053 bld.mubuf(aco_opcode::buffer_store_dword, Operand(rsrc), Operand(v1), in dump_lds_to_mem()
12059 bld.vop3(aco_opcode::v_mad_u32_u24, Definition(PhysReg{256}, v1), Operand::c32(4u), in dump_lds_to_mem()
12061 bld.sop2(aco_opcode::s_add_u32, Definition(soffset, s1), bld.def(s1, scc), in dump_lds_to_mem()
12064 const Temp cond = bld.sopc(aco_opcode::s_cmp_ge_u32, bld.def(s1, scc), in dump_lds_to_mem()
12076 bld.reset(ctx->block); in dump_lds_to_mem()
12080 bld.reset(ctx->block); in dump_lds_to_mem()
12111 Builder bld(ctx.program, ctx.block); in select_trap_handler_shader() local
12127 bld.sopk(aco_opcode::s_getreg_b32, Definition(save_wave_status, s1), ((32 - 1) << 11) | 2); in select_trap_handler_shader()
12130 bld.copy(Definition(save_m0, s1), Operand(m0, s1)); in select_trap_handler_shader()
12133 bld.sop1(Builder::s_or_saveexec, Definition(save_exec, bld.lm), Definition(scc, s1), in select_trap_handler_shader()
12134 Definition(exec, bld.lm), Operand::c32_or_c64(-1u, bld.lm == s2), in select_trap_handler_shader()
12135 Operand(exec, bld.lm)); in select_trap_handler_shader()
12141 bld.vop1(aco_opcode::v_clrexcp); in select_trap_handler_shader()
12149 bld.sop1(aco_opcode::s_sendmsg_rtn_b32, Definition(ttmp2_reg, s1), in select_trap_handler_shader()
12152 bld.sopk(aco_opcode::s_getreg_b32, Definition(ttmp2_reg, s1), ((32 - 1) << 11) | 18); in select_trap_handler_shader()
12155 bld.sop2(aco_opcode::s_lshl_b32, Definition(ttmp2_reg, s1), Definition(scc, s1), in select_trap_handler_shader()
12157 bld.copy(Definition(ttmp3_reg, s1), Operand::c32((unsigned)ctx.options->address32_hi)); in select_trap_handler_shader()
12160 bld.smem(aco_opcode::s_load_dwordx4, Definition(tma_rsrc, s4), Operand(ttmp2_reg, s2), in select_trap_handler_shader()
12167 dump_vgprs_to_mem(&ctx, bld, Operand(tma_rsrc, s4)); in select_trap_handler_shader()
12170 bld.copy(Definition(PhysReg{256}, v2) /* v[0-1] */, Operand(ttmp0_reg, s2)); in select_trap_handler_shader()
12172 bld.mubuf(aco_opcode::buffer_store_dwordx2, Operand(tma_rsrc, s4), Operand(v1), in select_trap_handler_shader()
12178 bld.smem(aco_opcode::s_load_dwordx4, Definition(tma_rsrc, s4), Operand(PhysReg{tma_lo}, s2), in select_trap_handler_shader()
12185 dump_vgprs_to_mem(&ctx, bld, Operand(tma_rsrc, s4)); in select_trap_handler_shader()
12188 bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(tma_rsrc, s4), Operand::c32(offset), in select_trap_handler_shader()
12210 bld.sopk(aco_opcode::s_getreg_b32, Definition(ttmp0_reg, s1), in select_trap_handler_shader()
12236 dump_lds_to_mem(&ctx, bld, Operand(tma_rsrc, s4)); in select_trap_handler_shader()
12242 bld.copy(Definition(m0, s1), Operand(save_m0, s1)); in select_trap_handler_shader()
12243 bld.copy(Definition(exec, bld.lm), Operand(save_exec, bld.lm)); in select_trap_handler_shader()
12246 bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), Operand(save_wave_status, s1), in select_trap_handler_shader()
12253 bld.sopp(aco_opcode::s_endpgm); in select_trap_handler_shader()
12268 load_vb_descs(Builder& bld, PhysReg dest, Operand base, unsigned start, unsigned max) in load_vb_descs() argument
12270 unsigned sgpr_limit = get_addr_sgpr_from_waves(bld.program, bld.program->min_waves); in load_vb_descs()
12276 bld.smem(aco_opcode::s_load_dwordx16, Definition(dest, s16), base, in load_vb_descs()
12279 bld.smem(aco_opcode::s_load_dwordx8, Definition(dest, s8), base, in load_vb_descs()
12282 bld.smem(aco_opcode::s_load_dwordx4, Definition(dest, s4), base, in load_vb_descs()
12293 wait_for_smem_loads(Builder& bld) in wait_for_smem_loads() argument
12295 if (bld.program->gfx_level >= GFX12) { in wait_for_smem_loads()
12296 bld.sopp(aco_opcode::s_wait_kmcnt, 0); in wait_for_smem_loads()
12300 bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level)); in wait_for_smem_loads()
12305 wait_for_vmem_loads(Builder& bld) in wait_for_vmem_loads() argument
12307 if (bld.program->gfx_level >= GFX12) { in wait_for_vmem_loads()
12308 bld.sopp(aco_opcode::s_wait_loadcnt, 0); in wait_for_vmem_loads()
12312 bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level)); in wait_for_vmem_loads()
12317 calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args, in calc_nontrivial_instance_id() argument
12322 bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_sgpr, s2), in calc_nontrivial_instance_id()
12325 wait_for_smem_loads(bld); in calc_nontrivial_instance_id()
12331 if (bld.program->gfx_level >= GFX8 && bld.program->gfx_level < GFX11) { in calc_nontrivial_instance_id()
12333 if (bld.program->gfx_level < GFX9) { in calc_nontrivial_instance_id()
12334 bld.vop1(aco_opcode::v_mov_b32, Definition(tmp_vgpr1, v1), div_info); in calc_nontrivial_instance_id()
12338 bld.vop2(aco_opcode::v_lshrrev_b32, fetch_index_def, div_info, instance_id); in calc_nontrivial_instance_id()
12341 if (bld.program->gfx_level >= GFX9) in calc_nontrivial_instance_id()
12342 instr = bld.vop2_sdwa(aco_opcode::v_add_u32, fetch_index_def, div_info, fetch_index).instr; in calc_nontrivial_instance_id()
12344 instr = bld.vop2_sdwa(aco_opcode::v_add_co_u32, fetch_index_def, Definition(vcc, bld.lm), in calc_nontrivial_instance_id()
12349 bld.vop3(aco_opcode::v_mul_hi_u32, fetch_index_def, Operand(tmp_sgpr.advance(4), s1), in calc_nontrivial_instance_id()
12353 bld.vop2_sdwa(aco_opcode::v_lshrrev_b32, fetch_index_def, div_info, fetch_index).instr; in calc_nontrivial_instance_id()
12359 bld.vop2(aco_opcode::v_lshrrev_b32, fetch_index_def, div_info, instance_id); in calc_nontrivial_instance_id()
12361 bld.vop3(aco_opcode::v_bfe_u32, tmp_def, div_info, Operand::c32(8u), Operand::c32(8u)); in calc_nontrivial_instance_id()
12362 bld.vadd32(fetch_index_def, tmp_op, fetch_index, false, Operand(s2), true); in calc_nontrivial_instance_id()
12364 bld.vop3(aco_opcode::v_mul_hi_u32, fetch_index_def, fetch_index, in calc_nontrivial_instance_id()
12367 bld.vop3(aco_opcode::v_bfe_u32, tmp_def, div_info, Operand::c32(16u), Operand::c32(8u)); in calc_nontrivial_instance_id()
12368 bld.vop2(aco_opcode::v_lshrrev_b32, fetch_index_def, tmp_op, fetch_index); in calc_nontrivial_instance_id()
12371 bld.vadd32(fetch_index_def, start_instance, fetch_index, false, Operand(s2), true); in calc_nontrivial_instance_id()
12388 Builder bld(program, block); in select_rt_prolog() local
12481 bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_raygen_sbt, s2), Operand(in_sbt_desc, s2), in select_rt_prolog()
12487 bld.sop1(aco_opcode::s_mov_b64, Definition(tmp_ring_offsets, s2), in select_rt_prolog()
12490 hw_init_scratch(bld, Definition(in_ring_offsets, s1), Operand(in_ring_offsets, s2), in select_rt_prolog()
12495 bld.vop1(aco_opcode::v_mov_b32, Definition(out_stack_ptr, v1), Operand(in_stack_base, s1)); in select_rt_prolog()
12498 bld.smem(aco_opcode::s_load_dwordx2, Definition(out_uniform_shader_addr, s2), in select_rt_prolog()
12502 bld.smem(aco_opcode::s_load_dword, Definition(out_launch_size_z, s1), in select_rt_prolog()
12504 bld.smem(aco_opcode::s_load_dwordx2, Definition(out_launch_size_x, s2), in select_rt_prolog()
12510 bld.vop3(aco_opcode::v_bfe_u32, Definition(in_local_ids[1], v1), Operand(in_local_ids[0], v1), in select_rt_prolog()
12512 bld.vop2(aco_opcode::v_and_b32, Definition(in_local_ids[0], v1), Operand::c32(0x7), in select_rt_prolog()
12517 bld.vop2_e64(aco_opcode::v_lshrrev_b32, Definition(out_launch_ids[2], v1), Operand::c32(16), in select_rt_prolog()
12519 bld.vop3(aco_opcode::v_mad_u32_u16, Definition(out_launch_ids[1], v1), in select_rt_prolog()
12523 bld.vop1(aco_opcode::v_mov_b32, Definition(out_launch_ids[2], v1), Operand(in_wg_id_z, s1)); in select_rt_prolog()
12524 bld.vop3(aco_opcode::v_mad_u32_u24, Definition(out_launch_ids[1], v1), in select_rt_prolog()
12528 bld.vop3(aco_opcode::v_mad_u32_u24, Definition(out_launch_ids[0], v1), Operand(in_wg_id_x, s1), in select_rt_prolog()
12533 bld.vop2_e64(aco_opcode::v_add_co_u32, Definition(out_record_ptr, v1), Definition(vcc, s2), in select_rt_prolog()
12536 bld.vop2_e64(aco_opcode::v_add_u32, Definition(out_record_ptr, v1), in select_rt_prolog()
12539 bld.vop1(aco_opcode::v_mov_b32, Definition(out_record_ptr.advance(4), v1), in select_rt_prolog()
12546 bld.sop2(aco_opcode::s_lshl_b32, Definition(tmp_wg_id_x_times_size, s1), Definition(scc, s1), in select_rt_prolog()
12550 bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, Definition(tmp_invocation_idx, v1), Operand::c32(-1u), in select_rt_prolog()
12554 bld.vop2(aco_opcode::v_mbcnt_hi_u32_b32, Definition(tmp_invocation_idx, v1), in select_rt_prolog()
12557 bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32_e64, Definition(tmp_invocation_idx, v1), in select_rt_prolog()
12562 bld.sopc(aco_opcode::s_cmp_lg_u32, Definition(scc, s1), in select_rt_prolog()
12564 bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm), in select_rt_prolog()
12567 bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[0], v1), in select_rt_prolog()
12568 Operand(tmp_invocation_idx, v1), Operand(out_launch_ids[0], v1), Operand(vcc, bld.lm)); in select_rt_prolog()
12569 bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(), in select_rt_prolog()
12570 Operand(out_launch_ids[1], v1), Operand(vcc, bld.lm)); in select_rt_prolog()
12574 bld.sop1(aco_opcode::s_mov_b32, in select_rt_prolog()
12577 bld.sop1(aco_opcode::s_mov_b64, Definition(get_arg_reg(out_args, out_args->ring_offsets), s2), in select_rt_prolog()
12582 bld.sop1(aco_opcode::s_setpc_b64, Operand(out_uniform_shader_addr, s2)); in select_rt_prolog()
12620 convert_unaligned_vs_attrib(Builder& bld, UnalignedVsAttribLoad load) in convert_unaligned_vs_attrib() argument
12630 bld.vop3(aco_opcode::v_lshl_or_b32, Definition(dst, v1), Operand(scratch, v1), in convert_unaligned_vs_attrib()
12635 if (bld.program->gfx_level >= GFX9) { in convert_unaligned_vs_attrib()
12636 bld.vop3(aco_opcode::v_lshl_or_b32, Definition(dst, v1), Operand(byte_reg, v1), in convert_unaligned_vs_attrib()
12639 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(byte_reg, v1), Operand::c32(i * 8), in convert_unaligned_vs_attrib()
12641 bld.vop2(aco_opcode::v_or_b32, Definition(dst, v1), Operand(dst, v1), in convert_unaligned_vs_attrib()
12651 bld.vop3(aco_opcode::v_bfe_u32, Definition(chan[2], v1), Operand(dst, v1), Operand::c32(22), in convert_unaligned_vs_attrib()
12653 bld.vop3(aco_opcode::v_bfe_u32, Definition(chan[1], v1), Operand(dst, v1), Operand::c32(11), in convert_unaligned_vs_attrib()
12655 bld.vop3(aco_opcode::v_bfe_u32, Definition(chan[0], v1), Operand(dst, v1), Operand::c32(0), in convert_unaligned_vs_attrib()
12657 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(chan[2], v1), Operand::c32(5), in convert_unaligned_vs_attrib()
12659 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(chan[1], v1), Operand::c32(4), in convert_unaligned_vs_attrib()
12661 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(chan[0], v1), Operand::c32(4), in convert_unaligned_vs_attrib()
12673 bld.vop3(bfe, Definition(chan[3], v1), Operand(dst, v1), Operand::c32(30), Operand::c32(2)); in convert_unaligned_vs_attrib()
12674 bld.vop3(bfe, Definition(chan[2], v1), Operand(dst, v1), Operand::c32(swapxz ? 0 : 20), in convert_unaligned_vs_attrib()
12676 bld.vop3(bfe, Definition(chan[1], v1), Operand(dst, v1), Operand::c32(10), Operand::c32(10)); in convert_unaligned_vs_attrib()
12677 bld.vop3(bfe, Definition(chan[0], v1), Operand(dst, v1), Operand::c32(swapxz ? 20 : 0), in convert_unaligned_vs_attrib()
12685 bld.vop3(aco_opcode::v_bfe_i32, Definition(dst, v1), Operand(dst, v1), Operand::c32(0), in convert_unaligned_vs_attrib()
12695 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(chan[i], v1), Operand(chan[i], v1)); in convert_unaligned_vs_attrib()
12698 bld.vop1(aco_opcode::v_cvt_f32_u32, Definition(chan[i], v1), Operand(chan[i], v1)); in convert_unaligned_vs_attrib()
12701 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(chan[i], v1), Operand(chan[i], v1)); in convert_unaligned_vs_attrib()
12713 bld.vop2(aco_opcode::v_mul_f32, Definition(chan[i], v1), in convert_unaligned_vs_attrib()
12717 bld.vop2(aco_opcode::v_mul_f32, Definition(chan[i], v1), in convert_unaligned_vs_attrib()
12719 bld.vop2(aco_opcode::v_max_f32, Definition(chan[i], v1), Operand::c32(0xbf800000), in convert_unaligned_vs_attrib()
12726 convert_current_unaligned_vs_attribs(Builder& bld, UnalignedVsAttribLoadState* state) in convert_current_unaligned_vs_attribs() argument
12731 wait_for_vmem_loads(bld); in convert_current_unaligned_vs_attribs()
12734 convert_unaligned_vs_attrib(bld, load); in convert_current_unaligned_vs_attribs()
12742 load_unaligned_vs_attrib(Builder& bld, PhysReg dst, Operand desc, Operand index, uint32_t offset, in load_unaligned_vs_attrib() argument
12751 load.d16 = bld.program->gfx_level >= GFX9 && !bld.program->dev.sram_ecc_enabled && size == 4; in load_unaligned_vs_attrib()
12762 convert_current_unaligned_vs_attribs(bld, state); in load_unaligned_vs_attrib()
12769 bld.mubuf(aco_opcode::buffer_load_ubyte_d16, Definition(dst, v1), desc, index, in load_unaligned_vs_attrib()
12771 bld.mubuf(aco_opcode::buffer_load_ubyte_d16_hi, Definition(dst, v1), desc, index, in load_unaligned_vs_attrib()
12773 bld.mubuf(aco_opcode::buffer_load_ubyte_d16, Definition(scratch, v1), desc, index, in load_unaligned_vs_attrib()
12775 bld.mubuf(aco_opcode::buffer_load_ubyte_d16_hi, Definition(scratch, v1), desc, index, in load_unaligned_vs_attrib()
12782 if (bld.program->gfx_level >= GFX12) { in load_unaligned_vs_attrib()
12788 bld.mubuf(aco_opcode::buffer_load_ubyte, def, desc, index, Operand::c32(soffset), in load_unaligned_vs_attrib()
12819 Builder bld(program, block); in select_vs_prolog() local
12825 bld.sopp(aco_opcode::s_setprio, 3); in select_vs_prolog()
12864 bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1), in select_vs_prolog()
12867 bld.sopk(aco_opcode::s_movk_i32, Definition(vertex_buffers.advance(4), s1), in select_vs_prolog()
12870 bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers.advance(4), s1), in select_vs_prolog()
12885 load_vb_descs(bld, desc, Operand(vertex_buffers, s2), loc, pinfo->num_attributes - loc); in select_vs_prolog()
12892 bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), count, Operand::c32(0u)); in select_vs_prolog()
12894 bld.sopc(aco_opcode::s_bitcmp1_b32, Definition(scc, s1), count, in select_vs_prolog()
12896 bld.sop2(aco_opcode::s_cselect_b64, Definition(exec, s2), Operand::c64(UINT64_MAX), in select_vs_prolog()
12906 bld.sop2(aco_opcode::s_bfe_u32, Definition(vcc, s1), Definition(scc, s1), in select_vs_prolog()
12908 bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm), Operand::c32(-1), Operand::zero(), in select_vs_prolog()
12916 bld.vop2(aco_opcode::v_cndmask_b32, Definition(get_arg_reg(args, dst_args[i]), v1), in select_vs_prolog()
12918 Operand(vcc, bld.lm)); in select_vs_prolog()
12923 bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->base_vertex), in select_vs_prolog()
12926 bld.vadd32(Definition(instance_index, v1), start_instance, instance_id, false, in select_vs_prolog()
12929 bld.vop1(aco_opcode::v_mov_b32, Definition(start_instance_vgpr, v1), start_instance); in select_vs_prolog()
12932 wait_for_smem_loads(bld); in select_vs_prolog()
12945 bld, args, pinfo, index, instance_id, start_instance, prolog_input, in select_vs_prolog()
12980 load_unaligned_vs_attrib(bld, dest.advance(j * 4u), Operand(cur_desc, s4), in select_vs_prolog()
12983 bld.mtbuf(aco_opcode::tbuffer_load_format_xy, in select_vs_prolog()
12988 bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(dest.advance(j * 4u), v1), in select_vs_prolog()
12997 bld.mubuf(aco_opcode::buffer_load_format_xyzw, Definition(dest, v4), in select_vs_prolog()
13028 bld.vop1(aco_opcode::v_mov_b32, Definition(dest.advance(j * 4u), v1), in select_vs_prolog()
13033 convert_current_unaligned_vs_attribs(bld, &unaligned_state); in select_vs_prolog()
13036 wait_for_vmem_loads(bld); in select_vs_prolog()
13047 bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1)); in select_vs_prolog()
13056 bld.vop3(aco_opcode::v_bfe_i32, Definition(alpha, v1), Operand(alpha, v1), in select_vs_prolog()
13061 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1)); in select_vs_prolog()
13062 bld.vop2(aco_opcode::v_max_f32, Definition(alpha, v1), Operand::c32(0xbf800000u), in select_vs_prolog()
13065 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1)); in select_vs_prolog()
13074 bld.smem(aco_opcode::s_load_dwordx2, Definition(prolog_input, s2), in select_vs_prolog()
13076 wait_for_smem_loads(bld); in select_vs_prolog()
13080 bld.sop1(aco_opcode::s_setpc_b64, continue_pc); in select_vs_prolog()
13101 Builder bld(ctx.program, ctx.block); in select_ps_epilog() local
13179 bld.reset(ctx.block); in select_ps_epilog()
13180 bld.sopp(aco_opcode::s_endpgm); in select_ps_epilog()