Lines Matching full:v1
30 //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
31 if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
34 //! v1: %res0 = v_mul_f32 %a, -%b
37 writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_b));
39 //~gfx9! v1: %neg_a = v_mul_f32 -1.0, %a
40 //~gfx9! v1: %res1 = v_mul_f32 0x123456, %neg_a
41 //~gfx10! v1: %res1 = v_mul_f32 0x123456, -%a
44 writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x123456u), neg_a));
46 //! v1: %res2 = v_mul_f32 %a, %b
49 writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), neg_neg_a, inputs[1]));
51 //! v1: %res3 = v_mul_f32 |%a|, %b
54 writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), abs_neg_a, inputs[1]));
56 //! v1: %res4 = v_mul_f32 -|%a|, %b
60 writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), neg_abs_a, inputs[1]));
62 //! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1
64 … writeout(5, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
66 //! v1: %res6 = v_subrev_f32 %a, %b
68 writeout(6, bld.vop2(aco_opcode::v_add_f32, bld.def(v1), neg_a, inputs[1]));
70 //! v1: %res7 = v_sub_f32 %b, %a
72 writeout(7, bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[1], neg_a));
74 //! v1: %res8 = v_mul_f32 %a, -%c
76 Temp neg_c = fneg(bld.copy(bld.def(v1), inputs[2]));
77 writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_c));
79 // //! v1: %res9 = v_mul_f32 |%neg_a|, %b
82 writeout(9, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), abs_neg_abs_a, inputs[1]));
89 //>> v1: %a, v1: %b = p_startpgm
90 if (!setup_cs("v1 v1", GFX9))
97 //! v1: %res0 = v_add_f32 %a, %b *0.5
99 Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
100 writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f000000u), tmp));
102 //! v1: %res1 = v_add_f32 %a, %b *2
104 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
105 writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
107 //! v1: %res2 = v_add_f32 %a, %b *4
109 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
110 writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40800000u), tmp));
112 //! v1: %res3 = v_add_f32 %a, %b clamp
114 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
115 writeout(3, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(),
118 //! v1: %res4 = v_add_f32 %a, %b *2 clamp
120 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
121 tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp);
122 writeout(4, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(),
157 //! v1: %res10_tmp = v_add_f32 %a, %b clamp
158 //! v1: %res10 = v_mul_f32 2.0, %res10_tmp
160 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
161 tmp = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
163 writeout(10, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
167 //! v1: %res11_tmp = v_xor_b32 %a, %b
168 //! v1: %res11 = v_mul_f32 2.0, %res11_tmp
170 tmp = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], inputs[1]);
171 writeout(11, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
175 //! v1: %res12_tmp = v_add_f32 %a, %b
177 //! v1: %res12 = v_mul_f32 2.0, %res12_tmp
179 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
181 writeout(12, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
183 //! v1: %res13 = v_add_f32 %a, %b
185 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
186 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp);
197 //! v1: %res14_tmp = v_add_f32 %a, %b
198 //! v1: %res14 = v_mul_f32 2.0, %res13_tmp
200 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
201 writeout(14, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
203 //! v1: %res15 = v_add_f32 %a, %b clamp
205 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
206 writeout(15, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(),
237 //! v1: %res18_tmp = v_add_f32 %a, %b
238 //! v1: %res18 = v_mul_f32 2.0, %res18_tmp
240 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
241 writeout(18, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp));
242 //! v1: %res19 = v_add_f32 %a, %b clamp
244 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
245 writeout(19, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(),
269 …return bld.vop2_e64(aco_opcode::v_subbrev_co_u32, bld.def(v1), bld.hint_vcc(bld.def(bld.lm)), op0,… in create_subbrev_co()
274 //>> v1: %a, s1: %b, s2: %c = p_startpgm
275 if (!setup_cs("v1 s1 s2", (chip_class)i))
280 //! v1: %res0 = v_cndmask_b32 0, %a, %c
283 writeout(0, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), inputs[0], subbrev));
285 //! v1: %res1 = v_cndmask_b32 0, 42, %c
288 writeout(1, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(42u), subbrev));
290 //~gfx9! v1: %subbrev, s2: %_ = v_subbrev_co_u32 0, 0, %c
291 //~gfx9! v1: %res2 = v_and_b32 %b, %subbrev
292 //~gfx10! v1: %res2 = v_cndmask_b32 0, %b, %c
295 writeout(2, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), inputs[1], subbrev));
297 //! v1: %subbrev1, s2: %_ = v_subbrev_co_u32 0, 0, %c
298 //! v1: %xor = v_xor_b32 %a, %subbrev1
299 //! v1: %res3 = v_cndmask_b32 0, %xor, %c
302 Temp xor_a = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], subbrev);
303 writeout(3, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), xor_a, subbrev));
305 //! v1: %res4 = v_cndmask_b32 0, %a, %c
307 Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
309 Temp sub = bld.vsub32(bld.def(v1), Operand::zero(), cndmask);
310 writeout(4, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(inputs[0]), sub));
318 //>> s1: %a, v1: %b = p_startpgm
319 if (!setup_cs("s1 v1", (chip_class)i))
335 //~gfx8! v1: %add_co1, s2: %_ = v_add_co_u32 %lshl1, %b
336 //~gfx8! v1: %res1, s2: %_ = v_add_co_u32 %add1, %add_co1
338 //~gfx(9|10)! v1: %lshl_add = v_lshl_add_u32 %a, 3, %b
339 //~gfx(9|10)! v1: %res1 = v_add_u32 %lshl1, %lshl_add
345 Temp vadd = bld.vadd32(bld.def(v1), shift, Operand(inputs[1]));
346 writeout(1, bld.vadd32(bld.def(v1), sadd, vadd));
349 //~gfx8! v1: %res2, s2: %_ = v_add_co_u32 %lshl2, %b
350 //~gfx(9|10)! v1: %res2 = v_lshl_add_u32 %a, 3, %b
354 writeout(2, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
357 //~gfx8! v1: %res3, s2: %_ = v_add_co_u32 %lshl3, %b
358 //~gfx(9|10)! v1: %res3 = v_lshl_add_u32 (is24bit)%a, 7, %b
363 writeout(3, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
366 //~gfx(8|9)! v1: %res4, s2: %carry = v_add_co_u32 %lshl4, %b
367 //~gfx10! v1: %res4, s2: %carry = v_add_co_u32_e64 %lshl4, %b
370 Temp carry = bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]), true).def(1).getTemp();
374 //~gfx8! v1: %res5, s2: %_ = v_add_co_u32 %lshl5, %b
375 //~gfx(9|10)! v1: %res5 = v_lshl_add_u32 (is24bit)%a, (is24bit)%a, %b
378 writeout(5, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
380 //~gfx8! v1: %res6 = v_mad_u32_u24 (is24bit)%a, 8, %b
381 //~gfx(9|10)! v1: %res6 = v_lshl_add_u32 (is24bit)%a, 3, %b
384 writeout(6, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
386 //~gfx8! v1: %res7 = v_mad_u32_u24 (is16bit)%a, 16, %b
387 //~gfx(9|10)! v1: %res7 = v_lshl_add_u32 (is16bit)%a, 4, %b
392 writeout(7, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
400 //>> v1: %a, s1: %b = p_startpgm
401 if (!setup_cs("v1 s1", (chip_class)i))
406 //! v1: %res0 = v_bcnt_u32_b32 %a, %a
408 bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero());
409 writeout(0, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0])));
411 //! v1: %res1 = v_bcnt_u32_b32 %a, %b
413 bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero());
414 writeout(1, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[1])));
416 //! v1: %res2 = v_bcnt_u32_b32 %a, 42
418 bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero());
419 writeout(2, bld.vadd32(bld.def(v1), bcnt, Operand::c32(42u)));
421 //! v1: %bnct3 = v_bcnt_u32_b32 %b, 0
422 //~gfx8! v1: %res3, s2: %_ = v_add_co_u32 %bcnt3, %a
423 //~gfx(9|10)! v1: %res3 = v_add_u32 %bcnt3, %a
425 bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[1]), Operand::zero());
426 writeout(3, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0])));
428 //! v1: %bnct4 = v_bcnt_u32_b32 %a, 0
429 //~gfx(8|9)! v1: %add4, s2: %carry = v_add_co_u32 %bcnt4, %a
430 //~gfx10! v1: %add4, s2: %carry = v_add_co_u32_e64 %bcnt4, %a
432 bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero());
433 Temp carry = bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0]), true).def(1).getTemp();
475 if (!setup_cs("v1 v1 v1", GFX9, CHIP_UNKNOWN, cfg.name))
487 //>> v1: %a, v1: %b, v1: %c = p_startpgm
489 //! v1: %res0 = @med3 @ub, @lb, %a
491 writeout(0, bld.vop2(cfg.min, bld.def(v1), cfg.ub,
492 bld.vop2(cfg.max, bld.def(v1), cfg.lb, inputs[0])));
494 //! v1: %res1 = @med3 @lb, @ub, %a
496 writeout(1, bld.vop2(cfg.max, bld.def(v1), cfg.lb,
497 bld.vop2(cfg.min, bld.def(v1), cfg.ub, inputs[0])));
500 //! v1: %res2_tmp = @min @lb, %a
501 //! v1: %res2 = @max @ub, %res2_tmp
503 writeout(2, bld.vop2(cfg.max, bld.def(v1), cfg.ub,
504 bld.vop2(cfg.min, bld.def(v1), cfg.lb, inputs[0])));
506 //! v1: %res3_tmp = @max @ub, %a
507 //! v1: %res3 = @min @lb, %res3_tmp
509 writeout(3, bld.vop2(cfg.min, bld.def(v1), cfg.lb,
510 bld.vop2(cfg.max, bld.def(v1), cfg.ub, inputs[0])));
514 //! v1: %res4_tmp = @max @lb, %a
515 //! v1: %res4 = @min %b, %res4_tmp
517 writeout(4, bld.vop2(cfg.min, bld.def(v1), inputs[1],
518 bld.vop2(cfg.max, bld.def(v1), cfg.lb, inputs[0])));
520 //! v1: %res5_tmp = @max %b, %a
521 //! v1: %res5 = @min @ub, %res5_tmp
523 writeout(5, bld.vop2(cfg.min, bld.def(v1), cfg.ub,
524 bld.vop2(cfg.max, bld.def(v1), inputs[1], inputs[0])));
526 //! v1: %res6_tmp = @max %c, %a
527 //! v1: %res6 = @min %b, %res6_tmp
529 writeout(6, bld.vop2(cfg.min, bld.def(v1), inputs[1],
530 bld.vop2(cfg.max, bld.def(v1), inputs[2], inputs[0])));
534 //! v1: %res7 = @med3 @ub, @lb, %a
536 Builder::Result max = bld.vop2(cfg.max, bld.def(v1), cfg.lb, inputs[0]);
538 Builder::Result min = bld.vop2(cfg.min, bld.def(v1), cfg.ub, max);
542 //! v1: (precise)%res8_tmp = @min @ub, %a
543 //! v1: %res8 = @max @lb, %res8_tmp
545 min = bld.vop2(cfg.min, bld.def(v1), cfg.ub, inputs[0]);
547 writeout(8, bld.vop2(cfg.max, bld.def(v1), cfg.lb, min));
554 //>> v1: %a, v1: %b, v2: %c, v1: %d = p_startpgm
555 if (!setup_cs("v1 v1 v2 v1", GFX9))
578 bld.copy(bld.def(v1), Operand::c32(0x40a00000u)), inputs[0])));
600 bld.copy(bld.def(v1), Operand::c32(0x40a00000u)), inputs[0])));
687 //>> v1: %a, v1: %b, v1: %c = p_startpgm
688 if (!setup_cs("v1 v1 v1", GFX9))
691 //! v1: %res0 = v_add3_u32 %a, %b, %c
693 Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
694 writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
696 //! v1: %tmp1 = v_add_u32 %b, %c clamp
697 //! v1: %res1 = v_add_u32 %a, %tmp1
699 tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
701 writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
703 //! v1: %tmp2 = v_add_u32 %b, %c
704 //! v1: %res2 = v_add_u32 %a, %tmp2 clamp
706 tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
707 tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
716 //>> v1: %a = p_startpgm
717 if (!setup_cs("v1", (chip_class)i))
720 //! v1: %res0 = v_max3_f32 0, -0, %a
723 Temp min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), xor0);
725 writeout(0, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), xor1));
727 //! v1: %res1 = v_max3_f32 0, -0, -%a
729 min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), Operand(inputs[0]));
731 writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), xor1));
739 //>> v1: %a, v1: %b, v1: %c = p_startpgm
740 if (!setup_cs("v1 v1 v1", (chip_class)i))
743 //! v1: %res0 = v_mad_u32_u24 %b, %c, %a
745 Temp mul = bld.vop2(aco_opcode::v_mul_u32_u24, bld.def(v1), inputs[1], inputs[2]);
746 writeout(0, bld.vadd32(bld.def(v1), inputs[0], mul));
748 //! v1: %res1_tmp = v_mul_u32_u24 %b, %c
749 //! v1: %_, s2: %res1 = v_add_co_u32 %a, %res1_tmp
751 mul = bld.vop2(aco_opcode::v_mul_u32_u24, bld.def(v1), inputs[1], inputs[2]);
752 writeout(1, bld.vadd32(bld.def(v1), inputs[0], mul, true).def(1).getTemp());
760 //>> v1: %a, v1: %b, s1: %c = p_startpgm
761 if (!setup_cs("v1 v1 s1", (chip_class)i))
766 //~gfx8! v1: %lshl0 = v_lshlrev_b32 3, %a
767 //~gfx8! v1: %res0, s2: %_ = v_add_co_u32 %lshl0, %b
768 //~gfx(9|10)! v1: %res0 = v_lshl_add_u32 %a, 3, %b
770 lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), Operand(inputs[0]));
771 writeout(0, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
773 //~gfx8! v1: %lshl1 = v_lshlrev_b32 7, (is24bit)%a
774 //~gfx8! v1: %res1, s2: %_ = v_add_co_u32 %lshl1, %b
775 //~gfx(9|10)! v1: %res1 = v_lshl_add_u32 (is24bit)%a, 7, %b
779 lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(7u), a_24bit);
780 writeout(1, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
782 //~gfx8! v1: %lshl2 = v_lshlrev_b32 (is24bit)%a, (is24bit)%b
783 //~gfx8! v1: %res2, s2: %_ = v_add_co_u32 %lshl2, %b
784 //~gfx(9|10)! v1: %res2 = v_lshl_add_u32 (is24bit)%b, (is24bit)%a, %b
788 lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), a_24bit, b_24bit);
789 writeout(2, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
791 //~gfx8! v1: %res3 = v_mad_u32_u24 (is24bit)%a, 8, %b
792 //~gfx(9|10)! v1: %res3 = v_lshl_add_u32 (is24bit)%a, 3, %b
794 lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), a_24bit);
795 writeout(3, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
797 //~gfx8! v1: %res4 = v_mad_u32_u24 (is16bit)%a, 16, %b
798 //~gfx(9|10)! v1: %res4 = v_lshl_add_u32 (is16bit)%a, 4, %b
802 lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(4u), a_16bit);
803 writeout(4, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1])));
805 //~gfx8! v1: %res5 = v_mad_u32_u24 (is24bit)%c, 16, %c
806 //~gfx(9|10)! v1: %res5 = v_lshl_add_u32 (is24bit)%c, 4, %c
810 lshl = bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(4u), c_24bit);
811 writeout(5, bld.vadd32(bld.def(v1), lshl, Operand(inputs[2])));
856 return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]); in emit_denorm_srcdest()
858 return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), val); in emit_denorm_srcdest()
860 return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val); in emit_denorm_srcdest()
889 if (!setup_cs("v1 s2", (chip_class)i, CHIP_UNKNOWN, subvariant))
901 //>> v1: %a, s2: %b = p_startpgm
903 //; patterns = {'cndmask': 'v1: %{} = v_cndmask_b32 0, {}, %b',
904 //; 'min': 'v1: %{} = v_min_f32 0, {}',
905 //; 'rcp': 'v1: %{} = v_rcp_f32 {}'}
906 //; ops = {'mul1': 'v1: %{} = v_mul_f32 1.0, %{}',
907 //; 'fneg': 'v1: %{} = v_mul_f32 -1.0, %{}',
908 //; 'fabs': 'v1: %{} = v_mul_f32 1.0, |%{}|',
909 //; 'fnegabs': 'v1: %{} = v_mul_f32 -1.0, |%{}|'}
927 //; insert_pattern('v1: %res = v_cndmask_b32 0, {}, %b'.format(name))
935 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), val);
949 0, bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]));
957 //>> v1: %a, v1: %b, s2: %c, s1: %d = p_startpgm
958 if (!setup_cs("v1 v1 s2 s1", GFX10_3))
967 //! v1: %res0 = v_add_f32 %a, %b row_mirror bound_ctrl:1
969 Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
970 Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tmp0, b);
974 //! v1: %res1 = v_subrev_f32 %a, %b row_mirror bound_ctrl:1
976 Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
977 Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), b, tmp1);
980 //! v1: %tmp2 = v_mov_b32 %a row_mirror bound_ctrl:1
981 //! v1: %res2 = v_sub_f32 %b, %tmp2 row_half_mirror bound_ctrl:1
983 Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
984 Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), b, tmp2, dpp_row_half_mirror);
988 //! v1: %res3 = v_add_f32 -%a, %b row_mirror bound_ctrl:1
990 auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
992 Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tmp3, b);
995 //! v1: %res4 = v_add_f32 -%a, %b row_mirror bound_ctrl:1
997 Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
998 auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp4, b);
1002 //! v1: %tmp5 = v_mov_b32 %a row_mirror bound_ctrl:1
1003 //! v1: %res5 = v_add_f32 %tmp5, %b clamp
1005 Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
1006 auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp5, b);
1010 //! v1: %res6 = v_add_f32 |%a|, %b row_mirror bound_ctrl:1
1012 auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
1014 auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp6, b);
1018 //! v1: %res7 = v_subrev_f32 %a, |%b| row_mirror bound_ctrl:1
1020 Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
1021 auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), b, tmp7);
1026 //! v1: %res8 = v_cndmask_b32 %a, %b, %c:vcc row_mirror bound_ctrl:1
1028 Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
1029 Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp8, b, c);
1033 //! v1: %tmp9 = v_mov_b32 %a row_mirror bound_ctrl:1
1034 //! v1: %res9 = v_add_f32 %tmp9, %d
1036 Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
1037 Temp res9 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp9, d);
1040 //! v1: %tmp10 = v_mov_b32 %a row_mirror bound_ctrl:1
1041 //! v1: %res10 = v_add_f32 %d, %tmp10
1043 Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
1044 Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), d, tmp10);
1051 //>> v1: %a, s1: %b = p_startpgm
1052 if (!setup_cs("v1 s1", GFX10))
1055 //! v1: %one = p_parallelcopy 1
1056 //! v1: %res0 = v_mul_f32 1, %a
1058 Temp one = bld.copy(bld.def(v1), Operand::c32(1));
1059 writeout(0, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), one, inputs[0], dpp_row_sl(1)));
1061 //! v1: %res1 = v_mul_f32 %a, %one row_shl:1 bound_ctrl:1
1063 writeout(1, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], one, dpp_row_sl(1)));
1065 //! v1: %res2 = v_mul_f32 0x12345678, %a
1067 Temp literal1 = bld.copy(bld.def(v1), Operand::c32(0x12345678u));
1068 … writeout(2, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
1070 //! v1: %literal2 = p_parallelcopy 0x12345679
1071 //! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1
1073 Temp literal2 = bld.copy(bld.def(v1), Operand::c32(0x12345679u));
1074 … writeout(3, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
1076 //! v1: %b_v = p_parallelcopy %b
1077 //! v1: %res4 = v_mul_f32 %b, %a
1079 Temp b_v = bld.copy(bld.def(v1), inputs[1]);
1080 writeout(4, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), b_v, inputs[0], dpp_row_sl(1)));
1082 //! v1: %res5 = v_mul_f32 %a, %b_v row_shl:1 bound_ctrl:1
1084 writeout(5, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], b_v, dpp_row_sl(1)));
1086 //! v1: %res6 = v_rcp_f32 %b
1088 writeout(6, bld.vop1_dpp(aco_opcode::v_rcp_f32, bld.def(v1), b_v, dpp_row_sl(1)));