1 /* 2 * Copyright © 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include "helpers.h" 26 27 using namespace aco; 28 29 BEGIN_TEST(optimizer_postRA.vcmp) 30 PhysReg reg_v0(256); 31 PhysReg reg_s0(0); 32 PhysReg reg_s2(2); 33 PhysReg reg_s4(4); 34 35 //>> v1: %a:v[0] = p_startpgm 36 ASSERTED bool setup_ok = setup_cs("v1", GFX8); 37 assert(setup_ok); 38 39 auto &startpgm = bld.instructions->at(0); 40 assert(startpgm->opcode == aco_opcode::p_startpgm); 41 startpgm->definitions[0].setFixed(reg_v0); 42 43 Temp v_in = inputs[0]; 44 45 { 46 /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */ 47 48 //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 49 //! s2: %e:s[2-3] = p_cbranch_z %b:vcc 50 //! p_unit_test 0, %e:s[2-3] 51 auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 52 Operand(v_in, reg_v0)); 53 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); 54 auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 55 writeout(0, Operand(br, reg_s2)); 56 } 57 58 //; del b, e 59 60 { 61 /* When VCC is overwritten inbetween, don't optimize. */ 62 63 //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 64 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 65 //! s2: %f:vcc = s_mov_b64 0 66 //! s2: %e:s[2-3] = p_cbranch_z %d:scc 67 //! p_unit_test 1, %e:s[2-3], %f:vcc 68 auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 69 Operand(v_in, reg_v0)); 70 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); 71 auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero()); 72 auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 73 writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc)); 74 } 75 76 //; del b, c, d, e, f 77 78 { 79 /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */ 80 81 //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0] 82 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec 83 //! s2: %e:s[2-3] = p_cbranch_z %d:scc 84 //! p_unit_test 2, %e:s[2-3] 85 auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(), 86 Operand(v_in, reg_v0)); 87 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm)); 88 auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 89 writeout(2, Operand(br, reg_s2)); 90 } 91 92 //; del b, c, d, e 93 94 { 95 /* When the VCC isn't written by VOPC, don't optimize */ 96 97 //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5] 98 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 99 //! s2: %e:s[2-3] = p_cbranch_z %d:scc 100 //! p_unit_test 2, %e:s[2-3] 101 auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), 102 Operand::c32(1u), Operand(reg_s4, bld.lm)); 103 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm)); 104 auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 105 writeout(2, Operand(br, reg_s2)); 106 } 107 108 //; del b, c, d, e, f, x 109 110 { 111 /* When EXEC is overwritten inbetween, don't optimize. */ 112 113 //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 114 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 115 //! s2: %f:exec = s_mov_b64 42 116 //! s2: %e:s[2-3] = p_cbranch_z %d:scc 117 //! p_unit_test 4, %e:s[2-3], %f:exec 118 auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 119 Operand(v_in, reg_v0)); 120 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); 121 auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u)); 122 auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 123 writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec)); 124 } 125 126 //; del b, c, d, e, f, x 127 128 finish_optimizer_postRA_test(); 129 END_TEST 130 131 BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) 132 //>> s1: %a, s2: %y, s1: %z = p_startpgm 133 ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6); 134 assert(setup_ok); 135 136 PhysReg reg_s0{0}; 137 PhysReg reg_s1{1}; 138 PhysReg reg_s2{2}; 139 PhysReg reg_s3{3}; 140 PhysReg reg_s4{4}; 141 PhysReg reg_s6{6}; 142 143 Temp in_0 = inputs[0]; 144 Temp in_1 = inputs[1]; 145 Temp in_2 = inputs[2]; 146 Operand op_in_0(in_0); 147 op_in_0.setFixed(reg_s0); 148 Operand op_in_1(in_1); 149 op_in_1.setFixed(reg_s4); 150 Operand op_in_2(in_2); 151 op_in_2.setFixed(reg_s6); 152 153 { 154 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 155 //! s2: %f:vcc = p_cbranch_nz %e:scc 156 //! p_unit_test 0, %f:vcc 157 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 158 Operand::c32(0x40018u)); 159 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 160 Operand::zero()); 161 auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); 162 writeout(0, Operand(br, vcc)); 163 } 164 165 //; del d, e, f 166 167 { 168 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 169 //! s2: %f:vcc = p_cbranch_z %e:scc 170 //! p_unit_test 1, %f:vcc 171 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 172 Operand::c32(0x40018u)); 173 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), 174 Operand::zero()); 175 auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); 176 writeout(1, Operand(br, vcc)); 177 } 178 179 //; del d, e, f 180 181 { 182 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 183 //! s2: %f:vcc = p_cbranch_z %e:scc 184 //! p_unit_test 2, %f:vcc 185 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 186 Operand::c32(0x40018u)); 187 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 188 Operand::zero()); 189 auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); 190 writeout(2, Operand(br, vcc)); 191 } 192 193 //; del d, e, f 194 195 { 196 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 197 //! s2: %f:vcc = p_cbranch_nz %e:scc 198 //! p_unit_test 3, %f:vcc 199 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 200 Operand::c32(0x40018u)); 201 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), 202 Operand::zero()); 203 auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); 204 writeout(3, Operand(br, vcc)); 205 } 206 207 //; del d, e, f 208 209 { 210 //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345 211 //! s2: %f:vcc = p_cbranch_z %e:scc 212 //! p_unit_test 4, %f:vcc 213 auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1, 214 Operand::c32(0x12345u)); 215 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2), 216 Operand::zero(8)); 217 auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); 218 writeout(4, Operand(br, vcc)); 219 } 220 221 //; del d, e, f 222 223 { 224 /* SCC is overwritten in between, don't optimize */ 225 226 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 227 //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 228 //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 229 //! s2: %f:vcc = p_cbranch_z %g:scc 230 //! p_unit_test 5, %f:vcc, %h:s[3] 231 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 232 Operand::c32(0x40018u)); 233 auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, 234 Operand::c32(1u)); 235 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 236 Operand::zero()); 237 auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); 238 writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3)); 239 } 240 241 //; del d, e, f, g, h, x 242 243 { 244 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 245 //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc 246 //! p_unit_test 6, %f:s[4] 247 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 248 Operand::c32(0x40018u)); 249 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 250 Operand::zero()); 251 auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp)); 252 writeout(6, Operand(br, reg_s4)); 253 } 254 255 //; del d, e, f 256 257 { 258 /* SCC is overwritten in between, don't optimize */ 259 260 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 261 //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 262 //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 263 //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc 264 //! p_unit_test 7, %f:s[4], %h:s[3] 265 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 266 Operand::c32(0x40018u)); 267 auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, 268 Operand::c32(1u)); 269 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 270 Operand::zero()); 271 auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp)); 272 writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3)); 273 } 274 275 //; del d, e, f, g, h, x 276 277 finish_optimizer_postRA_test(); 278 END_TEST 279 280 BEGIN_TEST(optimizer_postRA.dpp) 281 //>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm 282 if (!setup_cs("v1 v1 s2 s2", GFX10_3)) 283 return; 284 285 bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256)); 286 bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257)); 287 bld.instructions->at(0)->definitions[2].setFixed(vcc); 288 bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0)); 289 290 PhysReg reg_v0(256); 291 PhysReg reg_v2(258); 292 Operand a(inputs[0], PhysReg(256)); 293 Operand b(inputs[1], PhysReg(257)); 294 Operand c(inputs[2], vcc); 295 Operand d(inputs[3], PhysReg(0)); 296 297 /* basic optimization */ 298 //! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 299 //! p_unit_test 0, %res0:v[2] 300 Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 301 Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b); 302 writeout(0, Operand(res0, reg_v2)); 303 304 /* operand swapping */ 305 //! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 306 //! p_unit_test 1, %res1:v[2] 307 Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 308 Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2)); 309 writeout(1, Operand(res1, reg_v2)); 310 311 //! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 312 //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1 313 //! p_unit_test 2, %res2:v[2] 314 Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 315 Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror); 316 writeout(2, Operand(res2, reg_v2)); 317 318 /* modifiers */ 319 //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 320 //! p_unit_test 3, %res3:v[2] 321 auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 322 tmp3.instr->dpp16().neg[0] = true; 323 Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b); 324 writeout(3, Operand(res3, reg_v2)); 325 326 //! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 327 //! p_unit_test 4, %res4:v[2] 328 Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 329 auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b); 330 res4.instr->vop3().neg[0] = true; 331 writeout(4, Operand(res4, reg_v2)); 332 333 //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 334 //! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp 335 //! p_unit_test 5, %res5:v[2] 336 Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 337 auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b); 338 res5.instr->vop3().clamp = true; 339 writeout(5, Operand(res5, reg_v2)); 340 341 //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1 342 //! p_unit_test 6, %res6:v[2] 343 auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 344 tmp6.instr->dpp16().neg[0] = true; 345 auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b); 346 res6.instr->vop3().abs[0] = true; 347 writeout(6, Operand(res6, reg_v2)); 348 349 //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1 350 //! p_unit_test 7, %res7:v[2] 351 Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 352 auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2)); 353 res7.instr->vop3().abs[0] = true; 354 writeout(7, Operand(res7, reg_v2)); 355 356 /* vcc */ 357 //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1 358 //! p_unit_test 8, %res8:v[2] 359 Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 360 Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c); 361 writeout(8, Operand(res8, reg_v2)); 362 363 //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 364 //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1] 365 //! p_unit_test 9, %res9:v[2] 366 Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 367 Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d); 368 writeout(9, Operand(res9, reg_v2)); 369 370 /* control flow */ 371 //! BB1 372 //! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */ 373 //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 374 //! p_unit_test 10, %res10:v[2] 375 Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 376 377 bld.reset(program->create_and_insert_block()); 378 program->blocks[0].linear_succs.push_back(1); 379 program->blocks[0].logical_succs.push_back(1); 380 program->blocks[1].linear_preds.push_back(0); 381 program->blocks[1].logical_preds.push_back(0); 382 383 Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b); 384 writeout(10, Operand(res10, reg_v2)); 385 386 /* can't combine if the v_mov_b32's operand is modified */ 387 //! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 388 //! v1: %tmp11_2:v[0] = v_mov_b32 0 389 //! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1] 390 //! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0] 391 Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 392 Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0)); 393 Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b); 394 writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0)); 395 396 finish_optimizer_postRA_test(); 397 END_TEST 398 399