1 /* 2 * Copyright © 2021 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 #include "helpers.h" 8 9 using namespace aco; 10 11 BEGIN_TEST(optimizer_postRA.vcmp) 12 PhysReg reg_v0(256); 13 PhysReg reg_s0(0); 14 PhysReg reg_s4(4); 15 16 //>> v1: %a:v[0] = p_startpgm 17 ASSERTED bool setup_ok = setup_cs("v1", GFX8); 18 assert(setup_ok); 19 20 auto& startpgm = bld.instructions->at(0); 21 assert(startpgm->opcode == aco_opcode::p_startpgm); 22 startpgm->definitions[0].setFixed(reg_v0); 23 24 Temp v_in = inputs[0]; 25 26 { 27 /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */ 28 29 //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 30 //! p_cbranch_z %b:vcc 31 //! p_unit_test 0 32 auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 33 Operand(v_in, reg_v0)); 34 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), 35 Operand(exec, bld.lm)); 36 bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); 37 writeout(0); 38 } 39 40 //; del b 41 42 { 43 /* When VCC is overwritten inbetween, don't optimize. */ 44 45 //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 46 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 47 //! s2: %f:vcc = s_mov_b64 0 48 //! p_cbranch_z %d:scc 49 //! p_unit_test 1, %f:vcc 50 auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 51 Operand(v_in, reg_v0)); 52 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), 53 Operand(exec, bld.lm)); 54 auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero()); 55 bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); 56 writeout(1, Operand(ovrwr, vcc)); 57 } 58 59 //; del b, c, d, f 60 61 { 62 /* When part of VCC is overwritten inbetween, don't optimize. */ 63 64 //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 65 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 66 //! s1: %f:vcc_hi = s_mov_b32 0 67 //! p_cbranch_z %d:scc 68 //! p_unit_test 1, %f:vcc_hi 69 auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 70 Operand(v_in, reg_v0)); 71 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), 72 Operand(exec, bld.lm)); 73 auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero()); 74 bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); 75 writeout(1, Operand(ovrwr, vcc_hi)); 76 } 77 78 //; del b, c, d, f 79 80 { 81 /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */ 82 83 //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0] 84 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec 85 //! p_cbranch_z %d:scc 86 //! p_unit_test 2 87 auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(), 88 Operand(v_in, reg_v0)); 89 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), 90 Operand(vcmp, reg_s4), Operand(exec, bld.lm)); 91 bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); 92 writeout(2); 93 } 94 95 //; del b, c, d 96 97 { 98 /* When the VCC isn't written by VOPC, don't optimize */ 99 100 //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5] 101 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 102 //! p_cbranch_z %d:scc 103 //! p_unit_test 2 104 auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand::c32(1u), 105 Operand(reg_s4, bld.lm)); 106 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), 107 Operand(salu, vcc), Operand(exec, bld.lm)); 108 bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); 109 writeout(2); 110 } 111 112 //; del b, c, d, f, x 113 114 { 115 /* When EXEC is overwritten inbetween, don't optimize. */ 116 117 //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 118 //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 119 //! s2: %f:exec = s_mov_b64 42 120 //! p_cbranch_z %d:scc 121 //! p_unit_test 4, %f:exec 122 auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 123 Operand(v_in, reg_v0)); 124 auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), 125 Operand(exec, bld.lm)); 126 auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u)); 127 bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); 128 writeout(4, Operand(ovrwr, exec)); 129 } 130 131 //; del b, c, d, f, x 132 133 finish_optimizer_postRA_test(); 134 END_TEST 135 136 BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) 137 //>> s1: %a, s2: %y, s1: %z = p_startpgm 138 ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6); 139 assert(setup_ok); 140 141 PhysReg reg_s0{0}; 142 PhysReg reg_s2{2}; 143 PhysReg reg_s3{3}; 144 PhysReg reg_s4{4}; 145 PhysReg reg_s6{6}; 146 PhysReg reg_s8{8}; 147 148 Temp in_0 = inputs[0]; 149 Temp in_1 = inputs[1]; 150 Temp in_2 = inputs[2]; 151 Operand op_in_0(in_0); 152 op_in_0.setFixed(reg_s0); 153 Operand op_in_1(in_1); 154 op_in_1.setFixed(reg_s4); 155 Operand op_in_2(in_2); 156 op_in_2.setFixed(reg_s6); 157 158 { 159 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 160 //! p_cbranch_nz %e:scc 161 //! p_unit_test 0 162 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 163 Operand::c32(0x40018u)); 164 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 165 Operand::zero()); 166 bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); 167 writeout(0); 168 } 169 170 //; del d, e 171 172 { 173 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 174 //! p_cbranch_z %e:scc 175 //! p_unit_test 1 176 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 177 Operand::c32(0x40018u)); 178 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), 179 Operand::zero()); 180 bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); 181 writeout(1); 182 } 183 184 //; del d, e 185 186 { 187 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 188 //! p_cbranch_z %e:scc 189 //! p_unit_test 2 190 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 191 Operand::c32(0x40018u)); 192 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 193 Operand::zero()); 194 bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp)); 195 writeout(2); 196 } 197 198 //; del d, e 199 200 { 201 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 202 //! p_cbranch_nz %e:scc 203 //! p_unit_test 3 204 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 205 Operand::c32(0x40018u)); 206 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), 207 Operand::zero()); 208 bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp)); 209 writeout(3); 210 } 211 212 //; del d, e 213 214 { 215 //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345 216 //! p_cbranch_z %e:scc 217 //! p_unit_test 4 218 auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1, 219 Operand::c32(0x12345u)); 220 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2), 221 Operand::zero(8)); 222 bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp)); 223 writeout(4); 224 } 225 226 //; del d, e 227 228 { 229 /* SCC is overwritten in between, don't optimize */ 230 231 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 232 //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 233 //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 234 //! p_cbranch_z %g:scc 235 //! p_unit_test 5, %h:s[3] 236 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 237 Operand::c32(0x40018u)); 238 auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, 239 Operand::c32(1u)); 240 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 241 Operand::zero()); 242 bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); 243 writeout(5, Operand(ovrw, reg_s3)); 244 } 245 246 //; del d, e, g, h, x 247 248 { 249 /* SCC is overwritten in between, optimize by pulling down */ 250 251 //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 252 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 253 //! p_cbranch_z %g:scc 254 //! p_unit_test 5, %h:s[3] 255 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 256 Operand::c32(0x40018u)); 257 auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, 258 Operand::c32(1u)); 259 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), 260 Operand::zero()); 261 bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); 262 writeout(5, Operand(ovrw, reg_s3)); 263 } 264 265 //; del d, e, g, h, x 266 267 { 268 /* SCC is overwritten in between, optimize by pulling down */ 269 270 //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 271 //! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018 272 //! p_cbranch_z %g:scc 273 //! p_unit_test 5, %h:s[3] 274 auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1, 275 Operand::c32(0x40018u)); 276 auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, 277 Operand::c32(1u)); 278 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(salu, reg_s8), 279 Operand::zero()); 280 bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); 281 writeout(5, Operand(ovrw, reg_s3)); 282 } 283 284 //; del d, e, g, h, x 285 286 { 287 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 288 //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc 289 //! p_unit_test 6, %f:s[4] 290 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 291 Operand::c32(0x40018u)); 292 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 293 Operand::zero()); 294 auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), 295 Operand(op_in_2), bld.scc(scmp)); 296 writeout(6, Operand(br, reg_s4)); 297 } 298 299 //; del d, e, f 300 301 { 302 /* SCC is overwritten in between, don't optimize */ 303 304 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 305 //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 306 //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 307 //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc 308 //! p_unit_test 7, %f:s[4], %h:s[3] 309 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 310 Operand::c32(0x40018u)); 311 auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, 312 Operand::c32(1u)); 313 auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 314 Operand::zero()); 315 auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), 316 Operand(op_in_2), bld.scc(scmp)); 317 writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3)); 318 } 319 320 //; del d, e, f, g, h, x 321 322 finish_optimizer_postRA_test(); 323 END_TEST 324 325 BEGIN_TEST(optimizer_postRA.dpp) 326 //>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm 327 if (!setup_cs("v1 v1 s2 s2", GFX10_3)) 328 return; 329 330 bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256)); 331 bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257)); 332 bld.instructions->at(0)->definitions[2].setFixed(vcc); 333 bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0)); 334 335 PhysReg reg_v0(256); 336 PhysReg reg_v2(258); 337 Operand a(inputs[0], PhysReg(256)); 338 Operand b(inputs[1], PhysReg(257)); 339 Operand c(inputs[2], vcc); 340 Operand d(inputs[3], PhysReg(0)); 341 342 /* basic optimization */ 343 //! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi 344 //! p_unit_test 0, %res0:v[2] 345 Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 346 Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b); 347 writeout(0, Operand(res0, reg_v2)); 348 349 /* operand swapping */ 350 //! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi 351 //! p_unit_test 1, %res1:v[2] 352 Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 353 Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2)); 354 writeout(1, Operand(res1, reg_v2)); 355 356 //! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi 357 //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1 fi 358 //! p_unit_test 2, %res2:v[2] 359 Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 360 Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), 361 dpp_row_half_mirror); 362 writeout(2, Operand(res2, reg_v2)); 363 364 /* modifiers */ 365 //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi 366 //! p_unit_test 3, %res3:v[2] 367 auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 368 tmp3->dpp16().neg[0] = true; 369 Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b); 370 writeout(3, Operand(res3, reg_v2)); 371 372 //! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi 373 //! p_unit_test 4, %res4:v[2] 374 Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 375 auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b); 376 res4->valu().neg[0] = true; 377 writeout(4, Operand(res4, reg_v2)); 378 379 //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi 380 //! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp 381 //! p_unit_test 5, %res5:v[2] 382 Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 383 auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b); 384 res5->valu().clamp = true; 385 writeout(5, Operand(res5, reg_v2)); 386 387 //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1 fi 388 //! p_unit_test 6, %res6:v[2] 389 auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 390 tmp6->dpp16().neg[0] = true; 391 auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b); 392 res6->valu().abs[0] = true; 393 writeout(6, Operand(res6, reg_v2)); 394 395 //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1 fi 396 //! p_unit_test 7, %res7:v[2] 397 Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 398 auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2)); 399 res7->valu().abs[0] = true; 400 writeout(7, Operand(res7, reg_v2)); 401 402 //! v1: %tmp12:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1 fi 403 //! v1: %res12:v[2] = v_add_u32 %tmp12:v[2], %b:v[1] 404 //! p_unit_test 12, %res12:v[2] 405 auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 406 tmp12->dpp16().neg[0] = true; 407 Temp res12 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1, reg_v2), Operand(tmp12, reg_v2), b); 408 writeout(12, Operand(res12, reg_v2)); 409 410 //! v1: %tmp13:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1 fi 411 //! v1: %res13:v[2] = v_add_f16 %tmp13:v[2], %b:v[1] 412 //! p_unit_test 13, %res13:v[2] 413 auto tmp13 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 414 tmp13->dpp16().neg[0] = true; 415 Temp res13 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1, reg_v2), Operand(tmp13, reg_v2), b); 416 writeout(13, Operand(res13, reg_v2)); 417 418 /* vcc */ 419 //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1 fi 420 //! p_unit_test 8, %res8:v[2] 421 Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 422 Temp res8 = 423 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c); 424 writeout(8, Operand(res8, reg_v2)); 425 426 //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi 427 //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1] 428 //! p_unit_test 9, %res9:v[2] 429 Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 430 Temp res9 = 431 bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d); 432 writeout(9, Operand(res9, reg_v2)); 433 434 /* control flow */ 435 //! BB1 436 //! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */ 437 //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi 438 //! p_unit_test 10, %res10:v[2] 439 Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 440 441 bld.reset(program->create_and_insert_block()); 442 program->blocks[0].linear_succs.push_back(1); 443 program->blocks[0].logical_succs.push_back(1); 444 program->blocks[1].linear_preds.push_back(0); 445 program->blocks[1].logical_preds.push_back(0); 446 447 Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b); 448 writeout(10, Operand(res10, reg_v2)); 449 450 /* can't combine if the v_mov_b32's operand is modified */ 451 //! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi 452 //! v1: %tmp11_2:v[0] = v_mov_b32 0 453 //! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1] 454 //! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0] 455 Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 456 Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0)); 457 Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b); 458 writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0)); 459 460 finish_optimizer_postRA_test(); 461 END_TEST 462 463 BEGIN_TEST(optimizer_postRA.dpp_across_exec) 464 for (amd_gfx_level gfx : {GFX9, GFX10}) { 465 //>> v1: %a:v[0], v1: %b:v[1] = p_startpgm 466 if (!setup_cs("v1 v1", gfx)) 467 continue; 468 469 bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256)); 470 bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257)); 471 472 PhysReg reg_v2(258); 473 Operand a(inputs[0], PhysReg(256)); 474 Operand b(inputs[1], PhysReg(257)); 475 476 //~gfx9! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 477 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 478 //~gfx9! v1: %res0:v[2] = v_add_f32 %tmp0:v[2], %b:v[1] 479 //~gfx10! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi 480 //! p_unit_test 0, %res0:v[2] 481 Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 482 bld.sop1(Builder::s_not, Definition(exec, bld.lm), Definition(scc, s1), 483 Operand(exec, bld.lm)); 484 Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b); 485 writeout(0, Operand(res0, reg_v2)); 486 487 finish_optimizer_postRA_test(); 488 } 489 END_TEST 490 491 BEGIN_TEST(optimizer_postRA.dpp_vcmpx) 492 //>> v1: %a:v[0], v1: %b:v[1] = p_startpgm 493 if (!setup_cs("v1 v1", GFX11)) 494 return; 495 496 bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256)); 497 bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257)); 498 499 PhysReg reg_v2(258); 500 Operand a(inputs[0], PhysReg(256)); 501 Operand b(inputs[1], PhysReg(257)); 502 503 //! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi 504 //! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1] 505 //! p_unit_test 0, %res0:exec 506 Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 507 Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), Operand(tmp0, reg_v2), b); 508 writeout(0, Operand(res0, exec)); 509 510 finish_optimizer_postRA_test(); 511 END_TEST 512 513 BEGIN_TEST(optimizer_postRA.dpp_across_cf) 514 //>> v1: %a:v[0], v1: %b:v[1], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s4: %f:s[4-7] = p_startpgm 515 if (!setup_cs("v1 v1 v1 v1 s2 s4", GFX10_3)) 516 return; 517 518 aco_ptr<Instruction>& startpgm = bld.instructions->at(0); 519 startpgm->definitions[0].setFixed(PhysReg(256)); 520 startpgm->definitions[1].setFixed(PhysReg(257)); 521 startpgm->definitions[2].setFixed(PhysReg(258)); 522 startpgm->definitions[3].setFixed(PhysReg(259)); 523 startpgm->definitions[4].setFixed(PhysReg(0)); 524 startpgm->definitions[5].setFixed(PhysReg(4)); 525 526 Operand a(inputs[0], PhysReg(256)); /* source for DPP */ 527 Operand b(inputs[1], PhysReg(257)); /* source for fadd */ 528 Operand c(inputs[2], PhysReg(258)); /* buffer store address */ 529 Operand d(inputs[3], PhysReg(259)); /* buffer store value */ 530 Operand e(inputs[4], PhysReg(0)); /* condition */ 531 Operand f(inputs[5], PhysReg(4)); /* buffer descriptor */ 532 PhysReg reg_v12(268); /* temporary register */ 533 534 Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror); 535 536 //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec 537 //! p_cbranch_nz BB1, BB2 538 539 emit_divergent_if_else( 540 program.get(), bld, e, 541 [&]() -> void __anon703f0dc20102() 542 { 543 /* --- logical then --- */ 544 //! BB1 545 //! /* logical preds: BB0, / linear preds: BB0, / kind: */ 546 //! p_logical_start 547 548 //! buffer_store_dword %f:s[4-7], %c:v[2], 0, %d:v[3] offen 549 bld.mubuf(aco_opcode::buffer_store_dword, f, c, Operand::zero(), d, 0, true); 550 551 //! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi 552 //! p_unit_test 10, %res10:v[12] 553 Temp result = 554 bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b); 555 writeout(10, Operand(result, reg_v12)); 556 557 //! p_logical_end 558 //! p_branch BB3 559 560 /* --- linear then --- */ 561 //! BB2 562 //! /* logical preds: / linear preds: BB0, / kind: */ 563 //! p_branch BB3 564 565 /* --- invert --- */ 566 //! BB3 567 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ 568 //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec 569 //! p_cbranch_nz BB4, BB5 570 }, 571 [&]() -> void __anon703f0dc20202() 572 { 573 /* --- logical else --- */ 574 //! BB4 575 //! /* logical preds: BB0, / linear preds: BB3, / kind: */ 576 //! p_logical_start 577 //! p_logical_end 578 //! p_branch BB6 579 580 /* --- linear else --- */ 581 //! BB5 582 //! /* logical preds: / linear preds: BB3, / kind: */ 583 //! p_branch BB6 584 }); 585 586 /* --- merge block --- */ 587 //! BB6 588 //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */ 589 //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85] 590 591 finish_optimizer_postRA_test(); 592 END_TEST 593 594 BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten) 595 //>> v1: %a:v[0], v1: %b:v[1], s4: %c:s[4-7], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[2] = p_startpgm 596 if (!setup_cs("v1 v1 s4 v1 s2 s1", GFX10_3)) 597 return; 598 599 aco_ptr<Instruction>& startpgm = bld.instructions->at(0); 600 startpgm->definitions[0].setFixed(PhysReg(256)); 601 startpgm->definitions[1].setFixed(PhysReg(257)); 602 startpgm->definitions[2].setFixed(PhysReg(4)); 603 startpgm->definitions[3].setFixed(PhysReg(259)); 604 startpgm->definitions[4].setFixed(PhysReg(0)); 605 startpgm->definitions[5].setFixed(PhysReg(2)); 606 607 Operand a(inputs[0], PhysReg(256)); /* source for DPP */ 608 Operand b(inputs[1], PhysReg(257)); /* source for fadd */ 609 Operand c(inputs[2], PhysReg(4)); /* buffer descriptor */ 610 Operand d(inputs[3], PhysReg(259)); /* buffer store value */ 611 Operand e(inputs[4], PhysReg(0)); /* condition */ 612 Operand f(inputs[5], PhysReg(2)); /* buffer store address (scalar) */ 613 PhysReg reg_v12(268); /* temporary register */ 614 615 //! v1: %dpp_tmp:v[12] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi 616 Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror); 617 618 //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec 619 //! p_cbranch_nz BB1, BB2 620 621 emit_divergent_if_else( 622 program.get(), bld, e, 623 [&]() -> void __anon703f0dc20302() 624 { 625 /* --- logical then --- */ 626 //! BB1 627 //! /* logical preds: BB0, / linear preds: BB0, / kind: */ 628 //! p_logical_start 629 630 //! v1: %addr:v[0] = p_parallelcopy %f:s[2] 631 Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f); 632 633 //! buffer_store_dword %c:s[4-7], %addr:v[0], 0, %d:v[3] offen 634 bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(addr, a.physReg()), Operand::zero(), 635 d, 0, true); 636 637 //! p_logical_end 638 //! p_branch BB3 639 640 /* --- linear then --- */ 641 //! BB2 642 //! /* logical preds: / linear preds: BB0, / kind: */ 643 //! p_branch BB3 644 645 /* --- invert --- */ 646 //! BB3 647 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ 648 //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec 649 //! p_cbranch_nz BB4, BB5 650 }, 651 [&]() -> void __anon703f0dc20402() 652 { 653 /* --- logical else --- */ 654 //! BB4 655 //! /* logical preds: BB0, / linear preds: BB3, / kind: */ 656 //! p_logical_start 657 //! p_logical_end 658 //! p_branch BB6 659 660 /* --- linear else --- */ 661 //! BB5 662 //! /* logical preds: / linear preds: BB3, / kind: */ 663 //! p_branch BB6 664 }); 665 666 /* --- merge block --- */ 667 //! BB6 668 //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */ 669 //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85] 670 671 //! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1] 672 Temp result = 673 bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b); 674 //! p_unit_test 10, %result:v[12] 675 writeout(10, Operand(result, reg_v12)); 676 677 finish_optimizer_postRA_test(); 678 END_TEST 679 680 BEGIN_TEST(optimizer_postRA.dpp_across_cf_linear_clobber) 681 //>> v1: %a:v[0], v1: %b:v[1], s2: %c:s[0-1] = p_startpgm 682 if (!setup_cs("v1 v1 s2", GFX10_3)) 683 return; 684 685 aco_ptr<Instruction>& startpgm = bld.instructions->at(0); 686 startpgm->definitions[0].setFixed(PhysReg(256)); 687 startpgm->definitions[1].setFixed(PhysReg(257)); 688 startpgm->definitions[2].setFixed(PhysReg(0)); 689 690 Operand a(inputs[0], PhysReg(256)); /* source for DPP */ 691 Operand b(inputs[1], PhysReg(257)); /* source for fadd */ 692 Operand c(inputs[2], PhysReg(0)); /* condition */ 693 PhysReg reg_v12(268); /* temporary register */ 694 695 //! v1: %dpp_tmp:v[12] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi 696 Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror); 697 698 //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %c:s[0-1], %0:exec 699 //! p_cbranch_nz BB1, BB2 700 701 emit_divergent_if_else( 702 program.get(), bld, c, 703 [&]() -> void __anon703f0dc20502() 704 { 705 /* --- logical then --- */ 706 //! BB1 707 //! /* logical preds: BB0, / linear preds: BB0, / kind: */ 708 //! p_logical_start 709 710 //! v1: %clobber:v[0] = p_parallelcopy 0 711 Temp clobber = 712 bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), Operand::c32(0)); 713 714 //! p_unit_test 0, %clobber:v[0] 715 writeout(0, Operand(clobber, a.physReg())); 716 717 //! p_logical_end 718 //! p_branch BB3 719 720 /* --- linear then --- */ 721 //! BB2 722 //! /* logical preds: / linear preds: BB0, / kind: */ 723 //! p_branch BB3 724 725 /* --- invert --- */ 726 //! BB3 727 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ 728 //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec 729 //! p_cbranch_nz BB4, BB5 730 }, 731 [&]() -> void __anon703f0dc20602() 732 { 733 /* --- logical else --- */ 734 //! BB4 735 //! /* logical preds: BB0, / linear preds: BB3, / kind: */ 736 //! p_logical_start 737 738 //! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1] 739 Temp result = 740 bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b); 741 //! p_unit_test 1, %result:v[12] 742 writeout(1, Operand(result, reg_v12)); 743 744 //! p_logical_end 745 //! p_branch BB6 746 747 /* --- linear else --- */ 748 //! BB5 749 //! /* logical preds: / linear preds: BB3, / kind: */ 750 //! p_branch BB6 751 }); 752 753 /* --- merge block --- */ 754 //! BB6 755 //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */ 756 //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85] 757 758 finish_optimizer_postRA_test(); 759 END_TEST 760 761 BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf) 762 //>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s4: %f:s[4-7] = p_startpgm 763 if (!setup_cs("s2 v1 v1 s2 s4", GFX10_3)) 764 return; 765 766 aco_ptr<Instruction>& startpgm = bld.instructions->at(0); 767 startpgm->definitions[0].setFixed(PhysReg(2)); 768 startpgm->definitions[1].setFixed(PhysReg(258)); 769 startpgm->definitions[2].setFixed(PhysReg(259)); 770 startpgm->definitions[3].setFixed(PhysReg(0)); 771 startpgm->definitions[4].setFixed(PhysReg(4)); 772 773 Operand a(inputs[0], PhysReg(2)); /* source for s_and */ 774 Operand c(inputs[1], PhysReg(258)); /* buffer store address */ 775 Operand d(inputs[2], PhysReg(259)); /* buffer store value */ 776 Operand e(inputs[3], PhysReg(0)); /* condition */ 777 Operand f(inputs[4], PhysReg(4)); /* buffer descriptor */ 778 PhysReg reg_s8(8); /* temporary register */ 779 780 auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a, 781 Operand::c32(0x40018u)); 782 783 //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec 784 //! p_cbranch_nz BB1, BB2 785 786 emit_divergent_if_else( 787 program.get(), bld, e, 788 [&]() -> void __anon703f0dc20702() 789 { 790 /* --- logical then --- */ 791 //! BB1 792 //! /* logical preds: BB0, / linear preds: BB0, / kind: */ 793 //! p_logical_start 794 795 //! buffer_store_dword %f:s[4-7], %c:v[2], 0, %d:v[3] offen 796 bld.mubuf(aco_opcode::buffer_store_dword, f, c, Operand::zero(), d, 0, true); 797 798 //! p_logical_end 799 //! p_branch BB3 800 801 /* --- linear then --- */ 802 //! BB2 803 //! /* logical preds: / linear preds: BB0, / kind: */ 804 //! p_branch BB3 805 806 /* --- invert --- */ 807 //! BB3 808 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ 809 //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec 810 //! p_cbranch_nz BB4, BB5 811 }, 812 [&]() -> void __anon703f0dc20802() 813 { 814 /* --- logical else --- */ 815 //! BB4 816 //! /* logical preds: BB0, / linear preds: BB3, / kind: */ 817 //! p_logical_start 818 //! p_logical_end 819 //! p_branch BB6 820 821 /* --- linear else --- */ 822 //! BB5 823 //! /* logical preds: / linear preds: BB3, / kind: */ 824 //! p_branch BB6 825 }); 826 827 /* --- merge block --- */ 828 //! BB6 829 //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */ 830 //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85] 831 832 //! s2: %tmp_salu:s[8-9], s1: %br_scc:scc = s_and_b64 %a:s[2-3], 0x40018 833 //! p_cbranch_z %br_scc:scc 834 //! p_unit_test 5 835 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(tmp_salu, reg_s8), 836 Operand::zero()); 837 bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); 838 writeout(5); 839 840 finish_optimizer_postRA_test(); 841 END_TEST 842 843 BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten) 844 //>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[4], s4: %g:s[8-11] = p_startpgm 845 if (!setup_cs("s2 v1 v1 s2 s1 s4", GFX10_3)) 846 return; 847 848 aco_ptr<Instruction>& startpgm = bld.instructions->at(0); 849 startpgm->definitions[0].setFixed(PhysReg(2)); 850 startpgm->definitions[1].setFixed(PhysReg(258)); 851 startpgm->definitions[2].setFixed(PhysReg(259)); 852 startpgm->definitions[3].setFixed(PhysReg(0)); 853 startpgm->definitions[4].setFixed(PhysReg(4)); 854 startpgm->definitions[5].setFixed(PhysReg(8)); 855 856 Operand a(inputs[0], PhysReg(2)); /* source for s_and */ 857 Operand c(inputs[1], PhysReg(258)); /* buffer store address */ 858 Operand d(inputs[2], PhysReg(259)); /* buffer store value */ 859 Operand e(inputs[3], PhysReg(0)); /* condition */ 860 Operand f(inputs[4], PhysReg(4)); /* overwrite value */ 861 Operand g(inputs[5], PhysReg(8)); /* buffer descriptor */ 862 PhysReg reg_s3(3); /* temporary register */ 863 PhysReg reg_s8(8); /* temporary register */ 864 865 //! s2: %tmp_salu:s[8-9], s1: %tmp_salu_scc:scc = s_and_b64 %a:s[2-3], 0x40018 866 auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a, 867 Operand::c32(0x40018u)); 868 869 //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec 870 //! p_cbranch_nz BB1, BB2 871 872 emit_divergent_if_else( 873 program.get(), bld, e, 874 [&]() -> void __anon703f0dc20902() 875 { 876 /* --- logical then --- */ 877 //! BB1 878 //! /* logical preds: BB0, / linear preds: BB0, / kind: */ 879 //! p_logical_start 880 881 //! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4] 882 Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f); 883 884 //! buffer_store_dword %g:s[8-11], %c:v[2], %ovrwr:s[3], %d:v[3] offen 885 bld.mubuf(aco_opcode::buffer_store_dword, g, c, Operand(s_addr, reg_s3), d, 0, true); 886 887 //! p_logical_end 888 //! p_branch BB3 889 890 /* --- linear then --- */ 891 //! BB2 892 //! /* logical preds: / linear preds: BB0, / kind: */ 893 //! p_branch BB3 894 895 /* --- invert --- */ 896 //! BB3 897 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ 898 //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec 899 //! p_cbranch_nz BB4, BB5 900 }, 901 [&]() -> void __anon703f0dc20a02() 902 { 903 /* --- logical else --- */ 904 //! BB4 905 //! /* logical preds: BB0, / linear preds: BB3, / kind: */ 906 //! p_logical_start 907 //! p_logical_end 908 //! p_branch BB6 909 910 /* --- linear else --- */ 911 //! BB5 912 //! /* logical preds: / linear preds: BB3, / kind: */ 913 //! p_branch BB6 914 }); 915 916 /* --- merge block --- */ 917 //! BB6 918 //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */ 919 //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85] 920 921 //! s1: %br_scc:scc = s_cmp_lg_u64 %tmp_salu:s[8-9], 0 922 //! p_cbranch_z %br_scc:scc 923 //! p_unit_test 5 924 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(tmp_salu, reg_s8), 925 Operand::zero()); 926 bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); 927 writeout(5); 928 929 finish_optimizer_postRA_test(); 930 END_TEST 931