• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "helpers.h"
8 
9 using namespace aco;
10 
11 BEGIN_TEST(optimizer_postRA.vcmp)
12    PhysReg reg_v0(256);
13    PhysReg reg_s0(0);
14    PhysReg reg_s4(4);
15 
16    //>> v1: %a:v[0] = p_startpgm
17    ASSERTED bool setup_ok = setup_cs("v1", GFX8);
18    assert(setup_ok);
19 
20    auto& startpgm = bld.instructions->at(0);
21    assert(startpgm->opcode == aco_opcode::p_startpgm);
22    startpgm->definitions[0].setFixed(reg_v0);
23 
24    Temp v_in = inputs[0];
25 
26    {
27       /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
28 
29       //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
30       //! p_cbranch_z %b:vcc
31       //! p_unit_test 0
32       auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
33                            Operand(v_in, reg_v0));
34       auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
35                            Operand(exec, bld.lm));
36       bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
37       writeout(0);
38    }
39 
40    //; del b
41 
42    {
43       /* When VCC is overwritten inbetween, don't optimize. */
44 
45       //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
46       //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
47       //! s2: %f:vcc = s_mov_b64 0
48       //! p_cbranch_z %d:scc
49       //! p_unit_test 1, %f:vcc
50       auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
51                            Operand(v_in, reg_v0));
52       auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
53                            Operand(exec, bld.lm));
54       auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
55       bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
56       writeout(1, Operand(ovrwr, vcc));
57    }
58 
59    //; del b, c, d, f
60 
61    {
62       /* When part of VCC is overwritten inbetween, don't optimize. */
63 
64       //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
65       //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
66       //! s1: %f:vcc_hi = s_mov_b32 0
67       //! p_cbranch_z %d:scc
68       //! p_unit_test 1, %f:vcc_hi
69       auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
70                            Operand(v_in, reg_v0));
71       auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
72                            Operand(exec, bld.lm));
73       auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
74       bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
75       writeout(1, Operand(ovrwr, vcc_hi));
76    }
77 
78    //; del b, c, d, f
79 
80    {
81       /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
82 
83       //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
84       //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
85       //! p_cbranch_z %d:scc
86       //! p_unit_test 2
87       auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
88                                Operand(v_in, reg_v0));
89       auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
90                            Operand(vcmp, reg_s4), Operand(exec, bld.lm));
91       bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
92       writeout(2);
93    }
94 
95    //; del b, c, d
96 
97    {
98       /* When the VCC isn't written by VOPC, don't optimize */
99 
100       //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
101       //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
102       //! p_cbranch_z %d:scc
103       //! p_unit_test 2
104       auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand::c32(1u),
105                            Operand(reg_s4, bld.lm));
106       auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
107                            Operand(salu, vcc), Operand(exec, bld.lm));
108       bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
109       writeout(2);
110    }
111 
112    //; del b, c, d, f, x
113 
114    {
115       /* When EXEC is overwritten inbetween, don't optimize. */
116 
117       //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
118       //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
119       //! s2: %f:exec = s_mov_b64 42
120       //! p_cbranch_z %d:scc
121       //! p_unit_test 4, %f:exec
122       auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
123                            Operand(v_in, reg_v0));
124       auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
125                            Operand(exec, bld.lm));
126       auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
127       bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
128       writeout(4, Operand(ovrwr, exec));
129    }
130 
131    //; del b, c, d, f, x
132 
133    finish_optimizer_postRA_test();
134 END_TEST
135 
136 BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
137    //>> s1: %a, s2: %y, s1: %z = p_startpgm
138    ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
139    assert(setup_ok);
140 
141    PhysReg reg_s0{0};
142    PhysReg reg_s2{2};
143    PhysReg reg_s3{3};
144    PhysReg reg_s4{4};
145    PhysReg reg_s6{6};
146    PhysReg reg_s8{8};
147 
148    Temp in_0 = inputs[0];
149    Temp in_1 = inputs[1];
150    Temp in_2 = inputs[2];
151    Operand op_in_0(in_0);
152    op_in_0.setFixed(reg_s0);
153    Operand op_in_1(in_1);
154    op_in_1.setFixed(reg_s4);
155    Operand op_in_2(in_2);
156    op_in_2.setFixed(reg_s6);
157 
158    {
159       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
160       //! p_cbranch_nz %e:scc
161       //! p_unit_test 0
162       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
163                            Operand::c32(0x40018u));
164       auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
165                            Operand::zero());
166       bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
167       writeout(0);
168    }
169 
170    //; del d, e
171 
172    {
173       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
174       //! p_cbranch_z %e:scc
175       //! p_unit_test 1
176       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
177                            Operand::c32(0x40018u));
178       auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
179                            Operand::zero());
180       bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
181       writeout(1);
182    }
183 
184    //; del d, e
185 
186    {
187       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
188       //! p_cbranch_z %e:scc
189       //! p_unit_test 2
190       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
191                            Operand::c32(0x40018u));
192       auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
193                            Operand::zero());
194       bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp));
195       writeout(2);
196    }
197 
198    //; del d, e
199 
200    {
201       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
202       //! p_cbranch_nz %e:scc
203       //! p_unit_test 3
204       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
205                            Operand::c32(0x40018u));
206       auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
207                            Operand::zero());
208       bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp));
209       writeout(3);
210    }
211 
212    //; del d, e
213 
214    {
215       //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
216       //! p_cbranch_z %e:scc
217       //! p_unit_test 4
218       auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
219                            Operand::c32(0x12345u));
220       auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
221                            Operand::zero(8));
222       bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp));
223       writeout(4);
224    }
225 
226    //; del d, e
227 
228    {
229       /* SCC is overwritten in between, don't optimize */
230 
231       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
232       //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
233       //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
234       //! p_cbranch_z %g:scc
235       //! p_unit_test 5, %h:s[3]
236       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
237                            Operand::c32(0x40018u));
238       auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
239                            Operand::c32(1u));
240       auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
241                            Operand::zero());
242       bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
243       writeout(5, Operand(ovrw, reg_s3));
244    }
245 
246    //; del d, e, g, h, x
247 
248    {
249       /* SCC is overwritten in between, optimize by pulling down */
250 
251       //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
252       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
253       //! p_cbranch_z %g:scc
254       //! p_unit_test 5, %h:s[3]
255       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
256                            Operand::c32(0x40018u));
257       auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
258                            Operand::c32(1u));
259       auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
260                            Operand::zero());
261       bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
262       writeout(5, Operand(ovrw, reg_s3));
263    }
264 
265    //; del d, e, g, h, x
266 
267    {
268       /* SCC is overwritten in between, optimize by pulling down */
269 
270       //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
271       //! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018
272       //! p_cbranch_z %g:scc
273       //! p_unit_test 5, %h:s[3]
274       auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1,
275                            Operand::c32(0x40018u));
276       auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
277                            Operand::c32(1u));
278       auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(salu, reg_s8),
279                            Operand::zero());
280       bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
281       writeout(5, Operand(ovrw, reg_s3));
282    }
283 
284    //; del d, e, g, h, x
285 
286    {
287       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
288       //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
289       //! p_unit_test 6, %f:s[4]
290       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
291                            Operand::c32(0x40018u));
292       auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
293                            Operand::zero());
294       auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
295                          Operand(op_in_2), bld.scc(scmp));
296       writeout(6, Operand(br, reg_s4));
297    }
298 
299    //; del d, e, f
300 
301    {
302       /* SCC is overwritten in between, don't optimize */
303 
304       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
305       //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
306       //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
307       //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
308       //! p_unit_test 7, %f:s[4], %h:s[3]
309       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
310                            Operand::c32(0x40018u));
311       auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
312                            Operand::c32(1u));
313       auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
314                            Operand::zero());
315       auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
316                          Operand(op_in_2), bld.scc(scmp));
317       writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
318    }
319 
320    //; del d, e, f, g, h, x
321 
322    finish_optimizer_postRA_test();
323 END_TEST
324 
325 BEGIN_TEST(optimizer_postRA.dpp)
326    //>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm
327    if (!setup_cs("v1 v1 s2 s2", GFX10_3))
328       return;
329 
330    bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
331    bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
332    bld.instructions->at(0)->definitions[2].setFixed(vcc);
333    bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0));
334 
335    PhysReg reg_v0(256);
336    PhysReg reg_v2(258);
337    Operand a(inputs[0], PhysReg(256));
338    Operand b(inputs[1], PhysReg(257));
339    Operand c(inputs[2], vcc);
340    Operand d(inputs[3], PhysReg(0));
341 
342    /* basic optimization */
343    //! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
344    //! p_unit_test 0, %res0:v[2]
345    Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
346    Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b);
347    writeout(0, Operand(res0, reg_v2));
348 
349    /* operand swapping */
350    //! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
351    //! p_unit_test 1, %res1:v[2]
352    Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
353    Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2));
354    writeout(1, Operand(res1, reg_v2));
355 
356    //! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
357    //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1 fi
358    //! p_unit_test 2, %res2:v[2]
359    Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
360    Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2),
361                             dpp_row_half_mirror);
362    writeout(2, Operand(res2, reg_v2));
363 
364    /* modifiers */
365    //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
366    //! p_unit_test 3, %res3:v[2]
367    auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
368    tmp3->dpp16().neg[0] = true;
369    Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b);
370    writeout(3, Operand(res3, reg_v2));
371 
372    //! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
373    //! p_unit_test 4, %res4:v[2]
374    Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
375    auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b);
376    res4->valu().neg[0] = true;
377    writeout(4, Operand(res4, reg_v2));
378 
379    //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
380    //! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp
381    //! p_unit_test 5, %res5:v[2]
382    Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
383    auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b);
384    res5->valu().clamp = true;
385    writeout(5, Operand(res5, reg_v2));
386 
387    //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1 fi
388    //! p_unit_test 6, %res6:v[2]
389    auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
390    tmp6->dpp16().neg[0] = true;
391    auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b);
392    res6->valu().abs[0] = true;
393    writeout(6, Operand(res6, reg_v2));
394 
395    //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1 fi
396    //! p_unit_test 7, %res7:v[2]
397    Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
398    auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2));
399    res7->valu().abs[0] = true;
400    writeout(7, Operand(res7, reg_v2));
401 
402    //! v1: %tmp12:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1 fi
403    //! v1: %res12:v[2] = v_add_u32 %tmp12:v[2], %b:v[1]
404    //! p_unit_test 12, %res12:v[2]
405    auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
406    tmp12->dpp16().neg[0] = true;
407    Temp res12 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1, reg_v2), Operand(tmp12, reg_v2), b);
408    writeout(12, Operand(res12, reg_v2));
409 
410    //! v1: %tmp13:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1 fi
411    //! v1: %res13:v[2] = v_add_f16 %tmp13:v[2], %b:v[1]
412    //! p_unit_test 13, %res13:v[2]
413    auto tmp13 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
414    tmp13->dpp16().neg[0] = true;
415    Temp res13 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1, reg_v2), Operand(tmp13, reg_v2), b);
416    writeout(13, Operand(res13, reg_v2));
417 
418    /* vcc */
419    //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1 fi
420    //! p_unit_test 8, %res8:v[2]
421    Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
422    Temp res8 =
423       bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
424    writeout(8, Operand(res8, reg_v2));
425 
426    //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
427    //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
428    //! p_unit_test 9, %res9:v[2]
429    Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
430    Temp res9 =
431       bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
432    writeout(9, Operand(res9, reg_v2));
433 
434    /* control flow */
435    //! BB1
436    //! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */
437    //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
438    //! p_unit_test 10, %res10:v[2]
439    Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
440 
441    bld.reset(program->create_and_insert_block());
442    program->blocks[0].linear_succs.push_back(1);
443    program->blocks[0].logical_succs.push_back(1);
444    program->blocks[1].linear_preds.push_back(0);
445    program->blocks[1].logical_preds.push_back(0);
446 
447    Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b);
448    writeout(10, Operand(res10, reg_v2));
449 
450    /* can't combine if the v_mov_b32's operand is modified */
451    //! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
452    //! v1: %tmp11_2:v[0] = v_mov_b32 0
453    //! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1]
454    //! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0]
455    Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
456    Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0));
457    Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b);
458    writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0));
459 
460    finish_optimizer_postRA_test();
461 END_TEST
462 
463 BEGIN_TEST(optimizer_postRA.dpp_across_exec)
464    for (amd_gfx_level gfx : {GFX9, GFX10}) {
465       //>> v1: %a:v[0], v1: %b:v[1] = p_startpgm
466       if (!setup_cs("v1 v1", gfx))
467          continue;
468 
469       bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
470       bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
471 
472       PhysReg reg_v2(258);
473       Operand a(inputs[0], PhysReg(256));
474       Operand b(inputs[1], PhysReg(257));
475 
476       //~gfx9! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
477       //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
478       //~gfx9! v1: %res0:v[2] = v_add_f32 %tmp0:v[2], %b:v[1]
479       //~gfx10! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
480       //! p_unit_test 0, %res0:v[2]
481       Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
482       bld.sop1(Builder::s_not, Definition(exec, bld.lm), Definition(scc, s1),
483                Operand(exec, bld.lm));
484       Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b);
485       writeout(0, Operand(res0, reg_v2));
486 
487       finish_optimizer_postRA_test();
488    }
489 END_TEST
490 
491 BEGIN_TEST(optimizer_postRA.dpp_vcmpx)
492    //>> v1: %a:v[0], v1: %b:v[1] = p_startpgm
493    if (!setup_cs("v1 v1", GFX11))
494       return;
495 
496    bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
497    bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
498 
499    PhysReg reg_v2(258);
500    Operand a(inputs[0], PhysReg(256));
501    Operand b(inputs[1], PhysReg(257));
502 
503    //! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
504    //! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1]
505    //! p_unit_test 0, %res0:exec
506    Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
507    Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), Operand(tmp0, reg_v2), b);
508    writeout(0, Operand(res0, exec));
509 
510    finish_optimizer_postRA_test();
511 END_TEST
512 
513 BEGIN_TEST(optimizer_postRA.dpp_across_cf)
514    //>> v1: %a:v[0], v1: %b:v[1], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s4: %f:s[4-7] = p_startpgm
515    if (!setup_cs("v1 v1 v1 v1 s2 s4", GFX10_3))
516       return;
517 
518    aco_ptr<Instruction>& startpgm = bld.instructions->at(0);
519    startpgm->definitions[0].setFixed(PhysReg(256));
520    startpgm->definitions[1].setFixed(PhysReg(257));
521    startpgm->definitions[2].setFixed(PhysReg(258));
522    startpgm->definitions[3].setFixed(PhysReg(259));
523    startpgm->definitions[4].setFixed(PhysReg(0));
524    startpgm->definitions[5].setFixed(PhysReg(4));
525 
526    Operand a(inputs[0], PhysReg(256)); /* source for DPP */
527    Operand b(inputs[1], PhysReg(257)); /* source for fadd */
528    Operand c(inputs[2], PhysReg(258)); /* buffer store address */
529    Operand d(inputs[3], PhysReg(259)); /* buffer store value */
530    Operand e(inputs[4], PhysReg(0));   /* condition */
531    Operand f(inputs[5], PhysReg(4));   /* buffer descriptor */
532    PhysReg reg_v12(268);               /* temporary register */
533 
534    Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
535 
536    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
537    //! p_cbranch_nz BB1, BB2
538 
539    emit_divergent_if_else(
540       program.get(), bld, e,
541       [&]() -> void
__anon703f0dc20102() 542       {
543          /* --- logical then --- */
544          //! BB1
545          //! /* logical preds: BB0, / linear preds: BB0, / kind: */
546          //! p_logical_start
547 
548          //! buffer_store_dword %f:s[4-7], %c:v[2], 0, %d:v[3] offen
549          bld.mubuf(aco_opcode::buffer_store_dword, f, c, Operand::zero(), d, 0, true);
550 
551          //! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
552          //! p_unit_test 10, %res10:v[12]
553          Temp result =
554             bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
555          writeout(10, Operand(result, reg_v12));
556 
557          //! p_logical_end
558          //! p_branch BB3
559 
560          /* --- linear then --- */
561          //! BB2
562          //! /* logical preds: / linear preds: BB0, / kind: */
563          //! p_branch BB3
564 
565          /* --- invert --- */
566          //! BB3
567          //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
568          //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
569          //! p_cbranch_nz BB4, BB5
570       },
571       [&]() -> void
__anon703f0dc20202() 572       {
573          /* --- logical else --- */
574          //! BB4
575          //! /* logical preds: BB0, / linear preds: BB3, / kind: */
576          //! p_logical_start
577          //! p_logical_end
578          //! p_branch BB6
579 
580          /* --- linear else --- */
581          //! BB5
582          //! /* logical preds: / linear preds: BB3, / kind: */
583          //! p_branch BB6
584       });
585 
586    /* --- merge block --- */
587    //! BB6
588    //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
589    //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
590 
591    finish_optimizer_postRA_test();
592 END_TEST
593 
594 BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
595    //>> v1: %a:v[0], v1: %b:v[1], s4: %c:s[4-7], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[2] = p_startpgm
596    if (!setup_cs("v1 v1 s4 v1 s2 s1", GFX10_3))
597       return;
598 
599    aco_ptr<Instruction>& startpgm = bld.instructions->at(0);
600    startpgm->definitions[0].setFixed(PhysReg(256));
601    startpgm->definitions[1].setFixed(PhysReg(257));
602    startpgm->definitions[2].setFixed(PhysReg(4));
603    startpgm->definitions[3].setFixed(PhysReg(259));
604    startpgm->definitions[4].setFixed(PhysReg(0));
605    startpgm->definitions[5].setFixed(PhysReg(2));
606 
607    Operand a(inputs[0], PhysReg(256)); /* source for DPP */
608    Operand b(inputs[1], PhysReg(257)); /* source for fadd */
609    Operand c(inputs[2], PhysReg(4));   /* buffer descriptor */
610    Operand d(inputs[3], PhysReg(259)); /* buffer store value */
611    Operand e(inputs[4], PhysReg(0));   /* condition */
612    Operand f(inputs[5], PhysReg(2));   /* buffer store address (scalar) */
613    PhysReg reg_v12(268);               /* temporary register */
614 
615    //! v1: %dpp_tmp:v[12] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
616    Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
617 
618    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
619    //! p_cbranch_nz BB1, BB2
620 
621    emit_divergent_if_else(
622       program.get(), bld, e,
623       [&]() -> void
__anon703f0dc20302() 624       {
625          /* --- logical then --- */
626          //! BB1
627          //! /* logical preds: BB0, / linear preds: BB0, / kind: */
628          //! p_logical_start
629 
630          //! v1: %addr:v[0] = p_parallelcopy %f:s[2]
631          Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f);
632 
633          //! buffer_store_dword %c:s[4-7], %addr:v[0], 0, %d:v[3] offen
634          bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(addr, a.physReg()), Operand::zero(),
635                    d, 0, true);
636 
637          //! p_logical_end
638          //! p_branch BB3
639 
640          /* --- linear then --- */
641          //! BB2
642          //! /* logical preds: / linear preds: BB0, / kind: */
643          //! p_branch BB3
644 
645          /* --- invert --- */
646          //! BB3
647          //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
648          //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
649          //! p_cbranch_nz BB4, BB5
650       },
651       [&]() -> void
__anon703f0dc20402() 652       {
653          /* --- logical else --- */
654          //! BB4
655          //! /* logical preds: BB0, / linear preds: BB3, / kind: */
656          //! p_logical_start
657          //! p_logical_end
658          //! p_branch BB6
659 
660          /* --- linear else --- */
661          //! BB5
662          //! /* logical preds: / linear preds: BB3, / kind: */
663          //! p_branch BB6
664       });
665 
666    /* --- merge block --- */
667    //! BB6
668    //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
669    //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
670 
671    //! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1]
672    Temp result =
673       bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
674    //! p_unit_test 10, %result:v[12]
675    writeout(10, Operand(result, reg_v12));
676 
677    finish_optimizer_postRA_test();
678 END_TEST
679 
680 BEGIN_TEST(optimizer_postRA.dpp_across_cf_linear_clobber)
681    //>> v1: %a:v[0], v1: %b:v[1], s2: %c:s[0-1] = p_startpgm
682    if (!setup_cs("v1 v1 s2", GFX10_3))
683       return;
684 
685    aco_ptr<Instruction>& startpgm = bld.instructions->at(0);
686    startpgm->definitions[0].setFixed(PhysReg(256));
687    startpgm->definitions[1].setFixed(PhysReg(257));
688    startpgm->definitions[2].setFixed(PhysReg(0));
689 
690    Operand a(inputs[0], PhysReg(256)); /* source for DPP */
691    Operand b(inputs[1], PhysReg(257)); /* source for fadd */
692    Operand c(inputs[2], PhysReg(0));   /* condition */
693    PhysReg reg_v12(268);               /* temporary register */
694 
695    //! v1: %dpp_tmp:v[12] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
696    Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
697 
698    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %c:s[0-1], %0:exec
699    //! p_cbranch_nz BB1, BB2
700 
701    emit_divergent_if_else(
702       program.get(), bld, c,
703       [&]() -> void
__anon703f0dc20502() 704       {
705          /* --- logical then --- */
706          //! BB1
707          //! /* logical preds: BB0, / linear preds: BB0, / kind: */
708          //! p_logical_start
709 
710          //! v1: %clobber:v[0] = p_parallelcopy 0
711          Temp clobber =
712             bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), Operand::c32(0));
713 
714          //! p_unit_test 0, %clobber:v[0]
715          writeout(0, Operand(clobber, a.physReg()));
716 
717          //! p_logical_end
718          //! p_branch BB3
719 
720          /* --- linear then --- */
721          //! BB2
722          //! /* logical preds: / linear preds: BB0, / kind: */
723          //! p_branch BB3
724 
725          /* --- invert --- */
726          //! BB3
727          //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
728          //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
729          //! p_cbranch_nz BB4, BB5
730       },
731       [&]() -> void
__anon703f0dc20602() 732       {
733          /* --- logical else --- */
734          //! BB4
735          //! /* logical preds: BB0, / linear preds: BB3, / kind: */
736          //! p_logical_start
737 
738          //! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1]
739          Temp result =
740             bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
741          //! p_unit_test 1, %result:v[12]
742          writeout(1, Operand(result, reg_v12));
743 
744          //! p_logical_end
745          //! p_branch BB6
746 
747          /* --- linear else --- */
748          //! BB5
749          //! /* logical preds: / linear preds: BB3, / kind: */
750          //! p_branch BB6
751       });
752 
753    /* --- merge block --- */
754    //! BB6
755    //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
756    //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
757 
758    finish_optimizer_postRA_test();
759 END_TEST
760 
761 BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
762    //>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s4: %f:s[4-7] = p_startpgm
763    if (!setup_cs("s2 v1 v1 s2 s4", GFX10_3))
764       return;
765 
766    aco_ptr<Instruction>& startpgm = bld.instructions->at(0);
767    startpgm->definitions[0].setFixed(PhysReg(2));
768    startpgm->definitions[1].setFixed(PhysReg(258));
769    startpgm->definitions[2].setFixed(PhysReg(259));
770    startpgm->definitions[3].setFixed(PhysReg(0));
771    startpgm->definitions[4].setFixed(PhysReg(4));
772 
773    Operand a(inputs[0], PhysReg(2));   /* source for s_and */
774    Operand c(inputs[1], PhysReg(258)); /* buffer store address */
775    Operand d(inputs[2], PhysReg(259)); /* buffer store value */
776    Operand e(inputs[3], PhysReg(0));   /* condition */
777    Operand f(inputs[4], PhysReg(4));   /* buffer descriptor */
778    PhysReg reg_s8(8);                  /* temporary register */
779 
780    auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a,
781                             Operand::c32(0x40018u));
782 
783    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
784    //! p_cbranch_nz BB1, BB2
785 
786    emit_divergent_if_else(
787       program.get(), bld, e,
788       [&]() -> void
__anon703f0dc20702() 789       {
790          /* --- logical then --- */
791          //! BB1
792          //! /* logical preds: BB0, / linear preds: BB0, / kind: */
793          //! p_logical_start
794 
795          //! buffer_store_dword %f:s[4-7], %c:v[2], 0, %d:v[3] offen
796          bld.mubuf(aco_opcode::buffer_store_dword, f, c, Operand::zero(), d, 0, true);
797 
798          //! p_logical_end
799          //! p_branch BB3
800 
801          /* --- linear then --- */
802          //! BB2
803          //! /* logical preds: / linear preds: BB0, / kind: */
804          //! p_branch BB3
805 
806          /* --- invert --- */
807          //! BB3
808          //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
809          //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
810          //! p_cbranch_nz BB4, BB5
811       },
812       [&]() -> void
__anon703f0dc20802() 813       {
814          /* --- logical else --- */
815          //! BB4
816          //! /* logical preds: BB0, / linear preds: BB3, / kind: */
817          //! p_logical_start
818          //! p_logical_end
819          //! p_branch BB6
820 
821          /* --- linear else --- */
822          //! BB5
823          //! /* logical preds: / linear preds: BB3, / kind: */
824          //! p_branch BB6
825       });
826 
827    /* --- merge block --- */
828    //! BB6
829    //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
830    //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
831 
832    //! s2: %tmp_salu:s[8-9], s1: %br_scc:scc = s_and_b64 %a:s[2-3], 0x40018
833    //! p_cbranch_z %br_scc:scc
834    //! p_unit_test 5
835    auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(tmp_salu, reg_s8),
836                         Operand::zero());
837    bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
838    writeout(5);
839 
840    finish_optimizer_postRA_test();
841 END_TEST
842 
843 BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
844    //>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[4], s4: %g:s[8-11] = p_startpgm
845    if (!setup_cs("s2 v1 v1 s2 s1 s4", GFX10_3))
846       return;
847 
848    aco_ptr<Instruction>& startpgm = bld.instructions->at(0);
849    startpgm->definitions[0].setFixed(PhysReg(2));
850    startpgm->definitions[1].setFixed(PhysReg(258));
851    startpgm->definitions[2].setFixed(PhysReg(259));
852    startpgm->definitions[3].setFixed(PhysReg(0));
853    startpgm->definitions[4].setFixed(PhysReg(4));
854    startpgm->definitions[5].setFixed(PhysReg(8));
855 
856    Operand a(inputs[0], PhysReg(2));   /* source for s_and */
857    Operand c(inputs[1], PhysReg(258)); /* buffer store address */
858    Operand d(inputs[2], PhysReg(259)); /* buffer store value */
859    Operand e(inputs[3], PhysReg(0));   /* condition */
860    Operand f(inputs[4], PhysReg(4));   /* overwrite value */
861    Operand g(inputs[5], PhysReg(8));   /* buffer descriptor */
862    PhysReg reg_s3(3);                  /* temporary register */
863    PhysReg reg_s8(8);                  /* temporary register */
864 
865    //! s2: %tmp_salu:s[8-9], s1: %tmp_salu_scc:scc = s_and_b64 %a:s[2-3], 0x40018
866    auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a,
867                             Operand::c32(0x40018u));
868 
869    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
870    //! p_cbranch_nz BB1, BB2
871 
872    emit_divergent_if_else(
873       program.get(), bld, e,
874       [&]() -> void
__anon703f0dc20902() 875       {
876          /* --- logical then --- */
877          //! BB1
878          //! /* logical preds: BB0, / linear preds: BB0, / kind: */
879          //! p_logical_start
880 
881          //! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4]
882          Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f);
883 
884          //! buffer_store_dword %g:s[8-11], %c:v[2], %ovrwr:s[3], %d:v[3] offen
885          bld.mubuf(aco_opcode::buffer_store_dword, g, c, Operand(s_addr, reg_s3), d, 0, true);
886 
887          //! p_logical_end
888          //! p_branch BB3
889 
890          /* --- linear then --- */
891          //! BB2
892          //! /* logical preds: / linear preds: BB0, / kind: */
893          //! p_branch BB3
894 
895          /* --- invert --- */
896          //! BB3
897          //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
898          //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
899          //! p_cbranch_nz BB4, BB5
900       },
901       [&]() -> void
__anon703f0dc20a02() 902       {
903          /* --- logical else --- */
904          //! BB4
905          //! /* logical preds: BB0, / linear preds: BB3, / kind: */
906          //! p_logical_start
907          //! p_logical_end
908          //! p_branch BB6
909 
910          /* --- linear else --- */
911          //! BB5
912          //! /* logical preds: / linear preds: BB3, / kind: */
913          //! p_branch BB6
914       });
915 
916    /* --- merge block --- */
917    //! BB6
918    //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
919    //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
920 
921    //! s1: %br_scc:scc = s_cmp_lg_u64 %tmp_salu:s[8-9], 0
922    //! p_cbranch_z %br_scc:scc
923    //! p_unit_test 5
924    auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(tmp_salu, reg_s8),
925                         Operand::zero());
926    bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
927    writeout(5);
928 
929    finish_optimizer_postRA_test();
930 END_TEST
931