• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/nir_builder.h"
25 #include "nir.h"
26 #include "nir_constant_expressions.h"
27 #include "nir_control_flow.h"
28 #include "nir_loop_analyze.h"
29 
30 static nir_def *clone_alu_and_replace_src_defs(nir_builder *b,
31                                                const nir_alu_instr *alu,
32                                                nir_def **src_defs);
33 
34 /**
35  * Gets the single block that jumps back to the loop header. Already assumes
36  * there is exactly one such block.
37  */
38 static nir_block *
find_continue_block(nir_loop * loop)39 find_continue_block(nir_loop *loop)
40 {
41    nir_block *header_block = nir_loop_first_block(loop);
42    nir_block *prev_block =
43       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
44 
45    assert(header_block->predecessors->entries == 2);
46 
47    set_foreach(header_block->predecessors, pred_entry) {
48       if (pred_entry->key != prev_block)
49          return (nir_block *)pred_entry->key;
50    }
51 
52    unreachable("Continue block not found!");
53 }
54 
55 /**
56  * Does a phi have one constant value from outside a loop and one from inside?
57  */
58 static bool
phi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr * phi,const nir_block * entry_block,bool * entry_val,bool * continue_val)59 phi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr *phi,
60                                                        const nir_block *entry_block,
61                                                        bool *entry_val,
62                                                        bool *continue_val)
63 {
64    /* We already know we have exactly one continue */
65    assert(exec_list_length(&phi->srcs) == 2);
66 
67    *entry_val = false;
68    *continue_val = false;
69 
70    nir_foreach_phi_src(src, phi) {
71       if (!nir_src_is_const(src->src))
72          return false;
73 
74       if (src->pred != entry_block) {
75          *continue_val = nir_src_as_bool(src->src);
76       } else {
77          *entry_val = nir_src_as_bool(src->src);
78       }
79    }
80 
81    return true;
82 }
83 
84 /**
85  * This optimization detects if statements at the tops of loops where the
86  * condition is a phi node of two constants and moves half of the if to above
87  * the loop and the other half of the if to the end of the loop.  A simple for
88  * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
89  * ends up looking something like this:
90  *
91  * vec1 32 ssa_0 = load_const (0x00000000)
92  * vec1 32 ssa_1 = load_const (0xffffffff)
93  * loop {
94  *    block block_1:
95  *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
96  *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
97  *    if ssa_3 {
98  *       block block_2:
99  *       vec1 32 ssa_4 = load_const (0x00000001)
100  *       vec1 32 ssa_5 = iadd ssa_2, ssa_4
101  *    } else {
102  *       block block_3:
103  *    }
104  *    block block_4:
105  *    vec1 32 ssa_6 = load_const (0x00000004)
106  *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
107  *    if ssa_7 {
108  *       block block_5:
109  *    } else {
110  *       block block_6:
111  *       break
112  *    }
113  *    block block_7:
114  * }
115  *
116  * This turns it into something like this:
117  *
118  * // Stuff from block 1
119  * // Stuff from block 3
120  * loop {
121  *    block block_1:
122  *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
123  *    vec1 32 ssa_6 = load_const (0x00000004)
124  *    vec1 32 ssa_7 = ilt ssa_2, ssa_6
125  *    if ssa_7 {
126  *       block block_5:
127  *    } else {
128  *       block block_6:
129  *       break
130  *    }
131  *    block block_7:
132  *    // Stuff from block 1
133  *    // Stuff from block 2
134  *    vec1 32 ssa_4 = load_const (0x00000001)
135  *    vec1 32 ssa_5 = iadd ssa_2, ssa_4
136  * }
137  */
138 static bool
opt_peel_loop_initial_if(nir_loop * loop)139 opt_peel_loop_initial_if(nir_loop *loop)
140 {
141    nir_block *header_block = nir_loop_first_block(loop);
142    nir_block *const prev_block =
143       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
144 
145    /* It would be insane if this were not true */
146    assert(_mesa_set_search(header_block->predecessors, prev_block));
147 
148    /* The loop must have exactly one continue block which could be a block
149     * ending in a continue instruction or the "natural" continue from the
150     * last block in the loop back to the top.
151     */
152    if (header_block->predecessors->entries != 2)
153       return false;
154 
155    nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
156    if (!if_node || if_node->type != nir_cf_node_if)
157       return false;
158 
159    nir_if *nif = nir_cf_node_as_if(if_node);
160 
161    nir_def *cond = nif->condition.ssa;
162    if (cond->parent_instr->type != nir_instr_type_phi)
163       return false;
164 
165    nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
166    if (cond->parent_instr->block != header_block)
167       return false;
168 
169    bool entry_val = false, continue_val = false;
170    if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
171                                                                prev_block,
172                                                                &entry_val,
173                                                                &continue_val))
174       return false;
175 
176    /* If they both execute or both don't execute, this is a job for
177     * nir_dead_cf, not this pass.
178     */
179    if ((entry_val && continue_val) || (!entry_val && !continue_val))
180       return false;
181 
182    struct exec_list *continue_list, *entry_list;
183    if (continue_val) {
184       continue_list = &nif->then_list;
185       entry_list = &nif->else_list;
186    } else {
187       continue_list = &nif->else_list;
188       entry_list = &nif->then_list;
189    }
190 
191    /* We want to be moving the contents of entry_list to above the loop so it
192     * can't contain any break or continue instructions.
193     */
194    foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
195       nir_foreach_block_in_cf_node(block, cf_node) {
196          if (nir_block_ends_in_jump(block))
197             return false;
198       }
199    }
200 
201    /* We're about to re-arrange a bunch of blocks so make sure that we don't
202     * have deref uses which cross block boundaries.  We don't want a deref
203     * accidentally ending up in a phi.
204     */
205    nir_rematerialize_derefs_in_use_blocks_impl(
206       nir_cf_node_get_function(&loop->cf_node));
207 
208    /* Before we do anything, convert the loop to LCSSA.  We're about to
209     * replace a bunch of SSA defs with registers and this will prevent any of
210     * it from leaking outside the loop.
211     */
212    nir_convert_loop_to_lcssa(loop);
213 
214    nir_block *after_if_block =
215       nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
216 
217    /* Get rid of phis in the header block since we will be duplicating it */
218    nir_lower_phis_to_regs_block(header_block);
219    /* Get rid of phis after the if since dominance will change */
220    nir_lower_phis_to_regs_block(after_if_block);
221 
222    /* Get rid of SSA defs in the pieces we're about to move around */
223    nir_lower_ssa_defs_to_regs_block(header_block);
224    nir_foreach_block_in_cf_node(block, &nif->cf_node)
225       nir_lower_ssa_defs_to_regs_block(block);
226 
227    nir_cf_list header, tmp;
228    nir_cf_extract(&header, nir_before_block(header_block),
229                   nir_after_block(header_block));
230 
231    nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
232    nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
233    nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
234                   nir_after_cf_list(entry_list));
235    nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
236 
237    nir_cf_reinsert(&header,
238                    nir_after_block_before_jump(find_continue_block(loop)));
239 
240    bool continue_list_jumps =
241       nir_block_ends_in_jump(exec_node_data(nir_block,
242                                             exec_list_get_tail(continue_list),
243                                             cf_node.node));
244 
245    nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
246                   nir_after_cf_list(continue_list));
247 
248    /* Get continue block again as the previous reinsert might have removed the
249     * block.  Also, if both the continue list and the continue block ends in
250     * jump instructions, removes the jump from the latter, as it will not be
251     * executed if we insert the continue list before it. */
252 
253    nir_block *continue_block = find_continue_block(loop);
254 
255    if (continue_list_jumps) {
256       nir_instr *last_instr = nir_block_last_instr(continue_block);
257       if (last_instr && last_instr->type == nir_instr_type_jump)
258          nir_instr_remove(last_instr);
259    }
260 
261    nir_cf_reinsert(&tmp,
262                    nir_after_block_before_jump(continue_block));
263 
264    nir_cf_node_remove(&nif->cf_node);
265 
266    return true;
267 }
268 
269 static bool
alu_instr_is_type_conversion(const nir_alu_instr * alu)270 alu_instr_is_type_conversion(const nir_alu_instr *alu)
271 {
272    return nir_op_infos[alu->op].num_inputs == 1 &&
273           nir_op_infos[alu->op].output_type != nir_op_infos[alu->op].input_types[0];
274 }
275 
276 static bool
is_trivial_bcsel(const nir_instr * instr,bool allow_non_phi_src)277 is_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src)
278 {
279    if (instr->type != nir_instr_type_alu)
280       return false;
281 
282    nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
283    if (!nir_op_is_selection(bcsel->op))
284       return false;
285 
286    for (unsigned i = 0; i < 3; i++) {
287       if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
288           bcsel->src[i].src.ssa->parent_instr->block != instr->block)
289          return false;
290 
291       if (bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi) {
292          /* opt_split_alu_of_phi() is able to peel that src from the loop */
293          if (i == 0 || !allow_non_phi_src)
294             return false;
295          allow_non_phi_src = false;
296       }
297    }
298 
299    nir_foreach_phi_src(src, nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr)) {
300       if (!nir_src_is_const(src->src))
301          return false;
302    }
303 
304    return true;
305 }
306 
307 /**
308  * Splits ALU instructions that have a source that is a phi node
309  *
310  * ALU instructions in the header block of a loop that meet the following
311  * criteria can be split.
312  *
313  * - The loop has no continue instructions other than the "natural" continue
314  *   at the bottom of the loop.
315  *
316  * - At least one source of the instruction is a phi node from the header block.
317  *
318  * - Any non-phi sources of the ALU instruction come from a block that
319  *   dominates the block before the loop.  The most common failure mode for
320  *   this check is sources that are generated in the loop header block.
321  *
322  * - The phi node selects a constant or undef from the block before the loop or
323  *   the only ALU user is a trivial bcsel that gets removed by peeling the ALU
324  *
325  * The split process splits the original ALU instruction into two, one at the
326  * bottom of the loop and one at the block before the loop. The instruction
327  * before the loop computes the value on the first iteration, and the
328  * instruction at the bottom computes the value on the second, third, and so
329  * on. A new phi node is added to the header block that selects either the
330  * instruction before the loop or the one at the end, and uses of the original
331  * instruction are replaced by this phi.
332  *
333  * The splitting transforms a loop like:
334  *
335  *    vec1 32 ssa_8 = load_const (0x00000001)
336  *    vec1 32 ssa_10 = load_const (0x00000000)
337  *    // succs: block_1
338  *    loop {
339  *            block block_1:
340  *            // preds: block_0 block_4
341  *            vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15
342  *            vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15
343  *            vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16
344  *            vec1 32 ssa_14 = iadd ssa_11, ssa_8
345  *            vec1 32 ssa_15 = b32csel ssa_13, ssa_14, ssa_12
346  *            ...
347  *            // succs: block_1
348  *    }
349  *
350  * into:
351  *
352  *    vec1 32 ssa_8 = load_const (0x00000001)
353  *    vec1 32 ssa_10 = load_const (0x00000000)
354  *    vec1 32 ssa_22 = iadd ssa_10, ssa_8
355  *    // succs: block_1
356  *    loop {
357  *            block block_1:
358  *            // preds: block_0 block_4
359  *            vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15
360  *            vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15
361  *            vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16
362  *            vec1 32 ssa_21 = phi block_0: ssa_22, block_4: ssa_20
363  *            vec1 32 ssa_15 = b32csel ssa_13, ssa_21, ssa_12
364  *            ...
365  *            vec1 32 ssa_20 = iadd ssa_15, ssa_8
366  *            // succs: block_1
367  *    }
368  */
369 static bool
opt_split_alu_of_phi(nir_builder * b,nir_loop * loop,nir_opt_if_options options)370 opt_split_alu_of_phi(nir_builder *b, nir_loop *loop, nir_opt_if_options options)
371 {
372    bool progress = false;
373    nir_block *header_block = nir_loop_first_block(loop);
374    nir_block *const prev_block =
375       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
376 
377    /* It would be insane if this were not true */
378    assert(_mesa_set_search(header_block->predecessors, prev_block));
379 
380    /* The loop must have exactly one continue block which could be a block
381     * ending in a continue instruction or the "natural" continue from the
382     * last block in the loop back to the top.
383     */
384    if (header_block->predecessors->entries != 2)
385       return false;
386 
387    nir_block *continue_block = find_continue_block(loop);
388    if (continue_block == header_block)
389       return false;
390 
391    nir_foreach_instr_safe(instr, header_block) {
392       if (instr->type != nir_instr_type_alu)
393          continue;
394 
395       nir_alu_instr *const alu = nir_instr_as_alu(instr);
396 
397       /* nir_op_vec{2,3,4} and nir_op_mov are excluded because they can easily
398        * lead to infinite optimization loops. Splitting comparisons can lead
399        * to loop unrolling not recognizing loop termintators, and type
400        * conversions also lead to regressions.
401        */
402       if (nir_op_is_vec_or_mov(alu->op) ||
403           nir_alu_instr_is_comparison(alu) ||
404           alu_instr_is_type_conversion(alu) ||
405           /* Avoid fighting with nir_lower_64bit_phis */
406           (alu->def.bit_size == 64 && (options & nir_opt_if_avoid_64bit_phis)))
407          continue;
408 
409       bool has_phi_src_from_prev_block = false;
410       bool all_non_phi_exist_in_prev_block = true;
411       bool is_prev_result_undef = true;
412       bool is_prev_result_const = true;
413       nir_def *prev_srcs[8];     // FINISHME: Array size?
414       nir_def *continue_srcs[8]; // FINISHME: Array size?
415 
416       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
417          nir_instr *const src_instr = alu->src[i].src.ssa->parent_instr;
418 
419          /* If the source is a phi in the loop header block, then the
420           * prev_srcs and continue_srcs will come from the different sources
421           * of the phi.
422           */
423          if (src_instr->type == nir_instr_type_phi &&
424              src_instr->block == header_block) {
425             nir_phi_instr *const phi = nir_instr_as_phi(src_instr);
426 
427             /* Only strictly need to NULL out the pointers when the assertions
428              * (below) are compiled in.  Debugging a NULL pointer deref in the
429              * wild is easier than debugging a random pointer deref, so set
430              * NULL unconditionally just to be safe.
431              */
432             prev_srcs[i] = NULL;
433             continue_srcs[i] = NULL;
434 
435             nir_foreach_phi_src(src_of_phi, phi) {
436                if (src_of_phi->pred == prev_block) {
437                   if (src_of_phi->src.ssa->parent_instr->type !=
438                       nir_instr_type_undef) {
439                      is_prev_result_undef = false;
440                   }
441 
442                   if (src_of_phi->src.ssa->parent_instr->type !=
443                       nir_instr_type_load_const) {
444                      is_prev_result_const = false;
445                   }
446 
447                   prev_srcs[i] = src_of_phi->src.ssa;
448                   has_phi_src_from_prev_block = true;
449                } else
450                   continue_srcs[i] = src_of_phi->src.ssa;
451             }
452 
453             assert(prev_srcs[i] != NULL);
454             assert(continue_srcs[i] != NULL);
455          } else {
456             /* If the source is not a phi (or a phi in a block other than the
457              * loop header), then the value must exist in prev_block.
458              */
459             if (!nir_block_dominates(src_instr->block, prev_block)) {
460                all_non_phi_exist_in_prev_block = false;
461                break;
462             }
463 
464             prev_srcs[i] = alu->src[i].src.ssa;
465             continue_srcs[i] = alu->src[i].src.ssa;
466          }
467       }
468 
469       if (!has_phi_src_from_prev_block || !all_non_phi_exist_in_prev_block)
470          continue;
471 
472       if (!is_prev_result_undef && !is_prev_result_const) {
473          /* check if the only user is a trivial bcsel */
474          if (!list_is_singular(&alu->def.uses))
475             continue;
476 
477          nir_src *use = list_first_entry(&alu->def.uses, nir_src, use_link);
478          if (nir_src_is_if(use) || !is_trivial_bcsel(nir_src_parent_instr(use), true))
479             continue;
480       }
481 
482       /* Split ALU of Phi */
483       b->cursor = nir_after_block(prev_block);
484       nir_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
485 
486       /* Make a copy of the original ALU instruction.  Replace the sources
487        * of the new instruction that read a phi with an undef source from
488        * prev_block with the non-undef source of that phi.
489        *
490        * Insert the new instruction at the end of the continue block.
491        */
492       b->cursor = nir_after_block_before_jump(continue_block);
493 
494       nir_def *const alu_copy =
495          clone_alu_and_replace_src_defs(b, alu, continue_srcs);
496 
497       /* Make a new phi node that selects a value from prev_block and the
498        * result of the new instruction from continue_block.
499        */
500       nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
501       nir_phi_instr_add_src(phi, prev_block, prev_value);
502       nir_phi_instr_add_src(phi, continue_block, alu_copy);
503 
504       nir_def_init(&phi->instr, &phi->def, alu_copy->num_components,
505                    alu_copy->bit_size);
506 
507       b->cursor = nir_after_phis(header_block);
508       nir_builder_instr_insert(b, &phi->instr);
509 
510       /* Modify all readers of the original ALU instruction to read the
511        * result of the phi.
512        */
513       nir_def_rewrite_uses(&alu->def,
514                            &phi->def);
515 
516       /* Since the original ALU instruction no longer has any readers, just
517        * remove it.
518        */
519       nir_instr_remove_v(&alu->instr);
520       nir_instr_free(&alu->instr);
521 
522       progress = true;
523    }
524 
525    return progress;
526 }
527 
528 /**
529  * Simplify a bcsel whose sources are all phi nodes from the loop header block
530  *
531  * bcsel instructions in a loop that meet the following criteria can be
532  * converted to phi nodes:
533  *
534  * - The loop has no continue instructions other than the "natural" continue
535  *   at the bottom of the loop.
536  *
537  * - All of the sources of the bcsel are phi nodes in the header block of the
538  *   loop.
539  *
540  * - The phi node representing the condition of the bcsel instruction chooses
541  *   only constant values.
542  *
543  * The contant value from the condition will select one of the other sources
544  * when entered from outside the loop and the remaining source when entered
545  * from the continue block.  Since each of these sources is also a phi node in
546  * the header block, the value of the phi node can be "evaluated."  These
547  * evaluated phi nodes provide the sources for a new phi node.  All users of
548  * the bcsel result are updated to use the phi node result.
549  *
550  * The replacement transforms loops like:
551  *
552  *    vec1 32 ssa_7 = undefined
553  *    vec1 32 ssa_8 = load_const (0x00000001)
554  *    vec1 32 ssa_9 = load_const (0x000000c8)
555  *    vec1 32 ssa_10 = load_const (0x00000000)
556  *    // succs: block_1
557  *    loop {
558  *            block block_1:
559  *            // preds: block_0 block_4
560  *            vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
561  *            vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
562  *            vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
563  *            vec1 32 ssa_14 = b32csel ssa_12, ssa_13, ssa_11
564  *            vec1 32 ssa_16 = ige32 ssa_14, ssa_9
565  *            ...
566  *            vec1 32 ssa_15 = load_const (0xffffffff)
567  *            ...
568  *            vec1 32 ssa_25 = iadd ssa_14, ssa_8
569  *            // succs: block_1
570  *    }
571  *
572  * into:
573  *
574  *    vec1 32 ssa_7 = undefined
575  *    vec1 32 ssa_8 = load_const (0x00000001)
576  *    vec1 32 ssa_9 = load_const (0x000000c8)
577  *    vec1 32 ssa_10 = load_const (0x00000000)
578  *    // succs: block_1
579  *    loop {
580  *            block block_1:
581  *            // preds: block_0 block_4
582  *            vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
583  *            vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
584  *            vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
585  *            vec1 32 sss_26 = phi block_0: ssa_1, block_4: ssa_25
586  *            vec1 32 ssa_16 = ige32 ssa_26, ssa_9
587  *            ...
588  *            vec1 32 ssa_15 = load_const (0xffffffff)
589  *            ...
590  *            vec1 32 ssa_25 = iadd ssa_26, ssa_8
591  *            // succs: block_1
592  *    }
593  *
594  * \note
595  * It may be possible modify this function to not require a phi node as the
596  * source of the bcsel that is selected when entering from outside the loop.
597  * The only restriction is that the source must be geneated outside the loop
598  * (since it will become the source of a phi node in the header block of the
599  * loop).
600  */
601 static bool
opt_simplify_bcsel_of_phi(nir_builder * b,nir_loop * loop)602 opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)
603 {
604    bool progress = false;
605    nir_block *header_block = nir_loop_first_block(loop);
606    nir_block *const prev_block =
607       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
608 
609    /* It would be insane if this were not true */
610    assert(_mesa_set_search(header_block->predecessors, prev_block));
611 
612    /* The loop must have exactly one continue block which could be a block
613     * ending in a continue instruction or the "natural" continue from the
614     * last block in the loop back to the top.
615     */
616    if (header_block->predecessors->entries != 2)
617       return false;
618 
619    /* We can move any bcsel that can guaranteed to execut on every iteration
620     * of a loop.  For now this is accomplished by only taking bcsels from the
621     * header_block.  In the future, this could be expanced to include any
622     * bcsel that must come before any break.
623     *
624     * For more details, see
625     * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305
626     */
627    nir_foreach_instr_safe(instr, header_block) {
628       if (!is_trivial_bcsel(instr, false))
629          continue;
630 
631       nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
632       nir_phi_instr *const cond_phi =
633          nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr);
634 
635       bool entry_val = false, continue_val = false;
636       if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
637                                                                   prev_block,
638                                                                   &entry_val,
639                                                                   &continue_val))
640          continue;
641 
642       /* If they both execute or both don't execute, this is a job for
643        * nir_dead_cf, not this pass.
644        */
645       if ((entry_val && continue_val) || (!entry_val && !continue_val))
646          continue;
647 
648       const unsigned entry_src = entry_val ? 1 : 2;
649       const unsigned continue_src = entry_val ? 2 : 1;
650 
651       /* Create a new phi node that selects the value for prev_block from
652        * the bcsel source that is selected by entry_val and the value for
653        * continue_block from the other bcsel source.  Both sources have
654        * already been verified to be phi nodes.
655        */
656       nir_block *continue_block = find_continue_block(loop);
657       nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
658       nir_phi_instr_add_src(phi, prev_block,
659                             nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr),
660                                                        prev_block)
661                                ->src.ssa);
662 
663       nir_phi_instr_add_src(phi, continue_block,
664                             nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[continue_src].src.ssa->parent_instr),
665                                                        continue_block)
666                                ->src.ssa);
667 
668       nir_def_init(&phi->instr, &phi->def,
669                    bcsel->def.num_components,
670                    bcsel->def.bit_size);
671 
672       b->cursor = nir_after_phis(header_block);
673       nir_builder_instr_insert(b, &phi->instr);
674 
675       /* Modify all readers of the bcsel instruction to read the result of
676        * the phi.
677        */
678       nir_def_rewrite_uses(&bcsel->def,
679                            &phi->def);
680 
681       /* Since the original bcsel instruction no longer has any readers,
682        * just remove it.
683        */
684       nir_instr_remove_v(&bcsel->instr);
685       nir_instr_free(&bcsel->instr);
686 
687       progress = true;
688    }
689 
690    return progress;
691 }
692 
693 static bool
is_block_empty(nir_block * block)694 is_block_empty(nir_block *block)
695 {
696    return nir_cf_node_is_last(&block->cf_node) &&
697           exec_list_is_empty(&block->instr_list);
698 }
699 
700 /* Walk all the phis in the block immediately following the if statement and
701  * swap the blocks.
702  */
703 static void
rewrite_phi_predecessor_blocks(nir_if * nif,nir_block * old_then_block,nir_block * old_else_block,nir_block * new_then_block,nir_block * new_else_block)704 rewrite_phi_predecessor_blocks(nir_if *nif,
705                                nir_block *old_then_block,
706                                nir_block *old_else_block,
707                                nir_block *new_then_block,
708                                nir_block *new_else_block)
709 {
710    nir_block *after_if_block =
711       nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
712 
713    nir_foreach_phi(phi, after_if_block) {
714       nir_foreach_phi_src(src, phi) {
715          if (src->pred == old_then_block) {
716             src->pred = new_then_block;
717          } else if (src->pred == old_else_block) {
718             src->pred = new_else_block;
719          }
720       }
721    }
722 }
723 
724 /**
725  * This optimization turns:
726  *
727  *     if (cond) {
728  *     } else {
729  *         do_work();
730  *     }
731  *
732  * into:
733  *
734  *     if (!cond) {
735  *         do_work();
736  *     } else {
737  *     }
738  */
739 static bool
opt_if_simplification(nir_builder * b,nir_if * nif)740 opt_if_simplification(nir_builder *b, nir_if *nif)
741 {
742    /* Only simplify if the then block is empty and the else block is not. */
743    if (!is_block_empty(nir_if_first_then_block(nif)) ||
744        is_block_empty(nir_if_first_else_block(nif)))
745       return false;
746 
747    /* Insert the inverted instruction and rewrite the condition. */
748    b->cursor = nir_before_src(&nif->condition);
749    nir_src_rewrite(&nif->condition, nir_inot(b, nif->condition.ssa));
750 
751    /* Grab pointers to the last then/else blocks for fixing up the phis. */
752    nir_block *then_block = nir_if_last_then_block(nif);
753    nir_block *else_block = nir_if_last_else_block(nif);
754 
755    if (nir_block_ends_in_jump(else_block)) {
756       /* Even though this if statement has a jump on one side, we may still have
757        * phis afterwards.  Single-source phis can be produced by loop unrolling
758        * or dead control-flow passes and are perfectly legal.  Run a quick phi
759        * removal on the block after the if to clean up any such phis.
760        */
761       nir_block *const next_block =
762          nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
763       nir_opt_remove_phis_block(next_block);
764    }
765 
766    rewrite_phi_predecessor_blocks(nif, then_block, else_block, else_block,
767                                   then_block);
768 
769    /* Finally, move the else block to the then block. */
770    nir_cf_list tmp;
771    nir_cf_extract(&tmp, nir_before_cf_list(&nif->else_list),
772                   nir_after_cf_list(&nif->else_list));
773    nir_cf_reinsert(&tmp, nir_before_cf_list(&nif->then_list));
774 
775    return true;
776 }
777 
778 /* Find phi statements after an if that choose between true and false, and
779  * replace them with the if statement's condition (or an inot of it).
780  */
781 static bool
opt_if_phi_is_condition(nir_builder * b,nir_if * nif)782 opt_if_phi_is_condition(nir_builder *b, nir_if *nif)
783 {
784    /* Grab pointers to the last then/else blocks for looking in the phis. */
785    nir_block *then_block = nir_if_last_then_block(nif);
786    ASSERTED nir_block *else_block = nir_if_last_else_block(nif);
787    nir_def *cond = nif->condition.ssa;
788    bool progress = false;
789 
790    nir_block *after_if_block = nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
791    nir_foreach_phi_safe(phi, after_if_block) {
792       if (phi->def.bit_size != cond->bit_size ||
793           phi->def.num_components != 1)
794          continue;
795 
796       enum opt_bool {
797          T,
798          F,
799          UNKNOWN
800       } then_val = UNKNOWN,
801         else_val = UNKNOWN;
802 
803       nir_foreach_phi_src(src, phi) {
804          assert(src->pred == then_block || src->pred == else_block);
805          enum opt_bool *pred_val = src->pred == then_block ? &then_val : &else_val;
806 
807          nir_scalar val = nir_scalar_resolved(src->src.ssa, 0);
808          if (!nir_scalar_is_const(val))
809             break;
810 
811          if (nir_scalar_as_int(val) == -1)
812             *pred_val = T;
813          else if (nir_scalar_as_uint(val) == 0)
814             *pred_val = F;
815          else
816             break;
817       }
818       if (then_val == T && else_val == F) {
819          nir_def_rewrite_uses(&phi->def, cond);
820          progress = true;
821       } else if (then_val == F && else_val == T) {
822          b->cursor = nir_before_cf_node(&nif->cf_node);
823          nir_def_rewrite_uses(&phi->def, nir_inot(b, cond));
824          progress = true;
825       }
826    }
827 
828    return progress;
829 }
830 
831 static bool
evaluate_if_condition(nir_if * nif,nir_cursor cursor,bool * value)832 evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value)
833 {
834    nir_block *use_block = nir_cursor_current_block(cursor);
835    if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
836       *value = true;
837       return true;
838    } else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) {
839       *value = false;
840       return true;
841    } else {
842       return false;
843    }
844 }
845 
846 static nir_def *
clone_alu_and_replace_src_defs(nir_builder * b,const nir_alu_instr * alu,nir_def ** src_defs)847 clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
848                                nir_def **src_defs)
849 {
850    nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op);
851    nalu->exact = alu->exact;
852 
853    nir_def_init(&nalu->instr, &nalu->def,
854                 alu->def.num_components,
855                 alu->def.bit_size);
856 
857    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
858       nalu->src[i].src = nir_src_for_ssa(src_defs[i]);
859       memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
860              sizeof(nalu->src[i].swizzle));
861    }
862 
863    nir_builder_instr_insert(b, &nalu->instr);
864 
865    return &nalu->def;
866    ;
867 }
868 
869 /*
870  * This propagates if condition evaluation down the chain of some alu
871  * instructions. For example by checking the use of some of the following alu
872  * instruction we can eventually replace ssa_107 with NIR_TRUE.
873  *
874  *   loop {
875  *      block block_1:
876  *      vec1 32 ssa_85 = load_const (0x00000002)
877  *      vec1 32 ssa_86 = ieq ssa_48, ssa_85
878  *      vec1 32 ssa_87 = load_const (0x00000001)
879  *      vec1 32 ssa_88 = ieq ssa_48, ssa_87
880  *      vec1 32 ssa_89 = ior ssa_86, ssa_88
881  *      vec1 32 ssa_90 = ieq ssa_48, ssa_0
882  *      vec1 32 ssa_91 = ior ssa_89, ssa_90
883  *      if ssa_86 {
884  *         block block_2:
885  *             ...
886  *            break
887  *      } else {
888  *            block block_3:
889  *      }
890  *      block block_4:
891  *      if ssa_88 {
892  *            block block_5:
893  *             ...
894  *            break
895  *      } else {
896  *            block block_6:
897  *      }
898  *      block block_7:
899  *      if ssa_90 {
900  *            block block_8:
901  *             ...
902  *            break
903  *      } else {
904  *            block block_9:
905  *      }
906  *      block block_10:
907  *      vec1 32 ssa_107 = inot ssa_91
908  *      if ssa_107 {
909  *            block block_11:
910  *            break
911  *      } else {
912  *            block block_12:
913  *      }
914  *   }
915  */
916 static bool
propagate_condition_eval(nir_builder * b,nir_if * nif,nir_src * use_src,nir_src * alu_use,nir_alu_instr * alu)917 propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
918                          nir_src *alu_use, nir_alu_instr *alu)
919 {
920    bool bool_value;
921    b->cursor = nir_before_src(alu_use);
922    if (!evaluate_if_condition(nif, b->cursor, &bool_value))
923       return false;
924 
925    nir_def *def[NIR_MAX_VEC_COMPONENTS] = { 0 };
926    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
927       if (alu->src[i].src.ssa == use_src->ssa) {
928          def[i] = nir_imm_bool(b, bool_value);
929       } else {
930          def[i] = alu->src[i].src.ssa;
931       }
932    }
933 
934    nir_def *nalu = clone_alu_and_replace_src_defs(b, alu, def);
935    nir_src_rewrite(alu_use, nalu);
936 
937    return true;
938 }
939 
940 static bool
can_propagate_through_alu(nir_src * src)941 can_propagate_through_alu(nir_src *src)
942 {
943    if (nir_src_parent_instr(src)->type != nir_instr_type_alu)
944       return false;
945 
946    nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
947    switch (alu->op) {
948    case nir_op_ior:
949    case nir_op_iand:
950    case nir_op_inot:
951    case nir_op_b2i32:
952       return true;
953    case nir_op_bcsel:
954       return src == &alu->src[0].src;
955    default:
956       return false;
957    }
958 }
959 
960 static bool
evaluate_condition_use(nir_builder * b,nir_if * nif,nir_src * use_src)961 evaluate_condition_use(nir_builder *b, nir_if *nif, nir_src *use_src)
962 {
963    bool progress = false;
964 
965    b->cursor = nir_before_src(use_src);
966 
967    bool bool_value;
968    if (evaluate_if_condition(nif, b->cursor, &bool_value)) {
969       /* Rewrite use to use const */
970       nir_src_rewrite(use_src, nir_imm_bool(b, bool_value));
971       progress = true;
972    }
973 
974    if (!nir_src_is_if(use_src) && can_propagate_through_alu(use_src)) {
975       nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use_src));
976 
977       nir_foreach_use_including_if_safe(alu_use, &alu->def)
978          progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu);
979    }
980 
981    return progress;
982 }
983 
984 static bool
opt_if_evaluate_condition_use(nir_builder * b,nir_if * nif)985 opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif)
986 {
987    bool progress = false;
988 
989    /* Evaluate any uses of the if condition inside the if branches */
990    nir_foreach_use_including_if_safe(use_src, nif->condition.ssa) {
991       if (!(nir_src_is_if(use_src) && nir_src_parent_if(use_src) == nif))
992          progress |= evaluate_condition_use(b, nif, use_src);
993    }
994 
995    return progress;
996 }
997 
998 static bool
rewrite_comp_uses_within_if(nir_builder * b,nir_if * nif,bool invert,nir_scalar scalar,nir_scalar new_scalar)999 rewrite_comp_uses_within_if(nir_builder *b, nir_if *nif, bool invert,
1000                             nir_scalar scalar, nir_scalar new_scalar)
1001 {
1002    bool progress = false;
1003 
1004    nir_block *first = invert ? nir_if_first_else_block(nif) : nir_if_first_then_block(nif);
1005    nir_block *last = invert ? nir_if_last_else_block(nif) : nir_if_last_then_block(nif);
1006 
1007    nir_def *new_ssa = NULL;
1008    nir_foreach_use_safe(use, scalar.def) {
1009       if (nir_src_parent_instr(use)->block->index < first->index ||
1010           nir_src_parent_instr(use)->block->index > last->index)
1011          continue;
1012 
1013       /* Only rewrite users which use only the new component. This is to avoid a
1014        * situation where copy propagation will undo the rewrite and we risk an infinite
1015        * loop.
1016        *
1017        * We could rewrite users which use a mix of the old and new components, but if
1018        * nir_src_components_read() is incomplete, then we risk the new component actually being
1019        * unused and some optimization later undoing the rewrite.
1020        */
1021       if (nir_src_components_read(use) != BITFIELD64_BIT(scalar.comp))
1022          continue;
1023 
1024       if (!new_ssa) {
1025          b->cursor = nir_before_cf_node(&nif->cf_node);
1026          new_ssa = nir_channel(b, new_scalar.def, new_scalar.comp);
1027          if (scalar.def->num_components > 1) {
1028             nir_def *vec = nir_undef(b, scalar.def->num_components, scalar.def->bit_size);
1029             new_ssa = nir_vector_insert_imm(b, vec, new_ssa, scalar.comp);
1030          }
1031       }
1032 
1033       nir_src_rewrite(use, new_ssa);
1034       progress = true;
1035    }
1036 
1037    return progress;
1038 }
1039 
1040 /*
1041  * This optimization turns:
1042  *
1043  *     if (a == (b=readfirstlane(a)))
1044  *        use(a)
1045  *     if (c == (d=load_const))
1046  *        use(c)
1047  *
1048  * into:
1049  *
1050  *     if (a == (b=readfirstlane(a)))
1051  *        use(b)
1052  *     if (c == (d=load_const))
1053  *        use(d)
1054  */
1055 static bool
opt_if_rewrite_uniform_uses(nir_builder * b,nir_if * nif,nir_scalar cond,bool accept_ine)1056 opt_if_rewrite_uniform_uses(nir_builder *b, nir_if *nif, nir_scalar cond, bool accept_ine)
1057 {
1058    bool progress = false;
1059 
1060    if (!nir_scalar_is_alu(cond))
1061       return false;
1062 
1063    nir_op op = nir_scalar_alu_op(cond);
1064    if (op == nir_op_iand) {
1065       progress |= opt_if_rewrite_uniform_uses(b, nif, nir_scalar_chase_alu_src(cond, 0), false);
1066       progress |= opt_if_rewrite_uniform_uses(b, nif, nir_scalar_chase_alu_src(cond, 1), false);
1067       return progress;
1068    }
1069 
1070    if (op != nir_op_ieq && (op != nir_op_ine || !accept_ine))
1071       return false;
1072 
1073    for (unsigned i = 0; i < 2; i++) {
1074       nir_scalar src_uni = nir_scalar_chase_alu_src(cond, i);
1075       nir_scalar src_div = nir_scalar_chase_alu_src(cond, !i);
1076 
1077       if (nir_scalar_is_const(src_uni) && src_div.def != src_uni.def)
1078          return rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, src_div, src_uni);
1079 
1080       if (!nir_scalar_is_intrinsic(src_uni))
1081          continue;
1082       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src_uni.def->parent_instr);
1083       if (intrin->intrinsic != nir_intrinsic_read_first_invocation &&
1084           intrin->intrinsic != nir_intrinsic_read_invocation &&
1085           (intrin->intrinsic != nir_intrinsic_reduce || nir_intrinsic_cluster_size(intrin)))
1086          continue;
1087 
1088       nir_scalar intrin_src = { intrin->src[0].ssa, src_uni.comp };
1089       nir_scalar resolved_intrin_src = nir_scalar_resolved(intrin_src.def, intrin_src.comp);
1090 
1091       if (!nir_scalar_equal(resolved_intrin_src, src_div))
1092          continue;
1093 
1094       progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, resolved_intrin_src, src_uni);
1095       if (!nir_scalar_equal(intrin_src, resolved_intrin_src))
1096          progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, intrin_src, src_uni);
1097 
1098       return progress;
1099    }
1100 
1101    return false;
1102 }
1103 
1104 static void
simple_merge_if(nir_if * dest_if,nir_if * src_if,bool dest_if_then,bool src_if_then)1105 simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then,
1106                 bool src_if_then)
1107 {
1108    /* Now merge the if branch */
1109    nir_block *dest_blk = dest_if_then ? nir_if_last_then_block(dest_if)
1110                                       : nir_if_last_else_block(dest_if);
1111 
1112    struct exec_list *list = src_if_then ? &src_if->then_list
1113                                         : &src_if->else_list;
1114 
1115    nir_cf_list if_cf_list;
1116    nir_cf_extract(&if_cf_list, nir_before_cf_list(list),
1117                   nir_after_cf_list(list));
1118    nir_cf_reinsert(&if_cf_list, nir_after_block(dest_blk));
1119 }
1120 
1121 static bool
opt_if_merge(nir_if * nif)1122 opt_if_merge(nir_if *nif)
1123 {
1124    bool progress = false;
1125 
1126    nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node);
1127    if (!next_blk)
1128       return false;
1129 
1130    nir_if *next_if = nir_block_get_following_if(next_blk);
1131    if (!next_if)
1132       return false;
1133 
1134    /* Here we merge two consecutive ifs that have the same condition e.g:
1135     *
1136     *   if ssa_12 {
1137     *      ...
1138     *   } else {
1139     *      ...
1140     *   }
1141     *   if ssa_12 {
1142     *      ...
1143     *   } else {
1144     *      ...
1145     *   }
1146     *
1147     * Note: This only merges if-statements when the block between them is
1148     * empty. The reason we don't try to merge ifs that just have phis between
1149     * them is because this can result in increased register pressure. For
1150     * example when merging if ladders created by indirect indexing.
1151     */
1152    if (nif->condition.ssa == next_if->condition.ssa &&
1153        exec_list_is_empty(&next_blk->instr_list)) {
1154 
1155       /* This optimization isn't made to work in this case and
1156        * opt_if_evaluate_condition_use will optimize it later.
1157        */
1158       if (nir_block_ends_in_jump(nir_if_last_then_block(nif)) ||
1159           nir_block_ends_in_jump(nir_if_last_else_block(nif)))
1160          return false;
1161 
1162       simple_merge_if(nif, next_if, true, true);
1163       simple_merge_if(nif, next_if, false, false);
1164 
1165       nir_block *new_then_block = nir_if_last_then_block(nif);
1166       nir_block *new_else_block = nir_if_last_else_block(nif);
1167 
1168       nir_block *old_then_block = nir_if_last_then_block(next_if);
1169       nir_block *old_else_block = nir_if_last_else_block(next_if);
1170 
1171       /* Rewrite the predecessor block for any phis following the second
1172        * if-statement.
1173        */
1174       rewrite_phi_predecessor_blocks(next_if, old_then_block,
1175                                      old_else_block,
1176                                      new_then_block,
1177                                      new_else_block);
1178 
1179       /* Move phis after merged if to avoid them being deleted when we remove
1180        * the merged if-statement.
1181        */
1182       nir_block *after_next_if_block =
1183          nir_cf_node_as_block(nir_cf_node_next(&next_if->cf_node));
1184 
1185       nir_foreach_phi_safe(phi, after_next_if_block) {
1186          exec_node_remove(&phi->instr.node);
1187          exec_list_push_tail(&next_blk->instr_list, &phi->instr.node);
1188          phi->instr.block = next_blk;
1189       }
1190 
1191       nir_cf_node_remove(&next_if->cf_node);
1192 
1193       progress = true;
1194    }
1195 
1196    return progress;
1197 }
1198 
1199 static bool
opt_if_cf_list(nir_builder * b,struct exec_list * cf_list,nir_opt_if_options options)1200 opt_if_cf_list(nir_builder *b, struct exec_list *cf_list,
1201                nir_opt_if_options options)
1202 {
1203    bool progress = false;
1204    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1205       switch (cf_node->type) {
1206       case nir_cf_node_block:
1207          break;
1208 
1209       case nir_cf_node_if: {
1210          nir_if *nif = nir_cf_node_as_if(cf_node);
1211          progress |= opt_if_cf_list(b, &nif->then_list,
1212                                     options);
1213          progress |= opt_if_cf_list(b, &nif->else_list,
1214                                     options);
1215          progress |= opt_if_merge(nif);
1216          progress |= opt_if_simplification(b, nif);
1217          if (options & nir_opt_if_optimize_phi_true_false)
1218             progress |= opt_if_phi_is_condition(b, nif);
1219          break;
1220       }
1221 
1222       case nir_cf_node_loop: {
1223          nir_loop *loop = nir_cf_node_as_loop(cf_node);
1224          assert(!nir_loop_has_continue_construct(loop));
1225          progress |= opt_if_cf_list(b, &loop->body,
1226                                     options);
1227          progress |= opt_simplify_bcsel_of_phi(b, loop);
1228          break;
1229       }
1230 
1231       case nir_cf_node_function:
1232          unreachable("Invalid cf type");
1233       }
1234    }
1235 
1236    return progress;
1237 }
1238 
1239 /**
1240  * Optimizations which can create registers are done after other optimizations
1241  * which require SSA.
1242  */
1243 static bool
opt_if_regs_cf_list(struct exec_list * cf_list)1244 opt_if_regs_cf_list(struct exec_list *cf_list)
1245 {
1246    bool progress = false;
1247    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1248       switch (cf_node->type) {
1249       case nir_cf_node_block:
1250          break;
1251 
1252       case nir_cf_node_if: {
1253          nir_if *nif = nir_cf_node_as_if(cf_node);
1254          progress |= opt_if_regs_cf_list(&nif->then_list);
1255          progress |= opt_if_regs_cf_list(&nif->else_list);
1256          break;
1257       }
1258 
1259       case nir_cf_node_loop: {
1260          nir_loop *loop = nir_cf_node_as_loop(cf_node);
1261          assert(!nir_loop_has_continue_construct(loop));
1262          progress |= opt_if_regs_cf_list(&loop->body);
1263          progress |= opt_peel_loop_initial_if(loop);
1264          break;
1265       }
1266 
1267       case nir_cf_node_function:
1268          unreachable("Invalid cf type");
1269       }
1270    }
1271 
1272    return progress;
1273 }
1274 
1275 /**
1276  * These optimisations depend on nir_metadata_block_index and therefore must
1277  * not do anything to cause the metadata to become invalid.
1278  */
1279 static bool
opt_if_safe_cf_list(nir_builder * b,struct exec_list * cf_list,nir_opt_if_options options)1280 opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list, nir_opt_if_options options)
1281 {
1282    bool progress = false;
1283    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1284       switch (cf_node->type) {
1285       case nir_cf_node_block:
1286          break;
1287 
1288       case nir_cf_node_if: {
1289          nir_if *nif = nir_cf_node_as_if(cf_node);
1290          progress |= opt_if_safe_cf_list(b, &nif->then_list, options);
1291          progress |= opt_if_safe_cf_list(b, &nif->else_list, options);
1292          progress |= opt_if_evaluate_condition_use(b, nif);
1293          nir_scalar cond = nir_scalar_resolved(nif->condition.ssa, 0);
1294          progress |= opt_if_rewrite_uniform_uses(b, nif, cond, true);
1295          break;
1296       }
1297 
1298       case nir_cf_node_loop: {
1299          nir_loop *loop = nir_cf_node_as_loop(cf_node);
1300          assert(!nir_loop_has_continue_construct(loop));
1301          progress |= opt_if_safe_cf_list(b, &loop->body, options);
1302          progress |= opt_split_alu_of_phi(b, loop, options);
1303          break;
1304       }
1305 
1306       case nir_cf_node_function:
1307          unreachable("Invalid cf type");
1308       }
1309    }
1310 
1311    return progress;
1312 }
1313 
1314 bool
nir_opt_if(nir_shader * shader,nir_opt_if_options options)1315 nir_opt_if(nir_shader *shader, nir_opt_if_options options)
1316 {
1317    bool progress = false;
1318 
1319    nir_foreach_function_impl(impl, shader) {
1320       nir_builder b = nir_builder_create(impl);
1321 
1322       nir_metadata_require(impl, nir_metadata_block_index |
1323                                     nir_metadata_dominance);
1324       progress = opt_if_safe_cf_list(&b, &impl->body, options);
1325       nir_metadata_preserve(impl, nir_metadata_block_index |
1326                                      nir_metadata_dominance);
1327 
1328       bool preserve = true;
1329 
1330       if (opt_if_cf_list(&b, &impl->body, options)) {
1331          preserve = false;
1332          progress = true;
1333       }
1334 
1335       if (opt_if_regs_cf_list(&impl->body)) {
1336          preserve = false;
1337          progress = true;
1338 
1339          /* If that made progress, we're no longer really in SSA form.  We
1340           * need to convert registers back into SSA defs and clean up SSA defs
1341           * that don't dominate their uses.
1342           */
1343          nir_lower_reg_intrinsics_to_ssa_impl(impl);
1344       }
1345 
1346       if (preserve) {
1347          nir_metadata_preserve(impl, nir_metadata_none);
1348       } else {
1349          nir_metadata_preserve(impl, nir_metadata_all);
1350       }
1351    }
1352 
1353    return progress;
1354 }
1355