1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_control_flow.h"
26
27 /**
28 * This optimization detects if statements at the tops of loops where the
29 * condition is a phi node of two constants and moves half of the if to above
30 * the loop and the other half of the if to the end of the loop. A simple for
31 * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
32 * ends up looking something like this:
33 *
34 * vec1 32 ssa_0 = load_const (0x00000000)
35 * vec1 32 ssa_1 = load_const (0xffffffff)
36 * loop {
37 * block block_1:
38 * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
39 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
40 * if ssa_2 {
41 * block block_2:
42 * vec1 32 ssa_4 = load_const (0x00000001)
43 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
44 * } else {
45 * block block_3:
46 * }
47 * block block_4:
48 * vec1 32 ssa_6 = load_const (0x00000004)
49 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
50 * if ssa_7 {
51 * block block_5:
52 * } else {
53 * block block_6:
54 * break
55 * }
56 * block block_7:
57 * }
58 *
59 * This turns it into something like this:
60 *
61 * // Stuff from block 1
62 * // Stuff from block 3
63 * loop {
64 * block block_1:
65 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
66 * vec1 32 ssa_6 = load_const (0x00000004)
67 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
68 * if ssa_7 {
69 * block block_5:
70 * } else {
71 * block block_6:
72 * break
73 * }
74 * block block_7:
75 * // Stuff from block 1
76 * // Stuff from block 2
77 * vec1 32 ssa_4 = load_const (0x00000001)
78 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
79 * }
80 */
81 static bool
opt_peel_loop_initial_if(nir_loop * loop)82 opt_peel_loop_initial_if(nir_loop *loop)
83 {
84 nir_block *header_block = nir_loop_first_block(loop);
85 nir_block *prev_block =
86 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
87
88 /* It would be insane if this were not true */
89 assert(_mesa_set_search(header_block->predecessors, prev_block));
90
91 /* The loop must have exactly one continue block which could be a block
92 * ending in a continue instruction or the "natural" continue from the
93 * last block in the loop back to the top.
94 */
95 if (header_block->predecessors->entries != 2)
96 return false;
97
98 nir_block *continue_block = NULL;
99 struct set_entry *pred_entry;
100 set_foreach(header_block->predecessors, pred_entry) {
101 if (pred_entry->key != prev_block)
102 continue_block = (void *)pred_entry->key;
103 }
104
105 nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
106 if (!if_node || if_node->type != nir_cf_node_if)
107 return false;
108
109 nir_if *nif = nir_cf_node_as_if(if_node);
110 assert(nif->condition.is_ssa);
111
112 nir_ssa_def *cond = nif->condition.ssa;
113 if (cond->parent_instr->type != nir_instr_type_phi)
114 return false;
115
116 nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
117 if (cond->parent_instr->block != header_block)
118 return false;
119
120 /* We already know we have exactly one continue */
121 assert(exec_list_length(&cond_phi->srcs) == 2);
122
123 uint32_t entry_val = 0, continue_val = 0;
124 nir_foreach_phi_src(src, cond_phi) {
125 assert(src->src.is_ssa);
126 nir_const_value *const_src = nir_src_as_const_value(src->src);
127 if (!const_src)
128 return false;
129
130 if (src->pred == continue_block) {
131 continue_val = const_src->u32[0];
132 } else {
133 assert(src->pred == prev_block);
134 entry_val = const_src->u32[0];
135 }
136 }
137
138 /* If they both execute or both don't execute, this is a job for
139 * nir_dead_cf, not this pass.
140 */
141 if ((entry_val && continue_val) || (!entry_val && !continue_val))
142 return false;
143
144 struct exec_list *continue_list, *entry_list;
145 if (continue_val) {
146 continue_list = &nif->then_list;
147 entry_list = &nif->else_list;
148 } else {
149 continue_list = &nif->else_list;
150 entry_list = &nif->then_list;
151 }
152
153 /* We want to be moving the contents of entry_list to above the loop so it
154 * can't contain any break or continue instructions.
155 */
156 foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
157 nir_foreach_block_in_cf_node(block, cf_node) {
158 nir_instr *last_instr = nir_block_last_instr(block);
159 if (last_instr && last_instr->type == nir_instr_type_jump)
160 return false;
161 }
162 }
163
164 /* Before we do anything, convert the loop to LCSSA. We're about to
165 * replace a bunch of SSA defs with registers and this will prevent any of
166 * it from leaking outside the loop.
167 */
168 nir_convert_loop_to_lcssa(loop);
169
170 nir_block *after_if_block =
171 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
172
173 /* Get rid of phis in the header block since we will be duplicating it */
174 nir_lower_phis_to_regs_block(header_block);
175 /* Get rid of phis after the if since dominance will change */
176 nir_lower_phis_to_regs_block(after_if_block);
177
178 /* Get rid of SSA defs in the pieces we're about to move around */
179 nir_lower_ssa_defs_to_regs_block(header_block);
180 nir_foreach_block_in_cf_node(block, &nif->cf_node)
181 nir_lower_ssa_defs_to_regs_block(block);
182
183 nir_cf_list header, tmp;
184 nir_cf_extract(&header, nir_before_block(header_block),
185 nir_after_block(header_block));
186
187 nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
188 nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
189 nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
190 nir_after_cf_list(entry_list));
191 nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
192
193 nir_cf_reinsert(&header, nir_after_block_before_jump(continue_block));
194 nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
195 nir_after_cf_list(continue_list));
196 nir_cf_reinsert(&tmp, nir_after_block_before_jump(continue_block));
197
198 nir_cf_node_remove(&nif->cf_node);
199
200 return true;
201 }
202
203 static bool
opt_if_cf_list(struct exec_list * cf_list)204 opt_if_cf_list(struct exec_list *cf_list)
205 {
206 bool progress = false;
207 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
208 switch (cf_node->type) {
209 case nir_cf_node_block:
210 break;
211
212 case nir_cf_node_if: {
213 nir_if *nif = nir_cf_node_as_if(cf_node);
214 progress |= opt_if_cf_list(&nif->then_list);
215 progress |= opt_if_cf_list(&nif->else_list);
216 break;
217 }
218
219 case nir_cf_node_loop: {
220 nir_loop *loop = nir_cf_node_as_loop(cf_node);
221 progress |= opt_if_cf_list(&loop->body);
222 progress |= opt_peel_loop_initial_if(loop);
223 break;
224 }
225
226 case nir_cf_node_function:
227 unreachable("Invalid cf type");
228 }
229 }
230
231 return progress;
232 }
233
234 bool
nir_opt_if(nir_shader * shader)235 nir_opt_if(nir_shader *shader)
236 {
237 bool progress = false;
238
239 nir_foreach_function(function, shader) {
240 if (function->impl == NULL)
241 continue;
242
243 if (opt_if_cf_list(&function->impl->body)) {
244 nir_metadata_preserve(function->impl, nir_metadata_none);
245
246 /* If that made progress, we're no longer really in SSA form. We
247 * need to convert registers back into SSA defs and clean up SSA defs
248 * that don't dominate their uses.
249 */
250 nir_lower_regs_to_ssa_impl(function->impl);
251 progress = true;
252 }
253 }
254
255 return progress;
256 }
257