1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/u_dynarray.h"
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "nir_control_flow.h"
28
29 #define MAX_DISCARDS 254
30 #define MOVE_INSTR_FLAG(i) ((i) + 1)
31 #define STOP_PROCESSING_INSTR_FLAG 255
32
33 struct move_discard_state {
34 struct util_dynarray worklist;
35 unsigned discard_id;
36 };
37
38 /** Check recursively if the source can be moved to the top of the shader.
39 * Sets instr->pass_flags to MOVE_INSTR_FLAG and adds the instr
40 * to the given worklist
41 */
42 static bool
add_src_to_worklist(nir_src * src,void * state_)43 add_src_to_worklist(nir_src *src, void *state_)
44 {
45 struct move_discard_state *state = state_;
46 nir_instr *instr = src->ssa->parent_instr;
47 if (instr->pass_flags)
48 return true;
49
50 /* Phi instructions can't be moved at all. Also, if we're dependent on
51 * a phi then we are dependent on some other bit of control flow and
52 * it's hard to figure out the proper condition.
53 */
54 if (instr->type == nir_instr_type_phi)
55 return false;
56
57 if (instr->type == nir_instr_type_intrinsic) {
58 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
59 switch (intrin->intrinsic) {
60 /* Increasing the set of active invocations is safe for these intrinsics, which is
61 * all that moving it to the top does. This is because the read from inactive
62 * invocations is undefined.
63 */
64 case nir_intrinsic_quad_swizzle_amd:
65 /* If FI=0, then these intrinsics return 0 for inactive invocations. */
66 if (!nir_intrinsic_fetch_inactive(intrin))
67 return false;
68 FALLTHROUGH;
69 case nir_intrinsic_ddx:
70 case nir_intrinsic_ddy:
71 case nir_intrinsic_ddx_fine:
72 case nir_intrinsic_ddy_fine:
73 case nir_intrinsic_ddx_coarse:
74 case nir_intrinsic_ddy_coarse:
75 case nir_intrinsic_quad_broadcast:
76 case nir_intrinsic_quad_swap_horizontal:
77 case nir_intrinsic_quad_swap_vertical:
78 case nir_intrinsic_quad_swap_diagonal:
79 break;
80 default:
81 if (!nir_intrinsic_can_reorder(intrin))
82 return false;
83 break;
84 }
85 }
86
87 /* Set pass_flags and remember the instruction to add it's own sources and for potential
88 * cleanup.
89 */
90 instr->pass_flags = MOVE_INSTR_FLAG(state->discard_id);
91 util_dynarray_append(&state->worklist, nir_instr *, instr);
92
93 return true;
94 }
95
96 /** Try to mark a discard or demote instruction for moving
97 *
98 * This function does two things. One is that it searches through the
99 * dependency chain to see if this discard is an instruction that we can move
100 * up to the top. Second, if the discard is one we can move, it tags the
101 * discard and its dependencies (using pass_flags = 1).
102 * Demote are handled the same way, except that they can still be moved up
103 * when implicit derivatives are used.
104 */
105 static void
try_move_discard(nir_intrinsic_instr * discard,unsigned * next_discard_id)106 try_move_discard(nir_intrinsic_instr *discard, unsigned *next_discard_id)
107 {
108 /* We require the discard to be in the top level of control flow. We
109 * could, in theory, move discards that are inside ifs or loops but that
110 * would be a lot more work.
111 */
112 if (discard->instr.block->cf_node.parent->type != nir_cf_node_function)
113 return;
114
115 if (*next_discard_id == MAX_DISCARDS)
116 return;
117
118 discard->instr.pass_flags = MOVE_INSTR_FLAG(*next_discard_id);
119
120 /* Build the set of all instructions discard depends on to be able to
121 * clear the flags in case the discard cannot be moved.
122 */
123 nir_instr *work_[64];
124 struct move_discard_state state;
125 state.discard_id = *next_discard_id;
126 util_dynarray_init_from_stack(&state.worklist, work_, sizeof(work_));
127 util_dynarray_append(&state.worklist, nir_instr *, &discard->instr);
128
129 unsigned next = 0;
130 bool can_move_discard = true;
131 while (next < util_dynarray_num_elements(&state.worklist, nir_instr *) && can_move_discard) {
132 nir_instr *instr = *util_dynarray_element(&state.worklist, nir_instr *, next);
133 next++;
134 /* Instead of removing instructions from the worklist, we keep them so that the
135 * flags can be cleared if we fail.
136 */
137 can_move_discard = nir_foreach_src(instr, add_src_to_worklist, &state.worklist);
138 }
139
140 if (!can_move_discard) {
141 /* Moving the discard is impossible: clear the flags */
142 util_dynarray_foreach(&state.worklist, nir_instr *, instr)
143 (*instr)->pass_flags = 0;
144 } else {
145 (*next_discard_id)++;
146 }
147
148 util_dynarray_fini(&state.worklist);
149 }
150
151 enum intrinsic_discard_info {
152 can_move_after_demote = 1 << 0,
153 can_move_after_terminate = 1 << 1,
154 };
155
156 static enum intrinsic_discard_info
can_move_intrinsic_after_discard(nir_intrinsic_instr * intrin)157 can_move_intrinsic_after_discard(nir_intrinsic_instr *intrin)
158 {
159 if (nir_intrinsic_can_reorder(intrin))
160 return can_move_after_demote | can_move_after_terminate;
161
162 switch (intrin->intrinsic) {
163 case nir_intrinsic_quad_broadcast:
164 case nir_intrinsic_quad_swap_horizontal:
165 case nir_intrinsic_quad_swap_vertical:
166 case nir_intrinsic_quad_swap_diagonal:
167 case nir_intrinsic_quad_vote_all:
168 case nir_intrinsic_quad_vote_any:
169 case nir_intrinsic_quad_swizzle_amd:
170 case nir_intrinsic_ddx:
171 case nir_intrinsic_ddx_fine:
172 case nir_intrinsic_ddx_coarse:
173 case nir_intrinsic_ddy:
174 case nir_intrinsic_ddy_fine:
175 case nir_intrinsic_ddy_coarse:
176 return can_move_after_demote;
177 case nir_intrinsic_is_helper_invocation:
178 case nir_intrinsic_load_helper_invocation:
179 return can_move_after_terminate;
180 case nir_intrinsic_load_param:
181 case nir_intrinsic_load_deref:
182 case nir_intrinsic_decl_reg:
183 case nir_intrinsic_load_reg:
184 case nir_intrinsic_load_reg_indirect:
185 case nir_intrinsic_as_uniform:
186 case nir_intrinsic_inverse_ballot:
187 case nir_intrinsic_write_invocation_amd:
188 case nir_intrinsic_mbcnt_amd:
189 case nir_intrinsic_atomic_counter_read:
190 case nir_intrinsic_atomic_counter_read_deref:
191 case nir_intrinsic_image_deref_load:
192 case nir_intrinsic_image_load:
193 case nir_intrinsic_bindless_image_load:
194 case nir_intrinsic_image_deref_sparse_load:
195 case nir_intrinsic_image_sparse_load:
196 case nir_intrinsic_bindless_image_sparse_load:
197 case nir_intrinsic_image_deref_samples_identical:
198 case nir_intrinsic_image_samples_identical:
199 case nir_intrinsic_bindless_image_samples_identical:
200 case nir_intrinsic_load_ssbo:
201 case nir_intrinsic_load_output:
202 case nir_intrinsic_load_shared:
203 case nir_intrinsic_load_global:
204 case nir_intrinsic_load_global_2x32:
205 case nir_intrinsic_load_scratch:
206 case nir_intrinsic_load_stack:
207 case nir_intrinsic_load_buffer_amd:
208 case nir_intrinsic_load_typed_buffer_amd:
209 case nir_intrinsic_load_global_amd:
210 case nir_intrinsic_load_shared2_amd:
211 return can_move_after_demote | can_move_after_terminate;
212 case nir_intrinsic_store_deref:
213 if (!nir_deref_mode_is_in_set(nir_src_as_deref(intrin->src[0]),
214 nir_var_shader_temp | nir_var_function_temp)) {
215 break;
216 }
217 FALLTHROUGH;
218 case nir_intrinsic_store_reg:
219 case nir_intrinsic_store_reg_indirect:
220 case nir_intrinsic_store_scratch:
221 return can_move_after_demote | can_move_after_terminate;
222 default:
223 break;
224 }
225
226 return 0;
227 }
228
229 static bool
opt_move_discards_to_top_impl(nir_function_impl * impl)230 opt_move_discards_to_top_impl(nir_function_impl *impl)
231 {
232 bool progress = false;
233 bool consider_terminates = true;
234 unsigned next_discard_id = 0;
235
236 /* Walk through the instructions and look for a discard that we can move
237 * to the top of the program. If we hit any operation along the way that
238 * we cannot safely move a discard above, break out of the loop and stop
239 * trying to move any more discards.
240 */
241 nir_foreach_block(block, impl) {
242 nir_foreach_instr_safe(instr, block) {
243 instr->pass_flags = 0;
244
245 switch (instr->type) {
246 case nir_instr_type_alu:
247 case nir_instr_type_deref:
248 case nir_instr_type_load_const:
249 case nir_instr_type_undef:
250 case nir_instr_type_phi:
251 case nir_instr_type_debug_info:
252 /* These are all safe */
253 continue;
254
255 case nir_instr_type_call:
256 instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
257 /* We don't know what the function will do */
258 goto break_all;
259
260 case nir_instr_type_tex: {
261 nir_tex_instr *tex = nir_instr_as_tex(instr);
262 if (nir_tex_instr_has_implicit_derivative(tex))
263 consider_terminates = false;
264 continue;
265 }
266
267 case nir_instr_type_intrinsic: {
268 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
269 switch (intrin->intrinsic) {
270 case nir_intrinsic_terminate_if:
271 if (!consider_terminates) {
272 /* assume that a shader either uses terminate or demote, but not both */
273 instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
274 goto break_all;
275 }
276 FALLTHROUGH;
277 case nir_intrinsic_demote_if:
278 try_move_discard(intrin, &next_discard_id);
279 break;
280 default: {
281 enum intrinsic_discard_info info = can_move_intrinsic_after_discard(intrin);
282 if (!(info & can_move_after_demote)) {
283 instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
284 goto break_all;
285 } else if (!(info & can_move_after_terminate)) {
286 consider_terminates = false;
287 }
288 break;
289 }
290 }
291 continue;
292 }
293
294 case nir_instr_type_jump: {
295 nir_jump_instr *jump = nir_instr_as_jump(instr);
296 /* A return would cause the discard to not get executed */
297 if (jump->type == nir_jump_return) {
298 instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
299 goto break_all;
300 }
301 continue;
302 }
303
304 case nir_instr_type_parallel_copy:
305 unreachable("Unhanded instruction type");
306 }
307 }
308 }
309 break_all:
310
311 if (next_discard_id == 0)
312 return false;
313
314 /* Walk the list of instructions and move the discard/demote and
315 * everything it depends on to the top. We walk the instruction list
316 * here because it ensures that everything stays in its original order.
317 * This provides stability for the algorithm and ensures that we don't
318 * accidentally get dependencies out-of-order.
319 */
320 BITSET_DECLARE(cursors_valid, MAX_DISCARDS) = { 1u };
321 nir_cursor cursors_[32];
322 struct util_dynarray cursors;
323 util_dynarray_init_from_stack(&cursors, cursors_, sizeof(cursors_));
324 if (!util_dynarray_resize(&cursors, nir_cursor, next_discard_id))
325 return false;
326
327 *util_dynarray_element(&cursors, nir_cursor, 0) = nir_before_impl(impl);
328
329 nir_foreach_block(block, impl) {
330 bool stop = false;
331 nir_foreach_instr_safe(instr, block) {
332 if (instr->pass_flags == 0)
333 continue;
334
335 if (instr->pass_flags == STOP_PROCESSING_INSTR_FLAG) {
336 stop = true;
337 break;
338 }
339
340 unsigned index = instr->pass_flags - 1;
341 nir_cursor *cursor = util_dynarray_element(&cursors, nir_cursor, index);
342 if (!BITSET_TEST(cursors_valid, index)) {
343 unsigned prev_idx = BITSET_LAST_BIT_BEFORE(cursors_valid, index) - 1;
344 *cursor = *util_dynarray_element(&cursors, nir_cursor, prev_idx);
345 BITSET_SET(cursors_valid, index);
346 }
347
348 progress |= nir_instr_move(*cursor, instr);
349 *cursor = nir_after_instr(instr);
350 }
351 if (stop)
352 break;
353 }
354
355 util_dynarray_fini(&cursors);
356 return progress;
357 }
358
359 /* This optimization only operates on discard_if/demoe_if so
360 * nir_opt_conditional_discard and nir_lower_discard_or_demote
361 * should have been called before.
362 */
363 bool
nir_opt_move_discards_to_top(nir_shader * shader)364 nir_opt_move_discards_to_top(nir_shader *shader)
365 {
366 assert(shader->info.stage == MESA_SHADER_FRAGMENT);
367
368 bool progress = false;
369
370 if (!shader->info.fs.uses_discard)
371 return false;
372
373 nir_foreach_function_impl(impl, shader) {
374 if (opt_move_discards_to_top_impl(impl)) {
375 nir_metadata_preserve(impl, nir_metadata_control_flow);
376 progress = true;
377 }
378 }
379
380 return progress;
381 }
382