• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/u_dynarray.h"
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "nir_control_flow.h"
28 
29 #define MAX_DISCARDS               254
30 #define MOVE_INSTR_FLAG(i)         ((i) + 1)
31 #define STOP_PROCESSING_INSTR_FLAG 255
32 
33 struct move_discard_state {
34    struct util_dynarray worklist;
35    unsigned discard_id;
36 };
37 
38 /** Check recursively if the source can be moved to the top of the shader.
39  *  Sets instr->pass_flags to MOVE_INSTR_FLAG and adds the instr
40  *  to the given worklist
41  */
42 static bool
add_src_to_worklist(nir_src * src,void * state_)43 add_src_to_worklist(nir_src *src, void *state_)
44 {
45    struct move_discard_state *state = state_;
46    nir_instr *instr = src->ssa->parent_instr;
47    if (instr->pass_flags)
48       return true;
49 
50    /* Phi instructions can't be moved at all.  Also, if we're dependent on
51     * a phi then we are dependent on some other bit of control flow and
52     * it's hard to figure out the proper condition.
53     */
54    if (instr->type == nir_instr_type_phi)
55       return false;
56 
57    if (instr->type == nir_instr_type_intrinsic) {
58       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
59       switch (intrin->intrinsic) {
60       /* Increasing the set of active invocations is safe for these intrinsics, which is
61        * all that moving it to the top does. This is because the read from inactive
62        * invocations is undefined.
63        */
64       case nir_intrinsic_quad_swizzle_amd:
65          /* If FI=0, then these intrinsics return 0 for inactive invocations. */
66          if (!nir_intrinsic_fetch_inactive(intrin))
67             return false;
68          FALLTHROUGH;
69       case nir_intrinsic_ddx:
70       case nir_intrinsic_ddy:
71       case nir_intrinsic_ddx_fine:
72       case nir_intrinsic_ddy_fine:
73       case nir_intrinsic_ddx_coarse:
74       case nir_intrinsic_ddy_coarse:
75       case nir_intrinsic_quad_broadcast:
76       case nir_intrinsic_quad_swap_horizontal:
77       case nir_intrinsic_quad_swap_vertical:
78       case nir_intrinsic_quad_swap_diagonal:
79          break;
80       default:
81          if (!nir_intrinsic_can_reorder(intrin))
82             return false;
83          break;
84       }
85    }
86 
87    /* Set pass_flags and remember the instruction to add it's own sources and for potential
88     * cleanup.
89     */
90    instr->pass_flags = MOVE_INSTR_FLAG(state->discard_id);
91    util_dynarray_append(&state->worklist, nir_instr *, instr);
92 
93    return true;
94 }
95 
96 /** Try to mark a discard or demote instruction for moving
97  *
98  * This function does two things.  One is that it searches through the
99  * dependency chain to see if this discard is an instruction that we can move
100  * up to the top.  Second, if the discard is one we can move, it tags the
101  * discard and its dependencies (using pass_flags = 1).
102  * Demote are handled the same way, except that they can still be moved up
103  * when implicit derivatives are used.
104  */
105 static void
try_move_discard(nir_intrinsic_instr * discard,unsigned * next_discard_id)106 try_move_discard(nir_intrinsic_instr *discard, unsigned *next_discard_id)
107 {
108    /* We require the discard to be in the top level of control flow.  We
109     * could, in theory, move discards that are inside ifs or loops but that
110     * would be a lot more work.
111     */
112    if (discard->instr.block->cf_node.parent->type != nir_cf_node_function)
113       return;
114 
115    if (*next_discard_id == MAX_DISCARDS)
116       return;
117 
118    discard->instr.pass_flags = MOVE_INSTR_FLAG(*next_discard_id);
119 
120    /* Build the set of all instructions discard depends on to be able to
121     * clear the flags in case the discard cannot be moved.
122     */
123    nir_instr *work_[64];
124    struct move_discard_state state;
125    state.discard_id = *next_discard_id;
126    util_dynarray_init_from_stack(&state.worklist, work_, sizeof(work_));
127    util_dynarray_append(&state.worklist, nir_instr *, &discard->instr);
128 
129    unsigned next = 0;
130    bool can_move_discard = true;
131    while (next < util_dynarray_num_elements(&state.worklist, nir_instr *) && can_move_discard) {
132       nir_instr *instr = *util_dynarray_element(&state.worklist, nir_instr *, next);
133       next++;
134       /* Instead of removing instructions from the worklist, we keep them so that the
135        * flags can be cleared if we fail.
136        */
137       can_move_discard = nir_foreach_src(instr, add_src_to_worklist, &state.worklist);
138    }
139 
140    if (!can_move_discard) {
141       /* Moving the discard is impossible: clear the flags */
142       util_dynarray_foreach(&state.worklist, nir_instr *, instr)
143          (*instr)->pass_flags = 0;
144    } else {
145       (*next_discard_id)++;
146    }
147 
148    util_dynarray_fini(&state.worklist);
149 }
150 
151 enum intrinsic_discard_info {
152    can_move_after_demote = 1 << 0,
153    can_move_after_terminate = 1 << 1,
154 };
155 
156 static enum intrinsic_discard_info
can_move_intrinsic_after_discard(nir_intrinsic_instr * intrin)157 can_move_intrinsic_after_discard(nir_intrinsic_instr *intrin)
158 {
159    if (nir_intrinsic_can_reorder(intrin))
160       return can_move_after_demote | can_move_after_terminate;
161 
162    switch (intrin->intrinsic) {
163    case nir_intrinsic_quad_broadcast:
164    case nir_intrinsic_quad_swap_horizontal:
165    case nir_intrinsic_quad_swap_vertical:
166    case nir_intrinsic_quad_swap_diagonal:
167    case nir_intrinsic_quad_vote_all:
168    case nir_intrinsic_quad_vote_any:
169    case nir_intrinsic_quad_swizzle_amd:
170    case nir_intrinsic_ddx:
171    case nir_intrinsic_ddx_fine:
172    case nir_intrinsic_ddx_coarse:
173    case nir_intrinsic_ddy:
174    case nir_intrinsic_ddy_fine:
175    case nir_intrinsic_ddy_coarse:
176       return can_move_after_demote;
177    case nir_intrinsic_is_helper_invocation:
178    case nir_intrinsic_load_helper_invocation:
179       return can_move_after_terminate;
180    case nir_intrinsic_load_param:
181    case nir_intrinsic_load_deref:
182    case nir_intrinsic_decl_reg:
183    case nir_intrinsic_load_reg:
184    case nir_intrinsic_load_reg_indirect:
185    case nir_intrinsic_as_uniform:
186    case nir_intrinsic_inverse_ballot:
187    case nir_intrinsic_write_invocation_amd:
188    case nir_intrinsic_mbcnt_amd:
189    case nir_intrinsic_atomic_counter_read:
190    case nir_intrinsic_atomic_counter_read_deref:
191    case nir_intrinsic_image_deref_load:
192    case nir_intrinsic_image_load:
193    case nir_intrinsic_bindless_image_load:
194    case nir_intrinsic_image_deref_sparse_load:
195    case nir_intrinsic_image_sparse_load:
196    case nir_intrinsic_bindless_image_sparse_load:
197    case nir_intrinsic_image_deref_samples_identical:
198    case nir_intrinsic_image_samples_identical:
199    case nir_intrinsic_bindless_image_samples_identical:
200    case nir_intrinsic_load_ssbo:
201    case nir_intrinsic_load_output:
202    case nir_intrinsic_load_shared:
203    case nir_intrinsic_load_global:
204    case nir_intrinsic_load_global_2x32:
205    case nir_intrinsic_load_scratch:
206    case nir_intrinsic_load_stack:
207    case nir_intrinsic_load_buffer_amd:
208    case nir_intrinsic_load_typed_buffer_amd:
209    case nir_intrinsic_load_global_amd:
210    case nir_intrinsic_load_shared2_amd:
211       return can_move_after_demote | can_move_after_terminate;
212    case nir_intrinsic_store_deref:
213       if (!nir_deref_mode_is_in_set(nir_src_as_deref(intrin->src[0]),
214                                     nir_var_shader_temp | nir_var_function_temp)) {
215          break;
216       }
217       FALLTHROUGH;
218    case nir_intrinsic_store_reg:
219    case nir_intrinsic_store_reg_indirect:
220    case nir_intrinsic_store_scratch:
221       return can_move_after_demote | can_move_after_terminate;
222    default:
223       break;
224    }
225 
226    return 0;
227 }
228 
229 static bool
opt_move_discards_to_top_impl(nir_function_impl * impl)230 opt_move_discards_to_top_impl(nir_function_impl *impl)
231 {
232    bool progress = false;
233    bool consider_terminates = true;
234    unsigned next_discard_id = 0;
235 
236    /* Walk through the instructions and look for a discard that we can move
237     * to the top of the program.  If we hit any operation along the way that
238     * we cannot safely move a discard above, break out of the loop and stop
239     * trying to move any more discards.
240     */
241    nir_foreach_block(block, impl) {
242       nir_foreach_instr_safe(instr, block) {
243          instr->pass_flags = 0;
244 
245          switch (instr->type) {
246          case nir_instr_type_alu:
247          case nir_instr_type_deref:
248          case nir_instr_type_load_const:
249          case nir_instr_type_undef:
250          case nir_instr_type_phi:
251          case nir_instr_type_debug_info:
252             /* These are all safe */
253             continue;
254 
255          case nir_instr_type_call:
256             instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
257             /* We don't know what the function will do */
258             goto break_all;
259 
260          case nir_instr_type_tex: {
261             nir_tex_instr *tex = nir_instr_as_tex(instr);
262             if (nir_tex_instr_has_implicit_derivative(tex))
263                consider_terminates = false;
264             continue;
265          }
266 
267          case nir_instr_type_intrinsic: {
268             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
269             switch (intrin->intrinsic) {
270             case nir_intrinsic_terminate_if:
271                if (!consider_terminates) {
272                   /* assume that a shader either uses terminate or demote, but not both */
273                   instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
274                   goto break_all;
275                }
276             FALLTHROUGH;
277             case nir_intrinsic_demote_if:
278                try_move_discard(intrin, &next_discard_id);
279                break;
280             default: {
281                enum intrinsic_discard_info info = can_move_intrinsic_after_discard(intrin);
282                if (!(info & can_move_after_demote)) {
283                   instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
284                   goto break_all;
285                } else if (!(info & can_move_after_terminate)) {
286                   consider_terminates = false;
287                }
288                break;
289             }
290             }
291             continue;
292          }
293 
294          case nir_instr_type_jump: {
295             nir_jump_instr *jump = nir_instr_as_jump(instr);
296             /* A return would cause the discard to not get executed */
297             if (jump->type == nir_jump_return) {
298                instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
299                goto break_all;
300             }
301             continue;
302          }
303 
304          case nir_instr_type_parallel_copy:
305             unreachable("Unhanded instruction type");
306          }
307       }
308    }
309 break_all:
310 
311    if (next_discard_id == 0)
312       return false;
313 
314    /* Walk the list of instructions and move the discard/demote and
315     * everything it depends on to the top.  We walk the instruction list
316     * here because it ensures that everything stays in its original order.
317     * This provides stability for the algorithm and ensures that we don't
318     * accidentally get dependencies out-of-order.
319     */
320    BITSET_DECLARE(cursors_valid, MAX_DISCARDS) = { 1u };
321    nir_cursor cursors_[32];
322    struct util_dynarray cursors;
323    util_dynarray_init_from_stack(&cursors, cursors_, sizeof(cursors_));
324    if (!util_dynarray_resize(&cursors, nir_cursor, next_discard_id))
325       return false;
326 
327    *util_dynarray_element(&cursors, nir_cursor, 0) = nir_before_impl(impl);
328 
329    nir_foreach_block(block, impl) {
330       bool stop = false;
331       nir_foreach_instr_safe(instr, block) {
332          if (instr->pass_flags == 0)
333             continue;
334 
335          if (instr->pass_flags == STOP_PROCESSING_INSTR_FLAG) {
336             stop = true;
337             break;
338          }
339 
340          unsigned index = instr->pass_flags - 1;
341          nir_cursor *cursor = util_dynarray_element(&cursors, nir_cursor, index);
342          if (!BITSET_TEST(cursors_valid, index)) {
343             unsigned prev_idx = BITSET_LAST_BIT_BEFORE(cursors_valid, index) - 1;
344             *cursor = *util_dynarray_element(&cursors, nir_cursor, prev_idx);
345             BITSET_SET(cursors_valid, index);
346          }
347 
348          progress |= nir_instr_move(*cursor, instr);
349          *cursor = nir_after_instr(instr);
350       }
351       if (stop)
352          break;
353    }
354 
355    util_dynarray_fini(&cursors);
356    return progress;
357 }
358 
359 /* This optimization only operates on discard_if/demoe_if so
360  * nir_opt_conditional_discard and nir_lower_discard_or_demote
361  * should have been called before.
362  */
363 bool
nir_opt_move_discards_to_top(nir_shader * shader)364 nir_opt_move_discards_to_top(nir_shader *shader)
365 {
366    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
367 
368    bool progress = false;
369 
370    if (!shader->info.fs.uses_discard)
371       return false;
372 
373    nir_foreach_function_impl(impl, shader) {
374       if (opt_move_discards_to_top_impl(impl)) {
375          nir_metadata_preserve(impl, nir_metadata_control_flow);
376          progress = true;
377       }
378    }
379 
380    return progress;
381 }
382