• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora, Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nak_private.h"
7 #include "nir_builder.h"
8 
9 #include "util/u_dynarray.h"
10 
11 struct barrier {
12    nir_cf_node *node;
13    nir_def *bar_set;
14    nir_def *bar_reg;
15 };
16 
17 struct add_barriers_state {
18    const struct nak_compiler *nak;
19    nir_builder builder;
20    struct util_dynarray barriers;
21    bool progress;
22 };
23 
24 static void
add_bar_cf_node(nir_cf_node * node,struct add_barriers_state * state)25 add_bar_cf_node(nir_cf_node *node, struct add_barriers_state *state)
26 {
27    nir_builder *b = &state->builder;
28 
29    nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(node));
30    nir_block *after = nir_cf_node_as_block(nir_cf_node_next(node));
31 
32    b->cursor = nir_after_block(before);
33    nir_def *bar = nir_bar_set_nv(b);
34    nir_def *bar_reg = nir_decl_reg(b, 1, 32, 0);
35    nir_store_reg(b, bar, bar_reg);
36 
37    b->cursor = nir_before_block_after_phis(after);
38    nir_bar_sync_nv(b, nir_load_reg(b, bar_reg), bar);
39 
40    struct barrier barrier = {
41       .node = node,
42       .bar_set = bar,
43       .bar_reg = bar_reg,
44    };
45    util_dynarray_append(&state->barriers, struct barrier, barrier);
46 
47    state->progress = true;
48 }
49 
50 static void
break_loop_bars(nir_block * block,struct add_barriers_state * state)51 break_loop_bars(nir_block *block, struct add_barriers_state *state)
52 {
53    if (exec_list_is_empty(&block->instr_list))
54       return;
55 
56    nir_instr *block_last_instr = nir_block_last_instr(block);
57    if (block_last_instr->type != nir_instr_type_jump)
58       return;
59 
60    nir_jump_instr *jump = nir_instr_as_jump(block_last_instr);
61    assert(jump->type == nir_jump_break ||
62           jump->type == nir_jump_continue);
63 
64    nir_builder *b = &state->builder;
65    b->cursor = nir_before_instr(&jump->instr);
66 
67    const unsigned num_bars =
68       util_dynarray_num_elements(&state->barriers, struct barrier);
69 
70    int idx = (int)num_bars - 1;
71    for (nir_cf_node *p = block->cf_node.parent;
72         p->type != nir_cf_node_loop; p = p->parent) {
73       if (idx < 0)
74          break;
75 
76       const struct barrier *bar =
77          util_dynarray_element(&state->barriers, struct barrier, idx);
78       if (bar->node == p) {
79          nir_def *bar_val = nir_load_reg(b, bar->bar_reg);
80          bar_val = nir_bar_break_nv(b, bar_val);
81          nir_store_reg(b, bar_val, bar->bar_reg);
82          idx--;
83       }
84    }
85 }
86 
87 static void
pop_bar_cf_node(nir_cf_node * node,struct add_barriers_state * state)88 pop_bar_cf_node(nir_cf_node *node, struct add_barriers_state *state)
89 {
90    if (state->barriers.size == 0)
91       return;
92 
93    const struct barrier *top =
94       util_dynarray_top_ptr(&state->barriers, struct barrier);
95    if (top->node == node)
96       (void)util_dynarray_pop(&state->barriers, struct barrier);
97 }
98 
99 static void
lower_control_barriers_block(nir_block * block,struct add_barriers_state * state)100 lower_control_barriers_block(nir_block *block,
101                              struct add_barriers_state *state)
102 {
103    nir_foreach_instr_safe(instr, block) {
104       if (instr->type != nir_instr_type_intrinsic)
105          continue;
106 
107       nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr);
108       if (barrier->intrinsic != nir_intrinsic_barrier)
109          continue;
110 
111       mesa_scope exec_scope = nir_intrinsic_execution_scope(barrier);
112       assert(exec_scope <= SCOPE_WORKGROUP &&
113              "Control barrier with scope > WORKGROUP");
114 
115       if (exec_scope == SCOPE_WORKGROUP &&
116           nak_nir_workgroup_has_one_subgroup(state->builder.shader))
117          exec_scope = SCOPE_SUBGROUP;
118 
119       /* Because we're guaranteeing maximal convergence with this pass,
120        * subgroup barriers do nothing.
121        */
122       if (exec_scope <= SCOPE_SUBGROUP)
123          exec_scope = SCOPE_NONE;
124 
125       if (exec_scope != nir_intrinsic_execution_scope(barrier)) {
126          nir_intrinsic_set_execution_scope(barrier, exec_scope);
127          state->progress = true;
128       }
129 
130       const nir_variable_mode mem_modes = nir_intrinsic_memory_modes(barrier);
131       if (exec_scope == SCOPE_NONE && mem_modes == 0) {
132          nir_instr_remove(&barrier->instr);
133          state->progress = true;
134       } else {
135          state->builder.shader->info.uses_control_barrier = true;
136       }
137    }
138 }
139 
140 /* Checks if this CF node's immediate successor has a sync.  There's no point
141  * in adding a sync if the very next thing we do, besides dealing with phis,
142  * is to sync.
143  */
144 static bool
cf_node_imm_succ_is_sync(nir_cf_node * node)145 cf_node_imm_succ_is_sync(nir_cf_node *node)
146 {
147    nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
148 
149    nir_foreach_instr(instr, block) {
150       if (instr->type == nir_instr_type_phi)
151          continue;
152 
153       if (instr->type != nir_instr_type_intrinsic)
154          return false;
155 
156       nir_intrinsic_instr *sync = nir_instr_as_intrinsic(instr);
157       if (sync->intrinsic == nir_intrinsic_bar_sync_nv)
158          return true;
159 
160       return false;
161    }
162 
163    /* We can only get here if the only thing in the block is phis */
164 
165    /* There's another loop or if following and we didn't find a sync */
166    if (nir_cf_node_next(&block->cf_node))
167       return false;
168 
169    /* No successor in the CF list. Check the parent. */
170    nir_cf_node *parent = block->cf_node.parent;
171    switch (parent->type) {
172    case nir_cf_node_if:
173       return cf_node_imm_succ_is_sync(parent);
174    case nir_cf_node_loop:
175       /* We want to sync before the continue to avoid additional divergence.
176        * We could possibly avoid the sync in the case where the loop is known
177        * to be divergent and therefore syncs afterwards but this seems safer
178        * for now.
179        *
180        * Note that this also catches double loops where you have something
181        * like this:
182        *
183        *    loop {
184        *       ...
185        *       if (div) {
186        *          loop {
187        *             ...
188        *          }
189        *       }
190        *    }
191        *
192        * In this case, we'll place a merge after the if and so we don't need a
193        * merge around the inner loop.
194        */
195       return false;
196    case nir_cf_node_function:
197       /* The end of the function is a natural sync point */
198       return true;
199    default:
200       unreachable("Unknown CF node type");
201    }
202 }
203 
204 static bool
block_is_merge(const nir_block * block)205 block_is_merge(const nir_block *block)
206 {
207    /* If it's unreachable, there is no merge */
208    if (block->imm_dom == NULL)
209       return false;
210 
211    unsigned num_preds = 0;
212    set_foreach(block->predecessors, entry) {
213       const nir_block *pred = entry->key;
214 
215       /* We don't care about unreachable blocks */
216       if (pred->imm_dom == NULL)
217          continue;
218 
219       num_preds++;
220    }
221 
222    return num_preds > 1;
223 }
224 
225 static void
add_barriers_cf_list(struct exec_list * cf_list,struct add_barriers_state * state)226 add_barriers_cf_list(struct exec_list *cf_list,
227                      struct add_barriers_state *state)
228 {
229    foreach_list_typed(nir_cf_node, node, node, cf_list) {
230       switch (node->type) {
231       case nir_cf_node_block: {
232          nir_block *block = nir_cf_node_as_block(node);
233 
234          lower_control_barriers_block(block, state);
235 
236          break_loop_bars(block, state);
237          break;
238       }
239       case nir_cf_node_if: {
240          nir_if *nif = nir_cf_node_as_if(node);
241 
242          if (nif->condition.ssa->divergent &&
243              block_is_merge(nir_cf_node_as_block(nir_cf_node_next(node))) &&
244              !cf_node_imm_succ_is_sync(&nif->cf_node))
245             add_bar_cf_node(&nif->cf_node, state);
246 
247          add_barriers_cf_list(&nif->then_list, state);
248          add_barriers_cf_list(&nif->else_list, state);
249 
250          pop_bar_cf_node(&nif->cf_node, state);
251          break;
252       }
253       case nir_cf_node_loop: {
254          nir_loop *loop = nir_cf_node_as_loop(node);
255 
256          if (loop->divergent && !cf_node_imm_succ_is_sync(&loop->cf_node))
257             add_bar_cf_node(&loop->cf_node, state);
258 
259          add_barriers_cf_list(&loop->body, state);
260 
261          pop_bar_cf_node(&loop->cf_node, state);
262          break;
263       }
264       default:
265          unreachable("Unknown CF node type");
266       }
267    }
268 }
269 
270 static bool
nak_nir_add_barriers_impl(nir_function_impl * impl,const struct nak_compiler * nak)271 nak_nir_add_barriers_impl(nir_function_impl *impl,
272                           const struct nak_compiler *nak)
273 {
274    nir_metadata_require(impl, nir_metadata_dominance);
275 
276    struct add_barriers_state state = {
277       .nak = nak,
278       .builder = nir_builder_create(impl),
279    };
280    util_dynarray_init(&state.barriers, NULL);
281 
282    add_barriers_cf_list(&impl->body, &state);
283 
284    util_dynarray_fini(&state.barriers);
285 
286    if (state.progress) {
287       nir_metadata_preserve(impl, nir_metadata_block_index |
288                                   nir_metadata_dominance |
289                                   nir_metadata_loop_analysis);
290 
291       nir_lower_reg_intrinsics_to_ssa_impl(impl);
292    } else {
293       nir_metadata_preserve(impl, nir_metadata_all);
294    }
295 
296    return state.progress;
297 }
298 
299 bool
nak_nir_add_barriers(nir_shader * nir,const struct nak_compiler * nak)300 nak_nir_add_barriers(nir_shader *nir, const struct nak_compiler *nak)
301 {
302    if (nak->sm < 70) {
303       nir_shader_preserve_all_metadata(nir);
304       return false;
305    }
306 
307    bool progress = false;
308 
309    nir->info.uses_control_barrier = false;
310 
311    nir_foreach_function_impl(impl, nir)
312       progress |= nak_nir_add_barriers_impl(impl, nak);
313 
314    return progress;
315 }
316