1 /*
2 * Copyright © 2022 Collabora, Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "nak_private.h"
7 #include "nir_builder.h"
8
9 #include "util/u_dynarray.h"
10
11 struct barrier {
12 nir_cf_node *node;
13 nir_def *bar_set;
14 nir_def *bar_reg;
15 };
16
17 struct add_barriers_state {
18 const struct nak_compiler *nak;
19 nir_builder builder;
20 struct util_dynarray barriers;
21 bool progress;
22 };
23
24 static void
add_bar_cf_node(nir_cf_node * node,struct add_barriers_state * state)25 add_bar_cf_node(nir_cf_node *node, struct add_barriers_state *state)
26 {
27 nir_builder *b = &state->builder;
28
29 nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(node));
30 nir_block *after = nir_cf_node_as_block(nir_cf_node_next(node));
31
32 b->cursor = nir_after_block(before);
33 nir_def *bar = nir_bar_set_nv(b);
34 nir_def *bar_reg = nir_decl_reg(b, 1, 32, 0);
35 nir_store_reg(b, bar, bar_reg);
36
37 b->cursor = nir_before_block_after_phis(after);
38 nir_bar_sync_nv(b, nir_load_reg(b, bar_reg), bar);
39
40 struct barrier barrier = {
41 .node = node,
42 .bar_set = bar,
43 .bar_reg = bar_reg,
44 };
45 util_dynarray_append(&state->barriers, struct barrier, barrier);
46
47 state->progress = true;
48 }
49
50 static void
break_loop_bars(nir_block * block,struct add_barriers_state * state)51 break_loop_bars(nir_block *block, struct add_barriers_state *state)
52 {
53 if (exec_list_is_empty(&block->instr_list))
54 return;
55
56 nir_instr *block_last_instr = nir_block_last_instr(block);
57 if (block_last_instr->type != nir_instr_type_jump)
58 return;
59
60 nir_jump_instr *jump = nir_instr_as_jump(block_last_instr);
61 assert(jump->type == nir_jump_break ||
62 jump->type == nir_jump_continue);
63
64 nir_builder *b = &state->builder;
65 b->cursor = nir_before_instr(&jump->instr);
66
67 const unsigned num_bars =
68 util_dynarray_num_elements(&state->barriers, struct barrier);
69
70 int idx = (int)num_bars - 1;
71 for (nir_cf_node *p = block->cf_node.parent;
72 p->type != nir_cf_node_loop; p = p->parent) {
73 if (idx < 0)
74 break;
75
76 const struct barrier *bar =
77 util_dynarray_element(&state->barriers, struct barrier, idx);
78 if (bar->node == p) {
79 nir_def *bar_val = nir_load_reg(b, bar->bar_reg);
80 bar_val = nir_bar_break_nv(b, bar_val);
81 nir_store_reg(b, bar_val, bar->bar_reg);
82 idx--;
83 }
84 }
85 }
86
87 static void
pop_bar_cf_node(nir_cf_node * node,struct add_barriers_state * state)88 pop_bar_cf_node(nir_cf_node *node, struct add_barriers_state *state)
89 {
90 if (state->barriers.size == 0)
91 return;
92
93 const struct barrier *top =
94 util_dynarray_top_ptr(&state->barriers, struct barrier);
95 if (top->node == node)
96 (void)util_dynarray_pop(&state->barriers, struct barrier);
97 }
98
99 static void
lower_control_barriers_block(nir_block * block,struct add_barriers_state * state)100 lower_control_barriers_block(nir_block *block,
101 struct add_barriers_state *state)
102 {
103 nir_foreach_instr_safe(instr, block) {
104 if (instr->type != nir_instr_type_intrinsic)
105 continue;
106
107 nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr);
108 if (barrier->intrinsic != nir_intrinsic_barrier)
109 continue;
110
111 mesa_scope exec_scope = nir_intrinsic_execution_scope(barrier);
112 assert(exec_scope <= SCOPE_WORKGROUP &&
113 "Control barrier with scope > WORKGROUP");
114
115 if (exec_scope == SCOPE_WORKGROUP &&
116 nak_nir_workgroup_has_one_subgroup(state->builder.shader))
117 exec_scope = SCOPE_SUBGROUP;
118
119 /* Because we're guaranteeing maximal convergence with this pass,
120 * subgroup barriers do nothing.
121 */
122 if (exec_scope <= SCOPE_SUBGROUP)
123 exec_scope = SCOPE_NONE;
124
125 if (exec_scope != nir_intrinsic_execution_scope(barrier)) {
126 nir_intrinsic_set_execution_scope(barrier, exec_scope);
127 state->progress = true;
128 }
129
130 const nir_variable_mode mem_modes = nir_intrinsic_memory_modes(barrier);
131 if (exec_scope == SCOPE_NONE && mem_modes == 0) {
132 nir_instr_remove(&barrier->instr);
133 state->progress = true;
134 } else {
135 state->builder.shader->info.uses_control_barrier = true;
136 }
137 }
138 }
139
140 /* Checks if this CF node's immediate successor has a sync. There's no point
141 * in adding a sync if the very next thing we do, besides dealing with phis,
142 * is to sync.
143 */
144 static bool
cf_node_imm_succ_is_sync(nir_cf_node * node)145 cf_node_imm_succ_is_sync(nir_cf_node *node)
146 {
147 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
148
149 nir_foreach_instr(instr, block) {
150 if (instr->type == nir_instr_type_phi)
151 continue;
152
153 if (instr->type != nir_instr_type_intrinsic)
154 return false;
155
156 nir_intrinsic_instr *sync = nir_instr_as_intrinsic(instr);
157 if (sync->intrinsic == nir_intrinsic_bar_sync_nv)
158 return true;
159
160 return false;
161 }
162
163 /* We can only get here if the only thing in the block is phis */
164
165 /* There's another loop or if following and we didn't find a sync */
166 if (nir_cf_node_next(&block->cf_node))
167 return false;
168
169 /* No successor in the CF list. Check the parent. */
170 nir_cf_node *parent = block->cf_node.parent;
171 switch (parent->type) {
172 case nir_cf_node_if:
173 return cf_node_imm_succ_is_sync(parent);
174 case nir_cf_node_loop:
175 /* We want to sync before the continue to avoid additional divergence.
176 * We could possibly avoid the sync in the case where the loop is known
177 * to be divergent and therefore syncs afterwards but this seems safer
178 * for now.
179 *
180 * Note that this also catches double loops where you have something
181 * like this:
182 *
183 * loop {
184 * ...
185 * if (div) {
186 * loop {
187 * ...
188 * }
189 * }
190 * }
191 *
192 * In this case, we'll place a merge after the if and so we don't need a
193 * merge around the inner loop.
194 */
195 return false;
196 case nir_cf_node_function:
197 /* The end of the function is a natural sync point */
198 return true;
199 default:
200 unreachable("Unknown CF node type");
201 }
202 }
203
204 static bool
block_is_merge(const nir_block * block)205 block_is_merge(const nir_block *block)
206 {
207 /* If it's unreachable, there is no merge */
208 if (block->imm_dom == NULL)
209 return false;
210
211 unsigned num_preds = 0;
212 set_foreach(block->predecessors, entry) {
213 const nir_block *pred = entry->key;
214
215 /* We don't care about unreachable blocks */
216 if (pred->imm_dom == NULL)
217 continue;
218
219 num_preds++;
220 }
221
222 return num_preds > 1;
223 }
224
225 static void
add_barriers_cf_list(struct exec_list * cf_list,struct add_barriers_state * state)226 add_barriers_cf_list(struct exec_list *cf_list,
227 struct add_barriers_state *state)
228 {
229 foreach_list_typed(nir_cf_node, node, node, cf_list) {
230 switch (node->type) {
231 case nir_cf_node_block: {
232 nir_block *block = nir_cf_node_as_block(node);
233
234 lower_control_barriers_block(block, state);
235
236 break_loop_bars(block, state);
237 break;
238 }
239 case nir_cf_node_if: {
240 nir_if *nif = nir_cf_node_as_if(node);
241
242 if (nif->condition.ssa->divergent &&
243 block_is_merge(nir_cf_node_as_block(nir_cf_node_next(node))) &&
244 !cf_node_imm_succ_is_sync(&nif->cf_node))
245 add_bar_cf_node(&nif->cf_node, state);
246
247 add_barriers_cf_list(&nif->then_list, state);
248 add_barriers_cf_list(&nif->else_list, state);
249
250 pop_bar_cf_node(&nif->cf_node, state);
251 break;
252 }
253 case nir_cf_node_loop: {
254 nir_loop *loop = nir_cf_node_as_loop(node);
255
256 if (loop->divergent && !cf_node_imm_succ_is_sync(&loop->cf_node))
257 add_bar_cf_node(&loop->cf_node, state);
258
259 add_barriers_cf_list(&loop->body, state);
260
261 pop_bar_cf_node(&loop->cf_node, state);
262 break;
263 }
264 default:
265 unreachable("Unknown CF node type");
266 }
267 }
268 }
269
270 static bool
nak_nir_add_barriers_impl(nir_function_impl * impl,const struct nak_compiler * nak)271 nak_nir_add_barriers_impl(nir_function_impl *impl,
272 const struct nak_compiler *nak)
273 {
274 nir_metadata_require(impl, nir_metadata_dominance);
275
276 struct add_barriers_state state = {
277 .nak = nak,
278 .builder = nir_builder_create(impl),
279 };
280 util_dynarray_init(&state.barriers, NULL);
281
282 add_barriers_cf_list(&impl->body, &state);
283
284 util_dynarray_fini(&state.barriers);
285
286 if (state.progress) {
287 nir_metadata_preserve(impl, nir_metadata_block_index |
288 nir_metadata_dominance |
289 nir_metadata_loop_analysis);
290
291 nir_lower_reg_intrinsics_to_ssa_impl(impl);
292 } else {
293 nir_metadata_preserve(impl, nir_metadata_all);
294 }
295
296 return state.progress;
297 }
298
299 bool
nak_nir_add_barriers(nir_shader * nir,const struct nak_compiler * nak)300 nak_nir_add_barriers(nir_shader *nir, const struct nak_compiler *nak)
301 {
302 if (nak->sm < 70) {
303 nir_shader_preserve_all_metadata(nir);
304 return false;
305 }
306
307 bool progress = false;
308
309 nir->info.uses_control_barrier = false;
310
311 nir_foreach_function_impl(impl, nir)
312 progress |= nak_nir_add_barriers_impl(impl, nak);
313
314 return progress;
315 }
316