1 /*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_worklist.h"
26 #include "util/u_vector.h"
27
28 static bool
combine_all_barriers(nir_intrinsic_instr * a,nir_intrinsic_instr * b,void * _)29 combine_all_barriers(nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *_)
30 {
31 nir_intrinsic_set_memory_modes(
32 a, nir_intrinsic_memory_modes(a) | nir_intrinsic_memory_modes(b));
33 nir_intrinsic_set_memory_semantics(
34 a, nir_intrinsic_memory_semantics(a) | nir_intrinsic_memory_semantics(b));
35 nir_intrinsic_set_memory_scope(
36 a, MAX2(nir_intrinsic_memory_scope(a), nir_intrinsic_memory_scope(b)));
37 nir_intrinsic_set_execution_scope(
38 a, MAX2(nir_intrinsic_execution_scope(a), nir_intrinsic_execution_scope(b)));
39 return true;
40 }
41
42 static bool
nir_opt_combine_barriers_impl(nir_function_impl * impl,nir_combine_barrier_cb combine_cb,void * data)43 nir_opt_combine_barriers_impl(nir_function_impl *impl,
44 nir_combine_barrier_cb combine_cb,
45 void *data)
46 {
47 bool progress = false;
48
49 nir_foreach_block(block, impl) {
50 nir_intrinsic_instr *prev = NULL;
51
52 nir_foreach_instr_safe(instr, block) {
53 if (instr->type != nir_instr_type_intrinsic) {
54 prev = NULL;
55 continue;
56 }
57
58 nir_intrinsic_instr *current = nir_instr_as_intrinsic(instr);
59 if (current->intrinsic != nir_intrinsic_barrier) {
60 prev = NULL;
61 continue;
62 }
63
64 if (prev && combine_cb(prev, current, data)) {
65 nir_instr_remove(¤t->instr);
66 progress = true;
67 } else {
68 prev = current;
69 }
70 }
71 }
72
73 if (progress) {
74 nir_metadata_preserve(impl, nir_metadata_block_index |
75 nir_metadata_dominance |
76 nir_metadata_live_defs);
77 } else {
78 nir_metadata_preserve(impl, nir_metadata_all);
79 }
80
81 return progress;
82 }
83
84 /* Combine adjacent scoped barriers. */
85 bool
nir_opt_combine_barriers(nir_shader * shader,nir_combine_barrier_cb combine_cb,void * data)86 nir_opt_combine_barriers(nir_shader *shader,
87 nir_combine_barrier_cb combine_cb,
88 void *data)
89 {
90 /* Default to combining everything. Only some backends can do better. */
91 if (!combine_cb)
92 combine_cb = combine_all_barriers;
93
94 bool progress = false;
95
96 nir_foreach_function_impl(impl, shader) {
97 if (nir_opt_combine_barriers_impl(impl, combine_cb, data)) {
98 progress = true;
99 }
100 }
101
102 return progress;
103 }
104
105 static bool
barrier_happens_before(const nir_instr * a,const nir_instr * b)106 barrier_happens_before(const nir_instr *a, const nir_instr *b)
107 {
108 if (a->block == b->block)
109 return a->index < b->index;
110
111 return nir_block_dominates(a->block, b->block);
112 }
113
114 static bool
nir_opt_barrier_modes_impl(nir_function_impl * impl)115 nir_opt_barrier_modes_impl(nir_function_impl *impl)
116 {
117 bool progress = false;
118
119 nir_instr_worklist *barriers = nir_instr_worklist_create();
120 if (!barriers)
121 return false;
122
123 struct u_vector mem_derefs;
124 if (!u_vector_init(&mem_derefs, 32, sizeof(struct nir_instr *))) {
125 nir_instr_worklist_destroy(barriers);
126 return false;
127 }
128
129 const unsigned all_memory_modes = nir_var_image |
130 nir_var_mem_ssbo |
131 nir_var_mem_shared |
132 nir_var_mem_global;
133
134 nir_foreach_block_safe(block, impl) {
135 nir_foreach_instr_safe(instr, block) {
136 if (instr->type == nir_instr_type_intrinsic) {
137 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
138
139 if (intrin->intrinsic == nir_intrinsic_barrier)
140 nir_instr_worklist_push_tail(barriers, instr);
141
142 } else if (instr->type == nir_instr_type_deref) {
143 nir_deref_instr *deref = nir_instr_as_deref(instr);
144
145 if (nir_deref_mode_may_be(deref, all_memory_modes) ||
146 glsl_contains_atomic(deref->type)) {
147 nir_deref_instr **tail = u_vector_add(&mem_derefs);
148 *tail = deref;
149 }
150 }
151 }
152 }
153
154 nir_foreach_instr_in_worklist(instr, barriers) {
155 nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr);
156
157 const unsigned barrier_modes = nir_intrinsic_memory_modes(barrier);
158 unsigned new_modes = barrier_modes & ~all_memory_modes;
159
160 /* If a barrier dominates all memory accesses for a particular mode (or
161 * there are none), then the barrier cannot affect those accesses. We
162 * can drop that mode from the barrier.
163 *
164 * For each barrier, we look at the list of memory derefs, and see if
165 * the barrier fails to dominate the deref. If so, then there's at
166 * least one memory access that may happen before the barrier, so we
167 * need to keep the mode. Any modes not kept are discarded.
168 */
169 nir_deref_instr **p_deref;
170 u_vector_foreach(p_deref, &mem_derefs) {
171 nir_deref_instr *deref = *p_deref;
172 const unsigned atomic_mode =
173 glsl_contains_atomic(deref->type) ? nir_var_mem_ssbo : 0;
174 const unsigned deref_modes =
175 (deref->modes | atomic_mode) & barrier_modes;
176
177 if (deref_modes &&
178 !barrier_happens_before(&barrier->instr, &deref->instr))
179 new_modes |= deref_modes;
180 }
181
182 /* If we don't need all the modes, update the barrier. */
183 if (barrier_modes != new_modes) {
184 nir_intrinsic_set_memory_modes(barrier, new_modes);
185 progress = true;
186 }
187
188 /* Shared memory only exists within a workgroup, so synchronizing it
189 * beyond workgroup scope is nonsense.
190 */
191 if (nir_intrinsic_execution_scope(barrier) == SCOPE_NONE &&
192 new_modes == nir_var_mem_shared) {
193 nir_intrinsic_set_memory_scope(barrier,
194 MIN2(nir_intrinsic_memory_scope(barrier), SCOPE_WORKGROUP));
195 progress = true;
196 }
197 }
198
199 nir_instr_worklist_destroy(barriers);
200 u_vector_finish(&mem_derefs);
201
202 return progress;
203 }
204
205 /**
206 * Reduce barriers to remove unnecessary modes and scope.
207 *
208 * This pass must be called before nir_lower_explicit_io lowers derefs!
209 *
210 * Many shaders issue full memory barriers, which may need to synchronize
211 * access to images, SSBOs, shared local memory, or global memory. However,
212 * many of them only use a subset of those memory types - say, only SSBOs.
213 *
214 * Shaders may also have patterns such as:
215 *
216 * 1. shared local memory access
217 * 2. barrier with full variable modes
218 * 3. more shared local memory access
219 * 4. image access
220 *
221 * In this case, the barrier is needed to ensure synchronization between the
222 * various shared memory operations. Image reads and writes do also exist,
223 * but they are all on one side of the barrier, so it is a no-op for image
224 * access. We can drop the image mode from the barrier in this case too.
225 *
226 * In addition, we can reduce the memory scope of shared-only barriers, as
227 * shared local memory only exists within a workgroup.
228 */
229 bool
nir_opt_barrier_modes(nir_shader * shader)230 nir_opt_barrier_modes(nir_shader *shader)
231 {
232 bool progress = false;
233
234 nir_foreach_function_impl(impl, shader) {
235 nir_metadata_require(impl, nir_metadata_dominance |
236 nir_metadata_instr_index);
237
238 if (nir_opt_barrier_modes_impl(impl)) {
239 nir_metadata_preserve(impl, nir_metadata_block_index |
240 nir_metadata_dominance |
241 nir_metadata_live_defs);
242 progress = true;
243 } else {
244 nir_metadata_preserve(impl, nir_metadata_all);
245 }
246 }
247
248 return progress;
249 }
250