1 /*
2 * Copyright © 2019 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25
26 /* This pass optimizes GL access qualifiers. So far it does two things:
27 *
28 * - Infer readonly when it's missing.
29 * - Infer ACCESS_CAN_REORDER when the following are true:
30 * - Either there are no writes, or ACCESS_NON_WRITEABLE and ACCESS_RESTRICT
31 * are both set. In either case there are no writes to the underlying
32 * memory.
33 * - If ACCESS_COHERENT is set, then there must be no memory barriers
34 * involving the access. Coherent accesses may return different results
35 * before and after barriers.
36 * - ACCESS_VOLATILE is not set.
37 *
38 * If these conditions are true, then image and buffer reads may be treated as
39 * if they were uniform buffer reads, i.e. they may be arbitrarily moved,
40 * combined, rematerialized etc.
41 */
42
43 struct access_state {
44 struct set *vars_written;
45 bool images_written;
46 bool buffers_written;
47 bool image_barriers;
48 bool buffer_barriers;
49 };
50
51 static void
gather_intrinsic(struct access_state * state,nir_intrinsic_instr * instr)52 gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
53 {
54 nir_variable *var;
55 switch (instr->intrinsic) {
56 case nir_intrinsic_image_deref_store:
57 case nir_intrinsic_image_deref_atomic_add:
58 case nir_intrinsic_image_deref_atomic_imin:
59 case nir_intrinsic_image_deref_atomic_umin:
60 case nir_intrinsic_image_deref_atomic_imax:
61 case nir_intrinsic_image_deref_atomic_umax:
62 case nir_intrinsic_image_deref_atomic_and:
63 case nir_intrinsic_image_deref_atomic_or:
64 case nir_intrinsic_image_deref_atomic_xor:
65 case nir_intrinsic_image_deref_atomic_exchange:
66 case nir_intrinsic_image_deref_atomic_comp_swap:
67 case nir_intrinsic_image_deref_atomic_fadd:
68 var = nir_intrinsic_get_var(instr, 0);
69
70 /* In OpenGL, buffer images use normal buffer objects, whereas other
71 * image types use textures which cannot alias with buffer objects.
72 * Therefore we have to group buffer samplers together with SSBO's.
73 */
74 if (glsl_get_sampler_dim(glsl_without_array(var->type)) ==
75 GLSL_SAMPLER_DIM_BUF)
76 state->buffers_written = true;
77 else
78 state->images_written = true;
79
80 if (var->data.mode == nir_var_uniform)
81 _mesa_set_add(state->vars_written, var);
82 break;
83
84 case nir_intrinsic_bindless_image_store:
85 case nir_intrinsic_bindless_image_atomic_add:
86 case nir_intrinsic_bindless_image_atomic_imin:
87 case nir_intrinsic_bindless_image_atomic_umin:
88 case nir_intrinsic_bindless_image_atomic_imax:
89 case nir_intrinsic_bindless_image_atomic_umax:
90 case nir_intrinsic_bindless_image_atomic_and:
91 case nir_intrinsic_bindless_image_atomic_or:
92 case nir_intrinsic_bindless_image_atomic_xor:
93 case nir_intrinsic_bindless_image_atomic_exchange:
94 case nir_intrinsic_bindless_image_atomic_comp_swap:
95 case nir_intrinsic_bindless_image_atomic_fadd:
96 if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_BUF)
97 state->buffers_written = true;
98 else
99 state->images_written = true;
100 break;
101
102 case nir_intrinsic_store_deref:
103 case nir_intrinsic_deref_atomic_add:
104 case nir_intrinsic_deref_atomic_imin:
105 case nir_intrinsic_deref_atomic_umin:
106 case nir_intrinsic_deref_atomic_imax:
107 case nir_intrinsic_deref_atomic_umax:
108 case nir_intrinsic_deref_atomic_and:
109 case nir_intrinsic_deref_atomic_or:
110 case nir_intrinsic_deref_atomic_xor:
111 case nir_intrinsic_deref_atomic_exchange:
112 case nir_intrinsic_deref_atomic_comp_swap:
113 case nir_intrinsic_deref_atomic_fadd:
114 case nir_intrinsic_deref_atomic_fmin:
115 case nir_intrinsic_deref_atomic_fmax:
116 case nir_intrinsic_deref_atomic_fcomp_swap:
117 var = nir_intrinsic_get_var(instr, 0);
118 if (var->data.mode != nir_var_mem_ssbo)
119 break;
120
121 _mesa_set_add(state->vars_written, var);
122 state->buffers_written = true;
123 break;
124
125 case nir_intrinsic_memory_barrier:
126 state->buffer_barriers = true;
127 state->image_barriers = true;
128 break;
129
130 case nir_intrinsic_memory_barrier_buffer:
131 state->buffer_barriers = true;
132 break;
133
134 case nir_intrinsic_memory_barrier_image:
135 state->image_barriers = true;
136 break;
137
138 case nir_intrinsic_scoped_barrier:
139 /* TODO: Could be more granular if we had nir_var_mem_image. */
140 if (nir_intrinsic_memory_modes(instr) & (nir_var_mem_ubo |
141 nir_var_mem_ssbo |
142 nir_var_uniform |
143 nir_var_mem_global)) {
144 state->buffer_barriers = true;
145 state->image_barriers = true;
146 }
147 break;
148
149 default:
150 break;
151 }
152 }
153
154 static bool
process_variable(struct access_state * state,nir_variable * var)155 process_variable(struct access_state *state, nir_variable *var)
156 {
157 if (var->data.mode != nir_var_mem_ssbo &&
158 !(var->data.mode == nir_var_uniform &&
159 glsl_type_is_image(var->type)))
160 return false;
161
162 /* Ignore variables we've already marked */
163 if (var->data.access & ACCESS_CAN_REORDER)
164 return false;
165
166 if (!(var->data.access & ACCESS_NON_WRITEABLE) &&
167 !_mesa_set_search(state->vars_written, var)) {
168 var->data.access |= ACCESS_NON_WRITEABLE;
169 return true;
170 }
171
172 return false;
173 }
174
175 static bool
can_reorder(struct access_state * state,enum gl_access_qualifier access,bool is_buffer,bool is_ssbo)176 can_reorder(struct access_state *state, enum gl_access_qualifier access,
177 bool is_buffer, bool is_ssbo)
178 {
179 bool is_any_written = is_buffer ? state->buffers_written :
180 state->images_written;
181
182 /* Can we guarantee that the underlying memory is never written? */
183 if (!is_any_written ||
184 ((access & ACCESS_NON_WRITEABLE) &&
185 (access & ACCESS_RESTRICT))) {
186 /* Note: memoryBarrierBuffer() is only guaranteed to flush buffer
187 * variables and not imageBuffer's, so we only consider the GL-level
188 * type here.
189 */
190 bool is_any_barrier = is_ssbo ?
191 state->buffer_barriers : state->image_barriers;
192
193 return (!is_any_barrier || !(access & ACCESS_COHERENT)) &&
194 !(access & ACCESS_VOLATILE);
195 }
196
197 return false;
198 }
199
200 static bool
process_intrinsic(struct access_state * state,nir_intrinsic_instr * instr)201 process_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
202 {
203 switch (instr->intrinsic) {
204 case nir_intrinsic_bindless_image_load:
205 if (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER)
206 return false;
207
208 /* We have less information about bindless intrinsics, since we can't
209 * always trace uses back to the variable. Don't try and infer if it's
210 * read-only, unless there are no image writes at all.
211 */
212 bool progress = false;
213 bool is_buffer =
214 nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_BUF;
215
216 bool is_any_written =
217 is_buffer ? state->buffers_written : state->images_written;
218
219 if (!(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE) &&
220 !is_any_written) {
221 progress = true;
222 nir_intrinsic_set_access(instr,
223 nir_intrinsic_access(instr) |
224 ACCESS_NON_WRITEABLE);
225 }
226
227 if (can_reorder(state, nir_intrinsic_access(instr), is_buffer, false)) {
228 progress = true;
229 nir_intrinsic_set_access(instr,
230 nir_intrinsic_access(instr) |
231 ACCESS_CAN_REORDER);
232 }
233
234 return progress;
235
236 case nir_intrinsic_load_deref:
237 case nir_intrinsic_image_deref_load: {
238 nir_variable *var = nir_intrinsic_get_var(instr, 0);
239
240 if (instr->intrinsic == nir_intrinsic_load_deref &&
241 var->data.mode != nir_var_mem_ssbo)
242 return false;
243
244 if (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER)
245 return false;
246
247 bool progress = false;
248
249 /* Check if we were able to mark the whole variable non-writeable */
250 if (!(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE) &&
251 var->data.access & ACCESS_NON_WRITEABLE) {
252 progress = true;
253 nir_intrinsic_set_access(instr,
254 nir_intrinsic_access(instr) |
255 ACCESS_NON_WRITEABLE);
256 }
257
258 bool is_ssbo = var->data.mode == nir_var_mem_ssbo;
259
260 bool is_buffer = is_ssbo ||
261 glsl_get_sampler_dim(glsl_without_array(var->type)) == GLSL_SAMPLER_DIM_BUF;
262
263 if (can_reorder(state, nir_intrinsic_access(instr), is_buffer, is_ssbo)) {
264 progress = true;
265 nir_intrinsic_set_access(instr,
266 nir_intrinsic_access(instr) |
267 ACCESS_CAN_REORDER);
268 }
269
270 return progress;
271 }
272
273 default:
274 return false;
275 }
276 }
277
278 static bool
opt_access_impl(struct access_state * state,nir_function_impl * impl)279 opt_access_impl(struct access_state *state,
280 nir_function_impl *impl)
281 {
282 bool progress = false;
283
284 nir_foreach_block(block, impl) {
285 nir_foreach_instr(instr, block) {
286 if (instr->type == nir_instr_type_intrinsic)
287 progress |= process_intrinsic(state,
288 nir_instr_as_intrinsic(instr));
289 }
290 }
291
292 if (progress) {
293 nir_metadata_preserve(impl,
294 nir_metadata_block_index |
295 nir_metadata_dominance |
296 nir_metadata_live_ssa_defs |
297 nir_metadata_loop_analysis);
298 }
299
300
301 return progress;
302 }
303
304 bool
nir_opt_access(nir_shader * shader)305 nir_opt_access(nir_shader *shader)
306 {
307 struct access_state state = {
308 .vars_written = _mesa_pointer_set_create(NULL),
309 };
310
311 bool var_progress = false;
312 bool progress = false;
313
314 nir_foreach_function(func, shader) {
315 if (func->impl) {
316 nir_foreach_block(block, func->impl) {
317 nir_foreach_instr(instr, block) {
318 if (instr->type == nir_instr_type_intrinsic)
319 gather_intrinsic(&state, nir_instr_as_intrinsic(instr));
320 }
321 }
322 }
323 }
324
325 nir_foreach_variable_with_modes(var, shader, nir_var_uniform |
326 nir_var_mem_ubo |
327 nir_var_mem_ssbo)
328 var_progress |= process_variable(&state, var);
329
330 nir_foreach_function(func, shader) {
331 if (func->impl) {
332 progress |= opt_access_impl(&state, func->impl);
333
334 /* If we make a change to the uniforms, update all the impls. */
335 if (var_progress) {
336 nir_metadata_preserve(func->impl,
337 nir_metadata_block_index |
338 nir_metadata_dominance |
339 nir_metadata_live_ssa_defs |
340 nir_metadata_loop_analysis);
341 }
342 }
343 }
344
345 progress |= var_progress;
346
347 _mesa_set_destroy(state.vars_written, NULL);
348 return progress;
349 }
350