1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 #include "util/hash_table.h"
28 #include "util/u_dynarray.h"
29
30 struct nu_handle {
31 nir_def *handle;
32 nir_deref_instr *parent_deref;
33 nir_def *first;
34 };
35
36 struct nu_handle_key {
37 uint32_t block_index;
38 uint32_t access_group;
39 uint32_t handle_count;
40 /* We can have at most one texture and one sampler handle */
41 uint32_t handle_indixes[2];
42 uint32_t access_type;
43 /* Optional instruction index for emitting separate loops for non-reorderable instructions. */
44 uint32_t instr_index;
45 };
46
47 DERIVE_HASH_TABLE(nu_handle_key)
48
49 struct nu_handle_data {
50 struct nu_handle handles[2];
51 struct util_dynarray srcs;
52 };
53
54 struct nu_handle_src {
55 nir_src *srcs[2];
56 };
57
58 struct nu_access_group_state {
59 uint32_t last_first_use;
60 uint32_t index;
61 };
62
63 struct nu_state {
64 struct hash_table *accesses;
65 struct nu_access_group_state access_groups[nir_lower_non_uniform_access_type_count];
66 };
67
68 static bool
nu_handle_init(struct nu_handle * h,nir_src * src)69 nu_handle_init(struct nu_handle *h, nir_src *src)
70 {
71 nir_deref_instr *deref = nir_src_as_deref(*src);
72 if (deref) {
73 if (deref->deref_type == nir_deref_type_var)
74 return false;
75
76 nir_deref_instr *parent = nir_deref_instr_parent(deref);
77 assert(parent->deref_type == nir_deref_type_var);
78
79 assert(deref->deref_type == nir_deref_type_array);
80 if (nir_src_is_const(deref->arr.index))
81 return false;
82
83 h->handle = deref->arr.index.ssa;
84 h->parent_deref = parent;
85
86 return true;
87 } else {
88 if (nir_src_is_const(*src))
89 return false;
90
91 h->handle = src->ssa;
92 h->parent_deref = NULL;
93
94 return true;
95 }
96 }
97
98 static nir_def *
nu_handle_compare(const nir_lower_non_uniform_access_options * options,nir_builder * b,struct nu_handle * handle,nir_src * src)99 nu_handle_compare(const nir_lower_non_uniform_access_options *options,
100 nir_builder *b, struct nu_handle *handle, nir_src *src)
101 {
102 nir_component_mask_t channel_mask = ~0;
103 if (options->callback)
104 channel_mask = options->callback(src, options->callback_data);
105 channel_mask &= nir_component_mask(handle->handle->num_components);
106
107 nir_def *channels[NIR_MAX_VEC_COMPONENTS];
108 for (unsigned i = 0; i < handle->handle->num_components; i++)
109 channels[i] = nir_channel(b, handle->handle, i);
110
111 handle->first = handle->handle;
112 nir_def *equal_first = nir_imm_true(b);
113 u_foreach_bit(i, channel_mask) {
114 nir_def *first = nir_read_first_invocation(b, channels[i]);
115 handle->first = nir_vector_insert_imm(b, handle->first, first, i);
116
117 equal_first = nir_iand(b, equal_first, nir_ieq(b, first, channels[i]));
118 }
119
120 return equal_first;
121 }
122
123 static void
nu_handle_rewrite(nir_builder * b,struct nu_handle * h,nir_src * src)124 nu_handle_rewrite(nir_builder *b, struct nu_handle *h, nir_src *src)
125 {
126 if (h->parent_deref) {
127 /* Replicate the deref. */
128 nir_deref_instr *deref =
129 nir_build_deref_array(b, h->parent_deref, h->first);
130 nir_src_rewrite(src, &deref->def);
131 } else {
132 nir_src_rewrite(src, h->first);
133 }
134 }
135
136 static bool
get_first_use(nir_def * def,void * state)137 get_first_use(nir_def *def, void *state)
138 {
139 uint32_t *last_first_use = state;
140 nir_foreach_use(use, def)
141 *last_first_use = MIN2(*last_first_use, nir_src_parent_instr(use)->index);
142
143 return true;
144 }
145
146 static void
add_non_uniform_instr(struct nu_state * state,struct nu_handle * handles,nir_src ** srcs,uint32_t handle_count,bool group,enum nir_lower_non_uniform_access_type access_type)147 add_non_uniform_instr(struct nu_state *state, struct nu_handle *handles,
148 nir_src **srcs, uint32_t handle_count, bool group,
149 enum nir_lower_non_uniform_access_type access_type)
150 {
151 nir_instr *instr = nir_src_parent_instr(srcs[0]);
152
153 struct nu_access_group_state *access_group = &state->access_groups[ffs(access_type) - 1];
154
155 if (group) {
156 uint32_t first_use = UINT32_MAX;
157 nir_foreach_def(instr, get_first_use, &first_use);
158
159 /* Avoid moving accesses below their first use. */
160 if (instr->index >= access_group->last_first_use) {
161 access_group->last_first_use = first_use;
162 access_group->index++;
163 } else {
164 /* Adjust the access group scope so that every access dominates its first use. */
165 access_group->last_first_use = MIN2(access_group->last_first_use, first_use);
166 }
167 }
168
169 struct nu_handle_key key;
170 memset(&key, 0, sizeof(key));
171 key.block_index = instr->block->index;
172 key.access_group = access_group->index;
173 key.access_type = access_type;
174 key.handle_count = handle_count;
175
176 if (!group)
177 key.instr_index = instr->index;
178
179 for (uint32_t i = 0; i < handle_count; i++)
180 key.handle_indixes[i] = handles[i].handle->parent_instr->index;
181
182 struct hash_entry *entry = _mesa_hash_table_search(state->accesses, &key);
183 if (!entry) {
184 struct nu_handle_data *data = ralloc(state->accesses, struct nu_handle_data);
185
186 for (uint32_t i = 0; i < handle_count; i++)
187 data->handles[i] = handles[i];
188
189 util_dynarray_init(&data->srcs, state->accesses);
190
191 struct nu_handle_key *key_copy = ralloc(state->accesses, struct nu_handle_key);
192 memcpy(key_copy, &key, sizeof(key));
193
194 entry = _mesa_hash_table_insert(state->accesses, key_copy, data);
195 }
196
197 struct nu_handle_data *data = entry->data;
198
199 struct nu_handle_src src = { 0 };
200 for (uint32_t i = 0; i < handle_count; i++)
201 src.srcs[i] = srcs[i];
202
203 util_dynarray_append(&data->srcs, struct nu_handle_src, src);
204 }
205
206 static bool
lower_non_uniform_tex_access(struct nu_state * state,nir_tex_instr * tex)207 lower_non_uniform_tex_access(struct nu_state *state, nir_tex_instr *tex)
208 {
209 if (!tex->texture_non_uniform && !tex->sampler_non_uniform)
210 return false;
211
212 /* We can have at most one texture and one sampler handle */
213 unsigned num_handles = 0;
214 struct nu_handle handles[2];
215 nir_src *srcs[2];
216 for (unsigned i = 0; i < tex->num_srcs; i++) {
217 switch (tex->src[i].src_type) {
218 case nir_tex_src_texture_offset:
219 case nir_tex_src_texture_handle:
220 case nir_tex_src_texture_deref:
221 if (!tex->texture_non_uniform)
222 continue;
223 break;
224
225 case nir_tex_src_sampler_offset:
226 case nir_tex_src_sampler_handle:
227 case nir_tex_src_sampler_deref:
228 if (!tex->sampler_non_uniform)
229 continue;
230 break;
231
232 default:
233 continue;
234 }
235
236 assert(num_handles < ARRAY_SIZE(handles));
237 srcs[num_handles] = &tex->src[i].src;
238 if (nu_handle_init(&handles[num_handles], &tex->src[i].src))
239 num_handles++;
240 }
241
242 if (num_handles == 0) {
243 /* nu_handle_init() returned false because the handles are uniform. */
244 tex->texture_non_uniform = false;
245 tex->sampler_non_uniform = false;
246 return false;
247 }
248
249 tex->texture_non_uniform = false;
250 tex->sampler_non_uniform = false;
251
252 add_non_uniform_instr(state, handles, srcs, num_handles, true,
253 nir_lower_non_uniform_texture_access);
254
255 return true;
256 }
257
258 static bool
lower_non_uniform_access_intrin(struct nu_state * state,nir_intrinsic_instr * intrin,unsigned handle_src,enum nir_lower_non_uniform_access_type access_type)259 lower_non_uniform_access_intrin(struct nu_state *state, nir_intrinsic_instr *intrin,
260 unsigned handle_src, enum nir_lower_non_uniform_access_type access_type)
261 {
262 if (!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM))
263 return false;
264
265 nir_src *src = &intrin->src[handle_src];
266
267 struct nu_handle handle;
268 if (!nu_handle_init(&handle, src)) {
269 nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
270 return false;
271 }
272
273 nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
274
275 add_non_uniform_instr(state, &handle, &src, 1, nir_intrinsic_can_reorder(intrin),
276 access_type);
277
278 return true;
279 }
280
281 static void
handle_barrier(struct nu_state * state,bool affects_derivatives)282 handle_barrier(struct nu_state *state, bool affects_derivatives)
283 {
284 enum nir_lower_non_uniform_access_type access_type =
285 nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_image_access;
286
287 if (affects_derivatives)
288 access_type |= nir_lower_non_uniform_texture_access;
289
290 u_foreach_bit(i, access_type) {
291 state->access_groups[i].last_first_use = 0;
292 }
293 }
294
295 static bool
nir_lower_non_uniform_access_impl(nir_function_impl * impl,const nir_lower_non_uniform_access_options * options)296 nir_lower_non_uniform_access_impl(nir_function_impl *impl,
297 const nir_lower_non_uniform_access_options *options)
298 {
299 bool progress = false;
300
301 struct nu_state state = {
302 .accesses = nu_handle_key_table_create(NULL),
303 };
304
305 nir_metadata_require(impl, nir_metadata_instr_index | nir_metadata_block_index);
306
307 nir_foreach_block_safe(block, impl) {
308 nir_foreach_instr_safe(instr, block) {
309 switch (instr->type) {
310 case nir_instr_type_tex: {
311 nir_tex_instr *tex = nir_instr_as_tex(instr);
312 if ((options->types & nir_lower_non_uniform_texture_access) &&
313 lower_non_uniform_tex_access(&state, tex))
314 progress = true;
315 break;
316 }
317
318 case nir_instr_type_intrinsic: {
319 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
320 switch (intrin->intrinsic) {
321 case nir_intrinsic_terminate_if:
322 case nir_intrinsic_terminate:
323 case nir_intrinsic_demote_if:
324 case nir_intrinsic_demote:
325 case nir_intrinsic_barrier:
326 handle_barrier(&state, intrin->intrinsic == nir_intrinsic_terminate_if ||
327 intrin->intrinsic == nir_intrinsic_terminate);
328 break;
329
330 case nir_intrinsic_load_ubo:
331 if ((options->types & nir_lower_non_uniform_ubo_access) &&
332 lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_ubo_access))
333 progress = true;
334 break;
335
336 case nir_intrinsic_load_ssbo:
337 case nir_intrinsic_ssbo_atomic:
338 case nir_intrinsic_ssbo_atomic_swap:
339 if ((options->types & nir_lower_non_uniform_ssbo_access) &&
340 lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_ssbo_access))
341 progress = true;
342 break;
343
344 case nir_intrinsic_store_ssbo:
345 /* SSBO Stores put the index in the second source */
346 if ((options->types & nir_lower_non_uniform_ssbo_access) &&
347 lower_non_uniform_access_intrin(&state, intrin, 1, nir_lower_non_uniform_ssbo_access))
348 progress = true;
349 break;
350
351 case nir_intrinsic_get_ssbo_size:
352 if ((options->types & nir_lower_non_uniform_get_ssbo_size) &&
353 lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_get_ssbo_size))
354 progress = true;
355 break;
356
357 case nir_intrinsic_image_load:
358 case nir_intrinsic_image_sparse_load:
359 case nir_intrinsic_image_store:
360 case nir_intrinsic_image_atomic:
361 case nir_intrinsic_image_atomic_swap:
362 case nir_intrinsic_image_levels:
363 case nir_intrinsic_image_size:
364 case nir_intrinsic_image_samples:
365 case nir_intrinsic_image_samples_identical:
366 case nir_intrinsic_image_fragment_mask_load_amd:
367 case nir_intrinsic_bindless_image_load:
368 case nir_intrinsic_bindless_image_sparse_load:
369 case nir_intrinsic_bindless_image_store:
370 case nir_intrinsic_bindless_image_atomic:
371 case nir_intrinsic_bindless_image_atomic_swap:
372 case nir_intrinsic_bindless_image_levels:
373 case nir_intrinsic_bindless_image_size:
374 case nir_intrinsic_bindless_image_samples:
375 case nir_intrinsic_bindless_image_samples_identical:
376 case nir_intrinsic_bindless_image_fragment_mask_load_amd:
377 case nir_intrinsic_image_deref_load:
378 case nir_intrinsic_image_deref_sparse_load:
379 case nir_intrinsic_image_deref_store:
380 case nir_intrinsic_image_deref_atomic:
381 case nir_intrinsic_image_deref_atomic_swap:
382 case nir_intrinsic_image_deref_levels:
383 case nir_intrinsic_image_deref_size:
384 case nir_intrinsic_image_deref_samples:
385 case nir_intrinsic_image_deref_samples_identical:
386 case nir_intrinsic_image_deref_fragment_mask_load_amd:
387 if ((options->types & nir_lower_non_uniform_image_access) &&
388 lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_image_access))
389 progress = true;
390 break;
391
392 default:
393 /* Nothing to do */
394 break;
395 }
396 break;
397 }
398
399 case nir_instr_type_call:
400 handle_barrier(&state, true);
401 break;
402
403 default:
404 /* Nothing to do */
405 break;
406 }
407 }
408 }
409
410 nir_builder b = nir_builder_create(impl);
411
412 hash_table_foreach(state.accesses, entry) {
413 const struct nu_handle_key *key = entry->key;
414 struct nu_handle_data data = *(struct nu_handle_data *)entry->data;
415
416 nir_src *first_src = util_dynarray_top_ptr(&data.srcs, struct nu_handle_src)->srcs[0];
417 b.cursor = nir_after_instr(nir_src_parent_instr(first_src));
418
419 nir_push_loop(&b);
420
421 nir_def *all_equal_first = NULL;
422 for (uint32_t i = 0; i < key->handle_count; i++) {
423 if (i && data.handles[i].handle == data.handles[0].handle) {
424 data.handles[i].first = data.handles[0].first;
425 continue;
426 }
427
428 nir_def *equal_first = nu_handle_compare(options, &b, &data.handles[i], first_src);
429 if (i == 0)
430 all_equal_first = equal_first;
431 else
432 all_equal_first = nir_iand(&b, all_equal_first, equal_first);
433 }
434
435 nir_push_if(&b, all_equal_first);
436
437 util_dynarray_foreach(&data.srcs, struct nu_handle_src, src) {
438 for (uint32_t i = 0; i < key->handle_count; i++)
439 nu_handle_rewrite(&b, &data.handles[i], src->srcs[i]);
440
441 nir_instr *instr = nir_src_parent_instr(src->srcs[0]);
442 nir_instr_remove(instr);
443 nir_builder_instr_insert(&b, instr);
444 }
445
446 nir_jump(&b, nir_jump_break);
447
448 nir_pop_if(&b, NULL);
449 nir_pop_loop(&b, NULL);
450 }
451
452 _mesa_hash_table_destroy(state.accesses, NULL);
453
454 if (progress)
455 nir_metadata_preserve(impl, nir_metadata_none);
456
457 return progress;
458 }
459
460 /**
461 * Lowers non-uniform resource access by using a loop
462 *
463 * This pass lowers non-uniform resource access by using subgroup operations
464 * and a loop. Most hardware requires things like textures and UBO access
465 * operations to happen on a dynamically uniform (or at least subgroup
466 * uniform) resource. This pass allows for non-uniform access by placing the
467 * texture instruction in a loop that looks something like this:
468 *
469 * loop {
470 * bool tex_eq_first = readFirstInvocationARB(texture) == texture;
471 * bool smp_eq_first = readFirstInvocationARB(sampler) == sampler;
472 * if (tex_eq_first && smp_eq_first) {
473 * res = texture(texture, sampler, ...);
474 * break;
475 * }
476 * }
477 *
478 * Fortunately, because the instruction is immediately followed by the only
479 * break in the loop, the block containing the instruction dominates the end
480 * of the loop. Therefore, it's safe to move the instruction into the loop
481 * without fixing up SSA in any way.
482 */
483 bool
nir_lower_non_uniform_access(nir_shader * shader,const nir_lower_non_uniform_access_options * options)484 nir_lower_non_uniform_access(nir_shader *shader,
485 const nir_lower_non_uniform_access_options *options)
486 {
487 bool progress = false;
488
489 nir_foreach_function_impl(impl, shader) {
490 if (nir_lower_non_uniform_access_impl(impl, options))
491 progress = true;
492 }
493
494 return progress;
495 }
496