• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 #include "util/hash_table.h"
28 #include "util/u_dynarray.h"
29 
30 struct nu_handle {
31    nir_def *handle;
32    nir_deref_instr *parent_deref;
33    nir_def *first;
34 };
35 
36 struct nu_handle_key {
37    uint32_t block_index;
38    uint32_t access_group;
39    uint32_t handle_count;
40    /* We can have at most one texture and one sampler handle */
41    uint32_t handle_indixes[2];
42    uint32_t access_type;
43    /* Optional instruction index for emitting separate loops for non-reorderable instructions. */
44    uint32_t instr_index;
45 };
46 
47 DERIVE_HASH_TABLE(nu_handle_key)
48 
49 struct nu_handle_data {
50    struct nu_handle handles[2];
51    struct util_dynarray srcs;
52 };
53 
54 struct nu_handle_src {
55    nir_src *srcs[2];
56 };
57 
58 struct nu_access_group_state {
59    uint32_t last_first_use;
60    uint32_t index;
61 };
62 
63 struct nu_state {
64    struct hash_table *accesses;
65    struct nu_access_group_state access_groups[nir_lower_non_uniform_access_type_count];
66 };
67 
68 static bool
nu_handle_init(struct nu_handle * h,nir_src * src)69 nu_handle_init(struct nu_handle *h, nir_src *src)
70 {
71    nir_deref_instr *deref = nir_src_as_deref(*src);
72    if (deref) {
73       if (deref->deref_type == nir_deref_type_var)
74          return false;
75 
76       nir_deref_instr *parent = nir_deref_instr_parent(deref);
77       assert(parent->deref_type == nir_deref_type_var);
78 
79       assert(deref->deref_type == nir_deref_type_array);
80       if (nir_src_is_const(deref->arr.index))
81          return false;
82 
83       h->handle = deref->arr.index.ssa;
84       h->parent_deref = parent;
85 
86       return true;
87    } else {
88       if (nir_src_is_const(*src))
89          return false;
90 
91       h->handle = src->ssa;
92       h->parent_deref = NULL;
93 
94       return true;
95    }
96 }
97 
98 static nir_def *
nu_handle_compare(const nir_lower_non_uniform_access_options * options,nir_builder * b,struct nu_handle * handle,nir_src * src)99 nu_handle_compare(const nir_lower_non_uniform_access_options *options,
100                   nir_builder *b, struct nu_handle *handle, nir_src *src)
101 {
102    nir_component_mask_t channel_mask = ~0;
103    if (options->callback)
104       channel_mask = options->callback(src, options->callback_data);
105    channel_mask &= nir_component_mask(handle->handle->num_components);
106 
107    nir_def *channels[NIR_MAX_VEC_COMPONENTS];
108    for (unsigned i = 0; i < handle->handle->num_components; i++)
109       channels[i] = nir_channel(b, handle->handle, i);
110 
111    handle->first = handle->handle;
112    nir_def *equal_first = nir_imm_true(b);
113    u_foreach_bit(i, channel_mask) {
114       nir_def *first = nir_read_first_invocation(b, channels[i]);
115       handle->first = nir_vector_insert_imm(b, handle->first, first, i);
116 
117       equal_first = nir_iand(b, equal_first, nir_ieq(b, first, channels[i]));
118    }
119 
120    return equal_first;
121 }
122 
123 static void
nu_handle_rewrite(nir_builder * b,struct nu_handle * h,nir_src * src)124 nu_handle_rewrite(nir_builder *b, struct nu_handle *h, nir_src *src)
125 {
126    if (h->parent_deref) {
127       /* Replicate the deref. */
128       nir_deref_instr *deref =
129          nir_build_deref_array(b, h->parent_deref, h->first);
130       nir_src_rewrite(src, &deref->def);
131    } else {
132       nir_src_rewrite(src, h->first);
133    }
134 }
135 
136 static bool
get_first_use(nir_def * def,void * state)137 get_first_use(nir_def *def, void *state)
138 {
139    uint32_t *last_first_use = state;
140    nir_foreach_use(use, def)
141       *last_first_use = MIN2(*last_first_use, nir_src_parent_instr(use)->index);
142 
143    return true;
144 }
145 
146 static void
add_non_uniform_instr(struct nu_state * state,struct nu_handle * handles,nir_src ** srcs,uint32_t handle_count,bool group,enum nir_lower_non_uniform_access_type access_type)147 add_non_uniform_instr(struct nu_state *state, struct nu_handle *handles,
148                       nir_src **srcs, uint32_t handle_count, bool group,
149                       enum nir_lower_non_uniform_access_type access_type)
150 {
151    nir_instr *instr = nir_src_parent_instr(srcs[0]);
152 
153    struct nu_access_group_state *access_group = &state->access_groups[ffs(access_type) - 1];
154 
155    if (group) {
156       uint32_t first_use = UINT32_MAX;
157       nir_foreach_def(instr, get_first_use, &first_use);
158 
159       /* Avoid moving accesses below their first use. */
160       if (instr->index >= access_group->last_first_use) {
161          access_group->last_first_use = first_use;
162          access_group->index++;
163       } else {
164          /* Adjust the access group scope so that every access dominates its first use. */
165          access_group->last_first_use = MIN2(access_group->last_first_use, first_use);
166       }
167    }
168 
169    struct nu_handle_key key;
170    memset(&key, 0, sizeof(key));
171    key.block_index = instr->block->index;
172    key.access_group = access_group->index;
173    key.access_type = access_type;
174    key.handle_count = handle_count;
175 
176    if (!group)
177       key.instr_index = instr->index;
178 
179    for (uint32_t i = 0; i < handle_count; i++)
180       key.handle_indixes[i] = handles[i].handle->parent_instr->index;
181 
182    struct hash_entry *entry = _mesa_hash_table_search(state->accesses, &key);
183    if (!entry) {
184       struct nu_handle_data *data = ralloc(state->accesses, struct nu_handle_data);
185 
186       for (uint32_t i = 0; i < handle_count; i++)
187          data->handles[i] = handles[i];
188 
189       util_dynarray_init(&data->srcs, state->accesses);
190 
191       struct nu_handle_key *key_copy = ralloc(state->accesses, struct nu_handle_key);
192       memcpy(key_copy, &key, sizeof(key));
193 
194       entry = _mesa_hash_table_insert(state->accesses, key_copy, data);
195    }
196 
197    struct nu_handle_data *data = entry->data;
198 
199    struct nu_handle_src src = { 0 };
200    for (uint32_t i = 0; i < handle_count; i++)
201       src.srcs[i] = srcs[i];
202 
203    util_dynarray_append(&data->srcs, struct nu_handle_src, src);
204 }
205 
206 static bool
lower_non_uniform_tex_access(struct nu_state * state,nir_tex_instr * tex)207 lower_non_uniform_tex_access(struct nu_state *state, nir_tex_instr *tex)
208 {
209    if (!tex->texture_non_uniform && !tex->sampler_non_uniform)
210       return false;
211 
212    /* We can have at most one texture and one sampler handle */
213    unsigned num_handles = 0;
214    struct nu_handle handles[2];
215    nir_src *srcs[2];
216    for (unsigned i = 0; i < tex->num_srcs; i++) {
217       switch (tex->src[i].src_type) {
218       case nir_tex_src_texture_offset:
219       case nir_tex_src_texture_handle:
220       case nir_tex_src_texture_deref:
221          if (!tex->texture_non_uniform)
222             continue;
223          break;
224 
225       case nir_tex_src_sampler_offset:
226       case nir_tex_src_sampler_handle:
227       case nir_tex_src_sampler_deref:
228          if (!tex->sampler_non_uniform)
229             continue;
230          break;
231 
232       default:
233          continue;
234       }
235 
236       assert(num_handles < ARRAY_SIZE(handles));
237       srcs[num_handles] = &tex->src[i].src;
238       if (nu_handle_init(&handles[num_handles], &tex->src[i].src))
239          num_handles++;
240    }
241 
242    if (num_handles == 0) {
243       /* nu_handle_init() returned false because the handles are uniform. */
244       tex->texture_non_uniform = false;
245       tex->sampler_non_uniform = false;
246       return false;
247    }
248 
249    tex->texture_non_uniform = false;
250    tex->sampler_non_uniform = false;
251 
252    add_non_uniform_instr(state, handles, srcs, num_handles, true,
253                          nir_lower_non_uniform_texture_access);
254 
255    return true;
256 }
257 
258 static bool
lower_non_uniform_access_intrin(struct nu_state * state,nir_intrinsic_instr * intrin,unsigned handle_src,enum nir_lower_non_uniform_access_type access_type)259 lower_non_uniform_access_intrin(struct nu_state *state, nir_intrinsic_instr *intrin,
260                                 unsigned handle_src, enum nir_lower_non_uniform_access_type access_type)
261 {
262    if (!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM))
263       return false;
264 
265    nir_src *src = &intrin->src[handle_src];
266 
267    struct nu_handle handle;
268    if (!nu_handle_init(&handle, src)) {
269       nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
270       return false;
271    }
272 
273    nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
274 
275    add_non_uniform_instr(state, &handle, &src, 1, nir_intrinsic_can_reorder(intrin),
276                          access_type);
277 
278    return true;
279 }
280 
281 static void
handle_barrier(struct nu_state * state,bool affects_derivatives)282 handle_barrier(struct nu_state *state, bool affects_derivatives)
283 {
284    enum nir_lower_non_uniform_access_type access_type =
285       nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_image_access;
286 
287    if (affects_derivatives)
288       access_type |= nir_lower_non_uniform_texture_access;
289 
290    u_foreach_bit(i, access_type) {
291       state->access_groups[i].last_first_use = 0;
292    }
293 }
294 
295 static bool
nir_lower_non_uniform_access_impl(nir_function_impl * impl,const nir_lower_non_uniform_access_options * options)296 nir_lower_non_uniform_access_impl(nir_function_impl *impl,
297                                   const nir_lower_non_uniform_access_options *options)
298 {
299    bool progress = false;
300 
301    struct nu_state state = {
302       .accesses = nu_handle_key_table_create(NULL),
303    };
304 
305    nir_metadata_require(impl, nir_metadata_instr_index | nir_metadata_block_index);
306 
307    nir_foreach_block_safe(block, impl) {
308       nir_foreach_instr_safe(instr, block) {
309          switch (instr->type) {
310          case nir_instr_type_tex: {
311             nir_tex_instr *tex = nir_instr_as_tex(instr);
312             if ((options->types & nir_lower_non_uniform_texture_access) &&
313                 lower_non_uniform_tex_access(&state, tex))
314                progress = true;
315             break;
316          }
317 
318          case nir_instr_type_intrinsic: {
319             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
320             switch (intrin->intrinsic) {
321             case nir_intrinsic_terminate_if:
322             case nir_intrinsic_terminate:
323             case nir_intrinsic_demote_if:
324             case nir_intrinsic_demote:
325             case nir_intrinsic_barrier:
326                handle_barrier(&state, intrin->intrinsic == nir_intrinsic_terminate_if ||
327                                       intrin->intrinsic == nir_intrinsic_terminate);
328                break;
329 
330             case nir_intrinsic_load_ubo:
331                if ((options->types & nir_lower_non_uniform_ubo_access) &&
332                    lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_ubo_access))
333                   progress = true;
334                break;
335 
336             case nir_intrinsic_load_ssbo:
337             case nir_intrinsic_ssbo_atomic:
338             case nir_intrinsic_ssbo_atomic_swap:
339                if ((options->types & nir_lower_non_uniform_ssbo_access) &&
340                    lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_ssbo_access))
341                   progress = true;
342                break;
343 
344             case nir_intrinsic_store_ssbo:
345                /* SSBO Stores put the index in the second source */
346                if ((options->types & nir_lower_non_uniform_ssbo_access) &&
347                    lower_non_uniform_access_intrin(&state, intrin, 1, nir_lower_non_uniform_ssbo_access))
348                   progress = true;
349                break;
350 
351             case nir_intrinsic_get_ssbo_size:
352                if ((options->types & nir_lower_non_uniform_get_ssbo_size) &&
353                    lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_get_ssbo_size))
354                   progress = true;
355                break;
356 
357             case nir_intrinsic_image_load:
358             case nir_intrinsic_image_sparse_load:
359             case nir_intrinsic_image_store:
360             case nir_intrinsic_image_atomic:
361             case nir_intrinsic_image_atomic_swap:
362             case nir_intrinsic_image_levels:
363             case nir_intrinsic_image_size:
364             case nir_intrinsic_image_samples:
365             case nir_intrinsic_image_samples_identical:
366             case nir_intrinsic_image_fragment_mask_load_amd:
367             case nir_intrinsic_bindless_image_load:
368             case nir_intrinsic_bindless_image_sparse_load:
369             case nir_intrinsic_bindless_image_store:
370             case nir_intrinsic_bindless_image_atomic:
371             case nir_intrinsic_bindless_image_atomic_swap:
372             case nir_intrinsic_bindless_image_levels:
373             case nir_intrinsic_bindless_image_size:
374             case nir_intrinsic_bindless_image_samples:
375             case nir_intrinsic_bindless_image_samples_identical:
376             case nir_intrinsic_bindless_image_fragment_mask_load_amd:
377             case nir_intrinsic_image_deref_load:
378             case nir_intrinsic_image_deref_sparse_load:
379             case nir_intrinsic_image_deref_store:
380             case nir_intrinsic_image_deref_atomic:
381             case nir_intrinsic_image_deref_atomic_swap:
382             case nir_intrinsic_image_deref_levels:
383             case nir_intrinsic_image_deref_size:
384             case nir_intrinsic_image_deref_samples:
385             case nir_intrinsic_image_deref_samples_identical:
386             case nir_intrinsic_image_deref_fragment_mask_load_amd:
387                if ((options->types & nir_lower_non_uniform_image_access) &&
388                    lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_image_access))
389                   progress = true;
390                break;
391 
392             default:
393                /* Nothing to do */
394                break;
395             }
396             break;
397          }
398 
399          case nir_instr_type_call:
400             handle_barrier(&state, true);
401             break;
402 
403          default:
404             /* Nothing to do */
405             break;
406          }
407       }
408    }
409 
410    nir_builder b = nir_builder_create(impl);
411 
412    hash_table_foreach(state.accesses, entry) {
413       const struct nu_handle_key *key = entry->key;
414       struct nu_handle_data data = *(struct nu_handle_data *)entry->data;
415 
416       nir_src *first_src = util_dynarray_top_ptr(&data.srcs, struct nu_handle_src)->srcs[0];
417       b.cursor = nir_after_instr(nir_src_parent_instr(first_src));
418 
419       nir_push_loop(&b);
420 
421       nir_def *all_equal_first = NULL;
422       for (uint32_t i = 0; i < key->handle_count; i++) {
423          if (i && data.handles[i].handle == data.handles[0].handle) {
424             data.handles[i].first = data.handles[0].first;
425             continue;
426          }
427 
428          nir_def *equal_first = nu_handle_compare(options, &b, &data.handles[i], first_src);
429          if (i == 0)
430             all_equal_first = equal_first;
431          else
432             all_equal_first = nir_iand(&b, all_equal_first, equal_first);
433       }
434 
435       nir_push_if(&b, all_equal_first);
436 
437       util_dynarray_foreach(&data.srcs, struct nu_handle_src, src) {
438          for (uint32_t i = 0; i < key->handle_count; i++)
439             nu_handle_rewrite(&b, &data.handles[i], src->srcs[i]);
440 
441          nir_instr *instr = nir_src_parent_instr(src->srcs[0]);
442          nir_instr_remove(instr);
443          nir_builder_instr_insert(&b, instr);
444       }
445 
446       nir_jump(&b, nir_jump_break);
447 
448       nir_pop_if(&b, NULL);
449       nir_pop_loop(&b, NULL);
450    }
451 
452    _mesa_hash_table_destroy(state.accesses, NULL);
453 
454    if (progress)
455       nir_metadata_preserve(impl, nir_metadata_none);
456 
457    return progress;
458 }
459 
460 /**
461  * Lowers non-uniform resource access by using a loop
462  *
463  * This pass lowers non-uniform resource access by using subgroup operations
464  * and a loop.  Most hardware requires things like textures and UBO access
465  * operations to happen on a dynamically uniform (or at least subgroup
466  * uniform) resource.  This pass allows for non-uniform access by placing the
467  * texture instruction in a loop that looks something like this:
468  *
469  * loop {
470  *    bool tex_eq_first = readFirstInvocationARB(texture) == texture;
471  *    bool smp_eq_first = readFirstInvocationARB(sampler) == sampler;
472  *    if (tex_eq_first && smp_eq_first) {
473  *       res = texture(texture, sampler, ...);
474  *       break;
475  *    }
476  * }
477  *
478  * Fortunately, because the instruction is immediately followed by the only
479  * break in the loop, the block containing the instruction dominates the end
480  * of the loop.  Therefore, it's safe to move the instruction into the loop
481  * without fixing up SSA in any way.
482  */
483 bool
nir_lower_non_uniform_access(nir_shader * shader,const nir_lower_non_uniform_access_options * options)484 nir_lower_non_uniform_access(nir_shader *shader,
485                              const nir_lower_non_uniform_access_options *options)
486 {
487    bool progress = false;
488 
489    nir_foreach_function_impl(impl, shader) {
490       if (nir_lower_non_uniform_access_impl(impl, options))
491          progress = true;
492    }
493 
494    return progress;
495 }
496