• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 struct nu_handle {
28    nir_src *src;
29    nir_ssa_def *handle;
30    nir_deref_instr *parent_deref;
31    nir_ssa_def *first;
32 };
33 
34 static bool
nu_handle_init(struct nu_handle * h,nir_src * src)35 nu_handle_init(struct nu_handle *h, nir_src *src)
36 {
37    h->src = src;
38 
39    nir_deref_instr *deref = nir_src_as_deref(*src);
40    if (deref) {
41       if (deref->deref_type == nir_deref_type_var)
42          return false;
43 
44       nir_deref_instr *parent = nir_deref_instr_parent(deref);
45       assert(parent->deref_type == nir_deref_type_var);
46 
47       assert(deref->deref_type == nir_deref_type_array);
48       if (nir_src_is_const(deref->arr.index))
49          return false;
50 
51       assert(deref->arr.index.is_ssa);
52       h->handle = deref->arr.index.ssa;
53       h->parent_deref = parent;
54 
55       return true;
56    } else {
57       if (nir_src_is_const(*src))
58          return false;
59 
60       assert(src->is_ssa);
61       h->handle = src->ssa;
62       h->parent_deref = NULL;
63 
64       return true;
65    }
66 }
67 
68 static nir_ssa_def *
nu_handle_compare(const nir_lower_non_uniform_access_options * options,nir_builder * b,struct nu_handle * handle)69 nu_handle_compare(const nir_lower_non_uniform_access_options *options,
70                   nir_builder *b, struct nu_handle *handle)
71 {
72    nir_component_mask_t channel_mask = ~0;
73    if (options->callback)
74       channel_mask = options->callback(handle->src, options->callback_data);
75    channel_mask &= nir_component_mask(handle->handle->num_components);
76 
77    nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS];
78    for (unsigned i = 0; i < handle->handle->num_components; i++)
79       channels[i] = nir_channel(b, handle->handle, i);
80 
81    handle->first = handle->handle;
82    nir_ssa_def *equal_first = nir_imm_true(b);
83    u_foreach_bit(i, channel_mask) {
84       nir_ssa_def *first = nir_read_first_invocation(b, channels[i]);
85       handle->first = nir_vector_insert_imm(b, handle->first, first, i);
86 
87       equal_first = nir_iand(b, equal_first, nir_ieq(b, first, channels[i]));
88    }
89 
90    return equal_first;
91 }
92 
93 static void
nu_handle_rewrite(nir_builder * b,struct nu_handle * h)94 nu_handle_rewrite(nir_builder *b, struct nu_handle *h)
95 {
96    if (h->parent_deref) {
97       /* Replicate the deref. */
98       nir_deref_instr *deref =
99          nir_build_deref_array(b, h->parent_deref, h->first);
100       *(h->src) = nir_src_for_ssa(&deref->dest.ssa);
101    } else {
102       *(h->src) = nir_src_for_ssa(h->first);
103    }
104 }
105 
106 static bool
lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options * options,nir_builder * b,nir_tex_instr * tex)107 lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options,
108                              nir_builder *b, nir_tex_instr *tex)
109 {
110    if (!tex->texture_non_uniform && !tex->sampler_non_uniform)
111       return false;
112 
113    /* We can have at most one texture and one sampler handle */
114    unsigned num_handles = 0;
115    struct nu_handle handles[2];
116    for (unsigned i = 0; i < tex->num_srcs; i++) {
117       switch (tex->src[i].src_type) {
118       case nir_tex_src_texture_offset:
119       case nir_tex_src_texture_handle:
120       case nir_tex_src_texture_deref:
121          if (!tex->texture_non_uniform)
122             continue;
123          break;
124 
125       case nir_tex_src_sampler_offset:
126       case nir_tex_src_sampler_handle:
127       case nir_tex_src_sampler_deref:
128          if (!tex->sampler_non_uniform)
129             continue;
130          break;
131 
132       default:
133          continue;
134       }
135 
136       assert(num_handles <= ARRAY_SIZE(handles));
137       if (nu_handle_init(&handles[num_handles], &tex->src[i].src))
138          num_handles++;
139    }
140 
141    if (num_handles == 0)
142       return false;
143 
144    b->cursor = nir_instr_remove(&tex->instr);
145 
146    nir_push_loop(b);
147 
148    nir_ssa_def *all_equal_first = nir_imm_true(b);
149    for (unsigned i = 0; i < num_handles; i++) {
150       if (i && handles[i].handle == handles[0].handle) {
151          handles[i].first = handles[0].first;
152          continue;
153       }
154 
155       nir_ssa_def *equal_first = nu_handle_compare(options, b, &handles[i]);
156       all_equal_first = nir_iand(b, all_equal_first, equal_first);
157    }
158 
159    nir_push_if(b, all_equal_first);
160 
161    for (unsigned i = 0; i < num_handles; i++)
162       nu_handle_rewrite(b, &handles[i]);
163 
164    nir_builder_instr_insert(b, &tex->instr);
165    nir_jump(b, nir_jump_break);
166 
167    tex->texture_non_uniform = false;
168    tex->sampler_non_uniform = false;
169 
170    return true;
171 }
172 
173 static bool
lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options * options,nir_builder * b,nir_intrinsic_instr * intrin,unsigned handle_src)174 lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options *options,
175                                 nir_builder *b, nir_intrinsic_instr *intrin,
176                                 unsigned handle_src)
177 {
178    if (!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM))
179       return false;
180 
181    struct nu_handle handle;
182    if (!nu_handle_init(&handle, &intrin->src[handle_src]))
183       return false;
184 
185    b->cursor = nir_instr_remove(&intrin->instr);
186 
187    nir_push_loop(b);
188 
189    nir_push_if(b, nu_handle_compare(options, b, &handle));
190 
191    nu_handle_rewrite(b, &handle);
192 
193    nir_builder_instr_insert(b, &intrin->instr);
194    nir_jump(b, nir_jump_break);
195 
196    nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
197 
198    return true;
199 }
200 
201 static bool
nir_lower_non_uniform_access_impl(nir_function_impl * impl,const nir_lower_non_uniform_access_options * options)202 nir_lower_non_uniform_access_impl(nir_function_impl *impl,
203                                   const nir_lower_non_uniform_access_options *options)
204 {
205    bool progress = false;
206 
207    nir_builder b;
208    nir_builder_init(&b, impl);
209 
210    nir_foreach_block_safe(block, impl) {
211       nir_foreach_instr_safe(instr, block) {
212          switch (instr->type) {
213          case nir_instr_type_tex: {
214             nir_tex_instr *tex = nir_instr_as_tex(instr);
215             if ((options->types & nir_lower_non_uniform_texture_access) &&
216                 lower_non_uniform_tex_access(options, &b, tex))
217                progress = true;
218             break;
219          }
220 
221          case nir_instr_type_intrinsic: {
222             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
223             switch (intrin->intrinsic) {
224             case nir_intrinsic_load_ubo:
225                if ((options->types & nir_lower_non_uniform_ubo_access) &&
226                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
227                   progress = true;
228                break;
229 
230             case nir_intrinsic_load_ssbo:
231             case nir_intrinsic_ssbo_atomic_add:
232             case nir_intrinsic_ssbo_atomic_imin:
233             case nir_intrinsic_ssbo_atomic_umin:
234             case nir_intrinsic_ssbo_atomic_imax:
235             case nir_intrinsic_ssbo_atomic_umax:
236             case nir_intrinsic_ssbo_atomic_and:
237             case nir_intrinsic_ssbo_atomic_or:
238             case nir_intrinsic_ssbo_atomic_xor:
239             case nir_intrinsic_ssbo_atomic_exchange:
240             case nir_intrinsic_ssbo_atomic_comp_swap:
241             case nir_intrinsic_ssbo_atomic_fadd:
242             case nir_intrinsic_ssbo_atomic_fmin:
243             case nir_intrinsic_ssbo_atomic_fmax:
244             case nir_intrinsic_ssbo_atomic_fcomp_swap:
245                if ((options->types & nir_lower_non_uniform_ssbo_access) &&
246                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
247                   progress = true;
248                break;
249 
250             case nir_intrinsic_store_ssbo:
251                /* SSBO Stores put the index in the second source */
252                if ((options->types & nir_lower_non_uniform_ssbo_access) &&
253                    lower_non_uniform_access_intrin(options, &b, intrin, 1))
254                   progress = true;
255                break;
256 
257             case nir_intrinsic_image_load:
258             case nir_intrinsic_image_sparse_load:
259             case nir_intrinsic_image_store:
260             case nir_intrinsic_image_atomic_add:
261             case nir_intrinsic_image_atomic_imin:
262             case nir_intrinsic_image_atomic_umin:
263             case nir_intrinsic_image_atomic_imax:
264             case nir_intrinsic_image_atomic_umax:
265             case nir_intrinsic_image_atomic_and:
266             case nir_intrinsic_image_atomic_or:
267             case nir_intrinsic_image_atomic_xor:
268             case nir_intrinsic_image_atomic_exchange:
269             case nir_intrinsic_image_atomic_comp_swap:
270             case nir_intrinsic_image_atomic_fadd:
271             case nir_intrinsic_image_atomic_fmin:
272             case nir_intrinsic_image_atomic_fmax:
273             case nir_intrinsic_image_size:
274             case nir_intrinsic_image_samples:
275             case nir_intrinsic_bindless_image_load:
276             case nir_intrinsic_bindless_image_sparse_load:
277             case nir_intrinsic_bindless_image_store:
278             case nir_intrinsic_bindless_image_atomic_add:
279             case nir_intrinsic_bindless_image_atomic_imin:
280             case nir_intrinsic_bindless_image_atomic_umin:
281             case nir_intrinsic_bindless_image_atomic_imax:
282             case nir_intrinsic_bindless_image_atomic_umax:
283             case nir_intrinsic_bindless_image_atomic_and:
284             case nir_intrinsic_bindless_image_atomic_or:
285             case nir_intrinsic_bindless_image_atomic_xor:
286             case nir_intrinsic_bindless_image_atomic_exchange:
287             case nir_intrinsic_bindless_image_atomic_comp_swap:
288             case nir_intrinsic_bindless_image_atomic_fadd:
289             case nir_intrinsic_bindless_image_atomic_fmin:
290             case nir_intrinsic_bindless_image_atomic_fmax:
291             case nir_intrinsic_bindless_image_size:
292             case nir_intrinsic_bindless_image_samples:
293             case nir_intrinsic_image_deref_load:
294             case nir_intrinsic_image_deref_sparse_load:
295             case nir_intrinsic_image_deref_store:
296             case nir_intrinsic_image_deref_atomic_add:
297             case nir_intrinsic_image_deref_atomic_umin:
298             case nir_intrinsic_image_deref_atomic_imin:
299             case nir_intrinsic_image_deref_atomic_umax:
300             case nir_intrinsic_image_deref_atomic_imax:
301             case nir_intrinsic_image_deref_atomic_and:
302             case nir_intrinsic_image_deref_atomic_or:
303             case nir_intrinsic_image_deref_atomic_xor:
304             case nir_intrinsic_image_deref_atomic_exchange:
305             case nir_intrinsic_image_deref_atomic_comp_swap:
306             case nir_intrinsic_image_deref_atomic_fadd:
307             case nir_intrinsic_image_deref_atomic_fmin:
308             case nir_intrinsic_image_deref_atomic_fmax:
309             case nir_intrinsic_image_deref_size:
310             case nir_intrinsic_image_deref_samples:
311                if ((options->types & nir_lower_non_uniform_image_access) &&
312                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
313                   progress = true;
314                break;
315 
316             default:
317                /* Nothing to do */
318                break;
319             }
320             break;
321          }
322 
323          default:
324             /* Nothing to do */
325             break;
326          }
327       }
328    }
329 
330    if (progress)
331       nir_metadata_preserve(impl, nir_metadata_none);
332 
333    return progress;
334 }
335 
336 /**
337  * Lowers non-uniform resource access by using a loop
338  *
339  * This pass lowers non-uniform resource access by using subgroup operations
340  * and a loop.  Most hardware requires things like textures and UBO access
341  * operations to happen on a dynamically uniform (or at least subgroup
342  * uniform) resource.  This pass allows for non-uniform access by placing the
343  * texture instruction in a loop that looks something like this:
344  *
345  * loop {
346  *    bool tex_eq_first = readFirstInvocationARB(texture) == texture;
347  *    bool smp_eq_first = readFirstInvocationARB(sampler) == sampler;
348  *    if (tex_eq_first && smp_eq_first) {
349  *       res = texture(texture, sampler, ...);
350  *       break;
351  *    }
352  * }
353  *
354  * Fortunately, because the instruction is immediately followed by the only
355  * break in the loop, the block containing the instruction dominates the end
356  * of the loop.  Therefore, it's safe to move the instruction into the loop
357  * without fixing up SSA in any way.
358  */
359 bool
nir_lower_non_uniform_access(nir_shader * shader,const nir_lower_non_uniform_access_options * options)360 nir_lower_non_uniform_access(nir_shader *shader,
361                              const nir_lower_non_uniform_access_options *options)
362 {
363    bool progress = false;
364 
365    nir_foreach_function(function, shader) {
366       if (function->impl &&
367           nir_lower_non_uniform_access_impl(function->impl, options))
368          progress = true;
369    }
370 
371    return progress;
372 }
373