1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 struct nu_handle {
28 nir_src *src;
29 nir_ssa_def *handle;
30 nir_deref_instr *parent_deref;
31 nir_ssa_def *first;
32 };
33
34 static bool
nu_handle_init(struct nu_handle * h,nir_src * src)35 nu_handle_init(struct nu_handle *h, nir_src *src)
36 {
37 h->src = src;
38
39 nir_deref_instr *deref = nir_src_as_deref(*src);
40 if (deref) {
41 if (deref->deref_type == nir_deref_type_var)
42 return false;
43
44 nir_deref_instr *parent = nir_deref_instr_parent(deref);
45 assert(parent->deref_type == nir_deref_type_var);
46
47 assert(deref->deref_type == nir_deref_type_array);
48 if (nir_src_is_const(deref->arr.index))
49 return false;
50
51 assert(deref->arr.index.is_ssa);
52 h->handle = deref->arr.index.ssa;
53 h->parent_deref = parent;
54
55 return true;
56 } else {
57 if (nir_src_is_const(*src))
58 return false;
59
60 assert(src->is_ssa);
61 h->handle = src->ssa;
62 h->parent_deref = NULL;
63
64 return true;
65 }
66 }
67
68 static nir_ssa_def *
nu_handle_compare(const nir_lower_non_uniform_access_options * options,nir_builder * b,struct nu_handle * handle)69 nu_handle_compare(const nir_lower_non_uniform_access_options *options,
70 nir_builder *b, struct nu_handle *handle)
71 {
72 nir_component_mask_t channel_mask = ~0;
73 if (options->callback)
74 channel_mask = options->callback(handle->src, options->callback_data);
75 channel_mask &= nir_component_mask(handle->handle->num_components);
76
77 nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS];
78 for (unsigned i = 0; i < handle->handle->num_components; i++)
79 channels[i] = nir_channel(b, handle->handle, i);
80
81 handle->first = handle->handle;
82 nir_ssa_def *equal_first = nir_imm_true(b);
83 u_foreach_bit(i, channel_mask) {
84 nir_ssa_def *first = nir_read_first_invocation(b, channels[i]);
85 handle->first = nir_vector_insert_imm(b, handle->first, first, i);
86
87 equal_first = nir_iand(b, equal_first, nir_ieq(b, first, channels[i]));
88 }
89
90 return equal_first;
91 }
92
93 static void
nu_handle_rewrite(nir_builder * b,struct nu_handle * h)94 nu_handle_rewrite(nir_builder *b, struct nu_handle *h)
95 {
96 if (h->parent_deref) {
97 /* Replicate the deref. */
98 nir_deref_instr *deref =
99 nir_build_deref_array(b, h->parent_deref, h->first);
100 *(h->src) = nir_src_for_ssa(&deref->dest.ssa);
101 } else {
102 *(h->src) = nir_src_for_ssa(h->first);
103 }
104 }
105
106 static bool
lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options * options,nir_builder * b,nir_tex_instr * tex)107 lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options,
108 nir_builder *b, nir_tex_instr *tex)
109 {
110 if (!tex->texture_non_uniform && !tex->sampler_non_uniform)
111 return false;
112
113 /* We can have at most one texture and one sampler handle */
114 unsigned num_handles = 0;
115 struct nu_handle handles[2];
116 for (unsigned i = 0; i < tex->num_srcs; i++) {
117 switch (tex->src[i].src_type) {
118 case nir_tex_src_texture_offset:
119 case nir_tex_src_texture_handle:
120 case nir_tex_src_texture_deref:
121 if (!tex->texture_non_uniform)
122 continue;
123 break;
124
125 case nir_tex_src_sampler_offset:
126 case nir_tex_src_sampler_handle:
127 case nir_tex_src_sampler_deref:
128 if (!tex->sampler_non_uniform)
129 continue;
130 break;
131
132 default:
133 continue;
134 }
135
136 assert(num_handles <= ARRAY_SIZE(handles));
137 if (nu_handle_init(&handles[num_handles], &tex->src[i].src))
138 num_handles++;
139 }
140
141 if (num_handles == 0)
142 return false;
143
144 b->cursor = nir_instr_remove(&tex->instr);
145
146 nir_push_loop(b);
147
148 nir_ssa_def *all_equal_first = nir_imm_true(b);
149 for (unsigned i = 0; i < num_handles; i++) {
150 if (i && handles[i].handle == handles[0].handle) {
151 handles[i].first = handles[0].first;
152 continue;
153 }
154
155 nir_ssa_def *equal_first = nu_handle_compare(options, b, &handles[i]);
156 all_equal_first = nir_iand(b, all_equal_first, equal_first);
157 }
158
159 nir_push_if(b, all_equal_first);
160
161 for (unsigned i = 0; i < num_handles; i++)
162 nu_handle_rewrite(b, &handles[i]);
163
164 nir_builder_instr_insert(b, &tex->instr);
165 nir_jump(b, nir_jump_break);
166
167 tex->texture_non_uniform = false;
168 tex->sampler_non_uniform = false;
169
170 return true;
171 }
172
173 static bool
lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options * options,nir_builder * b,nir_intrinsic_instr * intrin,unsigned handle_src)174 lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options *options,
175 nir_builder *b, nir_intrinsic_instr *intrin,
176 unsigned handle_src)
177 {
178 if (!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM))
179 return false;
180
181 struct nu_handle handle;
182 if (!nu_handle_init(&handle, &intrin->src[handle_src]))
183 return false;
184
185 b->cursor = nir_instr_remove(&intrin->instr);
186
187 nir_push_loop(b);
188
189 nir_push_if(b, nu_handle_compare(options, b, &handle));
190
191 nu_handle_rewrite(b, &handle);
192
193 nir_builder_instr_insert(b, &intrin->instr);
194 nir_jump(b, nir_jump_break);
195
196 nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
197
198 return true;
199 }
200
201 static bool
nir_lower_non_uniform_access_impl(nir_function_impl * impl,const nir_lower_non_uniform_access_options * options)202 nir_lower_non_uniform_access_impl(nir_function_impl *impl,
203 const nir_lower_non_uniform_access_options *options)
204 {
205 bool progress = false;
206
207 nir_builder b;
208 nir_builder_init(&b, impl);
209
210 nir_foreach_block_safe(block, impl) {
211 nir_foreach_instr_safe(instr, block) {
212 switch (instr->type) {
213 case nir_instr_type_tex: {
214 nir_tex_instr *tex = nir_instr_as_tex(instr);
215 if ((options->types & nir_lower_non_uniform_texture_access) &&
216 lower_non_uniform_tex_access(options, &b, tex))
217 progress = true;
218 break;
219 }
220
221 case nir_instr_type_intrinsic: {
222 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
223 switch (intrin->intrinsic) {
224 case nir_intrinsic_load_ubo:
225 if ((options->types & nir_lower_non_uniform_ubo_access) &&
226 lower_non_uniform_access_intrin(options, &b, intrin, 0))
227 progress = true;
228 break;
229
230 case nir_intrinsic_load_ssbo:
231 case nir_intrinsic_ssbo_atomic_add:
232 case nir_intrinsic_ssbo_atomic_imin:
233 case nir_intrinsic_ssbo_atomic_umin:
234 case nir_intrinsic_ssbo_atomic_imax:
235 case nir_intrinsic_ssbo_atomic_umax:
236 case nir_intrinsic_ssbo_atomic_and:
237 case nir_intrinsic_ssbo_atomic_or:
238 case nir_intrinsic_ssbo_atomic_xor:
239 case nir_intrinsic_ssbo_atomic_exchange:
240 case nir_intrinsic_ssbo_atomic_comp_swap:
241 case nir_intrinsic_ssbo_atomic_fadd:
242 case nir_intrinsic_ssbo_atomic_fmin:
243 case nir_intrinsic_ssbo_atomic_fmax:
244 case nir_intrinsic_ssbo_atomic_fcomp_swap:
245 if ((options->types & nir_lower_non_uniform_ssbo_access) &&
246 lower_non_uniform_access_intrin(options, &b, intrin, 0))
247 progress = true;
248 break;
249
250 case nir_intrinsic_store_ssbo:
251 /* SSBO Stores put the index in the second source */
252 if ((options->types & nir_lower_non_uniform_ssbo_access) &&
253 lower_non_uniform_access_intrin(options, &b, intrin, 1))
254 progress = true;
255 break;
256
257 case nir_intrinsic_image_load:
258 case nir_intrinsic_image_sparse_load:
259 case nir_intrinsic_image_store:
260 case nir_intrinsic_image_atomic_add:
261 case nir_intrinsic_image_atomic_imin:
262 case nir_intrinsic_image_atomic_umin:
263 case nir_intrinsic_image_atomic_imax:
264 case nir_intrinsic_image_atomic_umax:
265 case nir_intrinsic_image_atomic_and:
266 case nir_intrinsic_image_atomic_or:
267 case nir_intrinsic_image_atomic_xor:
268 case nir_intrinsic_image_atomic_exchange:
269 case nir_intrinsic_image_atomic_comp_swap:
270 case nir_intrinsic_image_atomic_fadd:
271 case nir_intrinsic_image_atomic_fmin:
272 case nir_intrinsic_image_atomic_fmax:
273 case nir_intrinsic_image_size:
274 case nir_intrinsic_image_samples:
275 case nir_intrinsic_bindless_image_load:
276 case nir_intrinsic_bindless_image_sparse_load:
277 case nir_intrinsic_bindless_image_store:
278 case nir_intrinsic_bindless_image_atomic_add:
279 case nir_intrinsic_bindless_image_atomic_imin:
280 case nir_intrinsic_bindless_image_atomic_umin:
281 case nir_intrinsic_bindless_image_atomic_imax:
282 case nir_intrinsic_bindless_image_atomic_umax:
283 case nir_intrinsic_bindless_image_atomic_and:
284 case nir_intrinsic_bindless_image_atomic_or:
285 case nir_intrinsic_bindless_image_atomic_xor:
286 case nir_intrinsic_bindless_image_atomic_exchange:
287 case nir_intrinsic_bindless_image_atomic_comp_swap:
288 case nir_intrinsic_bindless_image_atomic_fadd:
289 case nir_intrinsic_bindless_image_atomic_fmin:
290 case nir_intrinsic_bindless_image_atomic_fmax:
291 case nir_intrinsic_bindless_image_size:
292 case nir_intrinsic_bindless_image_samples:
293 case nir_intrinsic_image_deref_load:
294 case nir_intrinsic_image_deref_sparse_load:
295 case nir_intrinsic_image_deref_store:
296 case nir_intrinsic_image_deref_atomic_add:
297 case nir_intrinsic_image_deref_atomic_umin:
298 case nir_intrinsic_image_deref_atomic_imin:
299 case nir_intrinsic_image_deref_atomic_umax:
300 case nir_intrinsic_image_deref_atomic_imax:
301 case nir_intrinsic_image_deref_atomic_and:
302 case nir_intrinsic_image_deref_atomic_or:
303 case nir_intrinsic_image_deref_atomic_xor:
304 case nir_intrinsic_image_deref_atomic_exchange:
305 case nir_intrinsic_image_deref_atomic_comp_swap:
306 case nir_intrinsic_image_deref_atomic_fadd:
307 case nir_intrinsic_image_deref_atomic_fmin:
308 case nir_intrinsic_image_deref_atomic_fmax:
309 case nir_intrinsic_image_deref_size:
310 case nir_intrinsic_image_deref_samples:
311 if ((options->types & nir_lower_non_uniform_image_access) &&
312 lower_non_uniform_access_intrin(options, &b, intrin, 0))
313 progress = true;
314 break;
315
316 default:
317 /* Nothing to do */
318 break;
319 }
320 break;
321 }
322
323 default:
324 /* Nothing to do */
325 break;
326 }
327 }
328 }
329
330 if (progress)
331 nir_metadata_preserve(impl, nir_metadata_none);
332
333 return progress;
334 }
335
336 /**
337 * Lowers non-uniform resource access by using a loop
338 *
339 * This pass lowers non-uniform resource access by using subgroup operations
340 * and a loop. Most hardware requires things like textures and UBO access
341 * operations to happen on a dynamically uniform (or at least subgroup
342 * uniform) resource. This pass allows for non-uniform access by placing the
343 * texture instruction in a loop that looks something like this:
344 *
345 * loop {
346 * bool tex_eq_first = readFirstInvocationARB(texture) == texture;
347 * bool smp_eq_first = readFirstInvocationARB(sampler) == sampler;
348 * if (tex_eq_first && smp_eq_first) {
349 * res = texture(texture, sampler, ...);
350 * break;
351 * }
352 * }
353 *
354 * Fortunately, because the instruction is immediately followed by the only
355 * break in the loop, the block containing the instruction dominates the end
356 * of the loop. Therefore, it's safe to move the instruction into the loop
357 * without fixing up SSA in any way.
358 */
359 bool
nir_lower_non_uniform_access(nir_shader * shader,const nir_lower_non_uniform_access_options * options)360 nir_lower_non_uniform_access(nir_shader *shader,
361 const nir_lower_non_uniform_access_options *options)
362 {
363 bool progress = false;
364
365 nir_foreach_function(function, shader) {
366 if (function->impl &&
367 nir_lower_non_uniform_access_impl(function->impl, options))
368 progress = true;
369 }
370
371 return progress;
372 }
373