1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/hash_table.h"
28
29 static bool
is_trivial_deref_cast(nir_deref_instr * cast)30 is_trivial_deref_cast(nir_deref_instr *cast)
31 {
32 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
33 if (!parent)
34 return false;
35
36 return cast->modes == parent->modes &&
37 cast->type == parent->type &&
38 cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
39 cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
40 }
41
42 void
nir_deref_path_init(nir_deref_path * path,nir_deref_instr * deref,void * mem_ctx)43 nir_deref_path_init(nir_deref_path *path,
44 nir_deref_instr *deref, void *mem_ctx)
45 {
46 assert(deref != NULL);
47
48 /* The length of the short path is at most ARRAY_SIZE - 1 because we need
49 * room for the NULL terminator.
50 */
51 static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
52
53 int count = 0;
54
55 nir_deref_instr **tail = &path->_short_path[max_short_path_len];
56 nir_deref_instr **head = tail;
57
58 *tail = NULL;
59 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
60 if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
61 continue;
62 count++;
63 if (count <= max_short_path_len)
64 *(--head) = d;
65 }
66
67 if (count <= max_short_path_len) {
68 /* If we're under max_short_path_len, just use the short path. */
69 path->path = head;
70 goto done;
71 }
72
73 #ifndef NDEBUG
74 /* Just in case someone uses short_path by accident */
75 for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
76 path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
77 #endif
78
79 path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
80 head = tail = path->path + count;
81 *tail = NULL;
82 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
83 if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
84 continue;
85 *(--head) = d;
86 }
87
88 done:
89 assert(head == path->path);
90 assert(tail == head + count);
91 assert(*tail == NULL);
92 }
93
94 void
nir_deref_path_finish(nir_deref_path * path)95 nir_deref_path_finish(nir_deref_path *path)
96 {
97 if (path->path < &path->_short_path[0] ||
98 path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
99 ralloc_free(path->path);
100 }
101
102 /**
103 * Recursively removes unused deref instructions
104 */
105 bool
nir_deref_instr_remove_if_unused(nir_deref_instr * instr)106 nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
107 {
108 bool progress = false;
109
110 for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
111 /* If anyone is using this deref, leave it alone */
112 assert(d->dest.is_ssa);
113 if (!nir_ssa_def_is_unused(&d->dest.ssa))
114 break;
115
116 nir_instr_remove(&d->instr);
117 progress = true;
118 }
119
120 return progress;
121 }
122
123 bool
nir_deref_instr_has_indirect(nir_deref_instr * instr)124 nir_deref_instr_has_indirect(nir_deref_instr *instr)
125 {
126 while (instr->deref_type != nir_deref_type_var) {
127 /* Consider casts to be indirects */
128 if (instr->deref_type == nir_deref_type_cast)
129 return true;
130
131 if ((instr->deref_type == nir_deref_type_array ||
132 instr->deref_type == nir_deref_type_ptr_as_array) &&
133 !nir_src_is_const(instr->arr.index))
134 return true;
135
136 instr = nir_deref_instr_parent(instr);
137 }
138
139 return false;
140 }
141
142 bool
nir_deref_instr_is_known_out_of_bounds(nir_deref_instr * instr)143 nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
144 {
145 for (; instr; instr = nir_deref_instr_parent(instr)) {
146 if (instr->deref_type == nir_deref_type_array &&
147 nir_src_is_const(instr->arr.index) &&
148 nir_src_as_uint(instr->arr.index) >=
149 glsl_get_length(nir_deref_instr_parent(instr)->type))
150 return true;
151 }
152
153 return false;
154 }
155
156 bool
nir_deref_instr_has_complex_use(nir_deref_instr * deref,nir_deref_instr_has_complex_use_options opts)157 nir_deref_instr_has_complex_use(nir_deref_instr *deref,
158 nir_deref_instr_has_complex_use_options opts)
159 {
160 nir_foreach_use(use_src, &deref->dest.ssa) {
161 nir_instr *use_instr = use_src->parent_instr;
162
163 switch (use_instr->type) {
164 case nir_instr_type_deref: {
165 nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
166
167 /* A var deref has no sources */
168 assert(use_deref->deref_type != nir_deref_type_var);
169
170 /* If a deref shows up in an array index or something like that, it's
171 * a complex use.
172 */
173 if (use_src != &use_deref->parent)
174 return true;
175
176 /* Anything that isn't a basic struct or array deref is considered to
177 * be a "complex" use. In particular, we don't allow ptr_as_array
178 * because we assume that opt_deref will turn any non-complex
179 * ptr_as_array derefs into regular array derefs eventually so passes
180 * which only want to handle simple derefs will pick them up in a
181 * later pass.
182 */
183 if (use_deref->deref_type != nir_deref_type_struct &&
184 use_deref->deref_type != nir_deref_type_array_wildcard &&
185 use_deref->deref_type != nir_deref_type_array)
186 return true;
187
188 if (nir_deref_instr_has_complex_use(use_deref, opts))
189 return true;
190
191 continue;
192 }
193
194 case nir_instr_type_intrinsic: {
195 nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
196 switch (use_intrin->intrinsic) {
197 case nir_intrinsic_load_deref:
198 assert(use_src == &use_intrin->src[0]);
199 continue;
200
201 case nir_intrinsic_copy_deref:
202 assert(use_src == &use_intrin->src[0] ||
203 use_src == &use_intrin->src[1]);
204 continue;
205
206 case nir_intrinsic_store_deref:
207 /* A use in src[1] of a store means we're taking that pointer and
208 * writing it to a variable. Because we have no idea who will
209 * read that variable and what they will do with the pointer, it's
210 * considered a "complex" use. A use in src[0], on the other
211 * hand, is a simple use because we're just going to dereference
212 * it and write a value there.
213 */
214 if (use_src == &use_intrin->src[0])
215 continue;
216 return true;
217
218 case nir_intrinsic_memcpy_deref:
219 if (use_src == &use_intrin->src[0] &&
220 (opts & nir_deref_instr_has_complex_use_allow_memcpy_dst))
221 continue;
222 if (use_src == &use_intrin->src[1] &&
223 (opts & nir_deref_instr_has_complex_use_allow_memcpy_src))
224 continue;
225 return true;
226
227 default:
228 return true;
229 }
230 unreachable("Switch default failed");
231 }
232
233 default:
234 return true;
235 }
236 }
237
238 nir_foreach_if_use(use, &deref->dest.ssa)
239 return true;
240
241 return false;
242 }
243
244 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)245 type_scalar_size_bytes(const struct glsl_type *type)
246 {
247 assert(glsl_type_is_vector_or_scalar(type) ||
248 glsl_type_is_matrix(type));
249 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
250 }
251
252 unsigned
nir_deref_instr_array_stride(nir_deref_instr * deref)253 nir_deref_instr_array_stride(nir_deref_instr *deref)
254 {
255 switch (deref->deref_type) {
256 case nir_deref_type_array:
257 case nir_deref_type_array_wildcard: {
258 const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
259 unsigned stride = glsl_get_explicit_stride(arr_type);
260
261 if ((glsl_type_is_matrix(arr_type) &&
262 glsl_matrix_type_is_row_major(arr_type)) ||
263 (glsl_type_is_vector(arr_type) && stride == 0))
264 stride = type_scalar_size_bytes(arr_type);
265
266 return stride;
267 }
268 case nir_deref_type_ptr_as_array:
269 return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
270 case nir_deref_type_cast:
271 return deref->cast.ptr_stride;
272 default:
273 return 0;
274 }
275 }
276
277 static unsigned
type_get_array_stride(const struct glsl_type * elem_type,glsl_type_size_align_func size_align)278 type_get_array_stride(const struct glsl_type *elem_type,
279 glsl_type_size_align_func size_align)
280 {
281 unsigned elem_size, elem_align;
282 size_align(elem_type, &elem_size, &elem_align);
283 return ALIGN_POT(elem_size, elem_align);
284 }
285
286 static unsigned
struct_type_get_field_offset(const struct glsl_type * struct_type,glsl_type_size_align_func size_align,unsigned field_idx)287 struct_type_get_field_offset(const struct glsl_type *struct_type,
288 glsl_type_size_align_func size_align,
289 unsigned field_idx)
290 {
291 assert(glsl_type_is_struct_or_ifc(struct_type));
292 unsigned offset = 0;
293 for (unsigned i = 0; i <= field_idx; i++) {
294 unsigned elem_size, elem_align;
295 size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
296 offset = ALIGN_POT(offset, elem_align);
297 if (i < field_idx)
298 offset += elem_size;
299 }
300 return offset;
301 }
302
303 unsigned
nir_deref_instr_get_const_offset(nir_deref_instr * deref,glsl_type_size_align_func size_align)304 nir_deref_instr_get_const_offset(nir_deref_instr *deref,
305 glsl_type_size_align_func size_align)
306 {
307 nir_deref_path path;
308 nir_deref_path_init(&path, deref, NULL);
309
310 unsigned offset = 0;
311 for (nir_deref_instr **p = &path.path[1]; *p; p++) {
312 switch ((*p)->deref_type) {
313 case nir_deref_type_array:
314 offset += nir_src_as_uint((*p)->arr.index) *
315 type_get_array_stride((*p)->type, size_align);
316 break;
317 case nir_deref_type_struct: {
318 /* p starts at path[1], so this is safe */
319 nir_deref_instr *parent = *(p - 1);
320 offset += struct_type_get_field_offset(parent->type, size_align,
321 (*p)->strct.index);
322 break;
323 }
324 case nir_deref_type_cast:
325 /* A cast doesn't contribute to the offset */
326 break;
327 default:
328 unreachable("Unsupported deref type");
329 }
330 }
331
332 nir_deref_path_finish(&path);
333
334 return offset;
335 }
336
337 nir_ssa_def *
nir_build_deref_offset(nir_builder * b,nir_deref_instr * deref,glsl_type_size_align_func size_align)338 nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
339 glsl_type_size_align_func size_align)
340 {
341 nir_deref_path path;
342 nir_deref_path_init(&path, deref, NULL);
343
344 nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);
345 for (nir_deref_instr **p = &path.path[1]; *p; p++) {
346 switch ((*p)->deref_type) {
347 case nir_deref_type_array:
348 case nir_deref_type_ptr_as_array: {
349 nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
350 int stride = type_get_array_stride((*p)->type, size_align);
351 offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
352 break;
353 }
354 case nir_deref_type_struct: {
355 /* p starts at path[1], so this is safe */
356 nir_deref_instr *parent = *(p - 1);
357 unsigned field_offset =
358 struct_type_get_field_offset(parent->type, size_align,
359 (*p)->strct.index);
360 offset = nir_iadd_imm(b, offset, field_offset);
361 break;
362 }
363 case nir_deref_type_cast:
364 /* A cast doesn't contribute to the offset */
365 break;
366 default:
367 unreachable("Unsupported deref type");
368 }
369 }
370
371 nir_deref_path_finish(&path);
372
373 return offset;
374 }
375
376 bool
nir_remove_dead_derefs_impl(nir_function_impl * impl)377 nir_remove_dead_derefs_impl(nir_function_impl *impl)
378 {
379 bool progress = false;
380
381 nir_foreach_block(block, impl) {
382 nir_foreach_instr_safe(instr, block) {
383 if (instr->type == nir_instr_type_deref &&
384 nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
385 progress = true;
386 }
387 }
388
389 if (progress) {
390 nir_metadata_preserve(impl, nir_metadata_block_index |
391 nir_metadata_dominance);
392 } else {
393 nir_metadata_preserve(impl, nir_metadata_all);
394 }
395
396 return progress;
397 }
398
399 bool
nir_remove_dead_derefs(nir_shader * shader)400 nir_remove_dead_derefs(nir_shader *shader)
401 {
402 bool progress = false;
403 nir_foreach_function(function, shader) {
404 if (function->impl && nir_remove_dead_derefs_impl(function->impl))
405 progress = true;
406 }
407
408 return progress;
409 }
410
411 void
nir_fixup_deref_modes(nir_shader * shader)412 nir_fixup_deref_modes(nir_shader *shader)
413 {
414 nir_foreach_function(function, shader) {
415 if (!function->impl)
416 continue;
417
418 nir_foreach_block(block, function->impl) {
419 nir_foreach_instr(instr, block) {
420 if (instr->type != nir_instr_type_deref)
421 continue;
422
423 nir_deref_instr *deref = nir_instr_as_deref(instr);
424 if (deref->deref_type == nir_deref_type_cast)
425 continue;
426
427 nir_variable_mode parent_modes;
428 if (deref->deref_type == nir_deref_type_var) {
429 parent_modes = deref->var->data.mode;
430 } else {
431 assert(deref->parent.is_ssa);
432 nir_deref_instr *parent =
433 nir_instr_as_deref(deref->parent.ssa->parent_instr);
434 parent_modes = parent->modes;
435 }
436
437 deref->modes = parent_modes;
438 }
439 }
440 }
441 }
442
443 static bool
modes_may_alias(nir_variable_mode a,nir_variable_mode b)444 modes_may_alias(nir_variable_mode a, nir_variable_mode b)
445 {
446 /* Generic pointers can alias with SSBOs */
447 if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
448 (b & (nir_var_mem_ssbo | nir_var_mem_global)))
449 return true;
450
451 /* Pointers can only alias if they share a mode. */
452 return a & b;
453 }
454
455 ALWAYS_INLINE static nir_deref_compare_result
compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path,unsigned * i,bool (* stop_fn)(const nir_deref_instr *))456 compare_deref_paths(nir_deref_path *a_path, nir_deref_path *b_path,
457 unsigned *i, bool (*stop_fn)(const nir_deref_instr *))
458 {
459 /* Start off assuming they fully compare. We ignore equality for now. In
460 * the end, we'll determine that by containment.
461 */
462 nir_deref_compare_result result = nir_derefs_may_alias_bit |
463 nir_derefs_a_contains_b_bit |
464 nir_derefs_b_contains_a_bit;
465
466 nir_deref_instr **a = a_path->path;
467 nir_deref_instr **b = b_path->path;
468
469 for (; a[*i] != NULL; (*i)++) {
470 if (a[*i] != b[*i])
471 break;
472
473 if (stop_fn && stop_fn(a[*i]))
474 break;
475 }
476
477 /* We're at either the tail or the divergence point between the two deref
478 * paths. Look to see if either contains cast or a ptr_as_array deref. If
479 * it does we don't know how to safely make any inferences. Hopefully,
480 * nir_opt_deref will clean most of these up and we can start inferring
481 * things again.
482 *
483 * In theory, we could do a bit better. For instance, we could detect the
484 * case where we have exactly one ptr_as_array deref in the chain after the
485 * divergence point and it's matched in both chains and the two chains have
486 * different constant indices.
487 */
488 for (unsigned j = *i; a[j] != NULL; j++) {
489 if (stop_fn && stop_fn(a[j]))
490 break;
491
492 if (a[j]->deref_type == nir_deref_type_cast ||
493 a[j]->deref_type == nir_deref_type_ptr_as_array)
494 return nir_derefs_may_alias_bit;
495 }
496 for (unsigned j = *i; b[j] != NULL; j++) {
497 if (stop_fn && stop_fn(b[j]))
498 break;
499
500 if (b[j]->deref_type == nir_deref_type_cast ||
501 b[j]->deref_type == nir_deref_type_ptr_as_array)
502 return nir_derefs_may_alias_bit;
503 }
504
505 for (; a[*i] != NULL && b[*i] != NULL; (*i)++) {
506 if (stop_fn && (stop_fn(a[*i]) || stop_fn(b[*i])))
507 break;
508
509 switch (a[*i]->deref_type) {
510 case nir_deref_type_array:
511 case nir_deref_type_array_wildcard: {
512 assert(b[*i]->deref_type == nir_deref_type_array ||
513 b[*i]->deref_type == nir_deref_type_array_wildcard);
514
515 if (a[*i]->deref_type == nir_deref_type_array_wildcard) {
516 if (b[*i]->deref_type != nir_deref_type_array_wildcard)
517 result &= ~nir_derefs_b_contains_a_bit;
518 } else if (b[*i]->deref_type == nir_deref_type_array_wildcard) {
519 if (a[*i]->deref_type != nir_deref_type_array_wildcard)
520 result &= ~nir_derefs_a_contains_b_bit;
521 } else {
522 assert(a[*i]->deref_type == nir_deref_type_array &&
523 b[*i]->deref_type == nir_deref_type_array);
524 assert(a[*i]->arr.index.is_ssa && b[*i]->arr.index.is_ssa);
525
526 if (nir_src_is_const(a[*i]->arr.index) &&
527 nir_src_is_const(b[*i]->arr.index)) {
528 /* If they're both direct and have different offsets, they
529 * don't even alias much less anything else.
530 */
531 if (nir_src_as_uint(a[*i]->arr.index) !=
532 nir_src_as_uint(b[*i]->arr.index))
533 return nir_derefs_do_not_alias;
534 } else if (a[*i]->arr.index.ssa == b[*i]->arr.index.ssa) {
535 /* They're the same indirect, continue on */
536 } else {
537 /* They're not the same index so we can't prove anything about
538 * containment.
539 */
540 result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
541 }
542 }
543 break;
544 }
545
546 case nir_deref_type_struct: {
547 /* If they're different struct members, they don't even alias */
548 if (a[*i]->strct.index != b[*i]->strct.index)
549 return nir_derefs_do_not_alias;
550 break;
551 }
552
553 default:
554 unreachable("Invalid deref type");
555 }
556 }
557
558 /* If a is longer than b, then it can't contain b. If neither a[i] nor
559 * b[i] are NULL then we aren't at the end of the chain and we know nothing
560 * about containment.
561 */
562 if (a[*i] != NULL)
563 result &= ~nir_derefs_a_contains_b_bit;
564 if (b[*i] != NULL)
565 result &= ~nir_derefs_b_contains_a_bit;
566
567 /* If a contains b and b contains a they must be equal. */
568 if ((result & nir_derefs_a_contains_b_bit) &&
569 (result & nir_derefs_b_contains_a_bit))
570 result |= nir_derefs_equal_bit;
571
572 return result;
573 }
574
575 static bool
is_interface_struct_deref(const nir_deref_instr * deref)576 is_interface_struct_deref(const nir_deref_instr *deref)
577 {
578 if (deref->deref_type == nir_deref_type_struct) {
579 assert(glsl_type_is_struct_or_ifc(nir_deref_instr_parent(deref)->type));
580 return true;
581 } else {
582 return false;
583 }
584 }
585
586 nir_deref_compare_result
nir_compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path)587 nir_compare_deref_paths(nir_deref_path *a_path,
588 nir_deref_path *b_path)
589 {
590 if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
591 return nir_derefs_do_not_alias;
592
593 if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
594 return nir_derefs_may_alias_bit;
595
596 unsigned path_idx = 1;
597 if (a_path->path[0]->deref_type == nir_deref_type_var) {
598 const nir_variable *a_var = a_path->path[0]->var;
599 const nir_variable *b_var = b_path->path[0]->var;
600
601 /* If we got here, the two variables must have the same mode. The
602 * only way modes_may_alias() can return true for two different modes
603 * is if one is global and the other ssbo. However, Global variables
604 * only exist in OpenCL and SSBOs don't exist there. No API allows
605 * both for variables.
606 */
607 assert(a_var->data.mode == b_var->data.mode);
608
609 switch (a_var->data.mode) {
610 case nir_var_mem_ssbo: {
611 nir_deref_compare_result binding_compare;
612 if (a_var == b_var) {
613 binding_compare = compare_deref_paths(a_path, b_path, &path_idx,
614 is_interface_struct_deref);
615 } else {
616 binding_compare = nir_derefs_do_not_alias;
617 }
618
619 if (binding_compare & nir_derefs_equal_bit)
620 break;
621
622 /* If the binding derefs can't alias and at least one is RESTRICT,
623 * then we know they can't alias.
624 */
625 if (!(binding_compare & nir_derefs_may_alias_bit) &&
626 ((a_var->data.access & ACCESS_RESTRICT) ||
627 (b_var->data.access & ACCESS_RESTRICT)))
628 return nir_derefs_do_not_alias;
629
630 return nir_derefs_may_alias_bit;
631 }
632
633 case nir_var_mem_shared:
634 if (a_var == b_var)
635 break;
636
637 /* Per SPV_KHR_workgroup_memory_explicit_layout and
638 * GL_EXT_shared_memory_block, shared blocks alias each other.
639 * We will have either all blocks or all non-blocks.
640 */
641 if (glsl_type_is_interface(a_var->type) ||
642 glsl_type_is_interface(b_var->type)) {
643 assert(glsl_type_is_interface(a_var->type) &&
644 glsl_type_is_interface(b_var->type));
645 return nir_derefs_may_alias_bit;
646 }
647
648 /* Otherwise, distinct shared vars don't alias */
649 return nir_derefs_do_not_alias;
650
651 default:
652 /* For any other variable types, if we can chase them back to the
653 * variable, and the variables are different, they don't alias.
654 */
655 if (a_var == b_var)
656 break;
657
658 return nir_derefs_do_not_alias;
659 }
660 } else {
661 assert(a_path->path[0]->deref_type == nir_deref_type_cast);
662 /* If they're not exactly the same cast, it's hard to compare them so we
663 * just assume they alias. Comparing casts is tricky as there are lots
664 * of things such as mode, type, etc. to make sure work out; for now, we
665 * just assume nit_opt_deref will combine them and compare the deref
666 * instructions.
667 *
668 * TODO: At some point in the future, we could be clever and understand
669 * that a float[] and int[] have the same layout and aliasing structure
670 * but double[] and vec3[] do not and we could potentially be a bit
671 * smarter here.
672 */
673 if (a_path->path[0] != b_path->path[0])
674 return nir_derefs_may_alias_bit;
675 }
676
677 return compare_deref_paths(a_path, b_path, &path_idx, NULL);
678 }
679
680 nir_deref_compare_result
nir_compare_derefs(nir_deref_instr * a,nir_deref_instr * b)681 nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
682 {
683 if (a == b) {
684 return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
685 nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
686 }
687
688 nir_deref_path a_path, b_path;
689 nir_deref_path_init(&a_path, a, NULL);
690 nir_deref_path_init(&b_path, b, NULL);
691 assert(a_path.path[0]->deref_type == nir_deref_type_var ||
692 a_path.path[0]->deref_type == nir_deref_type_cast);
693 assert(b_path.path[0]->deref_type == nir_deref_type_var ||
694 b_path.path[0]->deref_type == nir_deref_type_cast);
695
696 nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
697
698 nir_deref_path_finish(&a_path);
699 nir_deref_path_finish(&b_path);
700
701 return result;
702 }
703
nir_get_deref_path(void * mem_ctx,nir_deref_and_path * deref)704 nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
705 {
706 if (!deref->_path) {
707 deref->_path = ralloc(mem_ctx, nir_deref_path);
708 nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
709 }
710 return deref->_path;
711 }
712
nir_compare_derefs_and_paths(void * mem_ctx,nir_deref_and_path * a,nir_deref_and_path * b)713 nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,
714 nir_deref_and_path *a,
715 nir_deref_and_path *b)
716 {
717 if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
718 return nir_compare_derefs(a->instr, b->instr);
719
720 return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
721 nir_get_deref_path(mem_ctx, b));
722 }
723
724 struct rematerialize_deref_state {
725 bool progress;
726 nir_builder builder;
727 nir_block *block;
728 struct hash_table *cache;
729 };
730
731 static nir_deref_instr *
rematerialize_deref_in_block(nir_deref_instr * deref,struct rematerialize_deref_state * state)732 rematerialize_deref_in_block(nir_deref_instr *deref,
733 struct rematerialize_deref_state *state)
734 {
735 if (deref->instr.block == state->block)
736 return deref;
737
738 if (!state->cache) {
739 state->cache = _mesa_pointer_hash_table_create(NULL);
740 }
741
742 struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
743 if (cached)
744 return cached->data;
745
746 nir_builder *b = &state->builder;
747 nir_deref_instr *new_deref =
748 nir_deref_instr_create(b->shader, deref->deref_type);
749 new_deref->modes = deref->modes;
750 new_deref->type = deref->type;
751
752 if (deref->deref_type == nir_deref_type_var) {
753 new_deref->var = deref->var;
754 } else {
755 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
756 if (parent) {
757 parent = rematerialize_deref_in_block(parent, state);
758 new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
759 } else {
760 nir_src_copy(&new_deref->parent, &deref->parent);
761 }
762 }
763
764 switch (deref->deref_type) {
765 case nir_deref_type_var:
766 case nir_deref_type_array_wildcard:
767 /* Nothing more to do */
768 break;
769
770 case nir_deref_type_cast:
771 new_deref->cast.ptr_stride = deref->cast.ptr_stride;
772 break;
773
774 case nir_deref_type_array:
775 case nir_deref_type_ptr_as_array:
776 assert(!nir_src_as_deref(deref->arr.index));
777 nir_src_copy(&new_deref->arr.index, &deref->arr.index);
778 break;
779
780 case nir_deref_type_struct:
781 new_deref->strct.index = deref->strct.index;
782 break;
783
784 default:
785 unreachable("Invalid deref instruction type");
786 }
787
788 nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
789 deref->dest.ssa.num_components,
790 deref->dest.ssa.bit_size,
791 NULL);
792 nir_builder_instr_insert(b, &new_deref->instr);
793
794 return new_deref;
795 }
796
797 static bool
rematerialize_deref_src(nir_src * src,void * _state)798 rematerialize_deref_src(nir_src *src, void *_state)
799 {
800 struct rematerialize_deref_state *state = _state;
801
802 nir_deref_instr *deref = nir_src_as_deref(*src);
803 if (!deref)
804 return true;
805
806 nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
807 if (block_deref != deref) {
808 nir_instr_rewrite_src(src->parent_instr, src,
809 nir_src_for_ssa(&block_deref->dest.ssa));
810 nir_deref_instr_remove_if_unused(deref);
811 state->progress = true;
812 }
813
814 return true;
815 }
816
817 /** Re-materialize derefs in every block
818 *
819 * This pass re-materializes deref instructions in every block in which it is
820 * used. After this pass has been run, every use of a deref will be of a
821 * deref in the same block as the use. Also, all unused derefs will be
822 * deleted as a side-effect.
823 *
824 * Derefs used as sources of phi instructions are not rematerialized.
825 */
826 bool
nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl * impl)827 nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
828 {
829 struct rematerialize_deref_state state = { 0 };
830 nir_builder_init(&state.builder, impl);
831
832 nir_foreach_block_unstructured(block, impl) {
833 state.block = block;
834
835 /* Start each block with a fresh cache */
836 if (state.cache)
837 _mesa_hash_table_clear(state.cache, NULL);
838
839 nir_foreach_instr_safe(instr, block) {
840 if (instr->type == nir_instr_type_deref &&
841 nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
842 continue;
843
844 /* If a deref is used in a phi, we can't rematerialize it, as the new
845 * derefs would appear before the phi, which is not valid.
846 */
847 if (instr->type == nir_instr_type_phi)
848 continue;
849
850 state.builder.cursor = nir_before_instr(instr);
851 nir_foreach_src(instr, rematerialize_deref_src, &state);
852 }
853
854 #ifndef NDEBUG
855 nir_if *following_if = nir_block_get_following_if(block);
856 if (following_if)
857 assert(!nir_src_as_deref(following_if->condition));
858 #endif
859 }
860
861 _mesa_hash_table_destroy(state.cache, NULL);
862
863 return state.progress;
864 }
865
866 static void
nir_deref_instr_fixup_child_types(nir_deref_instr * parent)867 nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
868 {
869 nir_foreach_use(use, &parent->dest.ssa) {
870 if (use->parent_instr->type != nir_instr_type_deref)
871 continue;
872
873 nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);
874 switch (child->deref_type) {
875 case nir_deref_type_var:
876 unreachable("nir_deref_type_var cannot be a child");
877
878 case nir_deref_type_array:
879 case nir_deref_type_array_wildcard:
880 child->type = glsl_get_array_element(parent->type);
881 break;
882
883 case nir_deref_type_ptr_as_array:
884 child->type = parent->type;
885 break;
886
887 case nir_deref_type_struct:
888 child->type = glsl_get_struct_field(parent->type,
889 child->strct.index);
890 break;
891
892 case nir_deref_type_cast:
893 /* We stop the recursion here */
894 continue;
895 }
896
897 /* Recurse into children */
898 nir_deref_instr_fixup_child_types(child);
899 }
900 }
901
902 static bool
opt_alu_of_cast(nir_alu_instr * alu)903 opt_alu_of_cast(nir_alu_instr *alu)
904 {
905 bool progress = false;
906
907 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
908 assert(alu->src[i].src.is_ssa);
909 nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
910 if (src_instr->type != nir_instr_type_deref)
911 continue;
912
913 nir_deref_instr *src_deref = nir_instr_as_deref(src_instr);
914 if (src_deref->deref_type != nir_deref_type_cast)
915 continue;
916
917 assert(src_deref->parent.is_ssa);
918 nir_instr_rewrite_src_ssa(&alu->instr, &alu->src[i].src,
919 src_deref->parent.ssa);
920 progress = true;
921 }
922
923 return progress;
924 }
925
926 static bool
is_trivial_array_deref_cast(nir_deref_instr * cast)927 is_trivial_array_deref_cast(nir_deref_instr *cast)
928 {
929 assert(is_trivial_deref_cast(cast));
930
931 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
932
933 if (parent->deref_type == nir_deref_type_array) {
934 return cast->cast.ptr_stride ==
935 glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
936 } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
937 return cast->cast.ptr_stride ==
938 nir_deref_instr_array_stride(parent);
939 } else {
940 return false;
941 }
942 }
943
944 static bool
is_deref_ptr_as_array(nir_instr * instr)945 is_deref_ptr_as_array(nir_instr *instr)
946 {
947 return instr->type == nir_instr_type_deref &&
948 nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
949 }
950
951 static bool
opt_remove_restricting_cast_alignments(nir_deref_instr * cast)952 opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
953 {
954 assert(cast->deref_type == nir_deref_type_cast);
955 if (cast->cast.align_mul == 0)
956 return false;
957
958 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
959 if (parent == NULL)
960 return false;
961
962 /* Don't use any default alignment for this check. We don't want to fall
963 * back to type alignment too early in case we find out later that we're
964 * somehow a child of a packed struct.
965 */
966 uint32_t parent_mul, parent_offset;
967 if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
968 &parent_mul, &parent_offset))
969 return false;
970
971 /* If this cast increases the alignment, we want to keep it.
972 *
973 * There is a possibility that the larger alignment provided by this cast
974 * somehow disagrees with the smaller alignment further up the deref chain.
975 * In that case, we choose to favor the alignment closer to the actual
976 * memory operation which, in this case, is the cast and not its parent so
977 * keeping the cast alignment is the right thing to do.
978 */
979 if (parent_mul < cast->cast.align_mul)
980 return false;
981
982 /* If we've gotten here, we have a parent deref with an align_mul at least
983 * as large as ours so we can potentially throw away the alignment
984 * information on this deref. There are two cases to consider here:
985 *
986 * 1. We can chase the deref all the way back to the variable. In this
987 * case, we have "perfect" knowledge, modulo indirect array derefs.
988 * Unless we've done something wrong in our indirect/wildcard stride
989 * calculations, our knowledge from the deref walk is better than the
990 * client's.
991 *
992 * 2. We can't chase it all the way back to the variable. In this case,
993 * because our call to nir_get_explicit_deref_align(parent, ...) above
994 * above passes default_to_type_align=false, the only way we can even
995 * get here is if something further up the deref chain has a cast with
996 * an alignment which can only happen if we get an alignment from the
997 * client (most likely a decoration in the SPIR-V). If the client has
998 * provided us with two conflicting alignments in the deref chain,
999 * that's their fault and we can do whatever we want.
1000 *
1001 * In either case, we should be without our rights, at this point, to throw
1002 * away the alignment information on this deref. However, to be "nice" to
1003 * weird clients, we do one more check. It really shouldn't happen but
1004 * it's possible that the parent's alignment offset disagrees with the
1005 * cast's alignment offset. In this case, we consider the cast as
1006 * providing more information (or at least more valid information) and keep
1007 * it even if the align_mul from the parent is larger.
1008 */
1009 assert(cast->cast.align_mul <= parent_mul);
1010 if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
1011 return false;
1012
1013 /* If we got here, the parent has better alignment information than the
1014 * child and we can get rid of the child alignment information.
1015 */
1016 cast->cast.align_mul = 0;
1017 cast->cast.align_offset = 0;
1018 return true;
1019 }
1020
1021 /**
1022 * Remove casts that just wrap other casts.
1023 */
1024 static bool
opt_remove_cast_cast(nir_deref_instr * cast)1025 opt_remove_cast_cast(nir_deref_instr *cast)
1026 {
1027 nir_deref_instr *first_cast = cast;
1028
1029 while (true) {
1030 nir_deref_instr *parent = nir_deref_instr_parent(first_cast);
1031 if (parent == NULL || parent->deref_type != nir_deref_type_cast)
1032 break;
1033 first_cast = parent;
1034 }
1035 if (cast == first_cast)
1036 return false;
1037
1038 nir_instr_rewrite_src(&cast->instr, &cast->parent,
1039 nir_src_for_ssa(first_cast->parent.ssa));
1040 return true;
1041 }
1042
1043 /* Restrict variable modes in casts.
1044 *
1045 * If we know from something higher up the deref chain that the deref has a
1046 * specific mode, we can cast to more general and back but we can never cast
1047 * across modes. For non-cast derefs, we should only ever do anything here if
1048 * the parent eventually comes from a cast that we restricted earlier.
1049 */
1050 static bool
opt_restrict_deref_modes(nir_deref_instr * deref)1051 opt_restrict_deref_modes(nir_deref_instr *deref)
1052 {
1053 if (deref->deref_type == nir_deref_type_var) {
1054 assert(deref->modes == deref->var->data.mode);
1055 return false;
1056 }
1057
1058 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1059 if (parent == NULL || parent->modes == deref->modes)
1060 return false;
1061
1062 assert(parent->modes & deref->modes);
1063 deref->modes &= parent->modes;
1064 return true;
1065 }
1066
1067 static bool
opt_remove_sampler_cast(nir_deref_instr * cast)1068 opt_remove_sampler_cast(nir_deref_instr *cast)
1069 {
1070 assert(cast->deref_type == nir_deref_type_cast);
1071 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1072 if (parent == NULL)
1073 return false;
1074
1075 /* Strip both types down to their non-array type and bail if there are any
1076 * discrepancies in array lengths.
1077 */
1078 const struct glsl_type *parent_type = parent->type;
1079 const struct glsl_type *cast_type = cast->type;
1080 while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
1081 if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
1082 return false;
1083 parent_type = glsl_get_array_element(parent_type);
1084 cast_type = glsl_get_array_element(cast_type);
1085 }
1086
1087 if (!glsl_type_is_sampler(parent_type))
1088 return false;
1089
1090 if (cast_type != glsl_bare_sampler_type() &&
1091 (glsl_type_is_bare_sampler(parent_type) ||
1092 cast_type != glsl_sampler_type_to_texture(parent_type)))
1093 return false;
1094
1095 /* We're a cast from a more detailed sampler type to a bare sampler or a
1096 * texture type with the same dimensionality.
1097 */
1098 nir_ssa_def_rewrite_uses(&cast->dest.ssa,
1099 &parent->dest.ssa);
1100 nir_instr_remove(&cast->instr);
1101
1102 /* Recursively crawl the deref tree and clean up types */
1103 nir_deref_instr_fixup_child_types(parent);
1104
1105 return true;
1106 }
1107
1108 /**
1109 * Is this casting a struct to a contained struct.
1110 * struct a { struct b field0 };
1111 * ssa_5 is structa;
1112 * deref_cast (structb *)ssa_5 (function_temp structb);
1113 * converts to
1114 * deref_struct &ssa_5->field0 (function_temp structb);
1115 * This allows subsequent copy propagation to work.
1116 */
1117 static bool
opt_replace_struct_wrapper_cast(nir_builder * b,nir_deref_instr * cast)1118 opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
1119 {
1120 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1121 if (!parent)
1122 return false;
1123
1124 if (cast->cast.align_mul > 0)
1125 return false;
1126
1127 if (!glsl_type_is_struct(parent->type))
1128 return false;
1129
1130 /* Empty struct */
1131 if (glsl_get_length(parent->type) < 1)
1132 return false;
1133
1134 if (glsl_get_struct_field_offset(parent->type, 0) != 0)
1135 return false;
1136
1137 if (cast->type != glsl_get_struct_field(parent->type, 0))
1138 return false;
1139
1140 nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
1141 nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);
1142 nir_deref_instr_remove_if_unused(cast);
1143 return true;
1144 }
1145
1146 static bool
opt_deref_cast(nir_builder * b,nir_deref_instr * cast)1147 opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
1148 {
1149 bool progress = false;
1150
1151 progress |= opt_remove_restricting_cast_alignments(cast);
1152
1153 if (opt_replace_struct_wrapper_cast(b, cast))
1154 return true;
1155
1156 if (opt_remove_sampler_cast(cast))
1157 return true;
1158
1159 progress |= opt_remove_cast_cast(cast);
1160 if (!is_trivial_deref_cast(cast))
1161 return progress;
1162
1163 /* If this deref still contains useful alignment information, we don't want
1164 * to delete it.
1165 */
1166 if (cast->cast.align_mul > 0)
1167 return progress;
1168
1169 bool trivial_array_cast = is_trivial_array_deref_cast(cast);
1170
1171 assert(cast->dest.is_ssa);
1172 assert(cast->parent.is_ssa);
1173
1174 nir_foreach_use_safe(use_src, &cast->dest.ssa) {
1175 /* If this isn't a trivial array cast, we can't propagate into
1176 * ptr_as_array derefs.
1177 */
1178 if (is_deref_ptr_as_array(use_src->parent_instr) &&
1179 !trivial_array_cast)
1180 continue;
1181
1182 nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
1183 progress = true;
1184 }
1185
1186 /* If uses would be a bit crazy */
1187 assert(list_is_empty(&cast->dest.ssa.if_uses));
1188
1189 if (nir_deref_instr_remove_if_unused(cast))
1190 progress = true;
1191
1192 return progress;
1193 }
1194
1195 static bool
opt_deref_ptr_as_array(nir_builder * b,nir_deref_instr * deref)1196 opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
1197 {
1198 assert(deref->deref_type == nir_deref_type_ptr_as_array);
1199
1200 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1201
1202 if (nir_src_is_const(deref->arr.index) &&
1203 nir_src_as_int(deref->arr.index) == 0) {
1204 /* If it's a ptr_as_array deref with an index of 0, it does nothing
1205 * and we can just replace its uses with its parent, unless it has
1206 * alignment information.
1207 *
1208 * The source of a ptr_as_array deref always has a deref_type of
1209 * nir_deref_type_array or nir_deref_type_cast. If it's a cast, it
1210 * may be trivial and we may be able to get rid of that too. Any
1211 * trivial cast of trivial cast cases should be handled already by
1212 * opt_deref_cast() above.
1213 */
1214 if (parent->deref_type == nir_deref_type_cast &&
1215 parent->cast.align_mul == 0 &&
1216 is_trivial_deref_cast(parent))
1217 parent = nir_deref_instr_parent(parent);
1218 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
1219 &parent->dest.ssa);
1220 nir_instr_remove(&deref->instr);
1221 return true;
1222 }
1223
1224 if (parent->deref_type != nir_deref_type_array &&
1225 parent->deref_type != nir_deref_type_ptr_as_array)
1226 return false;
1227
1228 assert(parent->parent.is_ssa);
1229 assert(parent->arr.index.is_ssa);
1230 assert(deref->arr.index.is_ssa);
1231
1232 deref->arr.in_bounds &= parent->arr.in_bounds;
1233
1234 nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
1235 deref->arr.index.ssa);
1236
1237 deref->deref_type = parent->deref_type;
1238 nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
1239 nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
1240 nir_src_for_ssa(new_idx));
1241 return true;
1242 }
1243
1244 static bool
is_vector_bitcast_deref(nir_deref_instr * cast,nir_component_mask_t mask,bool is_write)1245 is_vector_bitcast_deref(nir_deref_instr *cast,
1246 nir_component_mask_t mask,
1247 bool is_write)
1248 {
1249 if (cast->deref_type != nir_deref_type_cast)
1250 return false;
1251
1252 /* Don't throw away useful alignment information */
1253 if (cast->cast.align_mul > 0)
1254 return false;
1255
1256 /* It has to be a cast of another deref */
1257 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1258 if (parent == NULL)
1259 return false;
1260
1261 /* The parent has to be a vector or scalar */
1262 if (!glsl_type_is_vector_or_scalar(parent->type))
1263 return false;
1264
1265 /* Don't bother with 1-bit types */
1266 unsigned cast_bit_size = glsl_get_bit_size(cast->type);
1267 unsigned parent_bit_size = glsl_get_bit_size(parent->type);
1268 if (cast_bit_size == 1 || parent_bit_size == 1)
1269 return false;
1270
1271 /* A strided vector type means it's not tightly packed */
1272 if (glsl_get_explicit_stride(cast->type) ||
1273 glsl_get_explicit_stride(parent->type))
1274 return false;
1275
1276 assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
1277 assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
1278 unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
1279 unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
1280 (parent_bit_size / 8);
1281 if (bytes_used > parent_bytes)
1282 return false;
1283
1284 if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
1285 parent_bit_size))
1286 return false;
1287
1288 return true;
1289 }
1290
1291 static nir_ssa_def *
resize_vector(nir_builder * b,nir_ssa_def * data,unsigned num_components)1292 resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)
1293 {
1294 if (num_components == data->num_components)
1295 return data;
1296
1297 unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
1298 for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
1299 swiz[i] = i;
1300
1301 return nir_swizzle(b, data, swiz, num_components);
1302 }
1303
1304 static bool
opt_load_vec_deref(nir_builder * b,nir_intrinsic_instr * load)1305 opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
1306 {
1307 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1308 nir_component_mask_t read_mask =
1309 nir_ssa_def_components_read(&load->dest.ssa);
1310
1311 /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1312 * vec4-aligned and so it can just read/write them as vec4s. This
1313 * results in a LOT of vec4->vec3 casts on loads and stores.
1314 */
1315 if (is_vector_bitcast_deref(deref, read_mask, false)) {
1316 const unsigned old_num_comps = load->dest.ssa.num_components;
1317 const unsigned old_bit_size = load->dest.ssa.bit_size;
1318
1319 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1320 const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1321 const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1322
1323 /* Stomp it to reference the parent */
1324 nir_instr_rewrite_src(&load->instr, &load->src[0],
1325 nir_src_for_ssa(&parent->dest.ssa));
1326 assert(load->dest.is_ssa);
1327 load->dest.ssa.bit_size = new_bit_size;
1328 load->dest.ssa.num_components = new_num_comps;
1329 load->num_components = new_num_comps;
1330
1331 b->cursor = nir_after_instr(&load->instr);
1332 nir_ssa_def *data = &load->dest.ssa;
1333 if (old_bit_size != new_bit_size)
1334 data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);
1335 data = resize_vector(b, data, old_num_comps);
1336
1337 nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,
1338 data->parent_instr);
1339 return true;
1340 }
1341
1342 return false;
1343 }
1344
1345 static bool
opt_store_vec_deref(nir_builder * b,nir_intrinsic_instr * store)1346 opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
1347 {
1348 nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
1349 nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
1350
1351 /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1352 * vec4-aligned and so it can just read/write them as vec4s. This
1353 * results in a LOT of vec4->vec3 casts on loads and stores.
1354 */
1355 if (is_vector_bitcast_deref(deref, write_mask, true)) {
1356 assert(store->src[1].is_ssa);
1357 nir_ssa_def *data = store->src[1].ssa;
1358
1359 const unsigned old_bit_size = data->bit_size;
1360
1361 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1362 const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1363 const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1364
1365 nir_instr_rewrite_src(&store->instr, &store->src[0],
1366 nir_src_for_ssa(&parent->dest.ssa));
1367
1368 /* Restrict things down as needed so the bitcast doesn't fail */
1369 data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);
1370 if (old_bit_size != new_bit_size)
1371 data = nir_bitcast_vector(b, data, new_bit_size);
1372 data = resize_vector(b, data, new_num_comps);
1373 nir_instr_rewrite_src(&store->instr, &store->src[1],
1374 nir_src_for_ssa(data));
1375 store->num_components = new_num_comps;
1376
1377 /* Adjust the write mask */
1378 write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
1379 new_bit_size);
1380 nir_intrinsic_set_write_mask(store, write_mask);
1381 return true;
1382 }
1383
1384 return false;
1385 }
1386
1387 static bool
opt_known_deref_mode_is(nir_builder * b,nir_intrinsic_instr * intrin)1388 opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
1389 {
1390 nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
1391 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1392 if (deref == NULL)
1393 return false;
1394
1395 nir_ssa_def *deref_is = NULL;
1396
1397 if (nir_deref_mode_must_be(deref, modes))
1398 deref_is = nir_imm_true(b);
1399
1400 if (!nir_deref_mode_may_be(deref, modes))
1401 deref_is = nir_imm_false(b);
1402
1403 if (deref_is == NULL)
1404 return false;
1405
1406 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);
1407 nir_instr_remove(&intrin->instr);
1408 return true;
1409 }
1410
1411 bool
nir_opt_deref_impl(nir_function_impl * impl)1412 nir_opt_deref_impl(nir_function_impl *impl)
1413 {
1414 bool progress = false;
1415
1416 nir_builder b;
1417 nir_builder_init(&b, impl);
1418
1419 nir_foreach_block(block, impl) {
1420 nir_foreach_instr_safe(instr, block) {
1421 b.cursor = nir_before_instr(instr);
1422
1423 switch (instr->type) {
1424 case nir_instr_type_alu: {
1425 nir_alu_instr *alu = nir_instr_as_alu(instr);
1426 if (opt_alu_of_cast(alu))
1427 progress = true;
1428 break;
1429 }
1430
1431 case nir_instr_type_deref: {
1432 nir_deref_instr *deref = nir_instr_as_deref(instr);
1433
1434 if (opt_restrict_deref_modes(deref))
1435 progress = true;
1436
1437 switch (deref->deref_type) {
1438 case nir_deref_type_ptr_as_array:
1439 if (opt_deref_ptr_as_array(&b, deref))
1440 progress = true;
1441 break;
1442
1443 case nir_deref_type_cast:
1444 if (opt_deref_cast(&b, deref))
1445 progress = true;
1446 break;
1447
1448 default:
1449 /* Do nothing */
1450 break;
1451 }
1452 break;
1453 }
1454
1455 case nir_instr_type_intrinsic: {
1456 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1457 switch (intrin->intrinsic) {
1458 case nir_intrinsic_load_deref:
1459 if (opt_load_vec_deref(&b, intrin))
1460 progress = true;
1461 break;
1462
1463 case nir_intrinsic_store_deref:
1464 if (opt_store_vec_deref(&b, intrin))
1465 progress = true;
1466 break;
1467
1468 case nir_intrinsic_deref_mode_is:
1469 if (opt_known_deref_mode_is(&b, intrin))
1470 progress = true;
1471 break;
1472
1473 default:
1474 /* Do nothing */
1475 break;
1476 }
1477 break;
1478 }
1479
1480 default:
1481 /* Do nothing */
1482 break;
1483 }
1484 }
1485 }
1486
1487 if (progress) {
1488 nir_metadata_preserve(impl, nir_metadata_block_index |
1489 nir_metadata_dominance);
1490 } else {
1491 nir_metadata_preserve(impl, nir_metadata_all);
1492 }
1493
1494 return progress;
1495 }
1496
1497 bool
nir_opt_deref(nir_shader * shader)1498 nir_opt_deref(nir_shader *shader)
1499 {
1500 bool progress = false;
1501
1502 nir_foreach_function(func, shader) {
1503 if (func->impl && nir_opt_deref_impl(func->impl))
1504 progress = true;
1505 }
1506
1507 return progress;
1508 }
1509