• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/hash_table.h"
28 
29 static bool
is_trivial_deref_cast(nir_deref_instr * cast)30 is_trivial_deref_cast(nir_deref_instr *cast)
31 {
32    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
33    if (!parent)
34       return false;
35 
36    return cast->modes == parent->modes &&
37           cast->type == parent->type &&
38           cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
39           cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
40 }
41 
42 void
nir_deref_path_init(nir_deref_path * path,nir_deref_instr * deref,void * mem_ctx)43 nir_deref_path_init(nir_deref_path *path,
44                     nir_deref_instr *deref, void *mem_ctx)
45 {
46    assert(deref != NULL);
47 
48    /* The length of the short path is at most ARRAY_SIZE - 1 because we need
49     * room for the NULL terminator.
50     */
51    static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
52 
53    int count = 0;
54 
55    nir_deref_instr **tail = &path->_short_path[max_short_path_len];
56    nir_deref_instr **head = tail;
57 
58    *tail = NULL;
59    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
60       if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
61          continue;
62       count++;
63       if (count <= max_short_path_len)
64          *(--head) = d;
65    }
66 
67    if (count <= max_short_path_len) {
68       /* If we're under max_short_path_len, just use the short path. */
69       path->path = head;
70       goto done;
71    }
72 
73 #ifndef NDEBUG
74    /* Just in case someone uses short_path by accident */
75    for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
76       path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
77 #endif
78 
79    path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
80    head = tail = path->path + count;
81    *tail = NULL;
82    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
83       if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
84          continue;
85       *(--head) = d;
86    }
87 
88 done:
89    assert(head == path->path);
90    assert(tail == head + count);
91    assert(*tail == NULL);
92 }
93 
94 void
nir_deref_path_finish(nir_deref_path * path)95 nir_deref_path_finish(nir_deref_path *path)
96 {
97    if (path->path < &path->_short_path[0] ||
98        path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
99       ralloc_free(path->path);
100 }
101 
102 /**
103  * Recursively removes unused deref instructions
104  */
105 bool
nir_deref_instr_remove_if_unused(nir_deref_instr * instr)106 nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
107 {
108    bool progress = false;
109 
110    for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
111       /* If anyone is using this deref, leave it alone */
112       assert(d->dest.is_ssa);
113       if (!nir_ssa_def_is_unused(&d->dest.ssa))
114          break;
115 
116       nir_instr_remove(&d->instr);
117       progress = true;
118    }
119 
120    return progress;
121 }
122 
123 bool
nir_deref_instr_has_indirect(nir_deref_instr * instr)124 nir_deref_instr_has_indirect(nir_deref_instr *instr)
125 {
126    while (instr->deref_type != nir_deref_type_var) {
127       /* Consider casts to be indirects */
128       if (instr->deref_type == nir_deref_type_cast)
129          return true;
130 
131       if ((instr->deref_type == nir_deref_type_array ||
132            instr->deref_type == nir_deref_type_ptr_as_array) &&
133           !nir_src_is_const(instr->arr.index))
134          return true;
135 
136       instr = nir_deref_instr_parent(instr);
137    }
138 
139    return false;
140 }
141 
142 bool
nir_deref_instr_is_known_out_of_bounds(nir_deref_instr * instr)143 nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
144 {
145    for (; instr; instr = nir_deref_instr_parent(instr)) {
146       if (instr->deref_type == nir_deref_type_array &&
147           nir_src_is_const(instr->arr.index) &&
148            nir_src_as_uint(instr->arr.index) >=
149            glsl_get_length(nir_deref_instr_parent(instr)->type))
150          return true;
151    }
152 
153    return false;
154 }
155 
156 bool
nir_deref_instr_has_complex_use(nir_deref_instr * deref,nir_deref_instr_has_complex_use_options opts)157 nir_deref_instr_has_complex_use(nir_deref_instr *deref,
158                                 nir_deref_instr_has_complex_use_options opts)
159 {
160    nir_foreach_use(use_src, &deref->dest.ssa) {
161       nir_instr *use_instr = use_src->parent_instr;
162 
163       switch (use_instr->type) {
164       case nir_instr_type_deref: {
165          nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
166 
167          /* A var deref has no sources */
168          assert(use_deref->deref_type != nir_deref_type_var);
169 
170          /* If a deref shows up in an array index or something like that, it's
171           * a complex use.
172           */
173          if (use_src != &use_deref->parent)
174             return true;
175 
176          /* Anything that isn't a basic struct or array deref is considered to
177           * be a "complex" use.  In particular, we don't allow ptr_as_array
178           * because we assume that opt_deref will turn any non-complex
179           * ptr_as_array derefs into regular array derefs eventually so passes
180           * which only want to handle simple derefs will pick them up in a
181           * later pass.
182           */
183          if (use_deref->deref_type != nir_deref_type_struct &&
184              use_deref->deref_type != nir_deref_type_array_wildcard &&
185              use_deref->deref_type != nir_deref_type_array)
186             return true;
187 
188          if (nir_deref_instr_has_complex_use(use_deref, opts))
189             return true;
190 
191          continue;
192       }
193 
194       case nir_instr_type_intrinsic: {
195          nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
196          switch (use_intrin->intrinsic) {
197          case nir_intrinsic_load_deref:
198             assert(use_src == &use_intrin->src[0]);
199             continue;
200 
201          case nir_intrinsic_copy_deref:
202             assert(use_src == &use_intrin->src[0] ||
203                    use_src == &use_intrin->src[1]);
204             continue;
205 
206          case nir_intrinsic_store_deref:
207             /* A use in src[1] of a store means we're taking that pointer and
208              * writing it to a variable.  Because we have no idea who will
209              * read that variable and what they will do with the pointer, it's
210              * considered a "complex" use.  A use in src[0], on the other
211              * hand, is a simple use because we're just going to dereference
212              * it and write a value there.
213              */
214             if (use_src == &use_intrin->src[0])
215                continue;
216             return true;
217 
218          case nir_intrinsic_memcpy_deref:
219             if (use_src == &use_intrin->src[0] &&
220                 (opts & nir_deref_instr_has_complex_use_allow_memcpy_dst))
221                continue;
222             if (use_src == &use_intrin->src[1] &&
223                 (opts & nir_deref_instr_has_complex_use_allow_memcpy_src))
224                continue;
225             return true;
226 
227          default:
228             return true;
229          }
230          unreachable("Switch default failed");
231       }
232 
233       default:
234          return true;
235       }
236    }
237 
238    nir_foreach_if_use(use, &deref->dest.ssa)
239       return true;
240 
241    return false;
242 }
243 
244 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)245 type_scalar_size_bytes(const struct glsl_type *type)
246 {
247    assert(glsl_type_is_vector_or_scalar(type) ||
248           glsl_type_is_matrix(type));
249    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
250 }
251 
252 unsigned
nir_deref_instr_array_stride(nir_deref_instr * deref)253 nir_deref_instr_array_stride(nir_deref_instr *deref)
254 {
255    switch (deref->deref_type) {
256    case nir_deref_type_array:
257    case nir_deref_type_array_wildcard: {
258       const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
259       unsigned stride = glsl_get_explicit_stride(arr_type);
260 
261       if ((glsl_type_is_matrix(arr_type) &&
262            glsl_matrix_type_is_row_major(arr_type)) ||
263           (glsl_type_is_vector(arr_type) && stride == 0))
264          stride = type_scalar_size_bytes(arr_type);
265 
266       return stride;
267    }
268    case nir_deref_type_ptr_as_array:
269       return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
270    case nir_deref_type_cast:
271       return deref->cast.ptr_stride;
272    default:
273       return 0;
274    }
275 }
276 
277 static unsigned
type_get_array_stride(const struct glsl_type * elem_type,glsl_type_size_align_func size_align)278 type_get_array_stride(const struct glsl_type *elem_type,
279                       glsl_type_size_align_func size_align)
280 {
281    unsigned elem_size, elem_align;
282    size_align(elem_type, &elem_size, &elem_align);
283    return ALIGN_POT(elem_size, elem_align);
284 }
285 
286 static unsigned
struct_type_get_field_offset(const struct glsl_type * struct_type,glsl_type_size_align_func size_align,unsigned field_idx)287 struct_type_get_field_offset(const struct glsl_type *struct_type,
288                              glsl_type_size_align_func size_align,
289                              unsigned field_idx)
290 {
291    assert(glsl_type_is_struct_or_ifc(struct_type));
292    unsigned offset = 0;
293    for (unsigned i = 0; i <= field_idx; i++) {
294       unsigned elem_size, elem_align;
295       size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
296       offset = ALIGN_POT(offset, elem_align);
297       if (i < field_idx)
298          offset += elem_size;
299    }
300    return offset;
301 }
302 
303 unsigned
nir_deref_instr_get_const_offset(nir_deref_instr * deref,glsl_type_size_align_func size_align)304 nir_deref_instr_get_const_offset(nir_deref_instr *deref,
305                                  glsl_type_size_align_func size_align)
306 {
307    nir_deref_path path;
308    nir_deref_path_init(&path, deref, NULL);
309 
310    unsigned offset = 0;
311    for (nir_deref_instr **p = &path.path[1]; *p; p++) {
312       switch ((*p)->deref_type) {
313       case nir_deref_type_array:
314          offset += nir_src_as_uint((*p)->arr.index) *
315                    type_get_array_stride((*p)->type, size_align);
316 	 break;
317       case nir_deref_type_struct: {
318          /* p starts at path[1], so this is safe */
319          nir_deref_instr *parent = *(p - 1);
320          offset += struct_type_get_field_offset(parent->type, size_align,
321                                                 (*p)->strct.index);
322 	 break;
323       }
324       case nir_deref_type_cast:
325          /* A cast doesn't contribute to the offset */
326          break;
327       default:
328          unreachable("Unsupported deref type");
329       }
330    }
331 
332    nir_deref_path_finish(&path);
333 
334    return offset;
335 }
336 
337 nir_ssa_def *
nir_build_deref_offset(nir_builder * b,nir_deref_instr * deref,glsl_type_size_align_func size_align)338 nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
339                        glsl_type_size_align_func size_align)
340 {
341    nir_deref_path path;
342    nir_deref_path_init(&path, deref, NULL);
343 
344    nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);
345    for (nir_deref_instr **p = &path.path[1]; *p; p++) {
346       switch ((*p)->deref_type) {
347       case nir_deref_type_array:
348       case nir_deref_type_ptr_as_array: {
349          nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
350          int stride = type_get_array_stride((*p)->type, size_align);
351          offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
352          break;
353       }
354       case nir_deref_type_struct: {
355          /* p starts at path[1], so this is safe */
356          nir_deref_instr *parent = *(p - 1);
357          unsigned field_offset =
358             struct_type_get_field_offset(parent->type, size_align,
359                                          (*p)->strct.index);
360          offset = nir_iadd_imm(b, offset, field_offset);
361          break;
362       }
363       case nir_deref_type_cast:
364          /* A cast doesn't contribute to the offset */
365          break;
366       default:
367          unreachable("Unsupported deref type");
368       }
369    }
370 
371    nir_deref_path_finish(&path);
372 
373    return offset;
374 }
375 
376 bool
nir_remove_dead_derefs_impl(nir_function_impl * impl)377 nir_remove_dead_derefs_impl(nir_function_impl *impl)
378 {
379    bool progress = false;
380 
381    nir_foreach_block(block, impl) {
382       nir_foreach_instr_safe(instr, block) {
383          if (instr->type == nir_instr_type_deref &&
384              nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
385             progress = true;
386       }
387    }
388 
389    if (progress) {
390       nir_metadata_preserve(impl, nir_metadata_block_index |
391                                   nir_metadata_dominance);
392    } else {
393       nir_metadata_preserve(impl, nir_metadata_all);
394    }
395 
396    return progress;
397 }
398 
399 bool
nir_remove_dead_derefs(nir_shader * shader)400 nir_remove_dead_derefs(nir_shader *shader)
401 {
402    bool progress = false;
403    nir_foreach_function(function, shader) {
404       if (function->impl && nir_remove_dead_derefs_impl(function->impl))
405          progress = true;
406    }
407 
408    return progress;
409 }
410 
411 void
nir_fixup_deref_modes(nir_shader * shader)412 nir_fixup_deref_modes(nir_shader *shader)
413 {
414    nir_foreach_function(function, shader) {
415       if (!function->impl)
416          continue;
417 
418       nir_foreach_block(block, function->impl) {
419          nir_foreach_instr(instr, block) {
420             if (instr->type != nir_instr_type_deref)
421                continue;
422 
423             nir_deref_instr *deref = nir_instr_as_deref(instr);
424             if (deref->deref_type == nir_deref_type_cast)
425                continue;
426 
427             nir_variable_mode parent_modes;
428             if (deref->deref_type == nir_deref_type_var) {
429                parent_modes = deref->var->data.mode;
430             } else {
431                assert(deref->parent.is_ssa);
432                nir_deref_instr *parent =
433                   nir_instr_as_deref(deref->parent.ssa->parent_instr);
434                parent_modes = parent->modes;
435             }
436 
437             deref->modes = parent_modes;
438          }
439       }
440    }
441 }
442 
443 static bool
modes_may_alias(nir_variable_mode a,nir_variable_mode b)444 modes_may_alias(nir_variable_mode a, nir_variable_mode b)
445 {
446    /* Generic pointers can alias with SSBOs */
447    if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
448        (b & (nir_var_mem_ssbo | nir_var_mem_global)))
449       return true;
450 
451    /* Pointers can only alias if they share a mode. */
452    return a & b;
453 }
454 
455 ALWAYS_INLINE static nir_deref_compare_result
compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path,unsigned * i,bool (* stop_fn)(const nir_deref_instr *))456 compare_deref_paths(nir_deref_path *a_path, nir_deref_path *b_path,
457                     unsigned *i, bool (*stop_fn)(const nir_deref_instr *))
458 {
459    /* Start off assuming they fully compare.  We ignore equality for now.  In
460     * the end, we'll determine that by containment.
461     */
462    nir_deref_compare_result result = nir_derefs_may_alias_bit |
463                                      nir_derefs_a_contains_b_bit |
464                                      nir_derefs_b_contains_a_bit;
465 
466    nir_deref_instr **a = a_path->path;
467    nir_deref_instr **b = b_path->path;
468 
469    for (; a[*i] != NULL; (*i)++) {
470       if (a[*i] != b[*i])
471          break;
472 
473       if (stop_fn && stop_fn(a[*i]))
474          break;
475    }
476 
477    /* We're at either the tail or the divergence point between the two deref
478     * paths.  Look to see if either contains cast or a ptr_as_array deref.  If
479     * it does we don't know how to safely make any inferences.  Hopefully,
480     * nir_opt_deref will clean most of these up and we can start inferring
481     * things again.
482     *
483     * In theory, we could do a bit better.  For instance, we could detect the
484     * case where we have exactly one ptr_as_array deref in the chain after the
485     * divergence point and it's matched in both chains and the two chains have
486     * different constant indices.
487     */
488    for (unsigned j = *i; a[j] != NULL; j++) {
489       if (stop_fn && stop_fn(a[j]))
490          break;
491 
492       if (a[j]->deref_type == nir_deref_type_cast ||
493           a[j]->deref_type == nir_deref_type_ptr_as_array)
494          return nir_derefs_may_alias_bit;
495    }
496    for (unsigned j = *i; b[j] != NULL; j++) {
497       if (stop_fn && stop_fn(b[j]))
498          break;
499 
500       if (b[j]->deref_type == nir_deref_type_cast ||
501           b[j]->deref_type == nir_deref_type_ptr_as_array)
502          return nir_derefs_may_alias_bit;
503    }
504 
505    for (; a[*i] != NULL && b[*i] != NULL; (*i)++) {
506       if (stop_fn && (stop_fn(a[*i]) || stop_fn(b[*i])))
507          break;
508 
509       switch (a[*i]->deref_type) {
510       case nir_deref_type_array:
511       case nir_deref_type_array_wildcard: {
512          assert(b[*i]->deref_type == nir_deref_type_array ||
513                 b[*i]->deref_type == nir_deref_type_array_wildcard);
514 
515          if (a[*i]->deref_type == nir_deref_type_array_wildcard) {
516             if (b[*i]->deref_type != nir_deref_type_array_wildcard)
517                result &= ~nir_derefs_b_contains_a_bit;
518          } else if (b[*i]->deref_type == nir_deref_type_array_wildcard) {
519             if (a[*i]->deref_type != nir_deref_type_array_wildcard)
520                result &= ~nir_derefs_a_contains_b_bit;
521          } else {
522             assert(a[*i]->deref_type == nir_deref_type_array &&
523                    b[*i]->deref_type == nir_deref_type_array);
524             assert(a[*i]->arr.index.is_ssa && b[*i]->arr.index.is_ssa);
525 
526             if (nir_src_is_const(a[*i]->arr.index) &&
527                 nir_src_is_const(b[*i]->arr.index)) {
528                /* If they're both direct and have different offsets, they
529                 * don't even alias much less anything else.
530                 */
531                if (nir_src_as_uint(a[*i]->arr.index) !=
532                    nir_src_as_uint(b[*i]->arr.index))
533                   return nir_derefs_do_not_alias;
534             } else if (a[*i]->arr.index.ssa == b[*i]->arr.index.ssa) {
535                /* They're the same indirect, continue on */
536             } else {
537                /* They're not the same index so we can't prove anything about
538                 * containment.
539                 */
540                result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
541             }
542          }
543          break;
544       }
545 
546       case nir_deref_type_struct: {
547          /* If they're different struct members, they don't even alias */
548          if (a[*i]->strct.index != b[*i]->strct.index)
549             return nir_derefs_do_not_alias;
550          break;
551       }
552 
553       default:
554          unreachable("Invalid deref type");
555       }
556    }
557 
558    /* If a is longer than b, then it can't contain b.  If neither a[i] nor
559     * b[i] are NULL then we aren't at the end of the chain and we know nothing
560     * about containment.
561     */
562    if (a[*i] != NULL)
563       result &= ~nir_derefs_a_contains_b_bit;
564    if (b[*i] != NULL)
565       result &= ~nir_derefs_b_contains_a_bit;
566 
567    /* If a contains b and b contains a they must be equal. */
568    if ((result & nir_derefs_a_contains_b_bit) &&
569        (result & nir_derefs_b_contains_a_bit))
570       result |= nir_derefs_equal_bit;
571 
572    return result;
573 }
574 
575 static bool
is_interface_struct_deref(const nir_deref_instr * deref)576 is_interface_struct_deref(const nir_deref_instr *deref)
577 {
578    if (deref->deref_type == nir_deref_type_struct) {
579       assert(glsl_type_is_struct_or_ifc(nir_deref_instr_parent(deref)->type));
580       return true;
581    } else {
582       return false;
583    }
584 }
585 
586 nir_deref_compare_result
nir_compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path)587 nir_compare_deref_paths(nir_deref_path *a_path,
588                         nir_deref_path *b_path)
589 {
590    if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
591       return nir_derefs_do_not_alias;
592 
593    if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
594       return nir_derefs_may_alias_bit;
595 
596    unsigned path_idx = 1;
597    if (a_path->path[0]->deref_type == nir_deref_type_var) {
598       const nir_variable *a_var = a_path->path[0]->var;
599       const nir_variable *b_var = b_path->path[0]->var;
600 
601       /* If we got here, the two variables must have the same mode.  The
602        * only way modes_may_alias() can return true for two different modes
603        * is if one is global and the other ssbo.  However, Global variables
604        * only exist in OpenCL and SSBOs don't exist there.  No API allows
605        * both for variables.
606        */
607       assert(a_var->data.mode == b_var->data.mode);
608 
609       switch (a_var->data.mode) {
610       case nir_var_mem_ssbo: {
611          nir_deref_compare_result binding_compare;
612          if (a_var == b_var) {
613             binding_compare = compare_deref_paths(a_path, b_path, &path_idx,
614                                                   is_interface_struct_deref);
615          } else {
616             binding_compare = nir_derefs_do_not_alias;
617          }
618 
619          if (binding_compare & nir_derefs_equal_bit)
620             break;
621 
622          /* If the binding derefs can't alias and at least one is RESTRICT,
623           * then we know they can't alias.
624           */
625          if (!(binding_compare & nir_derefs_may_alias_bit) &&
626              ((a_var->data.access & ACCESS_RESTRICT) ||
627               (b_var->data.access & ACCESS_RESTRICT)))
628             return nir_derefs_do_not_alias;
629 
630          return nir_derefs_may_alias_bit;
631       }
632 
633       case nir_var_mem_shared:
634          if (a_var == b_var)
635             break;
636 
637          /* Per SPV_KHR_workgroup_memory_explicit_layout and
638           * GL_EXT_shared_memory_block, shared blocks alias each other.
639           * We will have either all blocks or all non-blocks.
640           */
641          if (glsl_type_is_interface(a_var->type) ||
642              glsl_type_is_interface(b_var->type)) {
643             assert(glsl_type_is_interface(a_var->type) &&
644                    glsl_type_is_interface(b_var->type));
645             return nir_derefs_may_alias_bit;
646          }
647 
648          /* Otherwise, distinct shared vars don't alias */
649          return nir_derefs_do_not_alias;
650 
651       default:
652          /* For any other variable types, if we can chase them back to the
653           * variable, and the variables are different, they don't alias.
654           */
655          if (a_var == b_var)
656             break;
657 
658          return nir_derefs_do_not_alias;
659       }
660    } else {
661       assert(a_path->path[0]->deref_type == nir_deref_type_cast);
662       /* If they're not exactly the same cast, it's hard to compare them so we
663        * just assume they alias.  Comparing casts is tricky as there are lots
664        * of things such as mode, type, etc. to make sure work out; for now, we
665        * just assume nit_opt_deref will combine them and compare the deref
666        * instructions.
667        *
668        * TODO: At some point in the future, we could be clever and understand
669        * that a float[] and int[] have the same layout and aliasing structure
670        * but double[] and vec3[] do not and we could potentially be a bit
671        * smarter here.
672        */
673       if (a_path->path[0] != b_path->path[0])
674          return nir_derefs_may_alias_bit;
675    }
676 
677    return compare_deref_paths(a_path, b_path, &path_idx, NULL);
678 }
679 
680 nir_deref_compare_result
nir_compare_derefs(nir_deref_instr * a,nir_deref_instr * b)681 nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
682 {
683    if (a == b) {
684       return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
685              nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
686    }
687 
688    nir_deref_path a_path, b_path;
689    nir_deref_path_init(&a_path, a, NULL);
690    nir_deref_path_init(&b_path, b, NULL);
691    assert(a_path.path[0]->deref_type == nir_deref_type_var ||
692           a_path.path[0]->deref_type == nir_deref_type_cast);
693    assert(b_path.path[0]->deref_type == nir_deref_type_var ||
694           b_path.path[0]->deref_type == nir_deref_type_cast);
695 
696    nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
697 
698    nir_deref_path_finish(&a_path);
699    nir_deref_path_finish(&b_path);
700 
701    return result;
702 }
703 
nir_get_deref_path(void * mem_ctx,nir_deref_and_path * deref)704 nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
705 {
706    if (!deref->_path) {
707       deref->_path = ralloc(mem_ctx, nir_deref_path);
708       nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
709    }
710    return deref->_path;
711 }
712 
nir_compare_derefs_and_paths(void * mem_ctx,nir_deref_and_path * a,nir_deref_and_path * b)713 nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,
714                                                       nir_deref_and_path *a,
715                                                       nir_deref_and_path *b)
716 {
717    if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
718       return nir_compare_derefs(a->instr, b->instr);
719 
720    return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
721                                   nir_get_deref_path(mem_ctx, b));
722 }
723 
724 struct rematerialize_deref_state {
725    bool progress;
726    nir_builder builder;
727    nir_block *block;
728    struct hash_table *cache;
729 };
730 
731 static nir_deref_instr *
rematerialize_deref_in_block(nir_deref_instr * deref,struct rematerialize_deref_state * state)732 rematerialize_deref_in_block(nir_deref_instr *deref,
733                              struct rematerialize_deref_state *state)
734 {
735    if (deref->instr.block == state->block)
736       return deref;
737 
738    if (!state->cache) {
739       state->cache = _mesa_pointer_hash_table_create(NULL);
740    }
741 
742    struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
743    if (cached)
744       return cached->data;
745 
746    nir_builder *b = &state->builder;
747    nir_deref_instr *new_deref =
748       nir_deref_instr_create(b->shader, deref->deref_type);
749    new_deref->modes = deref->modes;
750    new_deref->type = deref->type;
751 
752    if (deref->deref_type == nir_deref_type_var) {
753       new_deref->var = deref->var;
754    } else {
755       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
756       if (parent) {
757          parent = rematerialize_deref_in_block(parent, state);
758          new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
759       } else {
760          nir_src_copy(&new_deref->parent, &deref->parent);
761       }
762    }
763 
764    switch (deref->deref_type) {
765    case nir_deref_type_var:
766    case nir_deref_type_array_wildcard:
767       /* Nothing more to do */
768       break;
769 
770    case nir_deref_type_cast:
771       new_deref->cast.ptr_stride = deref->cast.ptr_stride;
772       break;
773 
774    case nir_deref_type_array:
775    case nir_deref_type_ptr_as_array:
776       assert(!nir_src_as_deref(deref->arr.index));
777       nir_src_copy(&new_deref->arr.index, &deref->arr.index);
778       break;
779 
780    case nir_deref_type_struct:
781       new_deref->strct.index = deref->strct.index;
782       break;
783 
784    default:
785       unreachable("Invalid deref instruction type");
786    }
787 
788    nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
789                      deref->dest.ssa.num_components,
790                      deref->dest.ssa.bit_size,
791                      NULL);
792    nir_builder_instr_insert(b, &new_deref->instr);
793 
794    return new_deref;
795 }
796 
797 static bool
rematerialize_deref_src(nir_src * src,void * _state)798 rematerialize_deref_src(nir_src *src, void *_state)
799 {
800    struct rematerialize_deref_state *state = _state;
801 
802    nir_deref_instr *deref = nir_src_as_deref(*src);
803    if (!deref)
804       return true;
805 
806    nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
807    if (block_deref != deref) {
808       nir_instr_rewrite_src(src->parent_instr, src,
809                             nir_src_for_ssa(&block_deref->dest.ssa));
810       nir_deref_instr_remove_if_unused(deref);
811       state->progress = true;
812    }
813 
814    return true;
815 }
816 
817 /** Re-materialize derefs in every block
818  *
819  * This pass re-materializes deref instructions in every block in which it is
820  * used.  After this pass has been run, every use of a deref will be of a
821  * deref in the same block as the use.  Also, all unused derefs will be
822  * deleted as a side-effect.
823  *
824  * Derefs used as sources of phi instructions are not rematerialized.
825  */
826 bool
nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl * impl)827 nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
828 {
829    struct rematerialize_deref_state state = { 0 };
830    nir_builder_init(&state.builder, impl);
831 
832    nir_foreach_block_unstructured(block, impl) {
833       state.block = block;
834 
835       /* Start each block with a fresh cache */
836       if (state.cache)
837          _mesa_hash_table_clear(state.cache, NULL);
838 
839       nir_foreach_instr_safe(instr, block) {
840          if (instr->type == nir_instr_type_deref &&
841              nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
842             continue;
843 
844          /* If a deref is used in a phi, we can't rematerialize it, as the new
845           * derefs would appear before the phi, which is not valid.
846           */
847          if (instr->type == nir_instr_type_phi)
848             continue;
849 
850          state.builder.cursor = nir_before_instr(instr);
851          nir_foreach_src(instr, rematerialize_deref_src, &state);
852       }
853 
854 #ifndef NDEBUG
855       nir_if *following_if = nir_block_get_following_if(block);
856       if (following_if)
857          assert(!nir_src_as_deref(following_if->condition));
858 #endif
859    }
860 
861    _mesa_hash_table_destroy(state.cache, NULL);
862 
863    return state.progress;
864 }
865 
866 static void
nir_deref_instr_fixup_child_types(nir_deref_instr * parent)867 nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
868 {
869    nir_foreach_use(use, &parent->dest.ssa) {
870       if (use->parent_instr->type != nir_instr_type_deref)
871          continue;
872 
873       nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);
874       switch (child->deref_type) {
875       case nir_deref_type_var:
876          unreachable("nir_deref_type_var cannot be a child");
877 
878       case nir_deref_type_array:
879       case nir_deref_type_array_wildcard:
880          child->type = glsl_get_array_element(parent->type);
881          break;
882 
883       case nir_deref_type_ptr_as_array:
884          child->type = parent->type;
885          break;
886 
887       case nir_deref_type_struct:
888          child->type = glsl_get_struct_field(parent->type,
889                                              child->strct.index);
890          break;
891 
892       case nir_deref_type_cast:
893          /* We stop the recursion here */
894          continue;
895       }
896 
897       /* Recurse into children */
898       nir_deref_instr_fixup_child_types(child);
899    }
900 }
901 
902 static bool
opt_alu_of_cast(nir_alu_instr * alu)903 opt_alu_of_cast(nir_alu_instr *alu)
904 {
905    bool progress = false;
906 
907    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
908       assert(alu->src[i].src.is_ssa);
909       nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
910       if (src_instr->type != nir_instr_type_deref)
911          continue;
912 
913       nir_deref_instr *src_deref = nir_instr_as_deref(src_instr);
914       if (src_deref->deref_type != nir_deref_type_cast)
915          continue;
916 
917       assert(src_deref->parent.is_ssa);
918       nir_instr_rewrite_src_ssa(&alu->instr, &alu->src[i].src,
919                                 src_deref->parent.ssa);
920       progress = true;
921    }
922 
923    return progress;
924 }
925 
926 static bool
is_trivial_array_deref_cast(nir_deref_instr * cast)927 is_trivial_array_deref_cast(nir_deref_instr *cast)
928 {
929    assert(is_trivial_deref_cast(cast));
930 
931    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
932 
933    if (parent->deref_type == nir_deref_type_array) {
934       return cast->cast.ptr_stride ==
935              glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
936    } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
937       return cast->cast.ptr_stride ==
938              nir_deref_instr_array_stride(parent);
939    } else {
940       return false;
941    }
942 }
943 
944 static bool
is_deref_ptr_as_array(nir_instr * instr)945 is_deref_ptr_as_array(nir_instr *instr)
946 {
947    return instr->type == nir_instr_type_deref &&
948           nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
949 }
950 
951 static bool
opt_remove_restricting_cast_alignments(nir_deref_instr * cast)952 opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
953 {
954    assert(cast->deref_type == nir_deref_type_cast);
955    if (cast->cast.align_mul == 0)
956       return false;
957 
958    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
959    if (parent == NULL)
960       return false;
961 
962    /* Don't use any default alignment for this check.  We don't want to fall
963     * back to type alignment too early in case we find out later that we're
964     * somehow a child of a packed struct.
965     */
966    uint32_t parent_mul, parent_offset;
967    if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
968                                      &parent_mul, &parent_offset))
969       return false;
970 
971    /* If this cast increases the alignment, we want to keep it.
972     *
973     * There is a possibility that the larger alignment provided by this cast
974     * somehow disagrees with the smaller alignment further up the deref chain.
975     * In that case, we choose to favor the alignment closer to the actual
976     * memory operation which, in this case, is the cast and not its parent so
977     * keeping the cast alignment is the right thing to do.
978     */
979    if (parent_mul < cast->cast.align_mul)
980       return false;
981 
982    /* If we've gotten here, we have a parent deref with an align_mul at least
983     * as large as ours so we can potentially throw away the alignment
984     * information on this deref.  There are two cases to consider here:
985     *
986     *  1. We can chase the deref all the way back to the variable.  In this
987     *     case, we have "perfect" knowledge, modulo indirect array derefs.
988     *     Unless we've done something wrong in our indirect/wildcard stride
989     *     calculations, our knowledge from the deref walk is better than the
990     *     client's.
991     *
992     *  2. We can't chase it all the way back to the variable.  In this case,
993     *     because our call to nir_get_explicit_deref_align(parent, ...) above
994     *     above passes default_to_type_align=false, the only way we can even
995     *     get here is if something further up the deref chain has a cast with
996     *     an alignment which can only happen if we get an alignment from the
997     *     client (most likely a decoration in the SPIR-V).  If the client has
998     *     provided us with two conflicting alignments in the deref chain,
999     *     that's their fault and we can do whatever we want.
1000     *
1001     * In either case, we should be without our rights, at this point, to throw
1002     * away the alignment information on this deref.  However, to be "nice" to
1003     * weird clients, we do one more check.  It really shouldn't happen but
1004     * it's possible that the parent's alignment offset disagrees with the
1005     * cast's alignment offset.  In this case, we consider the cast as
1006     * providing more information (or at least more valid information) and keep
1007     * it even if the align_mul from the parent is larger.
1008     */
1009    assert(cast->cast.align_mul <= parent_mul);
1010    if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
1011       return false;
1012 
1013    /* If we got here, the parent has better alignment information than the
1014     * child and we can get rid of the child alignment information.
1015     */
1016    cast->cast.align_mul = 0;
1017    cast->cast.align_offset = 0;
1018    return true;
1019 }
1020 
1021 /**
1022  * Remove casts that just wrap other casts.
1023  */
1024 static bool
opt_remove_cast_cast(nir_deref_instr * cast)1025 opt_remove_cast_cast(nir_deref_instr *cast)
1026 {
1027    nir_deref_instr *first_cast = cast;
1028 
1029    while (true) {
1030       nir_deref_instr *parent = nir_deref_instr_parent(first_cast);
1031       if (parent == NULL || parent->deref_type != nir_deref_type_cast)
1032          break;
1033       first_cast = parent;
1034    }
1035    if (cast == first_cast)
1036       return false;
1037 
1038    nir_instr_rewrite_src(&cast->instr, &cast->parent,
1039                          nir_src_for_ssa(first_cast->parent.ssa));
1040    return true;
1041 }
1042 
1043 /* Restrict variable modes in casts.
1044  *
1045  * If we know from something higher up the deref chain that the deref has a
1046  * specific mode, we can cast to more general and back but we can never cast
1047  * across modes.  For non-cast derefs, we should only ever do anything here if
1048  * the parent eventually comes from a cast that we restricted earlier.
1049  */
1050 static bool
opt_restrict_deref_modes(nir_deref_instr * deref)1051 opt_restrict_deref_modes(nir_deref_instr *deref)
1052 {
1053    if (deref->deref_type == nir_deref_type_var) {
1054       assert(deref->modes == deref->var->data.mode);
1055       return false;
1056    }
1057 
1058    nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1059    if (parent == NULL || parent->modes == deref->modes)
1060       return false;
1061 
1062    assert(parent->modes & deref->modes);
1063    deref->modes &= parent->modes;
1064    return true;
1065 }
1066 
1067 static bool
opt_remove_sampler_cast(nir_deref_instr * cast)1068 opt_remove_sampler_cast(nir_deref_instr *cast)
1069 {
1070    assert(cast->deref_type == nir_deref_type_cast);
1071    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1072    if (parent == NULL)
1073       return false;
1074 
1075    /* Strip both types down to their non-array type and bail if there are any
1076     * discrepancies in array lengths.
1077     */
1078    const struct glsl_type *parent_type = parent->type;
1079    const struct glsl_type *cast_type = cast->type;
1080    while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
1081       if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
1082          return false;
1083       parent_type = glsl_get_array_element(parent_type);
1084       cast_type = glsl_get_array_element(cast_type);
1085    }
1086 
1087    if (!glsl_type_is_sampler(parent_type))
1088       return false;
1089 
1090    if (cast_type != glsl_bare_sampler_type() &&
1091        (glsl_type_is_bare_sampler(parent_type) ||
1092         cast_type != glsl_sampler_type_to_texture(parent_type)))
1093       return false;
1094 
1095    /* We're a cast from a more detailed sampler type to a bare sampler or a
1096     * texture type with the same dimensionality.
1097     */
1098    nir_ssa_def_rewrite_uses(&cast->dest.ssa,
1099                             &parent->dest.ssa);
1100    nir_instr_remove(&cast->instr);
1101 
1102    /* Recursively crawl the deref tree and clean up types */
1103    nir_deref_instr_fixup_child_types(parent);
1104 
1105    return true;
1106 }
1107 
1108 /**
1109  * Is this casting a struct to a contained struct.
1110  * struct a { struct b field0 };
1111  * ssa_5 is structa;
1112  * deref_cast (structb *)ssa_5 (function_temp structb);
1113  * converts to
1114  * deref_struct &ssa_5->field0 (function_temp structb);
1115  * This allows subsequent copy propagation to work.
1116  */
1117 static bool
opt_replace_struct_wrapper_cast(nir_builder * b,nir_deref_instr * cast)1118 opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
1119 {
1120    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1121    if (!parent)
1122       return false;
1123 
1124    if (cast->cast.align_mul > 0)
1125       return false;
1126 
1127    if (!glsl_type_is_struct(parent->type))
1128       return false;
1129 
1130    /* Empty struct */
1131    if (glsl_get_length(parent->type) < 1)
1132       return false;
1133 
1134    if (glsl_get_struct_field_offset(parent->type, 0) != 0)
1135       return false;
1136 
1137    if (cast->type != glsl_get_struct_field(parent->type, 0))
1138       return false;
1139 
1140    nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
1141    nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);
1142    nir_deref_instr_remove_if_unused(cast);
1143    return true;
1144 }
1145 
1146 static bool
opt_deref_cast(nir_builder * b,nir_deref_instr * cast)1147 opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
1148 {
1149    bool progress = false;
1150 
1151    progress |= opt_remove_restricting_cast_alignments(cast);
1152 
1153    if (opt_replace_struct_wrapper_cast(b, cast))
1154       return true;
1155 
1156    if (opt_remove_sampler_cast(cast))
1157       return true;
1158 
1159    progress |= opt_remove_cast_cast(cast);
1160    if (!is_trivial_deref_cast(cast))
1161       return progress;
1162 
1163    /* If this deref still contains useful alignment information, we don't want
1164     * to delete it.
1165     */
1166    if (cast->cast.align_mul > 0)
1167       return progress;
1168 
1169    bool trivial_array_cast = is_trivial_array_deref_cast(cast);
1170 
1171    assert(cast->dest.is_ssa);
1172    assert(cast->parent.is_ssa);
1173 
1174    nir_foreach_use_safe(use_src, &cast->dest.ssa) {
1175       /* If this isn't a trivial array cast, we can't propagate into
1176        * ptr_as_array derefs.
1177        */
1178       if (is_deref_ptr_as_array(use_src->parent_instr) &&
1179           !trivial_array_cast)
1180          continue;
1181 
1182       nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
1183       progress = true;
1184    }
1185 
1186    /* If uses would be a bit crazy */
1187    assert(list_is_empty(&cast->dest.ssa.if_uses));
1188 
1189    if (nir_deref_instr_remove_if_unused(cast))
1190       progress = true;
1191 
1192    return progress;
1193 }
1194 
1195 static bool
opt_deref_ptr_as_array(nir_builder * b,nir_deref_instr * deref)1196 opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
1197 {
1198    assert(deref->deref_type == nir_deref_type_ptr_as_array);
1199 
1200    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1201 
1202    if (nir_src_is_const(deref->arr.index) &&
1203        nir_src_as_int(deref->arr.index) == 0) {
1204       /* If it's a ptr_as_array deref with an index of 0, it does nothing
1205        * and we can just replace its uses with its parent, unless it has
1206        * alignment information.
1207        *
1208        * The source of a ptr_as_array deref always has a deref_type of
1209        * nir_deref_type_array or nir_deref_type_cast.  If it's a cast, it
1210        * may be trivial and we may be able to get rid of that too.  Any
1211        * trivial cast of trivial cast cases should be handled already by
1212        * opt_deref_cast() above.
1213        */
1214       if (parent->deref_type == nir_deref_type_cast &&
1215           parent->cast.align_mul == 0 &&
1216           is_trivial_deref_cast(parent))
1217          parent = nir_deref_instr_parent(parent);
1218       nir_ssa_def_rewrite_uses(&deref->dest.ssa,
1219                                &parent->dest.ssa);
1220       nir_instr_remove(&deref->instr);
1221       return true;
1222    }
1223 
1224    if (parent->deref_type != nir_deref_type_array &&
1225        parent->deref_type != nir_deref_type_ptr_as_array)
1226       return false;
1227 
1228    assert(parent->parent.is_ssa);
1229    assert(parent->arr.index.is_ssa);
1230    assert(deref->arr.index.is_ssa);
1231 
1232    deref->arr.in_bounds &= parent->arr.in_bounds;
1233 
1234    nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
1235                                       deref->arr.index.ssa);
1236 
1237    deref->deref_type = parent->deref_type;
1238    nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
1239    nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
1240                          nir_src_for_ssa(new_idx));
1241    return true;
1242 }
1243 
1244 static bool
is_vector_bitcast_deref(nir_deref_instr * cast,nir_component_mask_t mask,bool is_write)1245 is_vector_bitcast_deref(nir_deref_instr *cast,
1246                         nir_component_mask_t mask,
1247                         bool is_write)
1248 {
1249    if (cast->deref_type != nir_deref_type_cast)
1250       return false;
1251 
1252    /* Don't throw away useful alignment information */
1253    if (cast->cast.align_mul > 0)
1254       return false;
1255 
1256    /* It has to be a cast of another deref */
1257    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1258    if (parent == NULL)
1259       return false;
1260 
1261    /* The parent has to be a vector or scalar */
1262    if (!glsl_type_is_vector_or_scalar(parent->type))
1263       return false;
1264 
1265    /* Don't bother with 1-bit types */
1266    unsigned cast_bit_size = glsl_get_bit_size(cast->type);
1267    unsigned parent_bit_size = glsl_get_bit_size(parent->type);
1268    if (cast_bit_size == 1 || parent_bit_size == 1)
1269       return false;
1270 
1271    /* A strided vector type means it's not tightly packed */
1272    if (glsl_get_explicit_stride(cast->type) ||
1273        glsl_get_explicit_stride(parent->type))
1274       return false;
1275 
1276    assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
1277    assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
1278    unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
1279    unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
1280                            (parent_bit_size / 8);
1281    if (bytes_used > parent_bytes)
1282       return false;
1283 
1284    if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
1285                                                        parent_bit_size))
1286       return false;
1287 
1288    return true;
1289 }
1290 
1291 static nir_ssa_def *
resize_vector(nir_builder * b,nir_ssa_def * data,unsigned num_components)1292 resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)
1293 {
1294    if (num_components == data->num_components)
1295       return data;
1296 
1297    unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
1298    for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
1299       swiz[i] = i;
1300 
1301    return nir_swizzle(b, data, swiz, num_components);
1302 }
1303 
1304 static bool
opt_load_vec_deref(nir_builder * b,nir_intrinsic_instr * load)1305 opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
1306 {
1307    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1308    nir_component_mask_t read_mask =
1309       nir_ssa_def_components_read(&load->dest.ssa);
1310 
1311    /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1312     * vec4-aligned and so it can just read/write them as vec4s.  This
1313     * results in a LOT of vec4->vec3 casts on loads and stores.
1314     */
1315    if (is_vector_bitcast_deref(deref, read_mask, false)) {
1316       const unsigned old_num_comps = load->dest.ssa.num_components;
1317       const unsigned old_bit_size = load->dest.ssa.bit_size;
1318 
1319       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1320       const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1321       const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1322 
1323       /* Stomp it to reference the parent */
1324       nir_instr_rewrite_src(&load->instr, &load->src[0],
1325                             nir_src_for_ssa(&parent->dest.ssa));
1326       assert(load->dest.is_ssa);
1327       load->dest.ssa.bit_size = new_bit_size;
1328       load->dest.ssa.num_components = new_num_comps;
1329       load->num_components = new_num_comps;
1330 
1331       b->cursor = nir_after_instr(&load->instr);
1332       nir_ssa_def *data = &load->dest.ssa;
1333       if (old_bit_size != new_bit_size)
1334          data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);
1335       data = resize_vector(b, data, old_num_comps);
1336 
1337       nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,
1338                                      data->parent_instr);
1339       return true;
1340    }
1341 
1342    return false;
1343 }
1344 
1345 static bool
opt_store_vec_deref(nir_builder * b,nir_intrinsic_instr * store)1346 opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
1347 {
1348    nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
1349    nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
1350 
1351    /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1352     * vec4-aligned and so it can just read/write them as vec4s.  This
1353     * results in a LOT of vec4->vec3 casts on loads and stores.
1354     */
1355    if (is_vector_bitcast_deref(deref, write_mask, true)) {
1356       assert(store->src[1].is_ssa);
1357       nir_ssa_def *data = store->src[1].ssa;
1358 
1359       const unsigned old_bit_size = data->bit_size;
1360 
1361       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1362       const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1363       const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1364 
1365       nir_instr_rewrite_src(&store->instr, &store->src[0],
1366                             nir_src_for_ssa(&parent->dest.ssa));
1367 
1368       /* Restrict things down as needed so the bitcast doesn't fail */
1369       data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);
1370       if (old_bit_size != new_bit_size)
1371          data = nir_bitcast_vector(b, data, new_bit_size);
1372       data = resize_vector(b, data, new_num_comps);
1373       nir_instr_rewrite_src(&store->instr, &store->src[1],
1374                             nir_src_for_ssa(data));
1375       store->num_components = new_num_comps;
1376 
1377       /* Adjust the write mask */
1378       write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
1379                                                   new_bit_size);
1380       nir_intrinsic_set_write_mask(store, write_mask);
1381       return true;
1382    }
1383 
1384    return false;
1385 }
1386 
1387 static bool
opt_known_deref_mode_is(nir_builder * b,nir_intrinsic_instr * intrin)1388 opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
1389 {
1390    nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
1391    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1392    if (deref == NULL)
1393       return false;
1394 
1395    nir_ssa_def *deref_is = NULL;
1396 
1397    if (nir_deref_mode_must_be(deref, modes))
1398       deref_is = nir_imm_true(b);
1399 
1400    if (!nir_deref_mode_may_be(deref, modes))
1401       deref_is = nir_imm_false(b);
1402 
1403    if (deref_is == NULL)
1404       return false;
1405 
1406    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);
1407    nir_instr_remove(&intrin->instr);
1408    return true;
1409 }
1410 
1411 bool
nir_opt_deref_impl(nir_function_impl * impl)1412 nir_opt_deref_impl(nir_function_impl *impl)
1413 {
1414    bool progress = false;
1415 
1416    nir_builder b;
1417    nir_builder_init(&b, impl);
1418 
1419    nir_foreach_block(block, impl) {
1420       nir_foreach_instr_safe(instr, block) {
1421          b.cursor = nir_before_instr(instr);
1422 
1423          switch (instr->type) {
1424          case nir_instr_type_alu: {
1425             nir_alu_instr *alu = nir_instr_as_alu(instr);
1426             if (opt_alu_of_cast(alu))
1427                progress = true;
1428             break;
1429          }
1430 
1431          case nir_instr_type_deref: {
1432             nir_deref_instr *deref = nir_instr_as_deref(instr);
1433 
1434             if (opt_restrict_deref_modes(deref))
1435                progress = true;
1436 
1437             switch (deref->deref_type) {
1438             case nir_deref_type_ptr_as_array:
1439                if (opt_deref_ptr_as_array(&b, deref))
1440                   progress = true;
1441                break;
1442 
1443             case nir_deref_type_cast:
1444                if (opt_deref_cast(&b, deref))
1445                   progress = true;
1446                break;
1447 
1448             default:
1449                /* Do nothing */
1450                break;
1451             }
1452             break;
1453          }
1454 
1455          case nir_instr_type_intrinsic: {
1456             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1457             switch (intrin->intrinsic) {
1458             case nir_intrinsic_load_deref:
1459                if (opt_load_vec_deref(&b, intrin))
1460                   progress = true;
1461                break;
1462 
1463             case nir_intrinsic_store_deref:
1464                if (opt_store_vec_deref(&b, intrin))
1465                   progress = true;
1466                break;
1467 
1468             case nir_intrinsic_deref_mode_is:
1469                if (opt_known_deref_mode_is(&b, intrin))
1470                   progress = true;
1471                break;
1472 
1473             default:
1474                /* Do nothing */
1475                break;
1476             }
1477             break;
1478          }
1479 
1480          default:
1481             /* Do nothing */
1482             break;
1483          }
1484       }
1485    }
1486 
1487    if (progress) {
1488       nir_metadata_preserve(impl, nir_metadata_block_index |
1489                                   nir_metadata_dominance);
1490    } else {
1491       nir_metadata_preserve(impl, nir_metadata_all);
1492    }
1493 
1494    return progress;
1495 }
1496 
1497 bool
nir_opt_deref(nir_shader * shader)1498 nir_opt_deref(nir_shader *shader)
1499 {
1500    bool progress = false;
1501 
1502    nir_foreach_function(func, shader) {
1503       if (func->impl && nir_opt_deref_impl(func->impl))
1504          progress = true;
1505    }
1506 
1507    return progress;
1508 }
1509