• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_deref.h"
25 #include "util/hash_table.h"
26 #include "nir.h"
27 #include "nir_builder.h"
28 
29 bool
nir_deref_cast_is_trivial(nir_deref_instr * cast)30 nir_deref_cast_is_trivial(nir_deref_instr *cast)
31 {
32    assert(cast->deref_type == nir_deref_type_cast);
33 
34    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
35    if (!parent)
36       return false;
37 
38    return cast->modes == parent->modes &&
39           cast->type == parent->type &&
40           cast->def.num_components == parent->def.num_components &&
41           cast->def.bit_size == parent->def.bit_size;
42 }
43 
44 void
nir_deref_path_init(nir_deref_path * path,nir_deref_instr * deref,void * mem_ctx)45 nir_deref_path_init(nir_deref_path *path,
46                     nir_deref_instr *deref, void *mem_ctx)
47 {
48    assert(deref != NULL);
49 
50    /* The length of the short path is at most ARRAY_SIZE - 1 because we need
51     * room for the NULL terminator.
52     */
53    static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
54 
55    int count = 0;
56 
57    nir_deref_instr **tail = &path->_short_path[max_short_path_len];
58    nir_deref_instr **head = tail;
59 
60    *tail = NULL;
61    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
62       if (d->deref_type == nir_deref_type_cast && nir_deref_cast_is_trivial(d))
63          continue;
64       count++;
65       if (count <= max_short_path_len)
66          *(--head) = d;
67    }
68 
69    if (count <= max_short_path_len) {
70       /* If we're under max_short_path_len, just use the short path. */
71       path->path = head;
72       goto done;
73    }
74 
75 #ifndef NDEBUG
76    /* Just in case someone uses short_path by accident */
77    for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
78       path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
79 #endif
80 
81    path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
82    head = tail = path->path + count;
83    *tail = NULL;
84    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
85       if (d->deref_type == nir_deref_type_cast && nir_deref_cast_is_trivial(d))
86          continue;
87       *(--head) = d;
88    }
89 
90 done:
91    assert(head == path->path);
92    assert(tail == head + count);
93    assert(*tail == NULL);
94 }
95 
96 void
nir_deref_path_finish(nir_deref_path * path)97 nir_deref_path_finish(nir_deref_path *path)
98 {
99    if (path->path < &path->_short_path[0] ||
100        path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
101       ralloc_free(path->path);
102 }
103 
104 /**
105  * Recursively removes unused deref instructions
106  */
107 bool
nir_deref_instr_remove_if_unused(nir_deref_instr * instr)108 nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
109 {
110    bool progress = false;
111 
112    for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
113       /* If anyone is using this deref, leave it alone */
114       if (!nir_def_is_unused(&d->def))
115          break;
116 
117       nir_instr_remove(&d->instr);
118       progress = true;
119    }
120 
121    return progress;
122 }
123 
124 bool
nir_deref_instr_has_indirect(nir_deref_instr * instr)125 nir_deref_instr_has_indirect(nir_deref_instr *instr)
126 {
127    while (instr->deref_type != nir_deref_type_var) {
128       /* Consider casts to be indirects */
129       if (instr->deref_type == nir_deref_type_cast)
130          return true;
131 
132       if ((instr->deref_type == nir_deref_type_array ||
133            instr->deref_type == nir_deref_type_ptr_as_array) &&
134           !nir_src_is_const(instr->arr.index))
135          return true;
136 
137       instr = nir_deref_instr_parent(instr);
138    }
139 
140    return false;
141 }
142 
143 bool
nir_deref_instr_is_known_out_of_bounds(nir_deref_instr * instr)144 nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
145 {
146    for (; instr; instr = nir_deref_instr_parent(instr)) {
147       if (instr->deref_type == nir_deref_type_array &&
148           nir_src_is_const(instr->arr.index) &&
149           nir_src_as_uint(instr->arr.index) >=
150              glsl_get_length(nir_deref_instr_parent(instr)->type))
151          return true;
152    }
153 
154    return false;
155 }
156 
157 bool
nir_deref_instr_has_complex_use(nir_deref_instr * deref,nir_deref_instr_has_complex_use_options opts)158 nir_deref_instr_has_complex_use(nir_deref_instr *deref,
159                                 nir_deref_instr_has_complex_use_options opts)
160 {
161    nir_foreach_use_including_if(use_src, &deref->def) {
162       if (nir_src_is_if(use_src))
163          return true;
164 
165       nir_instr *use_instr = nir_src_parent_instr(use_src);
166 
167       switch (use_instr->type) {
168       case nir_instr_type_deref: {
169          nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
170 
171          /* A var deref has no sources */
172          assert(use_deref->deref_type != nir_deref_type_var);
173 
174          /* If a deref shows up in an array index or something like that, it's
175           * a complex use.
176           */
177          if (use_src != &use_deref->parent)
178             return true;
179 
180          /* Anything that isn't a basic struct or array deref is considered to
181           * be a "complex" use.  In particular, we don't allow ptr_as_array
182           * because we assume that opt_deref will turn any non-complex
183           * ptr_as_array derefs into regular array derefs eventually so passes
184           * which only want to handle simple derefs will pick them up in a
185           * later pass.
186           */
187          if (use_deref->deref_type != nir_deref_type_struct &&
188              use_deref->deref_type != nir_deref_type_array_wildcard &&
189              use_deref->deref_type != nir_deref_type_array)
190             return true;
191 
192          if (nir_deref_instr_has_complex_use(use_deref, opts))
193             return true;
194 
195          continue;
196       }
197 
198       case nir_instr_type_intrinsic: {
199          nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
200          switch (use_intrin->intrinsic) {
201          case nir_intrinsic_load_deref:
202             assert(use_src == &use_intrin->src[0]);
203             continue;
204 
205          case nir_intrinsic_copy_deref:
206             assert(use_src == &use_intrin->src[0] ||
207                    use_src == &use_intrin->src[1]);
208             continue;
209 
210          case nir_intrinsic_store_deref:
211             /* A use in src[1] of a store means we're taking that pointer and
212              * writing it to a variable.  Because we have no idea who will
213              * read that variable and what they will do with the pointer, it's
214              * considered a "complex" use.  A use in src[0], on the other
215              * hand, is a simple use because we're just going to dereference
216              * it and write a value there.
217              */
218             if (use_src == &use_intrin->src[0])
219                continue;
220             return true;
221 
222          case nir_intrinsic_memcpy_deref:
223             if (use_src == &use_intrin->src[0] &&
224                 (opts & nir_deref_instr_has_complex_use_allow_memcpy_dst))
225                continue;
226             if (use_src == &use_intrin->src[1] &&
227                 (opts & nir_deref_instr_has_complex_use_allow_memcpy_src))
228                continue;
229             return true;
230 
231          case nir_intrinsic_deref_atomic:
232          case nir_intrinsic_deref_atomic_swap:
233             if (opts & nir_deref_instr_has_complex_use_allow_atomics)
234                continue;
235             return true;
236 
237          default:
238             return true;
239          }
240          unreachable("Switch default failed");
241       }
242 
243       default:
244          return true;
245       }
246    }
247 
248    return false;
249 }
250 
251 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)252 type_scalar_size_bytes(const struct glsl_type *type)
253 {
254    assert(glsl_type_is_vector_or_scalar(type) ||
255           glsl_type_is_matrix(type));
256    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
257 }
258 
259 unsigned
nir_deref_instr_array_stride(nir_deref_instr * deref)260 nir_deref_instr_array_stride(nir_deref_instr *deref)
261 {
262    switch (deref->deref_type) {
263    case nir_deref_type_array:
264    case nir_deref_type_array_wildcard: {
265       const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
266       unsigned stride = glsl_get_explicit_stride(arr_type);
267 
268       if ((glsl_type_is_matrix(arr_type) &&
269            glsl_matrix_type_is_row_major(arr_type)) ||
270           (glsl_type_is_vector(arr_type) && stride == 0))
271          stride = type_scalar_size_bytes(arr_type);
272 
273       return stride;
274    }
275    case nir_deref_type_ptr_as_array:
276       return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
277    case nir_deref_type_cast:
278       return deref->cast.ptr_stride;
279    default:
280       return 0;
281    }
282 }
283 
284 static unsigned
type_get_array_stride(const struct glsl_type * elem_type,glsl_type_size_align_func size_align)285 type_get_array_stride(const struct glsl_type *elem_type,
286                       glsl_type_size_align_func size_align)
287 {
288    unsigned elem_size, elem_align;
289    size_align(elem_type, &elem_size, &elem_align);
290    return ALIGN_POT(elem_size, elem_align);
291 }
292 
293 static unsigned
struct_type_get_field_offset(const struct glsl_type * struct_type,glsl_type_size_align_func size_align,unsigned field_idx)294 struct_type_get_field_offset(const struct glsl_type *struct_type,
295                              glsl_type_size_align_func size_align,
296                              unsigned field_idx)
297 {
298    assert(glsl_type_is_struct_or_ifc(struct_type));
299    unsigned offset = 0;
300    for (unsigned i = 0; i <= field_idx; i++) {
301       unsigned elem_size, elem_align;
302       size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
303       offset = ALIGN_POT(offset, elem_align);
304       if (i < field_idx)
305          offset += elem_size;
306    }
307    return offset;
308 }
309 
310 unsigned
nir_deref_instr_get_const_offset(nir_deref_instr * deref,glsl_type_size_align_func size_align)311 nir_deref_instr_get_const_offset(nir_deref_instr *deref,
312                                  glsl_type_size_align_func size_align)
313 {
314    nir_deref_path path;
315    nir_deref_path_init(&path, deref, NULL);
316 
317    unsigned offset = 0;
318    for (nir_deref_instr **p = &path.path[1]; *p; p++) {
319       switch ((*p)->deref_type) {
320       case nir_deref_type_array:
321          offset += nir_src_as_uint((*p)->arr.index) *
322                    type_get_array_stride((*p)->type, size_align);
323          break;
324       case nir_deref_type_struct: {
325          /* p starts at path[1], so this is safe */
326          nir_deref_instr *parent = *(p - 1);
327          offset += struct_type_get_field_offset(parent->type, size_align,
328                                                 (*p)->strct.index);
329          break;
330       }
331       case nir_deref_type_cast:
332          /* A cast doesn't contribute to the offset */
333          break;
334       default:
335          unreachable("Unsupported deref type");
336       }
337    }
338 
339    nir_deref_path_finish(&path);
340 
341    return offset;
342 }
343 
344 nir_def *
nir_build_deref_offset(nir_builder * b,nir_deref_instr * deref,glsl_type_size_align_func size_align)345 nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
346                        glsl_type_size_align_func size_align)
347 {
348    nir_deref_path path;
349    nir_deref_path_init(&path, deref, NULL);
350 
351    nir_def *offset = nir_imm_intN_t(b, 0, deref->def.bit_size);
352    for (nir_deref_instr **p = &path.path[1]; *p; p++) {
353       switch ((*p)->deref_type) {
354       case nir_deref_type_array:
355       case nir_deref_type_ptr_as_array: {
356          nir_def *index = (*p)->arr.index.ssa;
357          int stride = type_get_array_stride((*p)->type, size_align);
358          offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
359          break;
360       }
361       case nir_deref_type_struct: {
362          /* p starts at path[1], so this is safe */
363          nir_deref_instr *parent = *(p - 1);
364          unsigned field_offset =
365             struct_type_get_field_offset(parent->type, size_align,
366                                          (*p)->strct.index);
367          offset = nir_iadd_imm(b, offset, field_offset);
368          break;
369       }
370       case nir_deref_type_cast:
371          /* A cast doesn't contribute to the offset */
372          break;
373       default:
374          unreachable("Unsupported deref type");
375       }
376    }
377 
378    nir_deref_path_finish(&path);
379 
380    return offset;
381 }
382 
383 bool
nir_remove_dead_derefs_impl(nir_function_impl * impl)384 nir_remove_dead_derefs_impl(nir_function_impl *impl)
385 {
386    bool progress = false;
387 
388    nir_foreach_block(block, impl) {
389       nir_foreach_instr_safe(instr, block) {
390          if (instr->type == nir_instr_type_deref &&
391              nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
392             progress = true;
393       }
394    }
395 
396    if (progress) {
397       nir_metadata_preserve(impl, nir_metadata_block_index |
398                                      nir_metadata_dominance);
399    } else {
400       nir_metadata_preserve(impl, nir_metadata_all);
401    }
402 
403    return progress;
404 }
405 
406 bool
nir_remove_dead_derefs(nir_shader * shader)407 nir_remove_dead_derefs(nir_shader *shader)
408 {
409    bool progress = false;
410    nir_foreach_function_impl(impl, shader) {
411       if (nir_remove_dead_derefs_impl(impl))
412          progress = true;
413    }
414 
415    return progress;
416 }
417 
418 static bool
nir_fixup_deref_modes_instr(UNUSED struct nir_builder * b,nir_instr * instr,UNUSED void * data)419 nir_fixup_deref_modes_instr(UNUSED struct nir_builder *b, nir_instr *instr, UNUSED void *data)
420 {
421    if (instr->type != nir_instr_type_deref)
422       return false;
423 
424    nir_deref_instr *deref = nir_instr_as_deref(instr);
425    nir_variable_mode parent_modes;
426    if (deref->deref_type == nir_deref_type_var) {
427       parent_modes = deref->var->data.mode;
428    } else {
429       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
430       if (parent == NULL) {
431          /* Cast to some non-deref value, nothing to propagate. */
432          assert(deref->deref_type == nir_deref_type_cast);
433          return false;
434       }
435 
436       /* It's safe to propagate a specific mode into a more generic one
437        * but never the other way around.
438        */
439       if (util_bitcount(parent->modes) != 1)
440          return false;
441 
442       parent_modes = parent->modes;
443    }
444 
445    if (deref->modes == parent_modes)
446       return false;
447 
448    deref->modes = parent_modes;
449    return true;
450 }
451 
452 void
nir_fixup_deref_modes(nir_shader * shader)453 nir_fixup_deref_modes(nir_shader *shader)
454 {
455    nir_shader_instructions_pass(shader, nir_fixup_deref_modes_instr,
456                                 nir_metadata_block_index |
457                                    nir_metadata_dominance |
458                                    nir_metadata_live_defs |
459                                    nir_metadata_instr_index,
460                                 NULL);
461 }
462 
463 static bool
nir_fixup_deref_types_instr(UNUSED struct nir_builder * b,nir_instr * instr,UNUSED void * data)464 nir_fixup_deref_types_instr(UNUSED struct nir_builder *b, nir_instr *instr, UNUSED void *data)
465 {
466    if (instr->type != nir_instr_type_deref)
467       return false;
468 
469    nir_deref_instr *deref = nir_instr_as_deref(instr);
470    const struct glsl_type *parent_derived_type;
471    if (deref->deref_type == nir_deref_type_var) {
472       parent_derived_type = deref->var->type;
473    } else if (deref->deref_type == nir_deref_type_array ||
474               deref->deref_type == nir_deref_type_struct) {
475       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
476       if (deref->deref_type == nir_deref_type_array) {
477          parent_derived_type = glsl_get_array_element(parent->type);
478       } else if (deref->deref_type == nir_deref_type_struct) {
479          parent_derived_type =
480             glsl_get_struct_field(parent->type, deref->strct.index);
481       } else {
482          unreachable("Unsupported deref type");
483       }
484    } else {
485       unreachable("Unsupported deref type");
486    }
487 
488    if (deref->type == parent_derived_type)
489       return false;
490 
491    deref->type = parent_derived_type;
492    return true;
493 }
494 
495 /* Update deref types when array sizes have changed. */
496 void
nir_fixup_deref_types(nir_shader * shader)497 nir_fixup_deref_types(nir_shader *shader)
498 {
499    nir_shader_instructions_pass(shader, nir_fixup_deref_types_instr,
500                                 nir_metadata_block_index |
501                                    nir_metadata_dominance |
502                                    nir_metadata_live_defs |
503                                    nir_metadata_instr_index,
504                                 NULL);
505 }
506 
507 static bool
modes_may_alias(nir_variable_mode a,nir_variable_mode b)508 modes_may_alias(nir_variable_mode a, nir_variable_mode b)
509 {
510    /* Generic pointers can alias with SSBOs */
511    if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
512        (b & (nir_var_mem_ssbo | nir_var_mem_global)))
513       return true;
514 
515    /* Pointers can only alias if they share a mode. */
516    return a & b;
517 }
518 
519 ALWAYS_INLINE static nir_deref_compare_result
compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path,unsigned * i,bool (* stop_fn)(const nir_deref_instr *))520 compare_deref_paths(nir_deref_path *a_path, nir_deref_path *b_path,
521                     unsigned *i, bool (*stop_fn)(const nir_deref_instr *))
522 {
523    /* Start off assuming they fully compare.  We ignore equality for now.  In
524     * the end, we'll determine that by containment.
525     */
526    nir_deref_compare_result result = nir_derefs_may_alias_bit |
527                                      nir_derefs_a_contains_b_bit |
528                                      nir_derefs_b_contains_a_bit;
529 
530    nir_deref_instr **a = a_path->path;
531    nir_deref_instr **b = b_path->path;
532 
533    for (; a[*i] != NULL; (*i)++) {
534       if (a[*i] != b[*i])
535          break;
536 
537       if (stop_fn && stop_fn(a[*i]))
538          break;
539    }
540 
541    /* We're at either the tail or the divergence point between the two deref
542     * paths.  Look to see if either contains cast or a ptr_as_array deref.  If
543     * it does we don't know how to safely make any inferences.  Hopefully,
544     * nir_opt_deref will clean most of these up and we can start inferring
545     * things again.
546     *
547     * In theory, we could do a bit better.  For instance, we could detect the
548     * case where we have exactly one ptr_as_array deref in the chain after the
549     * divergence point and it's matched in both chains and the two chains have
550     * different constant indices.
551     */
552    for (unsigned j = *i; a[j] != NULL; j++) {
553       if (stop_fn && stop_fn(a[j]))
554          break;
555 
556       if (a[j]->deref_type == nir_deref_type_cast ||
557           a[j]->deref_type == nir_deref_type_ptr_as_array)
558          return nir_derefs_may_alias_bit;
559    }
560    for (unsigned j = *i; b[j] != NULL; j++) {
561       if (stop_fn && stop_fn(b[j]))
562          break;
563 
564       if (b[j]->deref_type == nir_deref_type_cast ||
565           b[j]->deref_type == nir_deref_type_ptr_as_array)
566          return nir_derefs_may_alias_bit;
567    }
568 
569    for (; a[*i] != NULL && b[*i] != NULL; (*i)++) {
570       if (stop_fn && (stop_fn(a[*i]) || stop_fn(b[*i])))
571          break;
572 
573       switch (a[*i]->deref_type) {
574       case nir_deref_type_array:
575       case nir_deref_type_array_wildcard: {
576          assert(b[*i]->deref_type == nir_deref_type_array ||
577                 b[*i]->deref_type == nir_deref_type_array_wildcard);
578 
579          if (a[*i]->deref_type == nir_deref_type_array_wildcard) {
580             if (b[*i]->deref_type != nir_deref_type_array_wildcard)
581                result &= ~nir_derefs_b_contains_a_bit;
582          } else if (b[*i]->deref_type == nir_deref_type_array_wildcard) {
583             if (a[*i]->deref_type != nir_deref_type_array_wildcard)
584                result &= ~nir_derefs_a_contains_b_bit;
585          } else {
586             assert(a[*i]->deref_type == nir_deref_type_array &&
587                    b[*i]->deref_type == nir_deref_type_array);
588 
589             if (nir_src_is_const(a[*i]->arr.index) &&
590                 nir_src_is_const(b[*i]->arr.index)) {
591                /* If they're both direct and have different offsets, they
592                 * don't even alias much less anything else.
593                 */
594                if (nir_src_as_uint(a[*i]->arr.index) !=
595                    nir_src_as_uint(b[*i]->arr.index))
596                   return nir_derefs_do_not_alias;
597             } else if (a[*i]->arr.index.ssa == b[*i]->arr.index.ssa) {
598                /* They're the same indirect, continue on */
599             } else {
600                /* They're not the same index so we can't prove anything about
601                 * containment.
602                 */
603                result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
604             }
605          }
606          break;
607       }
608 
609       case nir_deref_type_struct: {
610          /* If they're different struct members, they don't even alias */
611          if (a[*i]->strct.index != b[*i]->strct.index)
612             return nir_derefs_do_not_alias;
613          break;
614       }
615 
616       default:
617          unreachable("Invalid deref type");
618       }
619    }
620 
621    /* If a is longer than b, then it can't contain b.  If neither a[i] nor
622     * b[i] are NULL then we aren't at the end of the chain and we know nothing
623     * about containment.
624     */
625    if (a[*i] != NULL)
626       result &= ~nir_derefs_a_contains_b_bit;
627    if (b[*i] != NULL)
628       result &= ~nir_derefs_b_contains_a_bit;
629 
630    /* If a contains b and b contains a they must be equal. */
631    if ((result & nir_derefs_a_contains_b_bit) &&
632        (result & nir_derefs_b_contains_a_bit))
633       result |= nir_derefs_equal_bit;
634 
635    return result;
636 }
637 
638 static bool
is_interface_struct_deref(const nir_deref_instr * deref)639 is_interface_struct_deref(const nir_deref_instr *deref)
640 {
641    if (deref->deref_type == nir_deref_type_struct) {
642       assert(glsl_type_is_struct_or_ifc(nir_deref_instr_parent(deref)->type));
643       return true;
644    } else {
645       return false;
646    }
647 }
648 
649 nir_deref_compare_result
nir_compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path)650 nir_compare_deref_paths(nir_deref_path *a_path,
651                         nir_deref_path *b_path)
652 {
653    if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
654       return nir_derefs_do_not_alias;
655 
656    if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
657       return nir_derefs_may_alias_bit;
658 
659    unsigned path_idx = 1;
660    if (a_path->path[0]->deref_type == nir_deref_type_var) {
661       const nir_variable *a_var = a_path->path[0]->var;
662       const nir_variable *b_var = b_path->path[0]->var;
663 
664       /* If we got here, the two variables must have the same mode.  The
665        * only way modes_may_alias() can return true for two different modes
666        * is if one is global and the other ssbo.  However, Global variables
667        * only exist in OpenCL and SSBOs don't exist there.  No API allows
668        * both for variables.
669        */
670       assert(a_var->data.mode == b_var->data.mode);
671 
672       switch (a_var->data.mode) {
673       case nir_var_mem_ssbo: {
674          nir_deref_compare_result binding_compare;
675          if (a_var == b_var) {
676             binding_compare = compare_deref_paths(a_path, b_path, &path_idx,
677                                                   is_interface_struct_deref);
678          } else {
679             binding_compare = nir_derefs_do_not_alias;
680          }
681 
682          if (binding_compare & nir_derefs_equal_bit)
683             break;
684 
685          /* If the binding derefs can't alias and at least one is RESTRICT,
686           * then we know they can't alias.
687           */
688          if (!(binding_compare & nir_derefs_may_alias_bit) &&
689              ((a_var->data.access & ACCESS_RESTRICT) ||
690               (b_var->data.access & ACCESS_RESTRICT)))
691             return nir_derefs_do_not_alias;
692 
693          return nir_derefs_may_alias_bit;
694       }
695 
696       case nir_var_mem_shared:
697          if (a_var == b_var)
698             break;
699 
700          /* Per SPV_KHR_workgroup_memory_explicit_layout and
701           * GL_EXT_shared_memory_block, shared blocks alias each other.
702           * We will have either all blocks or all non-blocks.
703           */
704          if (glsl_type_is_interface(a_var->type) ||
705              glsl_type_is_interface(b_var->type)) {
706             assert(glsl_type_is_interface(a_var->type) &&
707                    glsl_type_is_interface(b_var->type));
708             return nir_derefs_may_alias_bit;
709          }
710 
711          /* Otherwise, distinct shared vars don't alias */
712          return nir_derefs_do_not_alias;
713 
714       default:
715          /* For any other variable types, if we can chase them back to the
716           * variable, and the variables are different, they don't alias.
717           */
718          if (a_var == b_var)
719             break;
720 
721          return nir_derefs_do_not_alias;
722       }
723    } else {
724       assert(a_path->path[0]->deref_type == nir_deref_type_cast);
725       /* If they're not exactly the same cast, it's hard to compare them so we
726        * just assume they alias.  Comparing casts is tricky as there are lots
727        * of things such as mode, type, etc. to make sure work out; for now, we
728        * just assume nit_opt_deref will combine them and compare the deref
729        * instructions.
730        *
731        * TODO: At some point in the future, we could be clever and understand
732        * that a float[] and int[] have the same layout and aliasing structure
733        * but double[] and vec3[] do not and we could potentially be a bit
734        * smarter here.
735        */
736       if (a_path->path[0] != b_path->path[0])
737          return nir_derefs_may_alias_bit;
738    }
739 
740    return compare_deref_paths(a_path, b_path, &path_idx, NULL);
741 }
742 
743 nir_deref_compare_result
nir_compare_derefs(nir_deref_instr * a,nir_deref_instr * b)744 nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
745 {
746    if (a == b) {
747       return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
748              nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
749    }
750 
751    nir_deref_path a_path, b_path;
752    nir_deref_path_init(&a_path, a, NULL);
753    nir_deref_path_init(&b_path, b, NULL);
754    assert(a_path.path[0]->deref_type == nir_deref_type_var ||
755           a_path.path[0]->deref_type == nir_deref_type_cast);
756    assert(b_path.path[0]->deref_type == nir_deref_type_var ||
757           b_path.path[0]->deref_type == nir_deref_type_cast);
758 
759    nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
760 
761    nir_deref_path_finish(&a_path);
762    nir_deref_path_finish(&b_path);
763 
764    return result;
765 }
766 
767 nir_deref_path *
nir_get_deref_path(void * mem_ctx,nir_deref_and_path * deref)768 nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
769 {
770    if (!deref->_path) {
771       deref->_path = ralloc(mem_ctx, nir_deref_path);
772       nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
773    }
774    return deref->_path;
775 }
776 
777 nir_deref_compare_result
nir_compare_derefs_and_paths(void * mem_ctx,nir_deref_and_path * a,nir_deref_and_path * b)778 nir_compare_derefs_and_paths(void *mem_ctx,
779                              nir_deref_and_path *a,
780                              nir_deref_and_path *b)
781 {
782    if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
783       return nir_compare_derefs(a->instr, b->instr);
784 
785    return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
786                                   nir_get_deref_path(mem_ctx, b));
787 }
788 
789 struct rematerialize_deref_state {
790    bool progress;
791    nir_builder builder;
792    nir_block *block;
793 };
794 
795 static nir_deref_instr *
rematerialize_deref_in_block(nir_deref_instr * deref,struct rematerialize_deref_state * state)796 rematerialize_deref_in_block(nir_deref_instr *deref,
797                              struct rematerialize_deref_state *state)
798 {
799    if (deref->instr.block == state->block)
800       return deref;
801 
802    nir_builder *b = &state->builder;
803    nir_deref_instr *new_deref =
804       nir_deref_instr_create(b->shader, deref->deref_type);
805    new_deref->modes = deref->modes;
806    new_deref->type = deref->type;
807 
808    if (deref->deref_type == nir_deref_type_var) {
809       new_deref->var = deref->var;
810    } else {
811       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
812       if (parent) {
813          parent = rematerialize_deref_in_block(parent, state);
814          new_deref->parent = nir_src_for_ssa(&parent->def);
815       } else {
816          new_deref->parent = nir_src_for_ssa(deref->parent.ssa);
817       }
818    }
819 
820    switch (deref->deref_type) {
821    case nir_deref_type_var:
822    case nir_deref_type_array_wildcard:
823       /* Nothing more to do */
824       break;
825 
826    case nir_deref_type_cast:
827       new_deref->cast.ptr_stride = deref->cast.ptr_stride;
828       new_deref->cast.align_mul = deref->cast.align_mul;
829       new_deref->cast.align_offset = deref->cast.align_offset;
830       break;
831 
832    case nir_deref_type_array:
833    case nir_deref_type_ptr_as_array:
834       assert(!nir_src_as_deref(deref->arr.index));
835       new_deref->arr.index = nir_src_for_ssa(deref->arr.index.ssa);
836       break;
837 
838    case nir_deref_type_struct:
839       new_deref->strct.index = deref->strct.index;
840       break;
841 
842    default:
843       unreachable("Invalid deref instruction type");
844    }
845 
846    nir_def_init(&new_deref->instr, &new_deref->def,
847                 deref->def.num_components, deref->def.bit_size);
848    nir_builder_instr_insert(b, &new_deref->instr);
849 
850    return new_deref;
851 }
852 
853 static bool
rematerialize_deref_src(nir_src * src,void * _state)854 rematerialize_deref_src(nir_src *src, void *_state)
855 {
856    struct rematerialize_deref_state *state = _state;
857 
858    nir_deref_instr *deref = nir_src_as_deref(*src);
859    if (!deref)
860       return true;
861 
862    nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
863    if (block_deref != deref) {
864       nir_src_rewrite(src, &block_deref->def);
865       nir_deref_instr_remove_if_unused(deref);
866       state->progress = true;
867    }
868 
869    return true;
870 }
871 
872 bool
nir_rematerialize_deref_in_use_blocks(nir_deref_instr * instr)873 nir_rematerialize_deref_in_use_blocks(nir_deref_instr *instr)
874 {
875    if (nir_deref_instr_remove_if_unused(instr))
876       return true;
877 
878    struct rematerialize_deref_state state = {
879       .builder = nir_builder_create(nir_cf_node_get_function(&instr->instr.block->cf_node)),
880    };
881 
882    nir_foreach_use_safe(use, &instr->def) {
883       nir_instr *parent = nir_src_parent_instr(use);
884       if (parent->block == instr->instr.block)
885          continue;
886 
887       /* If a deref is used in a phi, we can't rematerialize it, as the new
888        * derefs would appear before the phi, which is not valid.
889        */
890       if (parent->type == nir_instr_type_phi)
891          continue;
892 
893       state.block = parent->block;
894       state.builder.cursor = nir_before_instr(parent);
895       rematerialize_deref_src(use, &state);
896    }
897 
898    return state.progress;
899 }
900 
901 /** Re-materialize derefs in every block
902  *
903  * This pass re-materializes deref instructions in every block in which it is
904  * used.  After this pass has been run, every use of a deref will be of a
905  * deref in the same block as the use.  Also, all unused derefs will be
906  * deleted as a side-effect.
907  *
908  * Derefs used as sources of phi instructions are not rematerialized.
909  */
910 bool
nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl * impl)911 nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
912 {
913    bool progress = false;
914    nir_foreach_block_unstructured(block, impl) {
915       nir_foreach_instr_safe(instr, block) {
916          if (instr->type == nir_instr_type_deref) {
917             nir_deref_instr *deref = nir_instr_as_deref(instr);
918             progress |= nir_rematerialize_deref_in_use_blocks(deref);
919          }
920       }
921 
922 #ifndef NDEBUG
923       nir_if *following_if = nir_block_get_following_if(block);
924       if (following_if)
925          assert(!nir_src_as_deref(following_if->condition));
926 #endif
927    }
928 
929    return progress;
930 }
931 
932 static void
nir_deref_instr_fixup_child_types(nir_deref_instr * parent)933 nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
934 {
935    nir_foreach_use(use, &parent->def) {
936       if (nir_src_parent_instr(use)->type != nir_instr_type_deref)
937          continue;
938 
939       nir_deref_instr *child = nir_instr_as_deref(nir_src_parent_instr(use));
940       switch (child->deref_type) {
941       case nir_deref_type_var:
942          unreachable("nir_deref_type_var cannot be a child");
943 
944       case nir_deref_type_array:
945       case nir_deref_type_array_wildcard:
946          child->type = glsl_get_array_element(parent->type);
947          break;
948 
949       case nir_deref_type_ptr_as_array:
950          child->type = parent->type;
951          break;
952 
953       case nir_deref_type_struct:
954          child->type = glsl_get_struct_field(parent->type,
955                                              child->strct.index);
956          break;
957 
958       case nir_deref_type_cast:
959          /* We stop the recursion here */
960          continue;
961       }
962 
963       /* Recurse into children */
964       nir_deref_instr_fixup_child_types(child);
965    }
966 }
967 
968 static bool
opt_alu_of_cast(nir_alu_instr * alu)969 opt_alu_of_cast(nir_alu_instr *alu)
970 {
971    bool progress = false;
972 
973    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
974       nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
975       if (src_instr->type != nir_instr_type_deref)
976          continue;
977 
978       nir_deref_instr *src_deref = nir_instr_as_deref(src_instr);
979       if (src_deref->deref_type != nir_deref_type_cast)
980          continue;
981 
982       nir_src_rewrite(&alu->src[i].src, src_deref->parent.ssa);
983       progress = true;
984    }
985 
986    return progress;
987 }
988 
989 static bool
is_trivial_array_deref_cast(nir_deref_instr * cast)990 is_trivial_array_deref_cast(nir_deref_instr *cast)
991 {
992    assert(nir_deref_cast_is_trivial(cast));
993 
994    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
995 
996    if (parent->deref_type == nir_deref_type_array) {
997       return cast->cast.ptr_stride ==
998              glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
999    } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
1000       return cast->cast.ptr_stride ==
1001              nir_deref_instr_array_stride(parent);
1002    } else {
1003       return false;
1004    }
1005 }
1006 
1007 static bool
is_deref_ptr_as_array(nir_instr * instr)1008 is_deref_ptr_as_array(nir_instr *instr)
1009 {
1010    return instr->type == nir_instr_type_deref &&
1011           nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
1012 }
1013 
1014 static bool
opt_remove_restricting_cast_alignments(nir_deref_instr * cast)1015 opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
1016 {
1017    assert(cast->deref_type == nir_deref_type_cast);
1018    if (cast->cast.align_mul == 0)
1019       return false;
1020 
1021    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1022    if (parent == NULL)
1023       return false;
1024 
1025    /* Don't use any default alignment for this check.  We don't want to fall
1026     * back to type alignment too early in case we find out later that we're
1027     * somehow a child of a packed struct.
1028     */
1029    uint32_t parent_mul, parent_offset;
1030    if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
1031                                      &parent_mul, &parent_offset))
1032       return false;
1033 
1034    /* If this cast increases the alignment, we want to keep it.
1035     *
1036     * There is a possibility that the larger alignment provided by this cast
1037     * somehow disagrees with the smaller alignment further up the deref chain.
1038     * In that case, we choose to favor the alignment closer to the actual
1039     * memory operation which, in this case, is the cast and not its parent so
1040     * keeping the cast alignment is the right thing to do.
1041     */
1042    if (parent_mul < cast->cast.align_mul)
1043       return false;
1044 
1045    /* If we've gotten here, we have a parent deref with an align_mul at least
1046     * as large as ours so we can potentially throw away the alignment
1047     * information on this deref.  There are two cases to consider here:
1048     *
1049     *  1. We can chase the deref all the way back to the variable.  In this
1050     *     case, we have "perfect" knowledge, modulo indirect array derefs.
1051     *     Unless we've done something wrong in our indirect/wildcard stride
1052     *     calculations, our knowledge from the deref walk is better than the
1053     *     client's.
1054     *
1055     *  2. We can't chase it all the way back to the variable.  In this case,
1056     *     because our call to nir_get_explicit_deref_align(parent, ...) above
1057     *     above passes default_to_type_align=false, the only way we can even
1058     *     get here is if something further up the deref chain has a cast with
1059     *     an alignment which can only happen if we get an alignment from the
1060     *     client (most likely a decoration in the SPIR-V).  If the client has
1061     *     provided us with two conflicting alignments in the deref chain,
1062     *     that's their fault and we can do whatever we want.
1063     *
1064     * In either case, we should be without our rights, at this point, to throw
1065     * away the alignment information on this deref.  However, to be "nice" to
1066     * weird clients, we do one more check.  It really shouldn't happen but
1067     * it's possible that the parent's alignment offset disagrees with the
1068     * cast's alignment offset.  In this case, we consider the cast as
1069     * providing more information (or at least more valid information) and keep
1070     * it even if the align_mul from the parent is larger.
1071     */
1072    assert(cast->cast.align_mul <= parent_mul);
1073    if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
1074       return false;
1075 
1076    /* If we got here, the parent has better alignment information than the
1077     * child and we can get rid of the child alignment information.
1078     */
1079    cast->cast.align_mul = 0;
1080    cast->cast.align_offset = 0;
1081    return true;
1082 }
1083 
1084 /**
1085  * Remove casts that just wrap other casts.
1086  */
1087 static bool
opt_remove_cast_cast(nir_deref_instr * cast)1088 opt_remove_cast_cast(nir_deref_instr *cast)
1089 {
1090    nir_deref_instr *parent = nir_deref_instr_parent(cast);
1091    if (parent == NULL || parent->deref_type != nir_deref_type_cast)
1092       return false;
1093 
1094    /* Copy align info from the parent cast if needed
1095     *
1096     * In the case that align_mul = 0, the alignment for this cast is inhereted
1097     * from the parent deref (if any). If we aren't careful, removing our
1098     * parent cast from the chain may lose alignment information so we need to
1099     * copy the parent's alignment information (if any).
1100     *
1101     * opt_remove_restricting_cast_alignments() above is run before this pass
1102     * and will will have cleared our alignment (set align_mul = 0) in the case
1103     * where the parent's alignment information is somehow superior.
1104     */
1105    if (cast->cast.align_mul == 0) {
1106       cast->cast.align_mul = parent->cast.align_mul;
1107       cast->cast.align_offset = parent->cast.align_offset;
1108    }
1109 
1110    nir_src_rewrite(&cast->parent, parent->parent.ssa);
1111    return true;
1112 }
1113 
1114 /* Restrict variable modes in casts.
1115  *
1116  * If we know from something higher up the deref chain that the deref has a
1117  * specific mode, we can cast to more general and back but we can never cast
1118  * across modes.  For non-cast derefs, we should only ever do anything here if
1119  * the parent eventually comes from a cast that we restricted earlier.
1120  */
1121 static bool
opt_restrict_deref_modes(nir_deref_instr * deref)1122 opt_restrict_deref_modes(nir_deref_instr *deref)
1123 {
1124    if (deref->deref_type == nir_deref_type_var) {
1125       assert(deref->modes == deref->var->data.mode);
1126       return false;
1127    }
1128 
1129    nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1130    if (parent == NULL || parent->modes == deref->modes)
1131       return false;
1132 
1133    assert(parent->modes & deref->modes);
1134    deref->modes &= parent->modes;
1135    return true;
1136 }
1137 
1138 static bool
opt_remove_sampler_cast(nir_deref_instr * cast)1139 opt_remove_sampler_cast(nir_deref_instr *cast)
1140 {
1141    assert(cast->deref_type == nir_deref_type_cast);
1142    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1143    if (parent == NULL)
1144       return false;
1145 
1146    /* Strip both types down to their non-array type and bail if there are any
1147     * discrepancies in array lengths.
1148     */
1149    const struct glsl_type *parent_type = parent->type;
1150    const struct glsl_type *cast_type = cast->type;
1151    while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
1152       if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
1153          return false;
1154       parent_type = glsl_get_array_element(parent_type);
1155       cast_type = glsl_get_array_element(cast_type);
1156    }
1157 
1158    if (!glsl_type_is_sampler(parent_type))
1159       return false;
1160 
1161    if (cast_type != glsl_bare_sampler_type() &&
1162        (glsl_type_is_bare_sampler(parent_type) ||
1163         cast_type != glsl_sampler_type_to_texture(parent_type)))
1164       return false;
1165 
1166    /* We're a cast from a more detailed sampler type to a bare sampler or a
1167     * texture type with the same dimensionality.
1168     */
1169    nir_def_rewrite_uses(&cast->def,
1170                         &parent->def);
1171    nir_instr_remove(&cast->instr);
1172 
1173    /* Recursively crawl the deref tree and clean up types */
1174    nir_deref_instr_fixup_child_types(parent);
1175 
1176    return true;
1177 }
1178 
1179 /**
1180  * Is this casting a struct to a contained struct.
1181  * struct a { struct b field0 };
1182  * ssa_5 is structa;
1183  * deref_cast (structb *)ssa_5 (function_temp structb);
1184  * converts to
1185  * deref_struct &ssa_5->field0 (function_temp structb);
1186  * This allows subsequent copy propagation to work.
1187  */
1188 static bool
opt_replace_struct_wrapper_cast(nir_builder * b,nir_deref_instr * cast)1189 opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
1190 {
1191    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1192    if (!parent)
1193       return false;
1194 
1195    if (cast->cast.align_mul > 0)
1196       return false;
1197 
1198    if (!glsl_type_is_struct(parent->type))
1199       return false;
1200 
1201    /* Empty struct */
1202    if (glsl_get_length(parent->type) < 1)
1203       return false;
1204 
1205    if (glsl_get_struct_field_offset(parent->type, 0) != 0)
1206       return false;
1207 
1208    const struct glsl_type *field_type = glsl_get_struct_field(parent->type, 0);
1209    if (cast->type != field_type)
1210       return false;
1211 
1212    /* we can't drop the stride information */
1213    if (cast->cast.ptr_stride != glsl_get_explicit_stride(field_type))
1214       return false;
1215 
1216    nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
1217    nir_def_rewrite_uses(&cast->def, &replace->def);
1218    nir_deref_instr_remove_if_unused(cast);
1219    return true;
1220 }
1221 
1222 static bool
opt_deref_cast(nir_builder * b,nir_deref_instr * cast)1223 opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
1224 {
1225    bool progress = false;
1226 
1227    progress |= opt_remove_restricting_cast_alignments(cast);
1228 
1229    if (opt_replace_struct_wrapper_cast(b, cast))
1230       return true;
1231 
1232    if (opt_remove_sampler_cast(cast))
1233       return true;
1234 
1235    progress |= opt_remove_cast_cast(cast);
1236    if (!nir_deref_cast_is_trivial(cast))
1237       return progress;
1238 
1239    /* If this deref still contains useful alignment information, we don't want
1240     * to delete it.
1241     */
1242    if (cast->cast.align_mul > 0)
1243       return progress;
1244 
1245    bool trivial_array_cast = is_trivial_array_deref_cast(cast);
1246 
1247    nir_foreach_use_including_if_safe(use_src, &cast->def) {
1248       assert(!nir_src_is_if(use_src) && "there cannot be if-uses");
1249 
1250       /* If this isn't a trivial array cast, we can't propagate into
1251        * ptr_as_array derefs.
1252        */
1253       if (is_deref_ptr_as_array(nir_src_parent_instr(use_src)) &&
1254           !trivial_array_cast)
1255          continue;
1256 
1257       nir_src_rewrite(use_src, cast->parent.ssa);
1258       progress = true;
1259    }
1260 
1261    if (nir_deref_instr_remove_if_unused(cast))
1262       progress = true;
1263 
1264    return progress;
1265 }
1266 
1267 static bool
opt_deref_ptr_as_array(nir_builder * b,nir_deref_instr * deref)1268 opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
1269 {
1270    assert(deref->deref_type == nir_deref_type_ptr_as_array);
1271 
1272    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1273 
1274    if (nir_src_is_const(deref->arr.index) &&
1275        nir_src_as_int(deref->arr.index) == 0) {
1276       /* If it's a ptr_as_array deref with an index of 0, it does nothing
1277        * and we can just replace its uses with its parent, unless it has
1278        * alignment information.
1279        *
1280        * The source of a ptr_as_array deref always has a deref_type of
1281        * nir_deref_type_array or nir_deref_type_cast.  If it's a cast, it
1282        * may be trivial and we may be able to get rid of that too.  Any
1283        * trivial cast of trivial cast cases should be handled already by
1284        * opt_deref_cast() above.
1285        */
1286       if (parent->deref_type == nir_deref_type_cast &&
1287           parent->cast.align_mul == 0 &&
1288           nir_deref_cast_is_trivial(parent))
1289          parent = nir_deref_instr_parent(parent);
1290       nir_def_rewrite_uses(&deref->def,
1291                            &parent->def);
1292       nir_instr_remove(&deref->instr);
1293       return true;
1294    }
1295 
1296    if (parent->deref_type != nir_deref_type_array &&
1297        parent->deref_type != nir_deref_type_ptr_as_array)
1298       return false;
1299 
1300    deref->arr.in_bounds &= parent->arr.in_bounds;
1301 
1302    nir_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
1303                                deref->arr.index.ssa);
1304 
1305    deref->deref_type = parent->deref_type;
1306    nir_src_rewrite(&deref->parent, parent->parent.ssa);
1307    nir_src_rewrite(&deref->arr.index, new_idx);
1308    return true;
1309 }
1310 
1311 static bool
is_vector_bitcast_deref(nir_deref_instr * cast,nir_component_mask_t mask,bool is_write)1312 is_vector_bitcast_deref(nir_deref_instr *cast,
1313                         nir_component_mask_t mask,
1314                         bool is_write)
1315 {
1316    if (cast->deref_type != nir_deref_type_cast)
1317       return false;
1318 
1319    /* Don't throw away useful alignment information */
1320    if (cast->cast.align_mul > 0)
1321       return false;
1322 
1323    /* It has to be a cast of another deref */
1324    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1325    if (parent == NULL)
1326       return false;
1327 
1328    /* The parent has to be a vector or scalar */
1329    if (!glsl_type_is_vector_or_scalar(parent->type))
1330       return false;
1331 
1332    /* Don't bother with 1-bit types */
1333    unsigned cast_bit_size = glsl_get_bit_size(cast->type);
1334    unsigned parent_bit_size = glsl_get_bit_size(parent->type);
1335    if (cast_bit_size == 1 || parent_bit_size == 1)
1336       return false;
1337 
1338    /* A strided vector type means it's not tightly packed */
1339    if (glsl_get_explicit_stride(cast->type) ||
1340        glsl_get_explicit_stride(parent->type))
1341       return false;
1342 
1343    assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
1344    assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
1345    unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
1346    unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
1347                            (parent_bit_size / 8);
1348    if (bytes_used > parent_bytes)
1349       return false;
1350 
1351    if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
1352                                                        parent_bit_size))
1353       return false;
1354 
1355    return true;
1356 }
1357 
1358 static nir_def *
resize_vector(nir_builder * b,nir_def * data,unsigned num_components)1359 resize_vector(nir_builder *b, nir_def *data, unsigned num_components)
1360 {
1361    if (num_components == data->num_components)
1362       return data;
1363 
1364    unsigned swiz[NIR_MAX_VEC_COMPONENTS] = {
1365       0,
1366    };
1367    for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
1368       swiz[i] = i;
1369 
1370    return nir_swizzle(b, data, swiz, num_components);
1371 }
1372 
1373 static bool
opt_load_vec_deref(nir_builder * b,nir_intrinsic_instr * load)1374 opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
1375 {
1376    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1377    nir_component_mask_t read_mask =
1378       nir_def_components_read(&load->def);
1379 
1380    /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1381     * vec4-aligned and so it can just read/write them as vec4s.  This
1382     * results in a LOT of vec4->vec3 casts on loads and stores.
1383     */
1384    if (is_vector_bitcast_deref(deref, read_mask, false)) {
1385       const unsigned old_num_comps = load->def.num_components;
1386       const unsigned old_bit_size = load->def.bit_size;
1387 
1388       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1389       const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1390       const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1391 
1392       /* Stomp it to reference the parent */
1393       nir_src_rewrite(&load->src[0], &parent->def);
1394       load->def.bit_size = new_bit_size;
1395       load->def.num_components = new_num_comps;
1396       load->num_components = new_num_comps;
1397 
1398       b->cursor = nir_after_instr(&load->instr);
1399       nir_def *data = &load->def;
1400       if (old_bit_size != new_bit_size)
1401          data = nir_bitcast_vector(b, &load->def, old_bit_size);
1402       data = resize_vector(b, data, old_num_comps);
1403 
1404       nir_def_rewrite_uses_after(&load->def, data,
1405                                  data->parent_instr);
1406       return true;
1407    }
1408 
1409    return false;
1410 }
1411 
1412 static bool
opt_store_vec_deref(nir_builder * b,nir_intrinsic_instr * store)1413 opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
1414 {
1415    nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
1416    nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
1417 
1418    /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1419     * vec4-aligned and so it can just read/write them as vec4s.  This
1420     * results in a LOT of vec4->vec3 casts on loads and stores.
1421     */
1422    if (is_vector_bitcast_deref(deref, write_mask, true)) {
1423       nir_def *data = store->src[1].ssa;
1424 
1425       const unsigned old_bit_size = data->bit_size;
1426 
1427       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1428       const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1429       const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1430 
1431       nir_src_rewrite(&store->src[0], &parent->def);
1432 
1433       /* Restrict things down as needed so the bitcast doesn't fail */
1434       data = nir_trim_vector(b, data, util_last_bit(write_mask));
1435       if (old_bit_size != new_bit_size)
1436          data = nir_bitcast_vector(b, data, new_bit_size);
1437       data = resize_vector(b, data, new_num_comps);
1438       nir_src_rewrite(&store->src[1], data);
1439       store->num_components = new_num_comps;
1440 
1441       /* Adjust the write mask */
1442       write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
1443                                                   new_bit_size);
1444       nir_intrinsic_set_write_mask(store, write_mask);
1445       return true;
1446    }
1447 
1448    return false;
1449 }
1450 
1451 static bool
opt_known_deref_mode_is(nir_builder * b,nir_intrinsic_instr * intrin)1452 opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
1453 {
1454    nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
1455    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1456    if (deref == NULL)
1457       return false;
1458 
1459    nir_def *deref_is = NULL;
1460 
1461    if (nir_deref_mode_must_be(deref, modes))
1462       deref_is = nir_imm_true(b);
1463 
1464    if (!nir_deref_mode_may_be(deref, modes))
1465       deref_is = nir_imm_false(b);
1466 
1467    if (deref_is == NULL)
1468       return false;
1469 
1470    nir_def_rewrite_uses(&intrin->def, deref_is);
1471    nir_instr_remove(&intrin->instr);
1472    return true;
1473 }
1474 
1475 bool
nir_opt_deref_impl(nir_function_impl * impl)1476 nir_opt_deref_impl(nir_function_impl *impl)
1477 {
1478    bool progress = false;
1479 
1480    nir_builder b = nir_builder_create(impl);
1481 
1482    nir_foreach_block(block, impl) {
1483       nir_foreach_instr_safe(instr, block) {
1484          b.cursor = nir_before_instr(instr);
1485 
1486          switch (instr->type) {
1487          case nir_instr_type_alu: {
1488             nir_alu_instr *alu = nir_instr_as_alu(instr);
1489             if (opt_alu_of_cast(alu))
1490                progress = true;
1491             break;
1492          }
1493 
1494          case nir_instr_type_deref: {
1495             nir_deref_instr *deref = nir_instr_as_deref(instr);
1496 
1497             if (opt_restrict_deref_modes(deref))
1498                progress = true;
1499 
1500             switch (deref->deref_type) {
1501             case nir_deref_type_ptr_as_array:
1502                if (opt_deref_ptr_as_array(&b, deref))
1503                   progress = true;
1504                break;
1505 
1506             case nir_deref_type_cast:
1507                if (opt_deref_cast(&b, deref))
1508                   progress = true;
1509                break;
1510 
1511             default:
1512                /* Do nothing */
1513                break;
1514             }
1515             break;
1516          }
1517 
1518          case nir_instr_type_intrinsic: {
1519             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1520             switch (intrin->intrinsic) {
1521             case nir_intrinsic_load_deref:
1522                if (opt_load_vec_deref(&b, intrin))
1523                   progress = true;
1524                break;
1525 
1526             case nir_intrinsic_store_deref:
1527                if (opt_store_vec_deref(&b, intrin))
1528                   progress = true;
1529                break;
1530 
1531             case nir_intrinsic_deref_mode_is:
1532                if (opt_known_deref_mode_is(&b, intrin))
1533                   progress = true;
1534                break;
1535 
1536             default:
1537                /* Do nothing */
1538                break;
1539             }
1540             break;
1541          }
1542 
1543          default:
1544             /* Do nothing */
1545             break;
1546          }
1547       }
1548    }
1549 
1550    if (progress) {
1551       nir_metadata_preserve(impl, nir_metadata_block_index |
1552                                      nir_metadata_dominance);
1553    } else {
1554       nir_metadata_preserve(impl, nir_metadata_all);
1555    }
1556 
1557    return progress;
1558 }
1559 
1560 bool
nir_opt_deref(nir_shader * shader)1561 nir_opt_deref(nir_shader *shader)
1562 {
1563    bool progress = false;
1564 
1565    nir_foreach_function_impl(impl, shader) {
1566       if (nir_opt_deref_impl(impl))
1567          progress = true;
1568    }
1569 
1570    return progress;
1571 }
1572