• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/hash_table.h"
28 
29 static bool
is_trivial_deref_cast(nir_deref_instr * cast)30 is_trivial_deref_cast(nir_deref_instr *cast)
31 {
32    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
33    if (!parent)
34       return false;
35 
36    return cast->modes == parent->modes &&
37           cast->type == parent->type &&
38           cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
39           cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
40 }
41 
42 void
nir_deref_path_init(nir_deref_path * path,nir_deref_instr * deref,void * mem_ctx)43 nir_deref_path_init(nir_deref_path *path,
44                     nir_deref_instr *deref, void *mem_ctx)
45 {
46    assert(deref != NULL);
47 
48    /* The length of the short path is at most ARRAY_SIZE - 1 because we need
49     * room for the NULL terminator.
50     */
51    static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
52 
53    int count = 0;
54 
55    nir_deref_instr **tail = &path->_short_path[max_short_path_len];
56    nir_deref_instr **head = tail;
57 
58    *tail = NULL;
59    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
60       if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
61          continue;
62       count++;
63       if (count <= max_short_path_len)
64          *(--head) = d;
65    }
66 
67    if (count <= max_short_path_len) {
68       /* If we're under max_short_path_len, just use the short path. */
69       path->path = head;
70       goto done;
71    }
72 
73 #ifndef NDEBUG
74    /* Just in case someone uses short_path by accident */
75    for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
76       path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
77 #endif
78 
79    path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
80    head = tail = path->path + count;
81    *tail = NULL;
82    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
83       if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
84          continue;
85       *(--head) = d;
86    }
87 
88 done:
89    assert(head == path->path);
90    assert(tail == head + count);
91    assert(*tail == NULL);
92 }
93 
94 void
nir_deref_path_finish(nir_deref_path * path)95 nir_deref_path_finish(nir_deref_path *path)
96 {
97    if (path->path < &path->_short_path[0] ||
98        path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
99       ralloc_free(path->path);
100 }
101 
102 /**
103  * Recursively removes unused deref instructions
104  */
105 bool
nir_deref_instr_remove_if_unused(nir_deref_instr * instr)106 nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
107 {
108    bool progress = false;
109 
110    for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
111       /* If anyone is using this deref, leave it alone */
112       assert(d->dest.is_ssa);
113       if (!nir_ssa_def_is_unused(&d->dest.ssa))
114          break;
115 
116       nir_instr_remove(&d->instr);
117       progress = true;
118    }
119 
120    return progress;
121 }
122 
123 bool
nir_deref_instr_has_indirect(nir_deref_instr * instr)124 nir_deref_instr_has_indirect(nir_deref_instr *instr)
125 {
126    while (instr->deref_type != nir_deref_type_var) {
127       /* Consider casts to be indirects */
128       if (instr->deref_type == nir_deref_type_cast)
129          return true;
130 
131       if ((instr->deref_type == nir_deref_type_array ||
132            instr->deref_type == nir_deref_type_ptr_as_array) &&
133           !nir_src_is_const(instr->arr.index))
134          return true;
135 
136       instr = nir_deref_instr_parent(instr);
137    }
138 
139    return false;
140 }
141 
142 bool
nir_deref_instr_is_known_out_of_bounds(nir_deref_instr * instr)143 nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
144 {
145    for (; instr; instr = nir_deref_instr_parent(instr)) {
146       if (instr->deref_type == nir_deref_type_array &&
147           nir_src_is_const(instr->arr.index) &&
148            nir_src_as_uint(instr->arr.index) >=
149            glsl_get_length(nir_deref_instr_parent(instr)->type))
150          return true;
151    }
152 
153    return false;
154 }
155 
156 bool
nir_deref_instr_has_complex_use(nir_deref_instr * deref)157 nir_deref_instr_has_complex_use(nir_deref_instr *deref)
158 {
159    nir_foreach_use(use_src, &deref->dest.ssa) {
160       nir_instr *use_instr = use_src->parent_instr;
161 
162       switch (use_instr->type) {
163       case nir_instr_type_deref: {
164          nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
165 
166          /* A var deref has no sources */
167          assert(use_deref->deref_type != nir_deref_type_var);
168 
169          /* If a deref shows up in an array index or something like that, it's
170           * a complex use.
171           */
172          if (use_src != &use_deref->parent)
173             return true;
174 
175          /* Anything that isn't a basic struct or array deref is considered to
176           * be a "complex" use.  In particular, we don't allow ptr_as_array
177           * because we assume that opt_deref will turn any non-complex
178           * ptr_as_array derefs into regular array derefs eventually so passes
179           * which only want to handle simple derefs will pick them up in a
180           * later pass.
181           */
182          if (use_deref->deref_type != nir_deref_type_struct &&
183              use_deref->deref_type != nir_deref_type_array_wildcard &&
184              use_deref->deref_type != nir_deref_type_array)
185             return true;
186 
187          if (nir_deref_instr_has_complex_use(use_deref))
188             return true;
189 
190          continue;
191       }
192 
193       case nir_instr_type_intrinsic: {
194          nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
195          switch (use_intrin->intrinsic) {
196          case nir_intrinsic_load_deref:
197             assert(use_src == &use_intrin->src[0]);
198             continue;
199 
200          case nir_intrinsic_copy_deref:
201             assert(use_src == &use_intrin->src[0] ||
202                    use_src == &use_intrin->src[1]);
203             continue;
204 
205          case nir_intrinsic_store_deref:
206             /* A use in src[1] of a store means we're taking that pointer and
207              * writing it to a variable.  Because we have no idea who will
208              * read that variable and what they will do with the pointer, it's
209              * considered a "complex" use.  A use in src[0], on the other
210              * hand, is a simple use because we're just going to dereference
211              * it and write a value there.
212              */
213             if (use_src == &use_intrin->src[0])
214                continue;
215             return true;
216 
217          default:
218             return true;
219          }
220          unreachable("Switch default failed");
221       }
222 
223       default:
224          return true;
225       }
226    }
227 
228    nir_foreach_if_use(use, &deref->dest.ssa)
229       return true;
230 
231    return false;
232 }
233 
234 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)235 type_scalar_size_bytes(const struct glsl_type *type)
236 {
237    assert(glsl_type_is_vector_or_scalar(type) ||
238           glsl_type_is_matrix(type));
239    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
240 }
241 
242 unsigned
nir_deref_instr_array_stride(nir_deref_instr * deref)243 nir_deref_instr_array_stride(nir_deref_instr *deref)
244 {
245    switch (deref->deref_type) {
246    case nir_deref_type_array:
247    case nir_deref_type_array_wildcard: {
248       const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
249       unsigned stride = glsl_get_explicit_stride(arr_type);
250 
251       if ((glsl_type_is_matrix(arr_type) &&
252            glsl_matrix_type_is_row_major(arr_type)) ||
253           (glsl_type_is_vector(arr_type) && stride == 0))
254          stride = type_scalar_size_bytes(arr_type);
255 
256       return stride;
257    }
258    case nir_deref_type_ptr_as_array:
259       return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
260    case nir_deref_type_cast:
261       return deref->cast.ptr_stride;
262    default:
263       return 0;
264    }
265 }
266 
267 static unsigned
type_get_array_stride(const struct glsl_type * elem_type,glsl_type_size_align_func size_align)268 type_get_array_stride(const struct glsl_type *elem_type,
269                       glsl_type_size_align_func size_align)
270 {
271    unsigned elem_size, elem_align;
272    size_align(elem_type, &elem_size, &elem_align);
273    return ALIGN_POT(elem_size, elem_align);
274 }
275 
276 static unsigned
struct_type_get_field_offset(const struct glsl_type * struct_type,glsl_type_size_align_func size_align,unsigned field_idx)277 struct_type_get_field_offset(const struct glsl_type *struct_type,
278                              glsl_type_size_align_func size_align,
279                              unsigned field_idx)
280 {
281    assert(glsl_type_is_struct_or_ifc(struct_type));
282    unsigned offset = 0;
283    for (unsigned i = 0; i <= field_idx; i++) {
284       unsigned elem_size, elem_align;
285       size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
286       offset = ALIGN_POT(offset, elem_align);
287       if (i < field_idx)
288          offset += elem_size;
289    }
290    return offset;
291 }
292 
293 unsigned
nir_deref_instr_get_const_offset(nir_deref_instr * deref,glsl_type_size_align_func size_align)294 nir_deref_instr_get_const_offset(nir_deref_instr *deref,
295                                  glsl_type_size_align_func size_align)
296 {
297    nir_deref_path path;
298    nir_deref_path_init(&path, deref, NULL);
299 
300    unsigned offset = 0;
301    for (nir_deref_instr **p = &path.path[1]; *p; p++) {
302       switch ((*p)->deref_type) {
303       case nir_deref_type_array:
304          offset += nir_src_as_uint((*p)->arr.index) *
305                    type_get_array_stride((*p)->type, size_align);
306 	 break;
307       case nir_deref_type_struct: {
308          /* p starts at path[1], so this is safe */
309          nir_deref_instr *parent = *(p - 1);
310          offset += struct_type_get_field_offset(parent->type, size_align,
311                                                 (*p)->strct.index);
312 	 break;
313       }
314       case nir_deref_type_cast:
315          /* A cast doesn't contribute to the offset */
316          break;
317       default:
318          unreachable("Unsupported deref type");
319       }
320    }
321 
322    nir_deref_path_finish(&path);
323 
324    return offset;
325 }
326 
327 nir_ssa_def *
nir_build_deref_offset(nir_builder * b,nir_deref_instr * deref,glsl_type_size_align_func size_align)328 nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
329                        glsl_type_size_align_func size_align)
330 {
331    nir_deref_path path;
332    nir_deref_path_init(&path, deref, NULL);
333 
334    nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);
335    for (nir_deref_instr **p = &path.path[1]; *p; p++) {
336       switch ((*p)->deref_type) {
337       case nir_deref_type_array:
338       case nir_deref_type_ptr_as_array: {
339          nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
340          int stride = type_get_array_stride((*p)->type, size_align);
341          offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
342          break;
343       }
344       case nir_deref_type_struct: {
345          /* p starts at path[1], so this is safe */
346          nir_deref_instr *parent = *(p - 1);
347          unsigned field_offset =
348             struct_type_get_field_offset(parent->type, size_align,
349                                          (*p)->strct.index);
350          offset = nir_iadd_imm(b, offset, field_offset);
351          break;
352       }
353       case nir_deref_type_cast:
354          /* A cast doesn't contribute to the offset */
355          break;
356       default:
357          unreachable("Unsupported deref type");
358       }
359    }
360 
361    nir_deref_path_finish(&path);
362 
363    return offset;
364 }
365 
366 bool
nir_remove_dead_derefs_impl(nir_function_impl * impl)367 nir_remove_dead_derefs_impl(nir_function_impl *impl)
368 {
369    bool progress = false;
370 
371    nir_foreach_block(block, impl) {
372       nir_foreach_instr_safe(instr, block) {
373          if (instr->type == nir_instr_type_deref &&
374              nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
375             progress = true;
376       }
377    }
378 
379    if (progress)
380       nir_metadata_preserve(impl, nir_metadata_block_index |
381                                   nir_metadata_dominance);
382 
383    return progress;
384 }
385 
386 bool
nir_remove_dead_derefs(nir_shader * shader)387 nir_remove_dead_derefs(nir_shader *shader)
388 {
389    bool progress = false;
390    nir_foreach_function(function, shader) {
391       if (function->impl && nir_remove_dead_derefs_impl(function->impl))
392          progress = true;
393    }
394 
395    return progress;
396 }
397 
398 void
nir_fixup_deref_modes(nir_shader * shader)399 nir_fixup_deref_modes(nir_shader *shader)
400 {
401    nir_foreach_function(function, shader) {
402       if (!function->impl)
403          continue;
404 
405       nir_foreach_block(block, function->impl) {
406          nir_foreach_instr(instr, block) {
407             if (instr->type != nir_instr_type_deref)
408                continue;
409 
410             nir_deref_instr *deref = nir_instr_as_deref(instr);
411             if (deref->deref_type == nir_deref_type_cast)
412                continue;
413 
414             nir_variable_mode parent_modes;
415             if (deref->deref_type == nir_deref_type_var) {
416                parent_modes = deref->var->data.mode;
417             } else {
418                assert(deref->parent.is_ssa);
419                nir_deref_instr *parent =
420                   nir_instr_as_deref(deref->parent.ssa->parent_instr);
421                parent_modes = parent->modes;
422             }
423 
424             deref->modes = parent_modes;
425          }
426       }
427    }
428 }
429 
430 static bool
modes_may_alias(nir_variable_mode a,nir_variable_mode b)431 modes_may_alias(nir_variable_mode a, nir_variable_mode b)
432 {
433    /* Generic pointers can alias with SSBOs */
434    if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
435        (b & (nir_var_mem_ssbo | nir_var_mem_global)))
436       return true;
437 
438    /* Pointers can only alias if they share a mode. */
439    return a & b;
440 }
441 
442 static bool
deref_path_contains_coherent_decoration(nir_deref_path * path)443 deref_path_contains_coherent_decoration(nir_deref_path *path)
444 {
445    assert(path->path[0]->deref_type == nir_deref_type_var);
446 
447    if (path->path[0]->var->data.access & ACCESS_COHERENT)
448       return true;
449 
450    for (nir_deref_instr **p = &path->path[1]; *p; p++) {
451       if ((*p)->deref_type != nir_deref_type_struct)
452          continue;
453 
454       const struct glsl_type *struct_type = (*(p - 1))->type;
455       const struct glsl_struct_field *field =
456          glsl_get_struct_field_data(struct_type, (*p)->strct.index);
457       if (field->memory_coherent)
458          return true;
459    }
460 
461    return false;
462 }
463 
464 nir_deref_compare_result
nir_compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path)465 nir_compare_deref_paths(nir_deref_path *a_path,
466                         nir_deref_path *b_path)
467 {
468    if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
469       return nir_derefs_do_not_alias;
470 
471    if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
472       return nir_derefs_may_alias_bit;
473 
474    if (a_path->path[0]->deref_type == nir_deref_type_var) {
475       if (a_path->path[0]->var != b_path->path[0]->var) {
476          /* Shader and function temporaries aren't backed by memory so two
477           * distinct variables never alias.
478           */
479          static const nir_variable_mode temp_var_modes =
480             nir_var_shader_temp | nir_var_function_temp;
481          if (!(a_path->path[0]->modes & ~temp_var_modes) ||
482              !(b_path->path[0]->modes & ~temp_var_modes))
483             return nir_derefs_do_not_alias;
484 
485          /* If they are both declared coherent or have coherent somewhere in
486           * their path (due to a member of an interface being declared
487           * coherent), we have to assume we that we could have any kind of
488           * aliasing.  Otherwise, they could still alias but the client didn't
489           * tell us and that's their fault.
490           */
491          if (deref_path_contains_coherent_decoration(a_path) &&
492              deref_path_contains_coherent_decoration(b_path))
493             return nir_derefs_may_alias_bit;
494 
495          /* Per SPV_KHR_workgroup_memory_explicit_layout and GL_EXT_shared_memory_block,
496           * shared blocks alias each other.
497           */
498          if (a_path->path[0]->modes & nir_var_mem_shared &&
499              b_path->path[0]->modes & nir_var_mem_shared &&
500              (glsl_type_is_interface(a_path->path[0]->var->type) ||
501               glsl_type_is_interface(b_path->path[0]->var->type))) {
502             assert(glsl_type_is_interface(a_path->path[0]->var->type) &&
503                    glsl_type_is_interface(b_path->path[0]->var->type));
504             return nir_derefs_may_alias_bit;
505          }
506 
507          /* If we can chase the deref all the way back to the variable and
508           * they're not the same variable and at least one is not declared
509           * coherent, we know they can't possibly alias.
510           */
511          return nir_derefs_do_not_alias;
512       }
513    } else {
514       assert(a_path->path[0]->deref_type == nir_deref_type_cast);
515       /* If they're not exactly the same cast, it's hard to compare them so we
516        * just assume they alias.  Comparing casts is tricky as there are lots
517        * of things such as mode, type, etc. to make sure work out; for now, we
518        * just assume nit_opt_deref will combine them and compare the deref
519        * instructions.
520        *
521        * TODO: At some point in the future, we could be clever and understand
522        * that a float[] and int[] have the same layout and aliasing structure
523        * but double[] and vec3[] do not and we could potentially be a bit
524        * smarter here.
525        */
526       if (a_path->path[0] != b_path->path[0])
527          return nir_derefs_may_alias_bit;
528    }
529 
530    /* Start off assuming they fully compare.  We ignore equality for now.  In
531     * the end, we'll determine that by containment.
532     */
533    nir_deref_compare_result result = nir_derefs_may_alias_bit |
534                                      nir_derefs_a_contains_b_bit |
535                                      nir_derefs_b_contains_a_bit;
536 
537    nir_deref_instr **a_p = &a_path->path[1];
538    nir_deref_instr **b_p = &b_path->path[1];
539    while (*a_p != NULL && *a_p == *b_p) {
540       a_p++;
541       b_p++;
542    }
543 
544    /* We're at either the tail or the divergence point between the two deref
545     * paths.  Look to see if either contains cast or a ptr_as_array deref.  If
546     * it does we don't know how to safely make any inferences.  Hopefully,
547     * nir_opt_deref will clean most of these up and we can start inferring
548     * things again.
549     *
550     * In theory, we could do a bit better.  For instance, we could detect the
551     * case where we have exactly one ptr_as_array deref in the chain after the
552     * divergence point and it's matched in both chains and the two chains have
553     * different constant indices.
554     */
555    for (nir_deref_instr **t_p = a_p; *t_p; t_p++) {
556       if ((*t_p)->deref_type == nir_deref_type_cast ||
557           (*t_p)->deref_type == nir_deref_type_ptr_as_array)
558          return nir_derefs_may_alias_bit;
559    }
560    for (nir_deref_instr **t_p = b_p; *t_p; t_p++) {
561       if ((*t_p)->deref_type == nir_deref_type_cast ||
562           (*t_p)->deref_type == nir_deref_type_ptr_as_array)
563          return nir_derefs_may_alias_bit;
564    }
565 
566    while (*a_p != NULL && *b_p != NULL) {
567       nir_deref_instr *a_tail = *(a_p++);
568       nir_deref_instr *b_tail = *(b_p++);
569 
570       switch (a_tail->deref_type) {
571       case nir_deref_type_array:
572       case nir_deref_type_array_wildcard: {
573          assert(b_tail->deref_type == nir_deref_type_array ||
574                 b_tail->deref_type == nir_deref_type_array_wildcard);
575 
576          if (a_tail->deref_type == nir_deref_type_array_wildcard) {
577             if (b_tail->deref_type != nir_deref_type_array_wildcard)
578                result &= ~nir_derefs_b_contains_a_bit;
579          } else if (b_tail->deref_type == nir_deref_type_array_wildcard) {
580             if (a_tail->deref_type != nir_deref_type_array_wildcard)
581                result &= ~nir_derefs_a_contains_b_bit;
582          } else {
583             assert(a_tail->deref_type == nir_deref_type_array &&
584                    b_tail->deref_type == nir_deref_type_array);
585             assert(a_tail->arr.index.is_ssa && b_tail->arr.index.is_ssa);
586 
587             if (nir_src_is_const(a_tail->arr.index) &&
588                 nir_src_is_const(b_tail->arr.index)) {
589                /* If they're both direct and have different offsets, they
590                 * don't even alias much less anything else.
591                 */
592                if (nir_src_as_uint(a_tail->arr.index) !=
593                    nir_src_as_uint(b_tail->arr.index))
594                   return nir_derefs_do_not_alias;
595             } else if (a_tail->arr.index.ssa == b_tail->arr.index.ssa) {
596                /* They're the same indirect, continue on */
597             } else {
598                /* They're not the same index so we can't prove anything about
599                 * containment.
600                 */
601                result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
602             }
603          }
604          break;
605       }
606 
607       case nir_deref_type_struct: {
608          /* If they're different struct members, they don't even alias */
609          if (a_tail->strct.index != b_tail->strct.index)
610             return nir_derefs_do_not_alias;
611          break;
612       }
613 
614       default:
615          unreachable("Invalid deref type");
616       }
617    }
618 
619    /* If a is longer than b, then it can't contain b */
620    if (*a_p != NULL)
621       result &= ~nir_derefs_a_contains_b_bit;
622    if (*b_p != NULL)
623       result &= ~nir_derefs_b_contains_a_bit;
624 
625    /* If a contains b and b contains a they must be equal. */
626    if ((result & nir_derefs_a_contains_b_bit) && (result & nir_derefs_b_contains_a_bit))
627       result |= nir_derefs_equal_bit;
628 
629    return result;
630 }
631 
632 nir_deref_compare_result
nir_compare_derefs(nir_deref_instr * a,nir_deref_instr * b)633 nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
634 {
635    if (a == b) {
636       return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
637              nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
638    }
639 
640    nir_deref_path a_path, b_path;
641    nir_deref_path_init(&a_path, a, NULL);
642    nir_deref_path_init(&b_path, b, NULL);
643    assert(a_path.path[0]->deref_type == nir_deref_type_var ||
644           a_path.path[0]->deref_type == nir_deref_type_cast);
645    assert(b_path.path[0]->deref_type == nir_deref_type_var ||
646           b_path.path[0]->deref_type == nir_deref_type_cast);
647 
648    nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
649 
650    nir_deref_path_finish(&a_path);
651    nir_deref_path_finish(&b_path);
652 
653    return result;
654 }
655 
nir_get_deref_path(void * mem_ctx,nir_deref_and_path * deref)656 nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
657 {
658    if (!deref->_path) {
659       deref->_path = ralloc(mem_ctx, nir_deref_path);
660       nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
661    }
662    return deref->_path;
663 }
664 
nir_compare_derefs_and_paths(void * mem_ctx,nir_deref_and_path * a,nir_deref_and_path * b)665 nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,
666                                                       nir_deref_and_path *a,
667                                                       nir_deref_and_path *b)
668 {
669    if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
670       return nir_compare_derefs(a->instr, b->instr);
671 
672    return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
673                                   nir_get_deref_path(mem_ctx, b));
674 }
675 
676 struct rematerialize_deref_state {
677    bool progress;
678    nir_builder builder;
679    nir_block *block;
680    struct hash_table *cache;
681 };
682 
683 static nir_deref_instr *
rematerialize_deref_in_block(nir_deref_instr * deref,struct rematerialize_deref_state * state)684 rematerialize_deref_in_block(nir_deref_instr *deref,
685                              struct rematerialize_deref_state *state)
686 {
687    if (deref->instr.block == state->block)
688       return deref;
689 
690    if (!state->cache) {
691       state->cache = _mesa_pointer_hash_table_create(NULL);
692    }
693 
694    struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
695    if (cached)
696       return cached->data;
697 
698    nir_builder *b = &state->builder;
699    nir_deref_instr *new_deref =
700       nir_deref_instr_create(b->shader, deref->deref_type);
701    new_deref->modes = deref->modes;
702    new_deref->type = deref->type;
703 
704    if (deref->deref_type == nir_deref_type_var) {
705       new_deref->var = deref->var;
706    } else {
707       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
708       if (parent) {
709          parent = rematerialize_deref_in_block(parent, state);
710          new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
711       } else {
712          nir_src_copy(&new_deref->parent, &deref->parent);
713       }
714    }
715 
716    switch (deref->deref_type) {
717    case nir_deref_type_var:
718    case nir_deref_type_array_wildcard:
719       /* Nothing more to do */
720       break;
721 
722    case nir_deref_type_cast:
723       new_deref->cast.ptr_stride = deref->cast.ptr_stride;
724       break;
725 
726    case nir_deref_type_array:
727    case nir_deref_type_ptr_as_array:
728       assert(!nir_src_as_deref(deref->arr.index));
729       nir_src_copy(&new_deref->arr.index, &deref->arr.index);
730       break;
731 
732    case nir_deref_type_struct:
733       new_deref->strct.index = deref->strct.index;
734       break;
735 
736    default:
737       unreachable("Invalid deref instruction type");
738    }
739 
740    nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
741                      deref->dest.ssa.num_components,
742                      deref->dest.ssa.bit_size,
743                      NULL);
744    nir_builder_instr_insert(b, &new_deref->instr);
745 
746    return new_deref;
747 }
748 
749 static bool
rematerialize_deref_src(nir_src * src,void * _state)750 rematerialize_deref_src(nir_src *src, void *_state)
751 {
752    struct rematerialize_deref_state *state = _state;
753 
754    nir_deref_instr *deref = nir_src_as_deref(*src);
755    if (!deref)
756       return true;
757 
758    nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
759    if (block_deref != deref) {
760       nir_instr_rewrite_src(src->parent_instr, src,
761                             nir_src_for_ssa(&block_deref->dest.ssa));
762       nir_deref_instr_remove_if_unused(deref);
763       state->progress = true;
764    }
765 
766    return true;
767 }
768 
769 /** Re-materialize derefs in every block
770  *
771  * This pass re-materializes deref instructions in every block in which it is
772  * used.  After this pass has been run, every use of a deref will be of a
773  * deref in the same block as the use.  Also, all unused derefs will be
774  * deleted as a side-effect.
775  *
776  * Derefs used as sources of phi instructions are not rematerialized.
777  */
778 bool
nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl * impl)779 nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
780 {
781    struct rematerialize_deref_state state = { 0 };
782    nir_builder_init(&state.builder, impl);
783 
784    nir_foreach_block_unstructured(block, impl) {
785       state.block = block;
786 
787       /* Start each block with a fresh cache */
788       if (state.cache)
789          _mesa_hash_table_clear(state.cache, NULL);
790 
791       nir_foreach_instr_safe(instr, block) {
792          if (instr->type == nir_instr_type_deref &&
793              nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
794             continue;
795 
796          /* If a deref is used in a phi, we can't rematerialize it, as the new
797           * derefs would appear before the phi, which is not valid.
798           */
799          if (instr->type == nir_instr_type_phi)
800             continue;
801 
802          state.builder.cursor = nir_before_instr(instr);
803          nir_foreach_src(instr, rematerialize_deref_src, &state);
804       }
805 
806 #ifndef NDEBUG
807       nir_if *following_if = nir_block_get_following_if(block);
808       if (following_if)
809          assert(!nir_src_as_deref(following_if->condition));
810 #endif
811    }
812 
813    _mesa_hash_table_destroy(state.cache, NULL);
814 
815    return state.progress;
816 }
817 
818 static void
nir_deref_instr_fixup_child_types(nir_deref_instr * parent)819 nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
820 {
821    nir_foreach_use(use, &parent->dest.ssa) {
822       if (use->parent_instr->type != nir_instr_type_deref)
823          continue;
824 
825       nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);
826       switch (child->deref_type) {
827       case nir_deref_type_var:
828          unreachable("nir_deref_type_var cannot be a child");
829 
830       case nir_deref_type_array:
831       case nir_deref_type_array_wildcard:
832          child->type = glsl_get_array_element(parent->type);
833          break;
834 
835       case nir_deref_type_ptr_as_array:
836          child->type = parent->type;
837          break;
838 
839       case nir_deref_type_struct:
840          child->type = glsl_get_struct_field(parent->type,
841                                              child->strct.index);
842          break;
843 
844       case nir_deref_type_cast:
845          /* We stop the recursion here */
846          continue;
847       }
848 
849       /* Recurse into children */
850       nir_deref_instr_fixup_child_types(child);
851    }
852 }
853 
854 static bool
is_trivial_array_deref_cast(nir_deref_instr * cast)855 is_trivial_array_deref_cast(nir_deref_instr *cast)
856 {
857    assert(is_trivial_deref_cast(cast));
858 
859    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
860 
861    if (parent->deref_type == nir_deref_type_array) {
862       return cast->cast.ptr_stride ==
863              glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
864    } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
865       return cast->cast.ptr_stride ==
866              nir_deref_instr_array_stride(parent);
867    } else {
868       return false;
869    }
870 }
871 
872 static bool
is_deref_ptr_as_array(nir_instr * instr)873 is_deref_ptr_as_array(nir_instr *instr)
874 {
875    return instr->type == nir_instr_type_deref &&
876           nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
877 }
878 
879 static bool
opt_remove_restricting_cast_alignments(nir_deref_instr * cast)880 opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
881 {
882    assert(cast->deref_type == nir_deref_type_cast);
883    if (cast->cast.align_mul == 0)
884       return false;
885 
886    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
887    if (parent == NULL)
888       return false;
889 
890    /* Don't use any default alignment for this check.  We don't want to fall
891     * back to type alignment too early in case we find out later that we're
892     * somehow a child of a packed struct.
893     */
894    uint32_t parent_mul, parent_offset;
895    if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
896                                      &parent_mul, &parent_offset))
897       return false;
898 
899    /* If this cast increases the alignment, we want to keep it.
900     *
901     * There is a possibility that the larger alignment provided by this cast
902     * somehow disagrees with the smaller alignment further up the deref chain.
903     * In that case, we choose to favor the alignment closer to the actual
904     * memory operation which, in this case, is the cast and not its parent so
905     * keeping the cast alignment is the right thing to do.
906     */
907    if (parent_mul < cast->cast.align_mul)
908       return false;
909 
910    /* If we've gotten here, we have a parent deref with an align_mul at least
911     * as large as ours so we can potentially throw away the alignment
912     * information on this deref.  There are two cases to consider here:
913     *
914     *  1. We can chase the deref all the way back to the variable.  In this
915     *     case, we have "perfect" knowledge, modulo indirect array derefs.
916     *     Unless we've done something wrong in our indirect/wildcard stride
917     *     calculations, our knowledge from the deref walk is better than the
918     *     client's.
919     *
920     *  2. We can't chase it all the way back to the variable.  In this case,
921     *     because our call to nir_get_explicit_deref_align(parent, ...) above
922     *     above passes default_to_type_align=false, the only way we can even
923     *     get here is if something further up the deref chain has a cast with
924     *     an alignment which can only happen if we get an alignment from the
925     *     client (most likely a decoration in the SPIR-V).  If the client has
926     *     provided us with two conflicting alignments in the deref chain,
927     *     that's their fault and we can do whatever we want.
928     *
929     * In either case, we should be without our rights, at this point, to throw
930     * away the alignment information on this deref.  However, to be "nice" to
931     * weird clients, we do one more check.  It really shouldn't happen but
932     * it's possible that the parent's alignment offset disagrees with the
933     * cast's alignment offset.  In this case, we consider the cast as
934     * providing more information (or at least more valid information) and keep
935     * it even if the align_mul from the parent is larger.
936     */
937    assert(cast->cast.align_mul <= parent_mul);
938    if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
939       return false;
940 
941    /* If we got here, the parent has better alignment information than the
942     * child and we can get rid of the child alignment information.
943     */
944    cast->cast.align_mul = 0;
945    cast->cast.align_offset = 0;
946    return true;
947 }
948 
949 /**
950  * Remove casts that just wrap other casts.
951  */
952 static bool
opt_remove_cast_cast(nir_deref_instr * cast)953 opt_remove_cast_cast(nir_deref_instr *cast)
954 {
955    nir_deref_instr *first_cast = cast;
956 
957    while (true) {
958       nir_deref_instr *parent = nir_deref_instr_parent(first_cast);
959       if (parent == NULL || parent->deref_type != nir_deref_type_cast)
960          break;
961       first_cast = parent;
962    }
963    if (cast == first_cast)
964       return false;
965 
966    nir_instr_rewrite_src(&cast->instr, &cast->parent,
967                          nir_src_for_ssa(first_cast->parent.ssa));
968    return true;
969 }
970 
971 /* Restrict variable modes in casts.
972  *
973  * If we know from something higher up the deref chain that the deref has a
974  * specific mode, we can cast to more general and back but we can never cast
975  * across modes.  For non-cast derefs, we should only ever do anything here if
976  * the parent eventually comes from a cast that we restricted earlier.
977  */
978 static bool
opt_restrict_deref_modes(nir_deref_instr * deref)979 opt_restrict_deref_modes(nir_deref_instr *deref)
980 {
981    if (deref->deref_type == nir_deref_type_var) {
982       assert(deref->modes == deref->var->data.mode);
983       return false;
984    }
985 
986    nir_deref_instr *parent = nir_src_as_deref(deref->parent);
987    if (parent == NULL || parent->modes == deref->modes)
988       return false;
989 
990    assert(parent->modes & deref->modes);
991    deref->modes &= parent->modes;
992    return true;
993 }
994 
995 static bool
opt_remove_sampler_cast(nir_deref_instr * cast)996 opt_remove_sampler_cast(nir_deref_instr *cast)
997 {
998    assert(cast->deref_type == nir_deref_type_cast);
999    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1000    if (parent == NULL)
1001       return false;
1002 
1003    /* Strip both types down to their non-array type and bail if there are any
1004     * discrepancies in array lengths.
1005     */
1006    const struct glsl_type *parent_type = parent->type;
1007    const struct glsl_type *cast_type = cast->type;
1008    while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
1009       if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
1010          return false;
1011       parent_type = glsl_get_array_element(parent_type);
1012       cast_type = glsl_get_array_element(cast_type);
1013    }
1014 
1015    if (glsl_type_is_array(parent_type) || glsl_type_is_array(cast_type))
1016       return false;
1017 
1018    if (!glsl_type_is_sampler(parent_type) ||
1019        cast_type != glsl_bare_sampler_type())
1020       return false;
1021 
1022    /* We're a cast from a more detailed sampler type to a bare sampler */
1023    nir_ssa_def_rewrite_uses(&cast->dest.ssa,
1024                             &parent->dest.ssa);
1025    nir_instr_remove(&cast->instr);
1026 
1027    /* Recursively crawl the deref tree and clean up types */
1028    nir_deref_instr_fixup_child_types(parent);
1029 
1030    return true;
1031 }
1032 
1033 /**
1034  * Is this casting a struct to a contained struct.
1035  * struct a { struct b field0 };
1036  * ssa_5 is structa;
1037  * deref_cast (structb *)ssa_5 (function_temp structb);
1038  * converts to
1039  * deref_struct &ssa_5->field0 (function_temp structb);
1040  * This allows subsequent copy propagation to work.
1041  */
1042 static bool
opt_replace_struct_wrapper_cast(nir_builder * b,nir_deref_instr * cast)1043 opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
1044 {
1045    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1046    if (!parent)
1047       return false;
1048 
1049    if (cast->cast.align_mul > 0)
1050       return false;
1051 
1052    if (!glsl_type_is_struct(parent->type))
1053       return false;
1054 
1055    /* Empty struct */
1056    if (glsl_get_length(parent->type) < 1)
1057       return false;
1058 
1059    if (glsl_get_struct_field_offset(parent->type, 0) != 0)
1060       return false;
1061 
1062    if (cast->type != glsl_get_struct_field(parent->type, 0))
1063       return false;
1064 
1065    nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
1066    nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);
1067    nir_deref_instr_remove_if_unused(cast);
1068    return true;
1069 }
1070 
1071 static bool
opt_deref_cast(nir_builder * b,nir_deref_instr * cast)1072 opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
1073 {
1074    bool progress = false;
1075 
1076    progress |= opt_remove_restricting_cast_alignments(cast);
1077 
1078    if (opt_replace_struct_wrapper_cast(b, cast))
1079       return true;
1080 
1081    if (opt_remove_sampler_cast(cast))
1082       return true;
1083 
1084    progress |= opt_remove_cast_cast(cast);
1085    if (!is_trivial_deref_cast(cast))
1086       return progress;
1087 
1088    /* If this deref still contains useful alignment information, we don't want
1089     * to delete it.
1090     */
1091    if (cast->cast.align_mul > 0)
1092       return progress;
1093 
1094    bool trivial_array_cast = is_trivial_array_deref_cast(cast);
1095 
1096    assert(cast->dest.is_ssa);
1097    assert(cast->parent.is_ssa);
1098 
1099    nir_foreach_use_safe(use_src, &cast->dest.ssa) {
1100       /* If this isn't a trivial array cast, we can't propagate into
1101        * ptr_as_array derefs.
1102        */
1103       if (is_deref_ptr_as_array(use_src->parent_instr) &&
1104           !trivial_array_cast)
1105          continue;
1106 
1107       nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
1108       progress = true;
1109    }
1110 
1111    /* If uses would be a bit crazy */
1112    assert(list_is_empty(&cast->dest.ssa.if_uses));
1113 
1114    if (nir_deref_instr_remove_if_unused(cast))
1115       progress = true;
1116 
1117    return progress;
1118 }
1119 
1120 static bool
opt_deref_ptr_as_array(nir_builder * b,nir_deref_instr * deref)1121 opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
1122 {
1123    assert(deref->deref_type == nir_deref_type_ptr_as_array);
1124 
1125    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1126 
1127    if (nir_src_is_const(deref->arr.index) &&
1128        nir_src_as_int(deref->arr.index) == 0) {
1129       /* If it's a ptr_as_array deref with an index of 0, it does nothing
1130        * and we can just replace its uses with its parent, unless it has
1131        * alignment information.
1132        *
1133        * The source of a ptr_as_array deref always has a deref_type of
1134        * nir_deref_type_array or nir_deref_type_cast.  If it's a cast, it
1135        * may be trivial and we may be able to get rid of that too.  Any
1136        * trivial cast of trivial cast cases should be handled already by
1137        * opt_deref_cast() above.
1138        */
1139       if (parent->deref_type == nir_deref_type_cast &&
1140           parent->cast.align_mul == 0 &&
1141           is_trivial_deref_cast(parent))
1142          parent = nir_deref_instr_parent(parent);
1143       nir_ssa_def_rewrite_uses(&deref->dest.ssa,
1144                                &parent->dest.ssa);
1145       nir_instr_remove(&deref->instr);
1146       return true;
1147    }
1148 
1149    if (parent->deref_type != nir_deref_type_array &&
1150        parent->deref_type != nir_deref_type_ptr_as_array)
1151       return false;
1152 
1153    assert(parent->parent.is_ssa);
1154    assert(parent->arr.index.is_ssa);
1155    assert(deref->arr.index.is_ssa);
1156 
1157    nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
1158                                       deref->arr.index.ssa);
1159 
1160    deref->deref_type = parent->deref_type;
1161    nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
1162    nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
1163                          nir_src_for_ssa(new_idx));
1164    return true;
1165 }
1166 
1167 static bool
is_vector_bitcast_deref(nir_deref_instr * cast,nir_component_mask_t mask,bool is_write)1168 is_vector_bitcast_deref(nir_deref_instr *cast,
1169                         nir_component_mask_t mask,
1170                         bool is_write)
1171 {
1172    if (cast->deref_type != nir_deref_type_cast)
1173       return false;
1174 
1175    /* Don't throw away useful alignment information */
1176    if (cast->cast.align_mul > 0)
1177       return false;
1178 
1179    /* It has to be a cast of another deref */
1180    nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1181    if (parent == NULL)
1182       return false;
1183 
1184    /* The parent has to be a vector or scalar */
1185    if (!glsl_type_is_vector_or_scalar(parent->type))
1186       return false;
1187 
1188    /* Don't bother with 1-bit types */
1189    unsigned cast_bit_size = glsl_get_bit_size(cast->type);
1190    unsigned parent_bit_size = glsl_get_bit_size(parent->type);
1191    if (cast_bit_size == 1 || parent_bit_size == 1)
1192       return false;
1193 
1194    /* A strided vector type means it's not tightly packed */
1195    if (glsl_get_explicit_stride(cast->type) ||
1196        glsl_get_explicit_stride(parent->type))
1197       return false;
1198 
1199    assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
1200    assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
1201    unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
1202    unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
1203                            (parent_bit_size / 8);
1204    if (bytes_used > parent_bytes)
1205       return false;
1206 
1207    if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
1208                                                        parent_bit_size))
1209       return false;
1210 
1211    return true;
1212 }
1213 
1214 static nir_ssa_def *
resize_vector(nir_builder * b,nir_ssa_def * data,unsigned num_components)1215 resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)
1216 {
1217    if (num_components == data->num_components)
1218       return data;
1219 
1220    unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
1221    for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
1222       swiz[i] = i;
1223 
1224    return nir_swizzle(b, data, swiz, num_components);
1225 }
1226 
1227 static bool
opt_load_vec_deref(nir_builder * b,nir_intrinsic_instr * load)1228 opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
1229 {
1230    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1231    nir_component_mask_t read_mask =
1232       nir_ssa_def_components_read(&load->dest.ssa);
1233 
1234    /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1235     * vec4-aligned and so it can just read/write them as vec4s.  This
1236     * results in a LOT of vec4->vec3 casts on loads and stores.
1237     */
1238    if (is_vector_bitcast_deref(deref, read_mask, false)) {
1239       const unsigned old_num_comps = load->dest.ssa.num_components;
1240       const unsigned old_bit_size = load->dest.ssa.bit_size;
1241 
1242       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1243       const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1244       const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1245 
1246       /* Stomp it to reference the parent */
1247       nir_instr_rewrite_src(&load->instr, &load->src[0],
1248                             nir_src_for_ssa(&parent->dest.ssa));
1249       assert(load->dest.is_ssa);
1250       load->dest.ssa.bit_size = new_bit_size;
1251       load->dest.ssa.num_components = new_num_comps;
1252       load->num_components = new_num_comps;
1253 
1254       b->cursor = nir_after_instr(&load->instr);
1255       nir_ssa_def *data = &load->dest.ssa;
1256       if (old_bit_size != new_bit_size)
1257          data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);
1258       data = resize_vector(b, data, old_num_comps);
1259 
1260       nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,
1261                                      data->parent_instr);
1262       return true;
1263    }
1264 
1265    return false;
1266 }
1267 
1268 static bool
opt_store_vec_deref(nir_builder * b,nir_intrinsic_instr * store)1269 opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
1270 {
1271    nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
1272    nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
1273 
1274    /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1275     * vec4-aligned and so it can just read/write them as vec4s.  This
1276     * results in a LOT of vec4->vec3 casts on loads and stores.
1277     */
1278    if (is_vector_bitcast_deref(deref, write_mask, true)) {
1279       assert(store->src[1].is_ssa);
1280       nir_ssa_def *data = store->src[1].ssa;
1281 
1282       const unsigned old_bit_size = data->bit_size;
1283 
1284       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1285       const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1286       const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1287 
1288       nir_instr_rewrite_src(&store->instr, &store->src[0],
1289                             nir_src_for_ssa(&parent->dest.ssa));
1290 
1291       /* Restrict things down as needed so the bitcast doesn't fail */
1292       data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);
1293       if (old_bit_size != new_bit_size)
1294          data = nir_bitcast_vector(b, data, new_bit_size);
1295       data = resize_vector(b, data, new_num_comps);
1296       nir_instr_rewrite_src(&store->instr, &store->src[1],
1297                             nir_src_for_ssa(data));
1298       store->num_components = new_num_comps;
1299 
1300       /* Adjust the write mask */
1301       write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
1302                                                   new_bit_size);
1303       nir_intrinsic_set_write_mask(store, write_mask);
1304       return true;
1305    }
1306 
1307    return false;
1308 }
1309 
1310 static bool
opt_known_deref_mode_is(nir_builder * b,nir_intrinsic_instr * intrin)1311 opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
1312 {
1313    nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
1314    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1315    if (deref == NULL)
1316       return false;
1317 
1318    nir_ssa_def *deref_is = NULL;
1319 
1320    if (nir_deref_mode_must_be(deref, modes))
1321       deref_is = nir_imm_true(b);
1322 
1323    if (!nir_deref_mode_may_be(deref, modes))
1324       deref_is = nir_imm_false(b);
1325 
1326    if (deref_is == NULL)
1327       return false;
1328 
1329    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);
1330    nir_instr_remove(&intrin->instr);
1331    return true;
1332 }
1333 
1334 bool
nir_opt_deref_impl(nir_function_impl * impl)1335 nir_opt_deref_impl(nir_function_impl *impl)
1336 {
1337    bool progress = false;
1338 
1339    nir_builder b;
1340    nir_builder_init(&b, impl);
1341 
1342    nir_foreach_block(block, impl) {
1343       nir_foreach_instr_safe(instr, block) {
1344          b.cursor = nir_before_instr(instr);
1345 
1346          switch (instr->type) {
1347          case nir_instr_type_deref: {
1348             nir_deref_instr *deref = nir_instr_as_deref(instr);
1349 
1350             if (opt_restrict_deref_modes(deref))
1351                progress = true;
1352 
1353             switch (deref->deref_type) {
1354             case nir_deref_type_ptr_as_array:
1355                if (opt_deref_ptr_as_array(&b, deref))
1356                   progress = true;
1357                break;
1358 
1359             case nir_deref_type_cast:
1360                if (opt_deref_cast(&b, deref))
1361                   progress = true;
1362                break;
1363 
1364             default:
1365                /* Do nothing */
1366                break;
1367             }
1368             break;
1369          }
1370 
1371          case nir_instr_type_intrinsic: {
1372             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1373             switch (intrin->intrinsic) {
1374             case nir_intrinsic_load_deref:
1375                if (opt_load_vec_deref(&b, intrin))
1376                   progress = true;
1377                break;
1378 
1379             case nir_intrinsic_store_deref:
1380                if (opt_store_vec_deref(&b, intrin))
1381                   progress = true;
1382                break;
1383 
1384             case nir_intrinsic_deref_mode_is:
1385                if (opt_known_deref_mode_is(&b, intrin))
1386                   progress = true;
1387                break;
1388 
1389             default:
1390                /* Do nothing */
1391                break;
1392             }
1393             break;
1394          }
1395 
1396          default:
1397             /* Do nothing */
1398             break;
1399          }
1400       }
1401    }
1402 
1403    if (progress) {
1404       nir_metadata_preserve(impl, nir_metadata_block_index |
1405                                   nir_metadata_dominance);
1406    } else {
1407       nir_metadata_preserve(impl, nir_metadata_all);
1408    }
1409 
1410    return progress;
1411 }
1412 
1413 bool
nir_opt_deref(nir_shader * shader)1414 nir_opt_deref(nir_shader *shader)
1415 {
1416    bool progress = false;
1417 
1418    nir_foreach_function(func, shader) {
1419       if (func->impl && nir_opt_deref_impl(func->impl))
1420          progress = true;
1421    }
1422 
1423    return progress;
1424 }
1425