• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28 
29 #include "util/set.h"
30 #include "util/u_math.h"
31 
32 static bool
is_array_deref_of_vec(nir_deref_instr * deref)33 is_array_deref_of_vec(nir_deref_instr *deref)
34 {
35    if (deref->deref_type != nir_deref_type_array &&
36        deref->deref_type != nir_deref_type_array_wildcard)
37       return false;
38 
39    nir_deref_instr *parent = nir_deref_instr_parent(deref);
40    return glsl_type_is_vector_or_scalar(parent->type);
41 }
42 
43 
44 static struct set *
get_complex_used_vars(nir_shader * shader,void * mem_ctx)45 get_complex_used_vars(nir_shader *shader, void *mem_ctx)
46 {
47    struct set *complex_vars = _mesa_pointer_set_create(mem_ctx);
48 
49    nir_foreach_function_impl(impl, shader) {
50       nir_foreach_block(block, impl) {
51          nir_foreach_instr(instr, block) {
52             if (instr->type != nir_instr_type_deref)
53                continue;
54 
55             nir_deref_instr *deref = nir_instr_as_deref(instr);
56 
57             /* We only need to consider var derefs because
58              * nir_deref_instr_has_complex_use is recursive.
59              */
60             if (deref->deref_type == nir_deref_type_var &&
61                 nir_deref_instr_has_complex_use(deref,
62                                                 nir_deref_instr_has_complex_use_allow_atomics))
63                _mesa_set_add(complex_vars, deref->var);
64          }
65       }
66    }
67 
68    return complex_vars;
69 }
70 
71 struct split_var_state {
72    void *mem_ctx;
73 
74    nir_shader *shader;
75    nir_function_impl *impl;
76 
77    nir_variable *base_var;
78 };
79 
80 struct field {
81    struct field *parent;
82 
83    const struct glsl_type *type;
84 
85    unsigned num_fields;
86    struct field *fields;
87 
88    /* The field currently being recursed */
89    unsigned current_index;
90 
91    nir_variable *var;
92 };
93 
94 static int
num_array_levels_in_array_of_vector_type(const struct glsl_type * type)95 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
96 {
97    int num_levels = 0;
98    while (true) {
99       if (glsl_type_is_array_or_matrix(type)) {
100          num_levels++;
101          type = glsl_get_array_element(type);
102       } else if (glsl_type_is_vector_or_scalar(type) &&
103                  !glsl_type_is_cmat(type)) {
104          /* glsl_type_is_vector_or_scalar would more accruately be called "can
105           * be an r-value that isn't an array, structure, or matrix. This
106           * optimization pass really shouldn't do anything to cooperative
107           * matrices. These matrices will eventually be lowered to something
108           * else (dependent on the backend), and that thing may (or may not)
109           * be handled by this or another pass.
110           */
111          return num_levels;
112       } else {
113          /* Not an array of vectors */
114          return -1;
115       }
116    }
117 }
118 
119 static nir_constant *
gather_constant_initializers(nir_constant * src,nir_variable * var,const struct glsl_type * type,struct field * field,struct split_var_state * state)120 gather_constant_initializers(nir_constant *src,
121                              nir_variable *var,
122                              const struct glsl_type *type,
123                              struct field *field,
124                              struct split_var_state *state)
125 {
126    if (!src)
127       return NULL;
128    if (glsl_type_is_array(type)) {
129       const struct glsl_type *element = glsl_get_array_element(type);
130       assert(src->num_elements == glsl_get_length(type));
131       nir_constant *dst = rzalloc(var, nir_constant);
132       dst->num_elements = src->num_elements;
133       dst->elements = rzalloc_array(var, nir_constant *, src->num_elements);
134       for (unsigned i = 0; i < src->num_elements; ++i) {
135          dst->elements[i] = gather_constant_initializers(src->elements[i], var, element, field, state);
136       }
137       return dst;
138    } else if (glsl_type_is_struct(type)) {
139       const struct glsl_type *element = glsl_get_struct_field(type, field->current_index);
140       return gather_constant_initializers(src->elements[field->current_index], var, element, &field->fields[field->current_index], state);
141    } else {
142       return nir_constant_clone(src, var);
143    }
144 }
145 
146 static void
init_field_for_type(struct field * field,struct field * parent,const struct glsl_type * type,const char * name,struct split_var_state * state)147 init_field_for_type(struct field *field, struct field *parent,
148                     const struct glsl_type *type,
149                     const char *name,
150                     struct split_var_state *state)
151 {
152    *field = (struct field){
153       .parent = parent,
154       .type = type,
155    };
156 
157    const struct glsl_type *struct_type = glsl_without_array(type);
158    if (glsl_type_is_struct_or_ifc(struct_type)) {
159       field->num_fields = glsl_get_length(struct_type),
160       field->fields = ralloc_array(state->mem_ctx, struct field,
161                                    field->num_fields);
162       for (unsigned i = 0; i < field->num_fields; i++) {
163          char *field_name = NULL;
164          if (name) {
165             field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
166                                          glsl_get_struct_elem_name(struct_type, i));
167          } else {
168             field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
169                                          glsl_get_type_name(struct_type),
170                                          glsl_get_struct_elem_name(struct_type, i));
171          }
172          field->current_index = i;
173          init_field_for_type(&field->fields[i], field,
174                              glsl_get_struct_field(struct_type, i),
175                              field_name, state);
176       }
177    } else {
178       const struct glsl_type *var_type = type;
179       struct field *root = field;
180       for (struct field *f = field->parent; f; f = f->parent) {
181          var_type = glsl_type_wrap_in_arrays(var_type, f->type);
182          root = f;
183       }
184 
185       nir_variable_mode mode = state->base_var->data.mode;
186       if (mode == nir_var_function_temp) {
187          field->var = nir_local_variable_create(state->impl, var_type, name);
188       } else {
189          field->var = nir_variable_create(state->shader, mode, var_type, name);
190       }
191       field->var->data.ray_query = state->base_var->data.ray_query;
192       field->var->constant_initializer = gather_constant_initializers(state->base_var->constant_initializer,
193                                                                       field->var, state->base_var->type,
194                                                                       root, state);
195    }
196 }
197 
198 static bool
split_var_list_structs(nir_shader * shader,nir_function_impl * impl,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_field_map,struct set ** complex_vars,void * mem_ctx)199 split_var_list_structs(nir_shader *shader,
200                        nir_function_impl *impl,
201                        struct exec_list *vars,
202                        nir_variable_mode mode,
203                        struct hash_table *var_field_map,
204                        struct set **complex_vars,
205                        void *mem_ctx)
206 {
207    struct split_var_state state = {
208       .mem_ctx = mem_ctx,
209       .shader = shader,
210       .impl = impl,
211    };
212 
213    struct exec_list split_vars;
214    exec_list_make_empty(&split_vars);
215 
216    /* To avoid list confusion (we'll be adding things as we split variables),
217     * pull all of the variables we plan to split off of the list
218     */
219    nir_foreach_variable_in_list_safe(var, vars) {
220       if (var->data.mode != mode)
221          continue;
222 
223       if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
224          continue;
225 
226       if (*complex_vars == NULL)
227          *complex_vars = get_complex_used_vars(shader, mem_ctx);
228 
229       /* We can't split a variable that's referenced with deref that has any
230        * sort of complex usage.
231        */
232       if (_mesa_set_search(*complex_vars, var))
233          continue;
234 
235       exec_node_remove(&var->node);
236       exec_list_push_tail(&split_vars, &var->node);
237    }
238 
239    nir_foreach_variable_in_list(var, &split_vars) {
240       state.base_var = var;
241 
242       struct field *root_field = ralloc(mem_ctx, struct field);
243       init_field_for_type(root_field, NULL, var->type, var->name, &state);
244       _mesa_hash_table_insert(var_field_map, var, root_field);
245    }
246 
247    return !exec_list_is_empty(&split_vars);
248 }
249 
250 static void
split_struct_derefs_impl(nir_function_impl * impl,struct hash_table * var_field_map,nir_variable_mode modes,void * mem_ctx)251 split_struct_derefs_impl(nir_function_impl *impl,
252                          struct hash_table *var_field_map,
253                          nir_variable_mode modes,
254                          void *mem_ctx)
255 {
256    nir_builder b = nir_builder_create(impl);
257 
258    nir_foreach_block(block, impl) {
259       nir_foreach_instr_safe(instr, block) {
260          if (instr->type != nir_instr_type_deref)
261             continue;
262 
263          nir_deref_instr *deref = nir_instr_as_deref(instr);
264          if (!nir_deref_mode_may_be(deref, modes))
265             continue;
266 
267          /* Clean up any dead derefs we find lying around.  They may refer to
268           * variables we're planning to split.
269           */
270          if (nir_deref_instr_remove_if_unused(deref))
271             continue;
272 
273          if (!glsl_type_is_vector_or_scalar(deref->type))
274             continue;
275 
276          nir_variable *base_var = nir_deref_instr_get_variable(deref);
277          /* If we can't chase back to the variable, then we're a complex use.
278           * This should have been detected by get_complex_used_vars() and the
279           * variable should not have been split.  However, we have no way of
280           * knowing that here, so we just have to trust it.
281           */
282          if (base_var == NULL)
283             continue;
284 
285          struct hash_entry *entry =
286             _mesa_hash_table_search(var_field_map, base_var);
287          if (!entry)
288             continue;
289 
290          struct field *root_field = entry->data;
291 
292          nir_deref_path path;
293          nir_deref_path_init(&path, deref, mem_ctx);
294 
295          struct field *tail_field = root_field;
296          for (unsigned i = 0; path.path[i]; i++) {
297             if (path.path[i]->deref_type != nir_deref_type_struct)
298                continue;
299 
300             assert(i > 0);
301             assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
302             assert(path.path[i - 1]->type ==
303                    glsl_without_array(tail_field->type));
304 
305             tail_field = &tail_field->fields[path.path[i]->strct.index];
306          }
307          nir_variable *split_var = tail_field->var;
308 
309          nir_deref_instr *new_deref = NULL;
310          for (unsigned i = 0; path.path[i]; i++) {
311             nir_deref_instr *p = path.path[i];
312             b.cursor = nir_after_instr(&p->instr);
313 
314             switch (p->deref_type) {
315             case nir_deref_type_var:
316                assert(new_deref == NULL);
317                new_deref = nir_build_deref_var(&b, split_var);
318                break;
319 
320             case nir_deref_type_array:
321             case nir_deref_type_array_wildcard:
322                new_deref = nir_build_deref_follower(&b, new_deref, p);
323                break;
324 
325             case nir_deref_type_struct:
326                /* Nothing to do; we're splitting structs */
327                break;
328 
329             default:
330                unreachable("Invalid deref type in path");
331             }
332          }
333 
334          assert(new_deref->type == deref->type);
335          nir_def_rewrite_uses(&deref->def,
336                               &new_deref->def);
337          nir_deref_instr_remove_if_unused(deref);
338       }
339    }
340 }
341 
342 /** A pass for splitting structs into multiple variables
343  *
344  * This pass splits arrays of structs into multiple variables, one for each
345  * (possibly nested) structure member.  After this pass completes, no
346  * variables of the given mode will contain a struct type.
347  */
348 bool
nir_split_struct_vars(nir_shader * shader,nir_variable_mode modes)349 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
350 {
351    void *mem_ctx = ralloc_context(NULL);
352    struct hash_table *var_field_map =
353       _mesa_pointer_hash_table_create(mem_ctx);
354    struct set *complex_vars = NULL;
355 
356    bool has_global_splits = false;
357    nir_variable_mode global_modes = modes & ~nir_var_function_temp;
358    if (global_modes) {
359       has_global_splits = split_var_list_structs(shader, NULL,
360                                                  &shader->variables,
361                                                  global_modes,
362                                                  var_field_map,
363                                                  &complex_vars,
364                                                  mem_ctx);
365    }
366 
367    bool progress = false;
368    nir_foreach_function_impl(impl, shader) {
369       bool has_local_splits = false;
370       if (modes & nir_var_function_temp) {
371          has_local_splits = split_var_list_structs(shader, impl,
372                                                    &impl->locals,
373                                                    nir_var_function_temp,
374                                                    var_field_map,
375                                                    &complex_vars,
376                                                    mem_ctx);
377       }
378 
379       if (has_global_splits || has_local_splits) {
380          split_struct_derefs_impl(impl, var_field_map,
381                                   modes, mem_ctx);
382 
383          nir_metadata_preserve(impl, nir_metadata_block_index |
384                                         nir_metadata_dominance);
385          progress = true;
386       } else {
387          nir_metadata_preserve(impl, nir_metadata_all);
388       }
389    }
390 
391    ralloc_free(mem_ctx);
392 
393    return progress;
394 }
395 
396 struct array_level_info {
397    unsigned array_len;
398    bool split;
399 };
400 
401 struct array_split {
402    /* Only set if this is the tail end of the splitting */
403    nir_variable *var;
404 
405    unsigned num_splits;
406    struct array_split *splits;
407 };
408 
409 struct array_var_info {
410    nir_variable *base_var;
411 
412    const struct glsl_type *split_var_type;
413 
414    bool split_var;
415    struct array_split root_split;
416 
417    unsigned num_levels;
418    struct array_level_info levels[0];
419 };
420 
421 static bool
init_var_list_array_infos(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_info_map,struct set ** complex_vars,void * mem_ctx)422 init_var_list_array_infos(nir_shader *shader,
423                           struct exec_list *vars,
424                           nir_variable_mode mode,
425                           struct hash_table *var_info_map,
426                           struct set **complex_vars,
427                           void *mem_ctx)
428 {
429    bool has_array = false;
430 
431    nir_foreach_variable_in_list(var, vars) {
432       if (var->data.mode != mode)
433          continue;
434 
435       int num_levels = num_array_levels_in_array_of_vector_type(var->type);
436       if (num_levels <= 0)
437          continue;
438 
439       if (*complex_vars == NULL)
440          *complex_vars = get_complex_used_vars(shader, mem_ctx);
441 
442       /* We can't split a variable that's referenced with deref that has any
443        * sort of complex usage.
444        */
445       if (_mesa_set_search(*complex_vars, var))
446          continue;
447 
448       struct array_var_info *info =
449          rzalloc_size(mem_ctx, sizeof(*info) +
450                                   num_levels * sizeof(info->levels[0]));
451 
452       info->base_var = var;
453       info->num_levels = num_levels;
454 
455       const struct glsl_type *type = var->type;
456       for (int i = 0; i < num_levels; i++) {
457          info->levels[i].array_len = glsl_get_length(type);
458          type = glsl_get_array_element(type);
459 
460          /* All levels start out initially as split */
461          info->levels[i].split = true;
462       }
463 
464       _mesa_hash_table_insert(var_info_map, var, info);
465       has_array = true;
466    }
467 
468    return has_array;
469 }
470 
471 static struct array_var_info *
get_array_var_info(nir_variable * var,struct hash_table * var_info_map)472 get_array_var_info(nir_variable *var,
473                    struct hash_table *var_info_map)
474 {
475    struct hash_entry *entry =
476       _mesa_hash_table_search(var_info_map, var);
477    return entry ? entry->data : NULL;
478 }
479 
480 static struct array_var_info *
get_array_deref_info(nir_deref_instr * deref,struct hash_table * var_info_map,nir_variable_mode modes)481 get_array_deref_info(nir_deref_instr *deref,
482                      struct hash_table *var_info_map,
483                      nir_variable_mode modes)
484 {
485    if (!nir_deref_mode_may_be(deref, modes))
486       return NULL;
487 
488    nir_variable *var = nir_deref_instr_get_variable(deref);
489    if (var == NULL)
490       return NULL;
491 
492    return get_array_var_info(var, var_info_map);
493 }
494 
495 static void
mark_array_deref_used(nir_deref_instr * deref,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)496 mark_array_deref_used(nir_deref_instr *deref,
497                       struct hash_table *var_info_map,
498                       nir_variable_mode modes,
499                       void *mem_ctx)
500 {
501    struct array_var_info *info =
502       get_array_deref_info(deref, var_info_map, modes);
503    if (!info)
504       return;
505 
506    nir_deref_path path;
507    nir_deref_path_init(&path, deref, mem_ctx);
508 
509    /* Walk the path and look for indirects.  If we have an array deref with an
510     * indirect, mark the given level as not being split.
511     */
512    for (unsigned i = 0; i < info->num_levels; i++) {
513       nir_deref_instr *p = path.path[i + 1];
514       if (p->deref_type == nir_deref_type_array &&
515           !nir_src_is_const(p->arr.index))
516          info->levels[i].split = false;
517    }
518 }
519 
520 static void
mark_array_usage_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)521 mark_array_usage_impl(nir_function_impl *impl,
522                       struct hash_table *var_info_map,
523                       nir_variable_mode modes,
524                       void *mem_ctx)
525 {
526    nir_foreach_block(block, impl) {
527       nir_foreach_instr(instr, block) {
528          if (instr->type != nir_instr_type_intrinsic)
529             continue;
530 
531          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
532          switch (intrin->intrinsic) {
533          case nir_intrinsic_copy_deref:
534             mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
535                                   var_info_map, modes, mem_ctx);
536             FALLTHROUGH;
537 
538          case nir_intrinsic_load_deref:
539          case nir_intrinsic_store_deref:
540             mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
541                                   var_info_map, modes, mem_ctx);
542             break;
543 
544          default:
545             break;
546          }
547       }
548    }
549 }
550 
551 static void
create_split_array_vars(struct array_var_info * var_info,unsigned level,struct array_split * split,const char * name,nir_shader * shader,nir_function_impl * impl,void * mem_ctx)552 create_split_array_vars(struct array_var_info *var_info,
553                         unsigned level,
554                         struct array_split *split,
555                         const char *name,
556                         nir_shader *shader,
557                         nir_function_impl *impl,
558                         void *mem_ctx)
559 {
560    while (level < var_info->num_levels && !var_info->levels[level].split) {
561       name = ralloc_asprintf(mem_ctx, "%s[*]", name);
562       level++;
563    }
564 
565    if (level == var_info->num_levels) {
566       /* We add parens to the variable name so it looks like "(foo[2][*])" so
567        * that further derefs will look like "(foo[2][*])[ssa_6]"
568        */
569       name = ralloc_asprintf(mem_ctx, "(%s)", name);
570 
571       nir_variable_mode mode = var_info->base_var->data.mode;
572       if (mode == nir_var_function_temp) {
573          split->var = nir_local_variable_create(impl,
574                                                 var_info->split_var_type, name);
575       } else {
576          split->var = nir_variable_create(shader, mode,
577                                           var_info->split_var_type, name);
578       }
579       split->var->data.ray_query = var_info->base_var->data.ray_query;
580    } else {
581       assert(var_info->levels[level].split);
582       split->num_splits = var_info->levels[level].array_len;
583       split->splits = rzalloc_array(mem_ctx, struct array_split,
584                                     split->num_splits);
585       for (unsigned i = 0; i < split->num_splits; i++) {
586          create_split_array_vars(var_info, level + 1, &split->splits[i],
587                                  ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
588                                  shader, impl, mem_ctx);
589       }
590    }
591 }
592 
593 static bool
split_var_list_arrays(nir_shader * shader,nir_function_impl * impl,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_info_map,void * mem_ctx)594 split_var_list_arrays(nir_shader *shader,
595                       nir_function_impl *impl,
596                       struct exec_list *vars,
597                       nir_variable_mode mode,
598                       struct hash_table *var_info_map,
599                       void *mem_ctx)
600 {
601    struct exec_list split_vars;
602    exec_list_make_empty(&split_vars);
603 
604    nir_foreach_variable_in_list_safe(var, vars) {
605       if (var->data.mode != mode)
606          continue;
607 
608       struct array_var_info *info = get_array_var_info(var, var_info_map);
609       if (!info)
610          continue;
611 
612       bool has_split = false;
613       const struct glsl_type *split_type =
614          glsl_without_array_or_matrix(var->type);
615       for (int i = info->num_levels - 1; i >= 0; i--) {
616          if (info->levels[i].split) {
617             has_split = true;
618             continue;
619          }
620 
621          /* If the original type was a matrix type, we'd like to keep that so
622           * we don't convert matrices into arrays.
623           */
624          if (i == info->num_levels - 1 &&
625              glsl_type_is_matrix(glsl_without_array(var->type))) {
626             split_type = glsl_matrix_type(glsl_get_base_type(split_type),
627                                           glsl_get_components(split_type),
628                                           info->levels[i].array_len);
629          } else {
630             split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
631          }
632       }
633 
634       if (has_split) {
635          info->split_var_type = split_type;
636          /* To avoid list confusion (we'll be adding things as we split
637           * variables), pull all of the variables we plan to split off of the
638           * main variable list.
639           */
640          exec_node_remove(&var->node);
641          exec_list_push_tail(&split_vars, &var->node);
642       } else {
643          assert(split_type == glsl_get_bare_type(var->type));
644          /* If we're not modifying this variable, delete the info so we skip
645           * it faster in later passes.
646           */
647          _mesa_hash_table_remove_key(var_info_map, var);
648       }
649    }
650 
651    nir_foreach_variable_in_list(var, &split_vars) {
652       struct array_var_info *info = get_array_var_info(var, var_info_map);
653       create_split_array_vars(info, 0, &info->root_split, var->name,
654                               shader, impl, mem_ctx);
655    }
656 
657    return !exec_list_is_empty(&split_vars);
658 }
659 
660 static bool
deref_has_split_wildcard(nir_deref_path * path,struct array_var_info * info)661 deref_has_split_wildcard(nir_deref_path *path,
662                          struct array_var_info *info)
663 {
664    if (info == NULL)
665       return false;
666 
667    assert(path->path[0]->var == info->base_var);
668    for (unsigned i = 0; i < info->num_levels; i++) {
669       if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
670           info->levels[i].split)
671          return true;
672    }
673 
674    return false;
675 }
676 
677 static bool
array_path_is_out_of_bounds(nir_deref_path * path,struct array_var_info * info)678 array_path_is_out_of_bounds(nir_deref_path *path,
679                             struct array_var_info *info)
680 {
681    if (info == NULL)
682       return false;
683 
684    assert(path->path[0]->var == info->base_var);
685    for (unsigned i = 0; i < info->num_levels; i++) {
686       nir_deref_instr *p = path->path[i + 1];
687       if (p->deref_type == nir_deref_type_array_wildcard)
688          continue;
689 
690       if (nir_src_is_const(p->arr.index) &&
691           nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
692          return true;
693    }
694 
695    return false;
696 }
697 
698 static void
emit_split_copies(nir_builder * b,struct array_var_info * dst_info,nir_deref_path * dst_path,unsigned dst_level,nir_deref_instr * dst,struct array_var_info * src_info,nir_deref_path * src_path,unsigned src_level,nir_deref_instr * src)699 emit_split_copies(nir_builder *b,
700                   struct array_var_info *dst_info, nir_deref_path *dst_path,
701                   unsigned dst_level, nir_deref_instr *dst,
702                   struct array_var_info *src_info, nir_deref_path *src_path,
703                   unsigned src_level, nir_deref_instr *src)
704 {
705    nir_deref_instr *dst_p, *src_p;
706 
707    while ((dst_p = dst_path->path[dst_level + 1])) {
708       if (dst_p->deref_type == nir_deref_type_array_wildcard)
709          break;
710 
711       dst = nir_build_deref_follower(b, dst, dst_p);
712       dst_level++;
713    }
714 
715    while ((src_p = src_path->path[src_level + 1])) {
716       if (src_p->deref_type == nir_deref_type_array_wildcard)
717          break;
718 
719       src = nir_build_deref_follower(b, src, src_p);
720       src_level++;
721    }
722 
723    if (src_p == NULL || dst_p == NULL) {
724       assert(src_p == NULL && dst_p == NULL);
725       nir_copy_deref(b, dst, src);
726    } else {
727       assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
728              src_p->deref_type == nir_deref_type_array_wildcard);
729 
730       if ((dst_info && dst_info->levels[dst_level].split) ||
731           (src_info && src_info->levels[src_level].split)) {
732          /* There are no indirects at this level on one of the source or the
733           * destination so we are lowering it.
734           */
735          assert(glsl_get_length(dst_path->path[dst_level]->type) ==
736                 glsl_get_length(src_path->path[src_level]->type));
737          unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
738          for (unsigned i = 0; i < len; i++) {
739             emit_split_copies(b, dst_info, dst_path, dst_level + 1,
740                               nir_build_deref_array_imm(b, dst, i),
741                               src_info, src_path, src_level + 1,
742                               nir_build_deref_array_imm(b, src, i));
743          }
744       } else {
745          /* Neither side is being split so we just keep going */
746          emit_split_copies(b, dst_info, dst_path, dst_level + 1,
747                            nir_build_deref_array_wildcard(b, dst),
748                            src_info, src_path, src_level + 1,
749                            nir_build_deref_array_wildcard(b, src));
750       }
751    }
752 }
753 
754 static void
split_array_copies_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)755 split_array_copies_impl(nir_function_impl *impl,
756                         struct hash_table *var_info_map,
757                         nir_variable_mode modes,
758                         void *mem_ctx)
759 {
760    nir_builder b = nir_builder_create(impl);
761 
762    nir_foreach_block(block, impl) {
763       nir_foreach_instr_safe(instr, block) {
764          if (instr->type != nir_instr_type_intrinsic)
765             continue;
766 
767          nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
768          if (copy->intrinsic != nir_intrinsic_copy_deref)
769             continue;
770 
771          nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
772          nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
773 
774          struct array_var_info *dst_info =
775             get_array_deref_info(dst_deref, var_info_map, modes);
776          struct array_var_info *src_info =
777             get_array_deref_info(src_deref, var_info_map, modes);
778 
779          if (!src_info && !dst_info)
780             continue;
781 
782          nir_deref_path dst_path, src_path;
783          nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
784          nir_deref_path_init(&src_path, src_deref, mem_ctx);
785 
786          if (!deref_has_split_wildcard(&dst_path, dst_info) &&
787              !deref_has_split_wildcard(&src_path, src_info))
788             continue;
789 
790          b.cursor = nir_instr_remove(&copy->instr);
791 
792          emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
793                            src_info, &src_path, 0, src_path.path[0]);
794       }
795    }
796 }
797 
798 static void
split_array_access_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)799 split_array_access_impl(nir_function_impl *impl,
800                         struct hash_table *var_info_map,
801                         nir_variable_mode modes,
802                         void *mem_ctx)
803 {
804    nir_builder b = nir_builder_create(impl);
805 
806    nir_foreach_block(block, impl) {
807       nir_foreach_instr_safe(instr, block) {
808          if (instr->type == nir_instr_type_deref) {
809             /* Clean up any dead derefs we find lying around.  They may refer
810              * to variables we're planning to split.
811              */
812             nir_deref_instr *deref = nir_instr_as_deref(instr);
813             if (nir_deref_mode_may_be(deref, modes))
814                nir_deref_instr_remove_if_unused(deref);
815             continue;
816          }
817 
818          if (instr->type != nir_instr_type_intrinsic)
819             continue;
820 
821          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
822          if (intrin->intrinsic != nir_intrinsic_load_deref &&
823              intrin->intrinsic != nir_intrinsic_store_deref &&
824              intrin->intrinsic != nir_intrinsic_copy_deref)
825             continue;
826 
827          const unsigned num_derefs =
828             intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
829 
830          for (unsigned d = 0; d < num_derefs; d++) {
831             nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
832 
833             struct array_var_info *info =
834                get_array_deref_info(deref, var_info_map, modes);
835             if (!info)
836                continue;
837 
838             nir_deref_path path;
839             nir_deref_path_init(&path, deref, mem_ctx);
840 
841             b.cursor = nir_before_instr(&intrin->instr);
842 
843             if (array_path_is_out_of_bounds(&path, info)) {
844                /* If one of the derefs is out-of-bounds, we just delete the
845                 * instruction.  If a destination is out of bounds, then it may
846                 * have been in-bounds prior to shrinking so we don't want to
847                 * accidentally stomp something.  However, we've already proven
848                 * that it will never be read so it's safe to delete.  If a
849                 * source is out of bounds then it is loading random garbage.
850                 * For loads, we replace their uses with an undef instruction
851                 * and for copies we just delete the copy since it was writing
852                 * undefined garbage anyway and we may as well leave the random
853                 * garbage in the destination alone.
854                 */
855                if (intrin->intrinsic == nir_intrinsic_load_deref) {
856                   nir_def *u =
857                      nir_undef(&b, intrin->def.num_components,
858                                intrin->def.bit_size);
859                   nir_def_rewrite_uses(&intrin->def,
860                                        u);
861                }
862                nir_instr_remove(&intrin->instr);
863                for (unsigned i = 0; i < num_derefs; i++)
864                   nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
865                break;
866             }
867 
868             struct array_split *split = &info->root_split;
869             for (unsigned i = 0; i < info->num_levels; i++) {
870                if (info->levels[i].split) {
871                   nir_deref_instr *p = path.path[i + 1];
872                   unsigned index = nir_src_as_uint(p->arr.index);
873                   assert(index < info->levels[i].array_len);
874                   split = &split->splits[index];
875                }
876             }
877             assert(!split->splits && split->var);
878 
879             nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
880             for (unsigned i = 0; i < info->num_levels; i++) {
881                if (!info->levels[i].split) {
882                   new_deref = nir_build_deref_follower(&b, new_deref,
883                                                        path.path[i + 1]);
884                }
885             }
886 
887             if (is_array_deref_of_vec(deref))
888                new_deref = nir_build_deref_follower(&b, new_deref, deref);
889 
890             assert(new_deref->type == deref->type);
891 
892             /* Rewrite the deref source to point to the split one */
893             nir_src_rewrite(&intrin->src[d], &new_deref->def);
894             nir_deref_instr_remove_if_unused(deref);
895          }
896       }
897    }
898 }
899 
900 /** A pass for splitting arrays of vectors into multiple variables
901  *
902  * This pass looks at arrays (possibly multiple levels) of vectors (not
903  * structures or other types) and tries to split them into piles of variables,
904  * one for each array element.  The heuristic used is simple: If a given array
905  * level is never used with an indirect, that array level will get split.
906  *
907  * This pass probably could handles structures easily enough but making a pass
908  * that could see through an array of structures of arrays would be difficult
909  * so it's best to just run nir_split_struct_vars first.
910  */
911 bool
nir_split_array_vars(nir_shader * shader,nir_variable_mode modes)912 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
913 {
914    void *mem_ctx = ralloc_context(NULL);
915    struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
916    struct set *complex_vars = NULL;
917 
918    bool has_global_array = false;
919    if (modes & (~nir_var_function_temp)) {
920       has_global_array = init_var_list_array_infos(shader,
921                                                    &shader->variables,
922                                                    modes,
923                                                    var_info_map,
924                                                    &complex_vars,
925                                                    mem_ctx);
926    }
927 
928    bool has_any_array = false;
929    nir_foreach_function_impl(impl, shader) {
930       bool has_local_array = false;
931       if (modes & nir_var_function_temp) {
932          has_local_array = init_var_list_array_infos(shader,
933                                                      &impl->locals,
934                                                      nir_var_function_temp,
935                                                      var_info_map,
936                                                      &complex_vars,
937                                                      mem_ctx);
938       }
939 
940       if (has_global_array || has_local_array) {
941          has_any_array = true;
942          mark_array_usage_impl(impl, var_info_map, modes, mem_ctx);
943       }
944    }
945 
946    /* If we failed to find any arrays of arrays, bail early. */
947    if (!has_any_array) {
948       ralloc_free(mem_ctx);
949       nir_shader_preserve_all_metadata(shader);
950       return false;
951    }
952 
953    bool has_global_splits = false;
954    if (modes & (~nir_var_function_temp)) {
955       has_global_splits = split_var_list_arrays(shader, NULL,
956                                                 &shader->variables,
957                                                 modes,
958                                                 var_info_map, mem_ctx);
959    }
960 
961    bool progress = false;
962    nir_foreach_function_impl(impl, shader) {
963       bool has_local_splits = false;
964       if (modes & nir_var_function_temp) {
965          has_local_splits = split_var_list_arrays(shader, impl,
966                                                   &impl->locals,
967                                                   nir_var_function_temp,
968                                                   var_info_map, mem_ctx);
969       }
970 
971       if (has_global_splits || has_local_splits) {
972          split_array_copies_impl(impl, var_info_map, modes, mem_ctx);
973          split_array_access_impl(impl, var_info_map, modes, mem_ctx);
974 
975          nir_metadata_preserve(impl, nir_metadata_block_index |
976                                         nir_metadata_dominance);
977          progress = true;
978       } else {
979          nir_metadata_preserve(impl, nir_metadata_all);
980       }
981    }
982 
983    ralloc_free(mem_ctx);
984 
985    return progress;
986 }
987 
988 struct array_level_usage {
989    unsigned array_len;
990 
991    /* The value UINT_MAX will be used to indicate an indirect */
992    unsigned max_read;
993    unsigned max_written;
994 
995    /* True if there is a copy that isn't to/from a shrinkable array */
996    bool has_external_copy;
997    struct set *levels_copied;
998 };
999 
1000 struct vec_var_usage {
1001    /* Convenience set of all components this variable has */
1002    nir_component_mask_t all_comps;
1003 
1004    nir_component_mask_t comps_read;
1005    nir_component_mask_t comps_written;
1006 
1007    nir_component_mask_t comps_kept;
1008 
1009    /* True if there is a copy that isn't to/from a shrinkable vector */
1010    bool has_external_copy;
1011    bool has_complex_use;
1012    struct set *vars_copied;
1013 
1014    unsigned num_levels;
1015    struct array_level_usage levels[0];
1016 };
1017 
1018 static struct vec_var_usage *
get_vec_var_usage(nir_variable * var,struct hash_table * var_usage_map,bool add_usage_entry,void * mem_ctx)1019 get_vec_var_usage(nir_variable *var,
1020                   struct hash_table *var_usage_map,
1021                   bool add_usage_entry, void *mem_ctx)
1022 {
1023    struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
1024    if (entry)
1025       return entry->data;
1026 
1027    if (!add_usage_entry)
1028       return NULL;
1029 
1030    /* Check to make sure that we are working with an array of vectors.  We
1031     * don't bother to shrink single vectors because we figure that we can
1032     * clean it up better with SSA than by inserting piles of vecN instructions
1033     * to compact results.
1034     */
1035    int num_levels = num_array_levels_in_array_of_vector_type(var->type);
1036    if (num_levels < 1)
1037       return NULL; /* Not an array of vectors */
1038 
1039    struct vec_var_usage *usage =
1040       rzalloc_size(mem_ctx, sizeof(*usage) +
1041                                num_levels * sizeof(usage->levels[0]));
1042 
1043    usage->num_levels = num_levels;
1044    const struct glsl_type *type = var->type;
1045    for (unsigned i = 0; i < num_levels; i++) {
1046       usage->levels[i].array_len = glsl_get_length(type);
1047       type = glsl_get_array_element(type);
1048    }
1049    assert(glsl_type_is_vector_or_scalar(type));
1050 
1051    usage->all_comps = (1 << glsl_get_components(type)) - 1;
1052 
1053    _mesa_hash_table_insert(var_usage_map, var, usage);
1054 
1055    return usage;
1056 }
1057 
1058 static struct vec_var_usage *
get_vec_deref_usage(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes,bool add_usage_entry,void * mem_ctx)1059 get_vec_deref_usage(nir_deref_instr *deref,
1060                     struct hash_table *var_usage_map,
1061                     nir_variable_mode modes,
1062                     bool add_usage_entry, void *mem_ctx)
1063 {
1064    if (!nir_deref_mode_may_be(deref, modes))
1065       return NULL;
1066 
1067    nir_variable *var = nir_deref_instr_get_variable(deref);
1068    if (var == NULL)
1069       return NULL;
1070 
1071    return get_vec_var_usage(nir_deref_instr_get_variable(deref),
1072                             var_usage_map, add_usage_entry, mem_ctx);
1073 }
1074 
1075 static void
mark_deref_if_complex(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1076 mark_deref_if_complex(nir_deref_instr *deref,
1077                       struct hash_table *var_usage_map,
1078                       nir_variable_mode modes,
1079                       void *mem_ctx)
1080 {
1081    /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1082     * recursive.
1083     */
1084    if (deref->deref_type != nir_deref_type_var)
1085       return;
1086 
1087    if (!(deref->var->data.mode & modes))
1088       return;
1089 
1090    if (!nir_deref_instr_has_complex_use(deref, nir_deref_instr_has_complex_use_allow_atomics))
1091       return;
1092 
1093    struct vec_var_usage *usage =
1094       get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx);
1095    if (!usage)
1096       return;
1097 
1098    usage->has_complex_use = true;
1099 }
1100 
1101 static void
mark_deref_used(nir_deref_instr * deref,nir_component_mask_t comps_read,nir_component_mask_t comps_written,nir_deref_instr * copy_deref,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1102 mark_deref_used(nir_deref_instr *deref,
1103                 nir_component_mask_t comps_read,
1104                 nir_component_mask_t comps_written,
1105                 nir_deref_instr *copy_deref,
1106                 struct hash_table *var_usage_map,
1107                 nir_variable_mode modes,
1108                 void *mem_ctx)
1109 {
1110    if (!nir_deref_mode_may_be(deref, modes))
1111       return;
1112 
1113    nir_variable *var = nir_deref_instr_get_variable(deref);
1114    if (var == NULL)
1115       return;
1116 
1117    struct vec_var_usage *usage =
1118       get_vec_var_usage(var, var_usage_map, true, mem_ctx);
1119    if (!usage)
1120       return;
1121 
1122    if (is_array_deref_of_vec(deref)) {
1123       if (comps_read)
1124          comps_read = usage->all_comps;
1125       if (comps_written)
1126          comps_written = usage->all_comps;
1127    }
1128 
1129    usage->comps_read |= comps_read & usage->all_comps;
1130    usage->comps_written |= comps_written & usage->all_comps;
1131 
1132    struct vec_var_usage *copy_usage = NULL;
1133    if (copy_deref) {
1134       copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
1135                                        true, mem_ctx);
1136       if (copy_usage) {
1137          if (usage->vars_copied == NULL) {
1138             usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
1139          }
1140          _mesa_set_add(usage->vars_copied, copy_usage);
1141       } else {
1142          usage->has_external_copy = true;
1143       }
1144    }
1145 
1146    nir_deref_path path;
1147    nir_deref_path_init(&path, deref, mem_ctx);
1148 
1149    nir_deref_path copy_path;
1150    if (copy_usage)
1151       nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
1152 
1153    unsigned copy_i = 0;
1154    for (unsigned i = 0; i < usage->num_levels; i++) {
1155       struct array_level_usage *level = &usage->levels[i];
1156       nir_deref_instr *deref = path.path[i + 1];
1157       assert(deref->deref_type == nir_deref_type_array ||
1158              deref->deref_type == nir_deref_type_array_wildcard);
1159 
1160       unsigned max_used;
1161       if (deref->deref_type == nir_deref_type_array) {
1162          max_used = nir_src_is_const(deref->arr.index) ? nir_src_as_uint(deref->arr.index) : UINT_MAX;
1163       } else {
1164          /* For wildcards, we read or wrote the whole thing. */
1165          assert(deref->deref_type == nir_deref_type_array_wildcard);
1166          max_used = level->array_len - 1;
1167 
1168          if (copy_usage) {
1169             /* Match each wildcard level with the level on copy_usage */
1170             for (; copy_path.path[copy_i + 1]; copy_i++) {
1171                if (copy_path.path[copy_i + 1]->deref_type ==
1172                    nir_deref_type_array_wildcard)
1173                   break;
1174             }
1175             struct array_level_usage *copy_level =
1176                &copy_usage->levels[copy_i++];
1177 
1178             if (level->levels_copied == NULL) {
1179                level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1180             }
1181             _mesa_set_add(level->levels_copied, copy_level);
1182          } else {
1183             /* We have a wildcard and it comes from a variable we aren't
1184              * tracking; flag it and we'll know to not shorten this array.
1185              */
1186             level->has_external_copy = true;
1187          }
1188       }
1189 
1190       if (comps_written)
1191          level->max_written = MAX2(level->max_written, max_used);
1192       if (comps_read)
1193          level->max_read = MAX2(level->max_read, max_used);
1194    }
1195 }
1196 
1197 static bool
src_is_load_deref(nir_src src,nir_src deref_src)1198 src_is_load_deref(nir_src src, nir_src deref_src)
1199 {
1200    nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1201    if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1202       return false;
1203 
1204    return load->src[0].ssa == deref_src.ssa;
1205 }
1206 
1207 /* Returns all non-self-referential components of a store instruction.  A
1208  * component is self-referential if it comes from the same component of a load
1209  * instruction on the same deref.  If the only data in a particular component
1210  * of a variable came directly from that component then it's undefined.  The
1211  * only way to get defined data into a component of a variable is for it to
1212  * get written there by something outside or from a different component.
1213  *
1214  * This is a fairly common pattern in shaders that come from either GLSL IR or
1215  * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1216  * load-vec-store.
1217  */
1218 static nir_component_mask_t
get_non_self_referential_store_comps(nir_intrinsic_instr * store)1219 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1220 {
1221    nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1222 
1223    nir_instr *src_instr = store->src[1].ssa->parent_instr;
1224    if (src_instr->type != nir_instr_type_alu)
1225       return comps;
1226 
1227    nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1228 
1229    if (src_alu->op == nir_op_mov) {
1230       /* If it's just a swizzle of a load from the same deref, discount any
1231        * channels that don't move in the swizzle.
1232        */
1233       if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1234          for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1235             if (src_alu->src[0].swizzle[i] == i)
1236                comps &= ~(1u << i);
1237          }
1238       }
1239    } else if (nir_op_is_vec(src_alu->op)) {
1240       /* If it's a vec, discount any channels that are just loads from the
1241        * same deref put in the same spot.
1242        */
1243       for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1244          if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1245              src_alu->src[i].swizzle[0] == i)
1246             comps &= ~(1u << i);
1247       }
1248    }
1249 
1250    return comps;
1251 }
1252 
1253 static void
find_used_components_impl(nir_function_impl * impl,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1254 find_used_components_impl(nir_function_impl *impl,
1255                           struct hash_table *var_usage_map,
1256                           nir_variable_mode modes,
1257                           void *mem_ctx)
1258 {
1259    nir_foreach_block(block, impl) {
1260       nir_foreach_instr(instr, block) {
1261          if (instr->type == nir_instr_type_deref) {
1262             mark_deref_if_complex(nir_instr_as_deref(instr),
1263                                   var_usage_map, modes, mem_ctx);
1264          }
1265 
1266          if (instr->type != nir_instr_type_intrinsic)
1267             continue;
1268 
1269          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1270          switch (intrin->intrinsic) {
1271          case nir_intrinsic_load_deref:
1272             mark_deref_used(nir_src_as_deref(intrin->src[0]),
1273                             nir_def_components_read(&intrin->def), 0,
1274                             NULL, var_usage_map, modes, mem_ctx);
1275             break;
1276 
1277          case nir_intrinsic_store_deref:
1278             mark_deref_used(nir_src_as_deref(intrin->src[0]),
1279                             0, get_non_self_referential_store_comps(intrin),
1280                             NULL, var_usage_map, modes, mem_ctx);
1281             break;
1282 
1283          case nir_intrinsic_copy_deref: {
1284             /* Just mark everything used for copies. */
1285             nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1286             nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1287             mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1288             mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1289             break;
1290          }
1291 
1292          default:
1293             break;
1294          }
1295       }
1296    }
1297 }
1298 
1299 static bool
shrink_vec_var_list(struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_usage_map)1300 shrink_vec_var_list(struct exec_list *vars,
1301                     nir_variable_mode mode,
1302                     struct hash_table *var_usage_map)
1303 {
1304    /* Initialize the components kept field of each variable.  This is the
1305     * AND of the components written and components read.  If a component is
1306     * written but never read, it's dead.  If it is read but never written,
1307     * then all values read are undefined garbage and we may as well not read
1308     * them.
1309     *
1310     * The same logic applies to the array length.  We make the array length
1311     * the minimum needed required length between read and write and plan to
1312     * discard any OOB access.  The one exception here is indirect writes
1313     * because we don't know where they will land and we can't shrink an array
1314     * with indirect writes because previously in-bounds writes may become
1315     * out-of-bounds and have undefined behavior.
1316     *
1317     * Also, if we have a copy that to/from something we can't shrink, we need
1318     * to leave components and array_len of any wildcards alone.
1319     */
1320    nir_foreach_variable_in_list(var, vars) {
1321       if (var->data.mode != mode)
1322          continue;
1323 
1324       struct vec_var_usage *usage =
1325          get_vec_var_usage(var, var_usage_map, false, NULL);
1326       if (!usage)
1327          continue;
1328 
1329       assert(usage->comps_kept == 0);
1330       if (usage->has_external_copy || usage->has_complex_use)
1331          usage->comps_kept = usage->all_comps;
1332       else
1333          usage->comps_kept = usage->comps_read & usage->comps_written;
1334 
1335       for (unsigned i = 0; i < usage->num_levels; i++) {
1336          struct array_level_usage *level = &usage->levels[i];
1337          assert(level->array_len > 0);
1338 
1339          if (level->max_written == UINT_MAX || level->has_external_copy ||
1340              usage->has_complex_use)
1341             continue; /* Can't shrink */
1342 
1343          unsigned max_used = MIN2(level->max_read, level->max_written);
1344          level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1345       }
1346    }
1347 
1348    /* In order for variable copies to work, we have to have the same data type
1349     * on the source and the destination.  In order to satisfy this, we run a
1350     * little fixed-point algorithm to transitively ensure that we get enough
1351     * components and array elements for this to hold for all copies.
1352     */
1353    bool fp_progress;
1354    do {
1355       fp_progress = false;
1356       nir_foreach_variable_in_list(var, vars) {
1357          if (var->data.mode != mode)
1358             continue;
1359 
1360          struct vec_var_usage *var_usage =
1361             get_vec_var_usage(var, var_usage_map, false, NULL);
1362          if (!var_usage || !var_usage->vars_copied)
1363             continue;
1364 
1365          set_foreach(var_usage->vars_copied, copy_entry) {
1366             struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1367             if (copy_usage->comps_kept != var_usage->comps_kept) {
1368                nir_component_mask_t comps_kept =
1369                   (var_usage->comps_kept | copy_usage->comps_kept);
1370                var_usage->comps_kept = comps_kept;
1371                copy_usage->comps_kept = comps_kept;
1372                fp_progress = true;
1373             }
1374          }
1375 
1376          for (unsigned i = 0; i < var_usage->num_levels; i++) {
1377             struct array_level_usage *var_level = &var_usage->levels[i];
1378             if (!var_level->levels_copied)
1379                continue;
1380 
1381             set_foreach(var_level->levels_copied, copy_entry) {
1382                struct array_level_usage *copy_level = (void *)copy_entry->key;
1383                if (var_level->array_len != copy_level->array_len) {
1384                   unsigned array_len =
1385                      MAX2(var_level->array_len, copy_level->array_len);
1386                   var_level->array_len = array_len;
1387                   copy_level->array_len = array_len;
1388                   fp_progress = true;
1389                }
1390             }
1391          }
1392       }
1393    } while (fp_progress);
1394 
1395    bool vars_shrunk = false;
1396    nir_foreach_variable_in_list_safe(var, vars) {
1397       if (var->data.mode != mode)
1398          continue;
1399 
1400       struct vec_var_usage *usage =
1401          get_vec_var_usage(var, var_usage_map, false, NULL);
1402       if (!usage)
1403          continue;
1404 
1405       bool shrunk = false;
1406       const struct glsl_type *vec_type = var->type;
1407       for (unsigned i = 0; i < usage->num_levels; i++) {
1408          /* If we've reduced the array to zero elements at some level, just
1409           * set comps_kept to 0 and delete the variable.
1410           */
1411          if (usage->levels[i].array_len == 0) {
1412             usage->comps_kept = 0;
1413             break;
1414          }
1415 
1416          assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1417          if (usage->levels[i].array_len < glsl_get_length(vec_type))
1418             shrunk = true;
1419          vec_type = glsl_get_array_element(vec_type);
1420       }
1421       assert(glsl_type_is_vector_or_scalar(vec_type));
1422 
1423       assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1424       if (usage->comps_kept != usage->all_comps)
1425          shrunk = true;
1426 
1427       if (usage->comps_kept == 0) {
1428          /* This variable is dead, remove it */
1429          vars_shrunk = true;
1430          exec_node_remove(&var->node);
1431          continue;
1432       }
1433 
1434       if (!shrunk) {
1435          /* This variable doesn't need to be shrunk.  Remove it from the
1436           * hash table so later steps will ignore it.
1437           */
1438          _mesa_hash_table_remove_key(var_usage_map, var);
1439          continue;
1440       }
1441 
1442       /* Build the new var type */
1443       unsigned new_num_comps = util_bitcount(usage->comps_kept);
1444       const struct glsl_type *new_type =
1445          glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1446       for (int i = usage->num_levels - 1; i >= 0; i--) {
1447          assert(usage->levels[i].array_len > 0);
1448          /* If the original type was a matrix type, we'd like to keep that so
1449           * we don't convert matrices into arrays.
1450           */
1451          if (i == usage->num_levels - 1 &&
1452              glsl_type_is_matrix(glsl_without_array(var->type)) &&
1453              new_num_comps > 1 && usage->levels[i].array_len > 1) {
1454             new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1455                                         new_num_comps,
1456                                         usage->levels[i].array_len);
1457          } else {
1458             new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1459          }
1460       }
1461       var->type = new_type;
1462 
1463       vars_shrunk = true;
1464    }
1465 
1466    return vars_shrunk;
1467 }
1468 
1469 static bool
vec_deref_is_oob(nir_deref_instr * deref,struct vec_var_usage * usage)1470 vec_deref_is_oob(nir_deref_instr *deref,
1471                  struct vec_var_usage *usage)
1472 {
1473    nir_deref_path path;
1474    nir_deref_path_init(&path, deref, NULL);
1475 
1476    bool oob = false;
1477    for (unsigned i = 0; i < usage->num_levels; i++) {
1478       nir_deref_instr *p = path.path[i + 1];
1479       if (p->deref_type == nir_deref_type_array_wildcard)
1480          continue;
1481 
1482       if (nir_src_is_const(p->arr.index) &&
1483           nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1484          oob = true;
1485          break;
1486       }
1487    }
1488 
1489    nir_deref_path_finish(&path);
1490 
1491    return oob;
1492 }
1493 
1494 static bool
vec_deref_is_dead_or_oob(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes)1495 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1496                          struct hash_table *var_usage_map,
1497                          nir_variable_mode modes)
1498 {
1499    struct vec_var_usage *usage =
1500       get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1501    if (!usage)
1502       return false;
1503 
1504    return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1505 }
1506 
1507 static void
shrink_vec_var_access_impl(nir_function_impl * impl,struct hash_table * var_usage_map,nir_variable_mode modes)1508 shrink_vec_var_access_impl(nir_function_impl *impl,
1509                            struct hash_table *var_usage_map,
1510                            nir_variable_mode modes)
1511 {
1512    nir_builder b = nir_builder_create(impl);
1513 
1514    nir_foreach_block(block, impl) {
1515       nir_foreach_instr_safe(instr, block) {
1516          switch (instr->type) {
1517          case nir_instr_type_deref: {
1518             nir_deref_instr *deref = nir_instr_as_deref(instr);
1519             if (!nir_deref_mode_may_be(deref, modes))
1520                break;
1521 
1522             /* Clean up any dead derefs we find lying around.  They may refer
1523              * to variables we've deleted.
1524              */
1525             if (nir_deref_instr_remove_if_unused(deref))
1526                break;
1527 
1528             /* Update the type in the deref to keep the types consistent as
1529              * you walk down the chain.  We don't need to check if this is one
1530              * of the derefs we're shrinking because this is a no-op if it
1531              * isn't.  The worst that could happen is that we accidentally fix
1532              * an invalid deref.
1533              */
1534             if (deref->deref_type == nir_deref_type_var) {
1535                deref->type = deref->var->type;
1536             } else if (deref->deref_type == nir_deref_type_array ||
1537                        deref->deref_type == nir_deref_type_array_wildcard) {
1538                nir_deref_instr *parent = nir_deref_instr_parent(deref);
1539                assert(glsl_type_is_array(parent->type) ||
1540                       glsl_type_is_matrix(parent->type) ||
1541                       glsl_type_is_vector(parent->type));
1542                deref->type = glsl_get_array_element(parent->type);
1543             }
1544             break;
1545          }
1546 
1547          case nir_instr_type_intrinsic: {
1548             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1549 
1550             /* If we have a copy whose source or destination has been deleted
1551              * because we determined the variable was dead, then we just
1552              * delete the copy instruction.  If the source variable was dead
1553              * then it was writing undefined garbage anyway and if it's the
1554              * destination variable that's dead then the write isn't needed.
1555              */
1556             if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1557                nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1558                nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1559                if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1560                    vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1561                   nir_instr_remove(&intrin->instr);
1562                   nir_deref_instr_remove_if_unused(dst);
1563                   nir_deref_instr_remove_if_unused(src);
1564                }
1565                continue;
1566             }
1567 
1568             if (intrin->intrinsic != nir_intrinsic_load_deref &&
1569                 intrin->intrinsic != nir_intrinsic_store_deref)
1570                continue;
1571 
1572             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1573             if (!nir_deref_mode_may_be(deref, modes))
1574                continue;
1575 
1576             struct vec_var_usage *usage =
1577                get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1578             if (!usage)
1579                continue;
1580 
1581             if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1582                if (intrin->intrinsic == nir_intrinsic_load_deref) {
1583                   nir_def *u =
1584                      nir_undef(&b, intrin->def.num_components,
1585                                intrin->def.bit_size);
1586                   nir_def_rewrite_uses(&intrin->def,
1587                                        u);
1588                }
1589                nir_instr_remove(&intrin->instr);
1590                nir_deref_instr_remove_if_unused(deref);
1591                continue;
1592             }
1593 
1594             /* If we're not dropping any components, there's no need to
1595              * compact vectors.
1596              */
1597             if (usage->comps_kept == usage->all_comps)
1598                continue;
1599 
1600             if (intrin->intrinsic == nir_intrinsic_load_deref) {
1601                b.cursor = nir_after_instr(&intrin->instr);
1602 
1603                nir_def *undef =
1604                   nir_undef(&b, 1, intrin->def.bit_size);
1605                nir_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1606                unsigned c = 0;
1607                for (unsigned i = 0; i < intrin->num_components; i++) {
1608                   if (usage->comps_kept & (1u << i))
1609                      vec_srcs[i] = nir_channel(&b, &intrin->def, c++);
1610                   else
1611                      vec_srcs[i] = undef;
1612                }
1613                nir_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1614 
1615                nir_def_rewrite_uses_after(&intrin->def,
1616                                           vec,
1617                                           vec->parent_instr);
1618 
1619                /* The SSA def is now only used by the swizzle.  It's safe to
1620                 * shrink the number of components.
1621                 */
1622                assert(list_length(&intrin->def.uses) == c);
1623                intrin->num_components = c;
1624                intrin->def.num_components = c;
1625             } else {
1626                nir_component_mask_t write_mask =
1627                   nir_intrinsic_write_mask(intrin);
1628 
1629                unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1630                nir_component_mask_t new_write_mask = 0;
1631                unsigned c = 0;
1632                for (unsigned i = 0; i < intrin->num_components; i++) {
1633                   if (usage->comps_kept & (1u << i)) {
1634                      swizzle[c] = i;
1635                      if (write_mask & (1u << i))
1636                         new_write_mask |= 1u << c;
1637                      c++;
1638                   }
1639                }
1640 
1641                b.cursor = nir_before_instr(&intrin->instr);
1642 
1643                nir_def *swizzled =
1644                   nir_swizzle(&b, intrin->src[1].ssa, swizzle, c);
1645 
1646                /* Rewrite to use the compacted source */
1647                nir_src_rewrite(&intrin->src[1], swizzled);
1648                nir_intrinsic_set_write_mask(intrin, new_write_mask);
1649                intrin->num_components = c;
1650             }
1651             break;
1652          }
1653 
1654          default:
1655             break;
1656          }
1657       }
1658    }
1659 }
1660 
1661 static bool
function_impl_has_vars_with_modes(nir_function_impl * impl,nir_variable_mode modes)1662 function_impl_has_vars_with_modes(nir_function_impl *impl,
1663                                   nir_variable_mode modes)
1664 {
1665    nir_shader *shader = impl->function->shader;
1666 
1667    if (modes & ~nir_var_function_temp) {
1668       nir_foreach_variable_with_modes(var, shader,
1669                                       modes & ~nir_var_function_temp)
1670          return true;
1671    }
1672 
1673    if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1674       return true;
1675 
1676    return false;
1677 }
1678 
1679 /** Attempt to shrink arrays of vectors
1680  *
1681  * This pass looks at variables which contain a vector or an array (possibly
1682  * multiple dimensions) of vectors and attempts to lower to a smaller vector
1683  * or array.  If the pass can prove that a component of a vector (or array of
1684  * vectors) is never really used, then that component will be removed.
1685  * Similarly, the pass attempts to shorten arrays based on what elements it
1686  * can prove are never read or never contain valid data.
1687  */
1688 bool
nir_shrink_vec_array_vars(nir_shader * shader,nir_variable_mode modes)1689 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1690 {
1691    assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1692 
1693    void *mem_ctx = ralloc_context(NULL);
1694 
1695    struct hash_table *var_usage_map =
1696       _mesa_pointer_hash_table_create(mem_ctx);
1697 
1698    bool has_vars_to_shrink = false;
1699    nir_foreach_function_impl(impl, shader) {
1700       /* Don't even bother crawling the IR if we don't have any variables.
1701        * Given that this pass deletes any unused variables, it's likely that
1702        * we will be in this scenario eventually.
1703        */
1704       if (function_impl_has_vars_with_modes(impl, modes)) {
1705          has_vars_to_shrink = true;
1706          find_used_components_impl(impl, var_usage_map,
1707                                    modes, mem_ctx);
1708       }
1709    }
1710    if (!has_vars_to_shrink) {
1711       ralloc_free(mem_ctx);
1712       nir_shader_preserve_all_metadata(shader);
1713       return false;
1714    }
1715 
1716    bool globals_shrunk = false;
1717    if (modes & nir_var_shader_temp) {
1718       globals_shrunk = shrink_vec_var_list(&shader->variables,
1719                                            nir_var_shader_temp,
1720                                            var_usage_map);
1721    }
1722 
1723    bool progress = false;
1724    nir_foreach_function_impl(impl, shader) {
1725       bool locals_shrunk = false;
1726       if (modes & nir_var_function_temp) {
1727          locals_shrunk = shrink_vec_var_list(&impl->locals,
1728                                              nir_var_function_temp,
1729                                              var_usage_map);
1730       }
1731 
1732       if (globals_shrunk || locals_shrunk) {
1733          shrink_vec_var_access_impl(impl, var_usage_map, modes);
1734 
1735          nir_metadata_preserve(impl, nir_metadata_block_index |
1736                                         nir_metadata_dominance);
1737          progress = true;
1738       } else {
1739          nir_metadata_preserve(impl, nir_metadata_all);
1740       }
1741    }
1742 
1743    ralloc_free(mem_ctx);
1744 
1745    return progress;
1746 }
1747