1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/set.h"
30 #include "util/u_math.h"
31
32 static bool
is_array_deref_of_vec(nir_deref_instr * deref)33 is_array_deref_of_vec(nir_deref_instr *deref)
34 {
35 if (deref->deref_type != nir_deref_type_array &&
36 deref->deref_type != nir_deref_type_array_wildcard)
37 return false;
38
39 nir_deref_instr *parent = nir_deref_instr_parent(deref);
40 return glsl_type_is_vector_or_scalar(parent->type);
41 }
42
43
44 static struct set *
get_complex_used_vars(nir_shader * shader,void * mem_ctx)45 get_complex_used_vars(nir_shader *shader, void *mem_ctx)
46 {
47 struct set *complex_vars = _mesa_pointer_set_create(mem_ctx);
48
49 nir_foreach_function_impl(impl, shader) {
50 nir_foreach_block(block, impl) {
51 nir_foreach_instr(instr, block) {
52 if (instr->type != nir_instr_type_deref)
53 continue;
54
55 nir_deref_instr *deref = nir_instr_as_deref(instr);
56
57 /* We only need to consider var derefs because
58 * nir_deref_instr_has_complex_use is recursive.
59 */
60 if (deref->deref_type == nir_deref_type_var &&
61 nir_deref_instr_has_complex_use(deref,
62 nir_deref_instr_has_complex_use_allow_atomics))
63 _mesa_set_add(complex_vars, deref->var);
64 }
65 }
66 }
67
68 return complex_vars;
69 }
70
71 struct split_var_state {
72 void *mem_ctx;
73
74 nir_shader *shader;
75 nir_function_impl *impl;
76
77 nir_variable *base_var;
78 };
79
80 struct field {
81 struct field *parent;
82
83 const struct glsl_type *type;
84
85 unsigned num_fields;
86 struct field *fields;
87
88 /* The field currently being recursed */
89 unsigned current_index;
90
91 nir_variable *var;
92 };
93
94 static int
num_array_levels_in_array_of_vector_type(const struct glsl_type * type)95 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
96 {
97 int num_levels = 0;
98 while (true) {
99 if (glsl_type_is_array_or_matrix(type)) {
100 num_levels++;
101 type = glsl_get_array_element(type);
102 } else if (glsl_type_is_vector_or_scalar(type) &&
103 !glsl_type_is_cmat(type)) {
104 /* glsl_type_is_vector_or_scalar would more accruately be called "can
105 * be an r-value that isn't an array, structure, or matrix. This
106 * optimization pass really shouldn't do anything to cooperative
107 * matrices. These matrices will eventually be lowered to something
108 * else (dependent on the backend), and that thing may (or may not)
109 * be handled by this or another pass.
110 */
111 return num_levels;
112 } else {
113 /* Not an array of vectors */
114 return -1;
115 }
116 }
117 }
118
119 static nir_constant *
gather_constant_initializers(nir_constant * src,nir_variable * var,const struct glsl_type * type,struct field * field,struct split_var_state * state)120 gather_constant_initializers(nir_constant *src,
121 nir_variable *var,
122 const struct glsl_type *type,
123 struct field *field,
124 struct split_var_state *state)
125 {
126 if (!src)
127 return NULL;
128 if (glsl_type_is_array(type)) {
129 const struct glsl_type *element = glsl_get_array_element(type);
130 assert(src->num_elements == glsl_get_length(type));
131 nir_constant *dst = rzalloc(var, nir_constant);
132 dst->num_elements = src->num_elements;
133 dst->elements = rzalloc_array(var, nir_constant *, src->num_elements);
134 for (unsigned i = 0; i < src->num_elements; ++i) {
135 dst->elements[i] = gather_constant_initializers(src->elements[i], var, element, field, state);
136 }
137 return dst;
138 } else if (glsl_type_is_struct(type)) {
139 const struct glsl_type *element = glsl_get_struct_field(type, field->current_index);
140 return gather_constant_initializers(src->elements[field->current_index], var, element, &field->fields[field->current_index], state);
141 } else {
142 return nir_constant_clone(src, var);
143 }
144 }
145
146 static void
init_field_for_type(struct field * field,struct field * parent,const struct glsl_type * type,const char * name,struct split_var_state * state)147 init_field_for_type(struct field *field, struct field *parent,
148 const struct glsl_type *type,
149 const char *name,
150 struct split_var_state *state)
151 {
152 *field = (struct field){
153 .parent = parent,
154 .type = type,
155 };
156
157 const struct glsl_type *struct_type = glsl_without_array(type);
158 if (glsl_type_is_struct_or_ifc(struct_type)) {
159 field->num_fields = glsl_get_length(struct_type),
160 field->fields = ralloc_array(state->mem_ctx, struct field,
161 field->num_fields);
162 for (unsigned i = 0; i < field->num_fields; i++) {
163 char *field_name = NULL;
164 if (name) {
165 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
166 glsl_get_struct_elem_name(struct_type, i));
167 } else {
168 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
169 glsl_get_type_name(struct_type),
170 glsl_get_struct_elem_name(struct_type, i));
171 }
172 field->current_index = i;
173 init_field_for_type(&field->fields[i], field,
174 glsl_get_struct_field(struct_type, i),
175 field_name, state);
176 }
177 } else {
178 const struct glsl_type *var_type = type;
179 struct field *root = field;
180 for (struct field *f = field->parent; f; f = f->parent) {
181 var_type = glsl_type_wrap_in_arrays(var_type, f->type);
182 root = f;
183 }
184
185 nir_variable_mode mode = state->base_var->data.mode;
186 if (mode == nir_var_function_temp) {
187 field->var = nir_local_variable_create(state->impl, var_type, name);
188 } else {
189 field->var = nir_variable_create(state->shader, mode, var_type, name);
190 }
191 field->var->data.ray_query = state->base_var->data.ray_query;
192 field->var->constant_initializer = gather_constant_initializers(state->base_var->constant_initializer,
193 field->var, state->base_var->type,
194 root, state);
195 }
196 }
197
198 static bool
split_var_list_structs(nir_shader * shader,nir_function_impl * impl,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_field_map,struct set ** complex_vars,void * mem_ctx)199 split_var_list_structs(nir_shader *shader,
200 nir_function_impl *impl,
201 struct exec_list *vars,
202 nir_variable_mode mode,
203 struct hash_table *var_field_map,
204 struct set **complex_vars,
205 void *mem_ctx)
206 {
207 struct split_var_state state = {
208 .mem_ctx = mem_ctx,
209 .shader = shader,
210 .impl = impl,
211 };
212
213 struct exec_list split_vars;
214 exec_list_make_empty(&split_vars);
215
216 /* To avoid list confusion (we'll be adding things as we split variables),
217 * pull all of the variables we plan to split off of the list
218 */
219 nir_foreach_variable_in_list_safe(var, vars) {
220 if (var->data.mode != mode)
221 continue;
222
223 if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
224 continue;
225
226 if (*complex_vars == NULL)
227 *complex_vars = get_complex_used_vars(shader, mem_ctx);
228
229 /* We can't split a variable that's referenced with deref that has any
230 * sort of complex usage.
231 */
232 if (_mesa_set_search(*complex_vars, var))
233 continue;
234
235 exec_node_remove(&var->node);
236 exec_list_push_tail(&split_vars, &var->node);
237 }
238
239 nir_foreach_variable_in_list(var, &split_vars) {
240 state.base_var = var;
241
242 struct field *root_field = ralloc(mem_ctx, struct field);
243 init_field_for_type(root_field, NULL, var->type, var->name, &state);
244 _mesa_hash_table_insert(var_field_map, var, root_field);
245 }
246
247 return !exec_list_is_empty(&split_vars);
248 }
249
250 static void
split_struct_derefs_impl(nir_function_impl * impl,struct hash_table * var_field_map,nir_variable_mode modes,void * mem_ctx)251 split_struct_derefs_impl(nir_function_impl *impl,
252 struct hash_table *var_field_map,
253 nir_variable_mode modes,
254 void *mem_ctx)
255 {
256 nir_builder b = nir_builder_create(impl);
257
258 nir_foreach_block(block, impl) {
259 nir_foreach_instr_safe(instr, block) {
260 if (instr->type != nir_instr_type_deref)
261 continue;
262
263 nir_deref_instr *deref = nir_instr_as_deref(instr);
264 if (!nir_deref_mode_may_be(deref, modes))
265 continue;
266
267 /* Clean up any dead derefs we find lying around. They may refer to
268 * variables we're planning to split.
269 */
270 if (nir_deref_instr_remove_if_unused(deref))
271 continue;
272
273 if (!glsl_type_is_vector_or_scalar(deref->type))
274 continue;
275
276 nir_variable *base_var = nir_deref_instr_get_variable(deref);
277 /* If we can't chase back to the variable, then we're a complex use.
278 * This should have been detected by get_complex_used_vars() and the
279 * variable should not have been split. However, we have no way of
280 * knowing that here, so we just have to trust it.
281 */
282 if (base_var == NULL)
283 continue;
284
285 struct hash_entry *entry =
286 _mesa_hash_table_search(var_field_map, base_var);
287 if (!entry)
288 continue;
289
290 struct field *root_field = entry->data;
291
292 nir_deref_path path;
293 nir_deref_path_init(&path, deref, mem_ctx);
294
295 struct field *tail_field = root_field;
296 for (unsigned i = 0; path.path[i]; i++) {
297 if (path.path[i]->deref_type != nir_deref_type_struct)
298 continue;
299
300 assert(i > 0);
301 assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
302 assert(path.path[i - 1]->type ==
303 glsl_without_array(tail_field->type));
304
305 tail_field = &tail_field->fields[path.path[i]->strct.index];
306 }
307 nir_variable *split_var = tail_field->var;
308
309 nir_deref_instr *new_deref = NULL;
310 for (unsigned i = 0; path.path[i]; i++) {
311 nir_deref_instr *p = path.path[i];
312 b.cursor = nir_after_instr(&p->instr);
313
314 switch (p->deref_type) {
315 case nir_deref_type_var:
316 assert(new_deref == NULL);
317 new_deref = nir_build_deref_var(&b, split_var);
318 break;
319
320 case nir_deref_type_array:
321 case nir_deref_type_array_wildcard:
322 new_deref = nir_build_deref_follower(&b, new_deref, p);
323 break;
324
325 case nir_deref_type_struct:
326 /* Nothing to do; we're splitting structs */
327 break;
328
329 default:
330 unreachable("Invalid deref type in path");
331 }
332 }
333
334 assert(new_deref->type == deref->type);
335 nir_def_rewrite_uses(&deref->def,
336 &new_deref->def);
337 nir_deref_instr_remove_if_unused(deref);
338 }
339 }
340 }
341
342 /** A pass for splitting structs into multiple variables
343 *
344 * This pass splits arrays of structs into multiple variables, one for each
345 * (possibly nested) structure member. After this pass completes, no
346 * variables of the given mode will contain a struct type.
347 */
348 bool
nir_split_struct_vars(nir_shader * shader,nir_variable_mode modes)349 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
350 {
351 void *mem_ctx = ralloc_context(NULL);
352 struct hash_table *var_field_map =
353 _mesa_pointer_hash_table_create(mem_ctx);
354 struct set *complex_vars = NULL;
355
356 bool has_global_splits = false;
357 nir_variable_mode global_modes = modes & ~nir_var_function_temp;
358 if (global_modes) {
359 has_global_splits = split_var_list_structs(shader, NULL,
360 &shader->variables,
361 global_modes,
362 var_field_map,
363 &complex_vars,
364 mem_ctx);
365 }
366
367 bool progress = false;
368 nir_foreach_function_impl(impl, shader) {
369 bool has_local_splits = false;
370 if (modes & nir_var_function_temp) {
371 has_local_splits = split_var_list_structs(shader, impl,
372 &impl->locals,
373 nir_var_function_temp,
374 var_field_map,
375 &complex_vars,
376 mem_ctx);
377 }
378
379 if (has_global_splits || has_local_splits) {
380 split_struct_derefs_impl(impl, var_field_map,
381 modes, mem_ctx);
382
383 nir_metadata_preserve(impl, nir_metadata_block_index |
384 nir_metadata_dominance);
385 progress = true;
386 } else {
387 nir_metadata_preserve(impl, nir_metadata_all);
388 }
389 }
390
391 ralloc_free(mem_ctx);
392
393 return progress;
394 }
395
396 struct array_level_info {
397 unsigned array_len;
398 bool split;
399 };
400
401 struct array_split {
402 /* Only set if this is the tail end of the splitting */
403 nir_variable *var;
404
405 unsigned num_splits;
406 struct array_split *splits;
407 };
408
409 struct array_var_info {
410 nir_variable *base_var;
411
412 const struct glsl_type *split_var_type;
413
414 bool split_var;
415 struct array_split root_split;
416
417 unsigned num_levels;
418 struct array_level_info levels[0];
419 };
420
421 static bool
init_var_list_array_infos(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_info_map,struct set ** complex_vars,void * mem_ctx)422 init_var_list_array_infos(nir_shader *shader,
423 struct exec_list *vars,
424 nir_variable_mode mode,
425 struct hash_table *var_info_map,
426 struct set **complex_vars,
427 void *mem_ctx)
428 {
429 bool has_array = false;
430
431 nir_foreach_variable_in_list(var, vars) {
432 if (var->data.mode != mode)
433 continue;
434
435 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
436 if (num_levels <= 0)
437 continue;
438
439 if (*complex_vars == NULL)
440 *complex_vars = get_complex_used_vars(shader, mem_ctx);
441
442 /* We can't split a variable that's referenced with deref that has any
443 * sort of complex usage.
444 */
445 if (_mesa_set_search(*complex_vars, var))
446 continue;
447
448 struct array_var_info *info =
449 rzalloc_size(mem_ctx, sizeof(*info) +
450 num_levels * sizeof(info->levels[0]));
451
452 info->base_var = var;
453 info->num_levels = num_levels;
454
455 const struct glsl_type *type = var->type;
456 for (int i = 0; i < num_levels; i++) {
457 info->levels[i].array_len = glsl_get_length(type);
458 type = glsl_get_array_element(type);
459
460 /* All levels start out initially as split */
461 info->levels[i].split = true;
462 }
463
464 _mesa_hash_table_insert(var_info_map, var, info);
465 has_array = true;
466 }
467
468 return has_array;
469 }
470
471 static struct array_var_info *
get_array_var_info(nir_variable * var,struct hash_table * var_info_map)472 get_array_var_info(nir_variable *var,
473 struct hash_table *var_info_map)
474 {
475 struct hash_entry *entry =
476 _mesa_hash_table_search(var_info_map, var);
477 return entry ? entry->data : NULL;
478 }
479
480 static struct array_var_info *
get_array_deref_info(nir_deref_instr * deref,struct hash_table * var_info_map,nir_variable_mode modes)481 get_array_deref_info(nir_deref_instr *deref,
482 struct hash_table *var_info_map,
483 nir_variable_mode modes)
484 {
485 if (!nir_deref_mode_may_be(deref, modes))
486 return NULL;
487
488 nir_variable *var = nir_deref_instr_get_variable(deref);
489 if (var == NULL)
490 return NULL;
491
492 return get_array_var_info(var, var_info_map);
493 }
494
495 static void
mark_array_deref_used(nir_deref_instr * deref,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)496 mark_array_deref_used(nir_deref_instr *deref,
497 struct hash_table *var_info_map,
498 nir_variable_mode modes,
499 void *mem_ctx)
500 {
501 struct array_var_info *info =
502 get_array_deref_info(deref, var_info_map, modes);
503 if (!info)
504 return;
505
506 nir_deref_path path;
507 nir_deref_path_init(&path, deref, mem_ctx);
508
509 /* Walk the path and look for indirects. If we have an array deref with an
510 * indirect, mark the given level as not being split.
511 */
512 for (unsigned i = 0; i < info->num_levels; i++) {
513 nir_deref_instr *p = path.path[i + 1];
514 if (p->deref_type == nir_deref_type_array &&
515 !nir_src_is_const(p->arr.index))
516 info->levels[i].split = false;
517 }
518 }
519
520 static void
mark_array_usage_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)521 mark_array_usage_impl(nir_function_impl *impl,
522 struct hash_table *var_info_map,
523 nir_variable_mode modes,
524 void *mem_ctx)
525 {
526 nir_foreach_block(block, impl) {
527 nir_foreach_instr(instr, block) {
528 if (instr->type != nir_instr_type_intrinsic)
529 continue;
530
531 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
532 switch (intrin->intrinsic) {
533 case nir_intrinsic_copy_deref:
534 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
535 var_info_map, modes, mem_ctx);
536 FALLTHROUGH;
537
538 case nir_intrinsic_load_deref:
539 case nir_intrinsic_store_deref:
540 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
541 var_info_map, modes, mem_ctx);
542 break;
543
544 default:
545 break;
546 }
547 }
548 }
549 }
550
551 static void
create_split_array_vars(struct array_var_info * var_info,unsigned level,struct array_split * split,const char * name,nir_shader * shader,nir_function_impl * impl,void * mem_ctx)552 create_split_array_vars(struct array_var_info *var_info,
553 unsigned level,
554 struct array_split *split,
555 const char *name,
556 nir_shader *shader,
557 nir_function_impl *impl,
558 void *mem_ctx)
559 {
560 while (level < var_info->num_levels && !var_info->levels[level].split) {
561 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
562 level++;
563 }
564
565 if (level == var_info->num_levels) {
566 /* We add parens to the variable name so it looks like "(foo[2][*])" so
567 * that further derefs will look like "(foo[2][*])[ssa_6]"
568 */
569 name = ralloc_asprintf(mem_ctx, "(%s)", name);
570
571 nir_variable_mode mode = var_info->base_var->data.mode;
572 if (mode == nir_var_function_temp) {
573 split->var = nir_local_variable_create(impl,
574 var_info->split_var_type, name);
575 } else {
576 split->var = nir_variable_create(shader, mode,
577 var_info->split_var_type, name);
578 }
579 split->var->data.ray_query = var_info->base_var->data.ray_query;
580 } else {
581 assert(var_info->levels[level].split);
582 split->num_splits = var_info->levels[level].array_len;
583 split->splits = rzalloc_array(mem_ctx, struct array_split,
584 split->num_splits);
585 for (unsigned i = 0; i < split->num_splits; i++) {
586 create_split_array_vars(var_info, level + 1, &split->splits[i],
587 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
588 shader, impl, mem_ctx);
589 }
590 }
591 }
592
593 static bool
split_var_list_arrays(nir_shader * shader,nir_function_impl * impl,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_info_map,void * mem_ctx)594 split_var_list_arrays(nir_shader *shader,
595 nir_function_impl *impl,
596 struct exec_list *vars,
597 nir_variable_mode mode,
598 struct hash_table *var_info_map,
599 void *mem_ctx)
600 {
601 struct exec_list split_vars;
602 exec_list_make_empty(&split_vars);
603
604 nir_foreach_variable_in_list_safe(var, vars) {
605 if (var->data.mode != mode)
606 continue;
607
608 struct array_var_info *info = get_array_var_info(var, var_info_map);
609 if (!info)
610 continue;
611
612 bool has_split = false;
613 const struct glsl_type *split_type =
614 glsl_without_array_or_matrix(var->type);
615 for (int i = info->num_levels - 1; i >= 0; i--) {
616 if (info->levels[i].split) {
617 has_split = true;
618 continue;
619 }
620
621 /* If the original type was a matrix type, we'd like to keep that so
622 * we don't convert matrices into arrays.
623 */
624 if (i == info->num_levels - 1 &&
625 glsl_type_is_matrix(glsl_without_array(var->type))) {
626 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
627 glsl_get_components(split_type),
628 info->levels[i].array_len);
629 } else {
630 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
631 }
632 }
633
634 if (has_split) {
635 info->split_var_type = split_type;
636 /* To avoid list confusion (we'll be adding things as we split
637 * variables), pull all of the variables we plan to split off of the
638 * main variable list.
639 */
640 exec_node_remove(&var->node);
641 exec_list_push_tail(&split_vars, &var->node);
642 } else {
643 assert(split_type == glsl_get_bare_type(var->type));
644 /* If we're not modifying this variable, delete the info so we skip
645 * it faster in later passes.
646 */
647 _mesa_hash_table_remove_key(var_info_map, var);
648 }
649 }
650
651 nir_foreach_variable_in_list(var, &split_vars) {
652 struct array_var_info *info = get_array_var_info(var, var_info_map);
653 create_split_array_vars(info, 0, &info->root_split, var->name,
654 shader, impl, mem_ctx);
655 }
656
657 return !exec_list_is_empty(&split_vars);
658 }
659
660 static bool
deref_has_split_wildcard(nir_deref_path * path,struct array_var_info * info)661 deref_has_split_wildcard(nir_deref_path *path,
662 struct array_var_info *info)
663 {
664 if (info == NULL)
665 return false;
666
667 assert(path->path[0]->var == info->base_var);
668 for (unsigned i = 0; i < info->num_levels; i++) {
669 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
670 info->levels[i].split)
671 return true;
672 }
673
674 return false;
675 }
676
677 static bool
array_path_is_out_of_bounds(nir_deref_path * path,struct array_var_info * info)678 array_path_is_out_of_bounds(nir_deref_path *path,
679 struct array_var_info *info)
680 {
681 if (info == NULL)
682 return false;
683
684 assert(path->path[0]->var == info->base_var);
685 for (unsigned i = 0; i < info->num_levels; i++) {
686 nir_deref_instr *p = path->path[i + 1];
687 if (p->deref_type == nir_deref_type_array_wildcard)
688 continue;
689
690 if (nir_src_is_const(p->arr.index) &&
691 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
692 return true;
693 }
694
695 return false;
696 }
697
698 static void
emit_split_copies(nir_builder * b,struct array_var_info * dst_info,nir_deref_path * dst_path,unsigned dst_level,nir_deref_instr * dst,struct array_var_info * src_info,nir_deref_path * src_path,unsigned src_level,nir_deref_instr * src)699 emit_split_copies(nir_builder *b,
700 struct array_var_info *dst_info, nir_deref_path *dst_path,
701 unsigned dst_level, nir_deref_instr *dst,
702 struct array_var_info *src_info, nir_deref_path *src_path,
703 unsigned src_level, nir_deref_instr *src)
704 {
705 nir_deref_instr *dst_p, *src_p;
706
707 while ((dst_p = dst_path->path[dst_level + 1])) {
708 if (dst_p->deref_type == nir_deref_type_array_wildcard)
709 break;
710
711 dst = nir_build_deref_follower(b, dst, dst_p);
712 dst_level++;
713 }
714
715 while ((src_p = src_path->path[src_level + 1])) {
716 if (src_p->deref_type == nir_deref_type_array_wildcard)
717 break;
718
719 src = nir_build_deref_follower(b, src, src_p);
720 src_level++;
721 }
722
723 if (src_p == NULL || dst_p == NULL) {
724 assert(src_p == NULL && dst_p == NULL);
725 nir_copy_deref(b, dst, src);
726 } else {
727 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
728 src_p->deref_type == nir_deref_type_array_wildcard);
729
730 if ((dst_info && dst_info->levels[dst_level].split) ||
731 (src_info && src_info->levels[src_level].split)) {
732 /* There are no indirects at this level on one of the source or the
733 * destination so we are lowering it.
734 */
735 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
736 glsl_get_length(src_path->path[src_level]->type));
737 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
738 for (unsigned i = 0; i < len; i++) {
739 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
740 nir_build_deref_array_imm(b, dst, i),
741 src_info, src_path, src_level + 1,
742 nir_build_deref_array_imm(b, src, i));
743 }
744 } else {
745 /* Neither side is being split so we just keep going */
746 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
747 nir_build_deref_array_wildcard(b, dst),
748 src_info, src_path, src_level + 1,
749 nir_build_deref_array_wildcard(b, src));
750 }
751 }
752 }
753
754 static void
split_array_copies_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)755 split_array_copies_impl(nir_function_impl *impl,
756 struct hash_table *var_info_map,
757 nir_variable_mode modes,
758 void *mem_ctx)
759 {
760 nir_builder b = nir_builder_create(impl);
761
762 nir_foreach_block(block, impl) {
763 nir_foreach_instr_safe(instr, block) {
764 if (instr->type != nir_instr_type_intrinsic)
765 continue;
766
767 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
768 if (copy->intrinsic != nir_intrinsic_copy_deref)
769 continue;
770
771 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
772 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
773
774 struct array_var_info *dst_info =
775 get_array_deref_info(dst_deref, var_info_map, modes);
776 struct array_var_info *src_info =
777 get_array_deref_info(src_deref, var_info_map, modes);
778
779 if (!src_info && !dst_info)
780 continue;
781
782 nir_deref_path dst_path, src_path;
783 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
784 nir_deref_path_init(&src_path, src_deref, mem_ctx);
785
786 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
787 !deref_has_split_wildcard(&src_path, src_info))
788 continue;
789
790 b.cursor = nir_instr_remove(©->instr);
791
792 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
793 src_info, &src_path, 0, src_path.path[0]);
794 }
795 }
796 }
797
798 static void
split_array_access_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)799 split_array_access_impl(nir_function_impl *impl,
800 struct hash_table *var_info_map,
801 nir_variable_mode modes,
802 void *mem_ctx)
803 {
804 nir_builder b = nir_builder_create(impl);
805
806 nir_foreach_block(block, impl) {
807 nir_foreach_instr_safe(instr, block) {
808 if (instr->type == nir_instr_type_deref) {
809 /* Clean up any dead derefs we find lying around. They may refer
810 * to variables we're planning to split.
811 */
812 nir_deref_instr *deref = nir_instr_as_deref(instr);
813 if (nir_deref_mode_may_be(deref, modes))
814 nir_deref_instr_remove_if_unused(deref);
815 continue;
816 }
817
818 if (instr->type != nir_instr_type_intrinsic)
819 continue;
820
821 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
822 if (intrin->intrinsic != nir_intrinsic_load_deref &&
823 intrin->intrinsic != nir_intrinsic_store_deref &&
824 intrin->intrinsic != nir_intrinsic_copy_deref)
825 continue;
826
827 const unsigned num_derefs =
828 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
829
830 for (unsigned d = 0; d < num_derefs; d++) {
831 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
832
833 struct array_var_info *info =
834 get_array_deref_info(deref, var_info_map, modes);
835 if (!info)
836 continue;
837
838 nir_deref_path path;
839 nir_deref_path_init(&path, deref, mem_ctx);
840
841 b.cursor = nir_before_instr(&intrin->instr);
842
843 if (array_path_is_out_of_bounds(&path, info)) {
844 /* If one of the derefs is out-of-bounds, we just delete the
845 * instruction. If a destination is out of bounds, then it may
846 * have been in-bounds prior to shrinking so we don't want to
847 * accidentally stomp something. However, we've already proven
848 * that it will never be read so it's safe to delete. If a
849 * source is out of bounds then it is loading random garbage.
850 * For loads, we replace their uses with an undef instruction
851 * and for copies we just delete the copy since it was writing
852 * undefined garbage anyway and we may as well leave the random
853 * garbage in the destination alone.
854 */
855 if (intrin->intrinsic == nir_intrinsic_load_deref) {
856 nir_def *u =
857 nir_undef(&b, intrin->def.num_components,
858 intrin->def.bit_size);
859 nir_def_rewrite_uses(&intrin->def,
860 u);
861 }
862 nir_instr_remove(&intrin->instr);
863 for (unsigned i = 0; i < num_derefs; i++)
864 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
865 break;
866 }
867
868 struct array_split *split = &info->root_split;
869 for (unsigned i = 0; i < info->num_levels; i++) {
870 if (info->levels[i].split) {
871 nir_deref_instr *p = path.path[i + 1];
872 unsigned index = nir_src_as_uint(p->arr.index);
873 assert(index < info->levels[i].array_len);
874 split = &split->splits[index];
875 }
876 }
877 assert(!split->splits && split->var);
878
879 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
880 for (unsigned i = 0; i < info->num_levels; i++) {
881 if (!info->levels[i].split) {
882 new_deref = nir_build_deref_follower(&b, new_deref,
883 path.path[i + 1]);
884 }
885 }
886
887 if (is_array_deref_of_vec(deref))
888 new_deref = nir_build_deref_follower(&b, new_deref, deref);
889
890 assert(new_deref->type == deref->type);
891
892 /* Rewrite the deref source to point to the split one */
893 nir_src_rewrite(&intrin->src[d], &new_deref->def);
894 nir_deref_instr_remove_if_unused(deref);
895 }
896 }
897 }
898 }
899
900 /** A pass for splitting arrays of vectors into multiple variables
901 *
902 * This pass looks at arrays (possibly multiple levels) of vectors (not
903 * structures or other types) and tries to split them into piles of variables,
904 * one for each array element. The heuristic used is simple: If a given array
905 * level is never used with an indirect, that array level will get split.
906 *
907 * This pass probably could handles structures easily enough but making a pass
908 * that could see through an array of structures of arrays would be difficult
909 * so it's best to just run nir_split_struct_vars first.
910 */
911 bool
nir_split_array_vars(nir_shader * shader,nir_variable_mode modes)912 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
913 {
914 void *mem_ctx = ralloc_context(NULL);
915 struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
916 struct set *complex_vars = NULL;
917
918 bool has_global_array = false;
919 if (modes & (~nir_var_function_temp)) {
920 has_global_array = init_var_list_array_infos(shader,
921 &shader->variables,
922 modes,
923 var_info_map,
924 &complex_vars,
925 mem_ctx);
926 }
927
928 bool has_any_array = false;
929 nir_foreach_function_impl(impl, shader) {
930 bool has_local_array = false;
931 if (modes & nir_var_function_temp) {
932 has_local_array = init_var_list_array_infos(shader,
933 &impl->locals,
934 nir_var_function_temp,
935 var_info_map,
936 &complex_vars,
937 mem_ctx);
938 }
939
940 if (has_global_array || has_local_array) {
941 has_any_array = true;
942 mark_array_usage_impl(impl, var_info_map, modes, mem_ctx);
943 }
944 }
945
946 /* If we failed to find any arrays of arrays, bail early. */
947 if (!has_any_array) {
948 ralloc_free(mem_ctx);
949 nir_shader_preserve_all_metadata(shader);
950 return false;
951 }
952
953 bool has_global_splits = false;
954 if (modes & (~nir_var_function_temp)) {
955 has_global_splits = split_var_list_arrays(shader, NULL,
956 &shader->variables,
957 modes,
958 var_info_map, mem_ctx);
959 }
960
961 bool progress = false;
962 nir_foreach_function_impl(impl, shader) {
963 bool has_local_splits = false;
964 if (modes & nir_var_function_temp) {
965 has_local_splits = split_var_list_arrays(shader, impl,
966 &impl->locals,
967 nir_var_function_temp,
968 var_info_map, mem_ctx);
969 }
970
971 if (has_global_splits || has_local_splits) {
972 split_array_copies_impl(impl, var_info_map, modes, mem_ctx);
973 split_array_access_impl(impl, var_info_map, modes, mem_ctx);
974
975 nir_metadata_preserve(impl, nir_metadata_block_index |
976 nir_metadata_dominance);
977 progress = true;
978 } else {
979 nir_metadata_preserve(impl, nir_metadata_all);
980 }
981 }
982
983 ralloc_free(mem_ctx);
984
985 return progress;
986 }
987
988 struct array_level_usage {
989 unsigned array_len;
990
991 /* The value UINT_MAX will be used to indicate an indirect */
992 unsigned max_read;
993 unsigned max_written;
994
995 /* True if there is a copy that isn't to/from a shrinkable array */
996 bool has_external_copy;
997 struct set *levels_copied;
998 };
999
1000 struct vec_var_usage {
1001 /* Convenience set of all components this variable has */
1002 nir_component_mask_t all_comps;
1003
1004 nir_component_mask_t comps_read;
1005 nir_component_mask_t comps_written;
1006
1007 nir_component_mask_t comps_kept;
1008
1009 /* True if there is a copy that isn't to/from a shrinkable vector */
1010 bool has_external_copy;
1011 bool has_complex_use;
1012 struct set *vars_copied;
1013
1014 unsigned num_levels;
1015 struct array_level_usage levels[0];
1016 };
1017
1018 static struct vec_var_usage *
get_vec_var_usage(nir_variable * var,struct hash_table * var_usage_map,bool add_usage_entry,void * mem_ctx)1019 get_vec_var_usage(nir_variable *var,
1020 struct hash_table *var_usage_map,
1021 bool add_usage_entry, void *mem_ctx)
1022 {
1023 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
1024 if (entry)
1025 return entry->data;
1026
1027 if (!add_usage_entry)
1028 return NULL;
1029
1030 /* Check to make sure that we are working with an array of vectors. We
1031 * don't bother to shrink single vectors because we figure that we can
1032 * clean it up better with SSA than by inserting piles of vecN instructions
1033 * to compact results.
1034 */
1035 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
1036 if (num_levels < 1)
1037 return NULL; /* Not an array of vectors */
1038
1039 struct vec_var_usage *usage =
1040 rzalloc_size(mem_ctx, sizeof(*usage) +
1041 num_levels * sizeof(usage->levels[0]));
1042
1043 usage->num_levels = num_levels;
1044 const struct glsl_type *type = var->type;
1045 for (unsigned i = 0; i < num_levels; i++) {
1046 usage->levels[i].array_len = glsl_get_length(type);
1047 type = glsl_get_array_element(type);
1048 }
1049 assert(glsl_type_is_vector_or_scalar(type));
1050
1051 usage->all_comps = (1 << glsl_get_components(type)) - 1;
1052
1053 _mesa_hash_table_insert(var_usage_map, var, usage);
1054
1055 return usage;
1056 }
1057
1058 static struct vec_var_usage *
get_vec_deref_usage(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes,bool add_usage_entry,void * mem_ctx)1059 get_vec_deref_usage(nir_deref_instr *deref,
1060 struct hash_table *var_usage_map,
1061 nir_variable_mode modes,
1062 bool add_usage_entry, void *mem_ctx)
1063 {
1064 if (!nir_deref_mode_may_be(deref, modes))
1065 return NULL;
1066
1067 nir_variable *var = nir_deref_instr_get_variable(deref);
1068 if (var == NULL)
1069 return NULL;
1070
1071 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
1072 var_usage_map, add_usage_entry, mem_ctx);
1073 }
1074
1075 static void
mark_deref_if_complex(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1076 mark_deref_if_complex(nir_deref_instr *deref,
1077 struct hash_table *var_usage_map,
1078 nir_variable_mode modes,
1079 void *mem_ctx)
1080 {
1081 /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1082 * recursive.
1083 */
1084 if (deref->deref_type != nir_deref_type_var)
1085 return;
1086
1087 if (!(deref->var->data.mode & modes))
1088 return;
1089
1090 if (!nir_deref_instr_has_complex_use(deref, nir_deref_instr_has_complex_use_allow_atomics))
1091 return;
1092
1093 struct vec_var_usage *usage =
1094 get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx);
1095 if (!usage)
1096 return;
1097
1098 usage->has_complex_use = true;
1099 }
1100
1101 static void
mark_deref_used(nir_deref_instr * deref,nir_component_mask_t comps_read,nir_component_mask_t comps_written,nir_deref_instr * copy_deref,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1102 mark_deref_used(nir_deref_instr *deref,
1103 nir_component_mask_t comps_read,
1104 nir_component_mask_t comps_written,
1105 nir_deref_instr *copy_deref,
1106 struct hash_table *var_usage_map,
1107 nir_variable_mode modes,
1108 void *mem_ctx)
1109 {
1110 if (!nir_deref_mode_may_be(deref, modes))
1111 return;
1112
1113 nir_variable *var = nir_deref_instr_get_variable(deref);
1114 if (var == NULL)
1115 return;
1116
1117 struct vec_var_usage *usage =
1118 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
1119 if (!usage)
1120 return;
1121
1122 if (is_array_deref_of_vec(deref)) {
1123 if (comps_read)
1124 comps_read = usage->all_comps;
1125 if (comps_written)
1126 comps_written = usage->all_comps;
1127 }
1128
1129 usage->comps_read |= comps_read & usage->all_comps;
1130 usage->comps_written |= comps_written & usage->all_comps;
1131
1132 struct vec_var_usage *copy_usage = NULL;
1133 if (copy_deref) {
1134 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
1135 true, mem_ctx);
1136 if (copy_usage) {
1137 if (usage->vars_copied == NULL) {
1138 usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
1139 }
1140 _mesa_set_add(usage->vars_copied, copy_usage);
1141 } else {
1142 usage->has_external_copy = true;
1143 }
1144 }
1145
1146 nir_deref_path path;
1147 nir_deref_path_init(&path, deref, mem_ctx);
1148
1149 nir_deref_path copy_path;
1150 if (copy_usage)
1151 nir_deref_path_init(©_path, copy_deref, mem_ctx);
1152
1153 unsigned copy_i = 0;
1154 for (unsigned i = 0; i < usage->num_levels; i++) {
1155 struct array_level_usage *level = &usage->levels[i];
1156 nir_deref_instr *deref = path.path[i + 1];
1157 assert(deref->deref_type == nir_deref_type_array ||
1158 deref->deref_type == nir_deref_type_array_wildcard);
1159
1160 unsigned max_used;
1161 if (deref->deref_type == nir_deref_type_array) {
1162 max_used = nir_src_is_const(deref->arr.index) ? nir_src_as_uint(deref->arr.index) : UINT_MAX;
1163 } else {
1164 /* For wildcards, we read or wrote the whole thing. */
1165 assert(deref->deref_type == nir_deref_type_array_wildcard);
1166 max_used = level->array_len - 1;
1167
1168 if (copy_usage) {
1169 /* Match each wildcard level with the level on copy_usage */
1170 for (; copy_path.path[copy_i + 1]; copy_i++) {
1171 if (copy_path.path[copy_i + 1]->deref_type ==
1172 nir_deref_type_array_wildcard)
1173 break;
1174 }
1175 struct array_level_usage *copy_level =
1176 ©_usage->levels[copy_i++];
1177
1178 if (level->levels_copied == NULL) {
1179 level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1180 }
1181 _mesa_set_add(level->levels_copied, copy_level);
1182 } else {
1183 /* We have a wildcard and it comes from a variable we aren't
1184 * tracking; flag it and we'll know to not shorten this array.
1185 */
1186 level->has_external_copy = true;
1187 }
1188 }
1189
1190 if (comps_written)
1191 level->max_written = MAX2(level->max_written, max_used);
1192 if (comps_read)
1193 level->max_read = MAX2(level->max_read, max_used);
1194 }
1195 }
1196
1197 static bool
src_is_load_deref(nir_src src,nir_src deref_src)1198 src_is_load_deref(nir_src src, nir_src deref_src)
1199 {
1200 nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1201 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1202 return false;
1203
1204 return load->src[0].ssa == deref_src.ssa;
1205 }
1206
1207 /* Returns all non-self-referential components of a store instruction. A
1208 * component is self-referential if it comes from the same component of a load
1209 * instruction on the same deref. If the only data in a particular component
1210 * of a variable came directly from that component then it's undefined. The
1211 * only way to get defined data into a component of a variable is for it to
1212 * get written there by something outside or from a different component.
1213 *
1214 * This is a fairly common pattern in shaders that come from either GLSL IR or
1215 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1216 * load-vec-store.
1217 */
1218 static nir_component_mask_t
get_non_self_referential_store_comps(nir_intrinsic_instr * store)1219 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1220 {
1221 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1222
1223 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1224 if (src_instr->type != nir_instr_type_alu)
1225 return comps;
1226
1227 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1228
1229 if (src_alu->op == nir_op_mov) {
1230 /* If it's just a swizzle of a load from the same deref, discount any
1231 * channels that don't move in the swizzle.
1232 */
1233 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1234 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1235 if (src_alu->src[0].swizzle[i] == i)
1236 comps &= ~(1u << i);
1237 }
1238 }
1239 } else if (nir_op_is_vec(src_alu->op)) {
1240 /* If it's a vec, discount any channels that are just loads from the
1241 * same deref put in the same spot.
1242 */
1243 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1244 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1245 src_alu->src[i].swizzle[0] == i)
1246 comps &= ~(1u << i);
1247 }
1248 }
1249
1250 return comps;
1251 }
1252
1253 static void
find_used_components_impl(nir_function_impl * impl,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1254 find_used_components_impl(nir_function_impl *impl,
1255 struct hash_table *var_usage_map,
1256 nir_variable_mode modes,
1257 void *mem_ctx)
1258 {
1259 nir_foreach_block(block, impl) {
1260 nir_foreach_instr(instr, block) {
1261 if (instr->type == nir_instr_type_deref) {
1262 mark_deref_if_complex(nir_instr_as_deref(instr),
1263 var_usage_map, modes, mem_ctx);
1264 }
1265
1266 if (instr->type != nir_instr_type_intrinsic)
1267 continue;
1268
1269 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1270 switch (intrin->intrinsic) {
1271 case nir_intrinsic_load_deref:
1272 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1273 nir_def_components_read(&intrin->def), 0,
1274 NULL, var_usage_map, modes, mem_ctx);
1275 break;
1276
1277 case nir_intrinsic_store_deref:
1278 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1279 0, get_non_self_referential_store_comps(intrin),
1280 NULL, var_usage_map, modes, mem_ctx);
1281 break;
1282
1283 case nir_intrinsic_copy_deref: {
1284 /* Just mark everything used for copies. */
1285 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1286 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1287 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1288 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1289 break;
1290 }
1291
1292 default:
1293 break;
1294 }
1295 }
1296 }
1297 }
1298
1299 static bool
shrink_vec_var_list(struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_usage_map)1300 shrink_vec_var_list(struct exec_list *vars,
1301 nir_variable_mode mode,
1302 struct hash_table *var_usage_map)
1303 {
1304 /* Initialize the components kept field of each variable. This is the
1305 * AND of the components written and components read. If a component is
1306 * written but never read, it's dead. If it is read but never written,
1307 * then all values read are undefined garbage and we may as well not read
1308 * them.
1309 *
1310 * The same logic applies to the array length. We make the array length
1311 * the minimum needed required length between read and write and plan to
1312 * discard any OOB access. The one exception here is indirect writes
1313 * because we don't know where they will land and we can't shrink an array
1314 * with indirect writes because previously in-bounds writes may become
1315 * out-of-bounds and have undefined behavior.
1316 *
1317 * Also, if we have a copy that to/from something we can't shrink, we need
1318 * to leave components and array_len of any wildcards alone.
1319 */
1320 nir_foreach_variable_in_list(var, vars) {
1321 if (var->data.mode != mode)
1322 continue;
1323
1324 struct vec_var_usage *usage =
1325 get_vec_var_usage(var, var_usage_map, false, NULL);
1326 if (!usage)
1327 continue;
1328
1329 assert(usage->comps_kept == 0);
1330 if (usage->has_external_copy || usage->has_complex_use)
1331 usage->comps_kept = usage->all_comps;
1332 else
1333 usage->comps_kept = usage->comps_read & usage->comps_written;
1334
1335 for (unsigned i = 0; i < usage->num_levels; i++) {
1336 struct array_level_usage *level = &usage->levels[i];
1337 assert(level->array_len > 0);
1338
1339 if (level->max_written == UINT_MAX || level->has_external_copy ||
1340 usage->has_complex_use)
1341 continue; /* Can't shrink */
1342
1343 unsigned max_used = MIN2(level->max_read, level->max_written);
1344 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1345 }
1346 }
1347
1348 /* In order for variable copies to work, we have to have the same data type
1349 * on the source and the destination. In order to satisfy this, we run a
1350 * little fixed-point algorithm to transitively ensure that we get enough
1351 * components and array elements for this to hold for all copies.
1352 */
1353 bool fp_progress;
1354 do {
1355 fp_progress = false;
1356 nir_foreach_variable_in_list(var, vars) {
1357 if (var->data.mode != mode)
1358 continue;
1359
1360 struct vec_var_usage *var_usage =
1361 get_vec_var_usage(var, var_usage_map, false, NULL);
1362 if (!var_usage || !var_usage->vars_copied)
1363 continue;
1364
1365 set_foreach(var_usage->vars_copied, copy_entry) {
1366 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1367 if (copy_usage->comps_kept != var_usage->comps_kept) {
1368 nir_component_mask_t comps_kept =
1369 (var_usage->comps_kept | copy_usage->comps_kept);
1370 var_usage->comps_kept = comps_kept;
1371 copy_usage->comps_kept = comps_kept;
1372 fp_progress = true;
1373 }
1374 }
1375
1376 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1377 struct array_level_usage *var_level = &var_usage->levels[i];
1378 if (!var_level->levels_copied)
1379 continue;
1380
1381 set_foreach(var_level->levels_copied, copy_entry) {
1382 struct array_level_usage *copy_level = (void *)copy_entry->key;
1383 if (var_level->array_len != copy_level->array_len) {
1384 unsigned array_len =
1385 MAX2(var_level->array_len, copy_level->array_len);
1386 var_level->array_len = array_len;
1387 copy_level->array_len = array_len;
1388 fp_progress = true;
1389 }
1390 }
1391 }
1392 }
1393 } while (fp_progress);
1394
1395 bool vars_shrunk = false;
1396 nir_foreach_variable_in_list_safe(var, vars) {
1397 if (var->data.mode != mode)
1398 continue;
1399
1400 struct vec_var_usage *usage =
1401 get_vec_var_usage(var, var_usage_map, false, NULL);
1402 if (!usage)
1403 continue;
1404
1405 bool shrunk = false;
1406 const struct glsl_type *vec_type = var->type;
1407 for (unsigned i = 0; i < usage->num_levels; i++) {
1408 /* If we've reduced the array to zero elements at some level, just
1409 * set comps_kept to 0 and delete the variable.
1410 */
1411 if (usage->levels[i].array_len == 0) {
1412 usage->comps_kept = 0;
1413 break;
1414 }
1415
1416 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1417 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1418 shrunk = true;
1419 vec_type = glsl_get_array_element(vec_type);
1420 }
1421 assert(glsl_type_is_vector_or_scalar(vec_type));
1422
1423 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1424 if (usage->comps_kept != usage->all_comps)
1425 shrunk = true;
1426
1427 if (usage->comps_kept == 0) {
1428 /* This variable is dead, remove it */
1429 vars_shrunk = true;
1430 exec_node_remove(&var->node);
1431 continue;
1432 }
1433
1434 if (!shrunk) {
1435 /* This variable doesn't need to be shrunk. Remove it from the
1436 * hash table so later steps will ignore it.
1437 */
1438 _mesa_hash_table_remove_key(var_usage_map, var);
1439 continue;
1440 }
1441
1442 /* Build the new var type */
1443 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1444 const struct glsl_type *new_type =
1445 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1446 for (int i = usage->num_levels - 1; i >= 0; i--) {
1447 assert(usage->levels[i].array_len > 0);
1448 /* If the original type was a matrix type, we'd like to keep that so
1449 * we don't convert matrices into arrays.
1450 */
1451 if (i == usage->num_levels - 1 &&
1452 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1453 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1454 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1455 new_num_comps,
1456 usage->levels[i].array_len);
1457 } else {
1458 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1459 }
1460 }
1461 var->type = new_type;
1462
1463 vars_shrunk = true;
1464 }
1465
1466 return vars_shrunk;
1467 }
1468
1469 static bool
vec_deref_is_oob(nir_deref_instr * deref,struct vec_var_usage * usage)1470 vec_deref_is_oob(nir_deref_instr *deref,
1471 struct vec_var_usage *usage)
1472 {
1473 nir_deref_path path;
1474 nir_deref_path_init(&path, deref, NULL);
1475
1476 bool oob = false;
1477 for (unsigned i = 0; i < usage->num_levels; i++) {
1478 nir_deref_instr *p = path.path[i + 1];
1479 if (p->deref_type == nir_deref_type_array_wildcard)
1480 continue;
1481
1482 if (nir_src_is_const(p->arr.index) &&
1483 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1484 oob = true;
1485 break;
1486 }
1487 }
1488
1489 nir_deref_path_finish(&path);
1490
1491 return oob;
1492 }
1493
1494 static bool
vec_deref_is_dead_or_oob(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes)1495 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1496 struct hash_table *var_usage_map,
1497 nir_variable_mode modes)
1498 {
1499 struct vec_var_usage *usage =
1500 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1501 if (!usage)
1502 return false;
1503
1504 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1505 }
1506
1507 static void
shrink_vec_var_access_impl(nir_function_impl * impl,struct hash_table * var_usage_map,nir_variable_mode modes)1508 shrink_vec_var_access_impl(nir_function_impl *impl,
1509 struct hash_table *var_usage_map,
1510 nir_variable_mode modes)
1511 {
1512 nir_builder b = nir_builder_create(impl);
1513
1514 nir_foreach_block(block, impl) {
1515 nir_foreach_instr_safe(instr, block) {
1516 switch (instr->type) {
1517 case nir_instr_type_deref: {
1518 nir_deref_instr *deref = nir_instr_as_deref(instr);
1519 if (!nir_deref_mode_may_be(deref, modes))
1520 break;
1521
1522 /* Clean up any dead derefs we find lying around. They may refer
1523 * to variables we've deleted.
1524 */
1525 if (nir_deref_instr_remove_if_unused(deref))
1526 break;
1527
1528 /* Update the type in the deref to keep the types consistent as
1529 * you walk down the chain. We don't need to check if this is one
1530 * of the derefs we're shrinking because this is a no-op if it
1531 * isn't. The worst that could happen is that we accidentally fix
1532 * an invalid deref.
1533 */
1534 if (deref->deref_type == nir_deref_type_var) {
1535 deref->type = deref->var->type;
1536 } else if (deref->deref_type == nir_deref_type_array ||
1537 deref->deref_type == nir_deref_type_array_wildcard) {
1538 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1539 assert(glsl_type_is_array(parent->type) ||
1540 glsl_type_is_matrix(parent->type) ||
1541 glsl_type_is_vector(parent->type));
1542 deref->type = glsl_get_array_element(parent->type);
1543 }
1544 break;
1545 }
1546
1547 case nir_instr_type_intrinsic: {
1548 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1549
1550 /* If we have a copy whose source or destination has been deleted
1551 * because we determined the variable was dead, then we just
1552 * delete the copy instruction. If the source variable was dead
1553 * then it was writing undefined garbage anyway and if it's the
1554 * destination variable that's dead then the write isn't needed.
1555 */
1556 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1557 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1558 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1559 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1560 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1561 nir_instr_remove(&intrin->instr);
1562 nir_deref_instr_remove_if_unused(dst);
1563 nir_deref_instr_remove_if_unused(src);
1564 }
1565 continue;
1566 }
1567
1568 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1569 intrin->intrinsic != nir_intrinsic_store_deref)
1570 continue;
1571
1572 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1573 if (!nir_deref_mode_may_be(deref, modes))
1574 continue;
1575
1576 struct vec_var_usage *usage =
1577 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1578 if (!usage)
1579 continue;
1580
1581 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1582 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1583 nir_def *u =
1584 nir_undef(&b, intrin->def.num_components,
1585 intrin->def.bit_size);
1586 nir_def_rewrite_uses(&intrin->def,
1587 u);
1588 }
1589 nir_instr_remove(&intrin->instr);
1590 nir_deref_instr_remove_if_unused(deref);
1591 continue;
1592 }
1593
1594 /* If we're not dropping any components, there's no need to
1595 * compact vectors.
1596 */
1597 if (usage->comps_kept == usage->all_comps)
1598 continue;
1599
1600 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1601 b.cursor = nir_after_instr(&intrin->instr);
1602
1603 nir_def *undef =
1604 nir_undef(&b, 1, intrin->def.bit_size);
1605 nir_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1606 unsigned c = 0;
1607 for (unsigned i = 0; i < intrin->num_components; i++) {
1608 if (usage->comps_kept & (1u << i))
1609 vec_srcs[i] = nir_channel(&b, &intrin->def, c++);
1610 else
1611 vec_srcs[i] = undef;
1612 }
1613 nir_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1614
1615 nir_def_rewrite_uses_after(&intrin->def,
1616 vec,
1617 vec->parent_instr);
1618
1619 /* The SSA def is now only used by the swizzle. It's safe to
1620 * shrink the number of components.
1621 */
1622 assert(list_length(&intrin->def.uses) == c);
1623 intrin->num_components = c;
1624 intrin->def.num_components = c;
1625 } else {
1626 nir_component_mask_t write_mask =
1627 nir_intrinsic_write_mask(intrin);
1628
1629 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1630 nir_component_mask_t new_write_mask = 0;
1631 unsigned c = 0;
1632 for (unsigned i = 0; i < intrin->num_components; i++) {
1633 if (usage->comps_kept & (1u << i)) {
1634 swizzle[c] = i;
1635 if (write_mask & (1u << i))
1636 new_write_mask |= 1u << c;
1637 c++;
1638 }
1639 }
1640
1641 b.cursor = nir_before_instr(&intrin->instr);
1642
1643 nir_def *swizzled =
1644 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c);
1645
1646 /* Rewrite to use the compacted source */
1647 nir_src_rewrite(&intrin->src[1], swizzled);
1648 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1649 intrin->num_components = c;
1650 }
1651 break;
1652 }
1653
1654 default:
1655 break;
1656 }
1657 }
1658 }
1659 }
1660
1661 static bool
function_impl_has_vars_with_modes(nir_function_impl * impl,nir_variable_mode modes)1662 function_impl_has_vars_with_modes(nir_function_impl *impl,
1663 nir_variable_mode modes)
1664 {
1665 nir_shader *shader = impl->function->shader;
1666
1667 if (modes & ~nir_var_function_temp) {
1668 nir_foreach_variable_with_modes(var, shader,
1669 modes & ~nir_var_function_temp)
1670 return true;
1671 }
1672
1673 if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1674 return true;
1675
1676 return false;
1677 }
1678
1679 /** Attempt to shrink arrays of vectors
1680 *
1681 * This pass looks at variables which contain a vector or an array (possibly
1682 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1683 * or array. If the pass can prove that a component of a vector (or array of
1684 * vectors) is never really used, then that component will be removed.
1685 * Similarly, the pass attempts to shorten arrays based on what elements it
1686 * can prove are never read or never contain valid data.
1687 */
1688 bool
nir_shrink_vec_array_vars(nir_shader * shader,nir_variable_mode modes)1689 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1690 {
1691 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1692
1693 void *mem_ctx = ralloc_context(NULL);
1694
1695 struct hash_table *var_usage_map =
1696 _mesa_pointer_hash_table_create(mem_ctx);
1697
1698 bool has_vars_to_shrink = false;
1699 nir_foreach_function_impl(impl, shader) {
1700 /* Don't even bother crawling the IR if we don't have any variables.
1701 * Given that this pass deletes any unused variables, it's likely that
1702 * we will be in this scenario eventually.
1703 */
1704 if (function_impl_has_vars_with_modes(impl, modes)) {
1705 has_vars_to_shrink = true;
1706 find_used_components_impl(impl, var_usage_map,
1707 modes, mem_ctx);
1708 }
1709 }
1710 if (!has_vars_to_shrink) {
1711 ralloc_free(mem_ctx);
1712 nir_shader_preserve_all_metadata(shader);
1713 return false;
1714 }
1715
1716 bool globals_shrunk = false;
1717 if (modes & nir_var_shader_temp) {
1718 globals_shrunk = shrink_vec_var_list(&shader->variables,
1719 nir_var_shader_temp,
1720 var_usage_map);
1721 }
1722
1723 bool progress = false;
1724 nir_foreach_function_impl(impl, shader) {
1725 bool locals_shrunk = false;
1726 if (modes & nir_var_function_temp) {
1727 locals_shrunk = shrink_vec_var_list(&impl->locals,
1728 nir_var_function_temp,
1729 var_usage_map);
1730 }
1731
1732 if (globals_shrunk || locals_shrunk) {
1733 shrink_vec_var_access_impl(impl, var_usage_map, modes);
1734
1735 nir_metadata_preserve(impl, nir_metadata_block_index |
1736 nir_metadata_dominance);
1737 progress = true;
1738 } else {
1739 nir_metadata_preserve(impl, nir_metadata_all);
1740 }
1741 }
1742
1743 ralloc_free(mem_ctx);
1744
1745 return progress;
1746 }
1747