1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_deref.h"
25 #include "util/hash_table.h"
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 bool
nir_deref_cast_is_trivial(nir_deref_instr * cast)30 nir_deref_cast_is_trivial(nir_deref_instr *cast)
31 {
32 assert(cast->deref_type == nir_deref_type_cast);
33
34 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
35 if (!parent)
36 return false;
37
38 return cast->modes == parent->modes &&
39 cast->type == parent->type &&
40 cast->def.num_components == parent->def.num_components &&
41 cast->def.bit_size == parent->def.bit_size;
42 }
43
44 void
nir_deref_path_init(nir_deref_path * path,nir_deref_instr * deref,void * mem_ctx)45 nir_deref_path_init(nir_deref_path *path,
46 nir_deref_instr *deref, void *mem_ctx)
47 {
48 assert(deref != NULL);
49
50 /* The length of the short path is at most ARRAY_SIZE - 1 because we need
51 * room for the NULL terminator.
52 */
53 static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
54
55 int count = 0;
56
57 nir_deref_instr **tail = &path->_short_path[max_short_path_len];
58 nir_deref_instr **head = tail;
59
60 *tail = NULL;
61 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
62 if (d->deref_type == nir_deref_type_cast && nir_deref_cast_is_trivial(d))
63 continue;
64 count++;
65 if (count <= max_short_path_len)
66 *(--head) = d;
67 }
68
69 if (count <= max_short_path_len) {
70 /* If we're under max_short_path_len, just use the short path. */
71 path->path = head;
72 goto done;
73 }
74
75 #ifndef NDEBUG
76 /* Just in case someone uses short_path by accident */
77 for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
78 path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
79 #endif
80
81 path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
82 head = tail = path->path + count;
83 *tail = NULL;
84 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
85 if (d->deref_type == nir_deref_type_cast && nir_deref_cast_is_trivial(d))
86 continue;
87 *(--head) = d;
88 }
89
90 done:
91 assert(head == path->path);
92 assert(tail == head + count);
93 assert(*tail == NULL);
94 }
95
96 void
nir_deref_path_finish(nir_deref_path * path)97 nir_deref_path_finish(nir_deref_path *path)
98 {
99 if (path->path < &path->_short_path[0] ||
100 path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
101 ralloc_free(path->path);
102 }
103
104 /**
105 * Recursively removes unused deref instructions
106 */
107 bool
nir_deref_instr_remove_if_unused(nir_deref_instr * instr)108 nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
109 {
110 bool progress = false;
111
112 for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
113 /* If anyone is using this deref, leave it alone */
114 if (!nir_def_is_unused(&d->def))
115 break;
116
117 nir_instr_remove(&d->instr);
118 progress = true;
119 }
120
121 return progress;
122 }
123
124 bool
nir_deref_instr_has_indirect(nir_deref_instr * instr)125 nir_deref_instr_has_indirect(nir_deref_instr *instr)
126 {
127 while (instr->deref_type != nir_deref_type_var) {
128 /* Consider casts to be indirects */
129 if (instr->deref_type == nir_deref_type_cast)
130 return true;
131
132 if ((instr->deref_type == nir_deref_type_array ||
133 instr->deref_type == nir_deref_type_ptr_as_array) &&
134 !nir_src_is_const(instr->arr.index))
135 return true;
136
137 instr = nir_deref_instr_parent(instr);
138 }
139
140 return false;
141 }
142
143 bool
nir_deref_instr_is_known_out_of_bounds(nir_deref_instr * instr)144 nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
145 {
146 for (; instr; instr = nir_deref_instr_parent(instr)) {
147 if (instr->deref_type == nir_deref_type_array &&
148 nir_src_is_const(instr->arr.index) &&
149 nir_src_as_uint(instr->arr.index) >=
150 glsl_get_length(nir_deref_instr_parent(instr)->type))
151 return true;
152 }
153
154 return false;
155 }
156
157 bool
nir_deref_instr_has_complex_use(nir_deref_instr * deref,nir_deref_instr_has_complex_use_options opts)158 nir_deref_instr_has_complex_use(nir_deref_instr *deref,
159 nir_deref_instr_has_complex_use_options opts)
160 {
161 nir_foreach_use_including_if(use_src, &deref->def) {
162 if (nir_src_is_if(use_src))
163 return true;
164
165 nir_instr *use_instr = nir_src_parent_instr(use_src);
166
167 switch (use_instr->type) {
168 case nir_instr_type_deref: {
169 nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
170
171 /* A var deref has no sources */
172 assert(use_deref->deref_type != nir_deref_type_var);
173
174 /* If a deref shows up in an array index or something like that, it's
175 * a complex use.
176 */
177 if (use_src != &use_deref->parent)
178 return true;
179
180 /* Anything that isn't a basic struct or array deref is considered to
181 * be a "complex" use. In particular, we don't allow ptr_as_array
182 * because we assume that opt_deref will turn any non-complex
183 * ptr_as_array derefs into regular array derefs eventually so passes
184 * which only want to handle simple derefs will pick them up in a
185 * later pass.
186 */
187 if (use_deref->deref_type != nir_deref_type_struct &&
188 use_deref->deref_type != nir_deref_type_array_wildcard &&
189 use_deref->deref_type != nir_deref_type_array)
190 return true;
191
192 if (nir_deref_instr_has_complex_use(use_deref, opts))
193 return true;
194
195 continue;
196 }
197
198 case nir_instr_type_intrinsic: {
199 nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
200 switch (use_intrin->intrinsic) {
201 case nir_intrinsic_load_deref:
202 assert(use_src == &use_intrin->src[0]);
203 continue;
204
205 case nir_intrinsic_copy_deref:
206 assert(use_src == &use_intrin->src[0] ||
207 use_src == &use_intrin->src[1]);
208 continue;
209
210 case nir_intrinsic_store_deref:
211 /* A use in src[1] of a store means we're taking that pointer and
212 * writing it to a variable. Because we have no idea who will
213 * read that variable and what they will do with the pointer, it's
214 * considered a "complex" use. A use in src[0], on the other
215 * hand, is a simple use because we're just going to dereference
216 * it and write a value there.
217 */
218 if (use_src == &use_intrin->src[0])
219 continue;
220 return true;
221
222 case nir_intrinsic_memcpy_deref:
223 if (use_src == &use_intrin->src[0] &&
224 (opts & nir_deref_instr_has_complex_use_allow_memcpy_dst))
225 continue;
226 if (use_src == &use_intrin->src[1] &&
227 (opts & nir_deref_instr_has_complex_use_allow_memcpy_src))
228 continue;
229 return true;
230
231 case nir_intrinsic_deref_atomic:
232 case nir_intrinsic_deref_atomic_swap:
233 if (opts & nir_deref_instr_has_complex_use_allow_atomics)
234 continue;
235 return true;
236
237 default:
238 return true;
239 }
240 unreachable("Switch default failed");
241 }
242
243 default:
244 return true;
245 }
246 }
247
248 return false;
249 }
250
251 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)252 type_scalar_size_bytes(const struct glsl_type *type)
253 {
254 assert(glsl_type_is_vector_or_scalar(type) ||
255 glsl_type_is_matrix(type));
256 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
257 }
258
259 unsigned
nir_deref_instr_array_stride(nir_deref_instr * deref)260 nir_deref_instr_array_stride(nir_deref_instr *deref)
261 {
262 switch (deref->deref_type) {
263 case nir_deref_type_array:
264 case nir_deref_type_array_wildcard: {
265 const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
266 unsigned stride = glsl_get_explicit_stride(arr_type);
267
268 if ((glsl_type_is_matrix(arr_type) &&
269 glsl_matrix_type_is_row_major(arr_type)) ||
270 (glsl_type_is_vector(arr_type) && stride == 0))
271 stride = type_scalar_size_bytes(arr_type);
272
273 return stride;
274 }
275 case nir_deref_type_ptr_as_array:
276 return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
277 case nir_deref_type_cast:
278 return deref->cast.ptr_stride;
279 default:
280 return 0;
281 }
282 }
283
284 static unsigned
type_get_array_stride(const struct glsl_type * elem_type,glsl_type_size_align_func size_align)285 type_get_array_stride(const struct glsl_type *elem_type,
286 glsl_type_size_align_func size_align)
287 {
288 unsigned elem_size, elem_align;
289 size_align(elem_type, &elem_size, &elem_align);
290 return ALIGN_POT(elem_size, elem_align);
291 }
292
293 static unsigned
struct_type_get_field_offset(const struct glsl_type * struct_type,glsl_type_size_align_func size_align,unsigned field_idx)294 struct_type_get_field_offset(const struct glsl_type *struct_type,
295 glsl_type_size_align_func size_align,
296 unsigned field_idx)
297 {
298 assert(glsl_type_is_struct_or_ifc(struct_type));
299 unsigned offset = 0;
300 for (unsigned i = 0; i <= field_idx; i++) {
301 unsigned elem_size, elem_align;
302 size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
303 offset = ALIGN_POT(offset, elem_align);
304 if (i < field_idx)
305 offset += elem_size;
306 }
307 return offset;
308 }
309
310 unsigned
nir_deref_instr_get_const_offset(nir_deref_instr * deref,glsl_type_size_align_func size_align)311 nir_deref_instr_get_const_offset(nir_deref_instr *deref,
312 glsl_type_size_align_func size_align)
313 {
314 nir_deref_path path;
315 nir_deref_path_init(&path, deref, NULL);
316
317 unsigned offset = 0;
318 for (nir_deref_instr **p = &path.path[1]; *p; p++) {
319 switch ((*p)->deref_type) {
320 case nir_deref_type_array:
321 offset += nir_src_as_uint((*p)->arr.index) *
322 type_get_array_stride((*p)->type, size_align);
323 break;
324 case nir_deref_type_struct: {
325 /* p starts at path[1], so this is safe */
326 nir_deref_instr *parent = *(p - 1);
327 offset += struct_type_get_field_offset(parent->type, size_align,
328 (*p)->strct.index);
329 break;
330 }
331 case nir_deref_type_cast:
332 /* A cast doesn't contribute to the offset */
333 break;
334 default:
335 unreachable("Unsupported deref type");
336 }
337 }
338
339 nir_deref_path_finish(&path);
340
341 return offset;
342 }
343
344 nir_def *
nir_build_deref_offset(nir_builder * b,nir_deref_instr * deref,glsl_type_size_align_func size_align)345 nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
346 glsl_type_size_align_func size_align)
347 {
348 nir_deref_path path;
349 nir_deref_path_init(&path, deref, NULL);
350
351 nir_def *offset = nir_imm_intN_t(b, 0, deref->def.bit_size);
352 for (nir_deref_instr **p = &path.path[1]; *p; p++) {
353 switch ((*p)->deref_type) {
354 case nir_deref_type_array:
355 case nir_deref_type_ptr_as_array: {
356 nir_def *index = (*p)->arr.index.ssa;
357 int stride = type_get_array_stride((*p)->type, size_align);
358 offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
359 break;
360 }
361 case nir_deref_type_struct: {
362 /* p starts at path[1], so this is safe */
363 nir_deref_instr *parent = *(p - 1);
364 unsigned field_offset =
365 struct_type_get_field_offset(parent->type, size_align,
366 (*p)->strct.index);
367 offset = nir_iadd_imm(b, offset, field_offset);
368 break;
369 }
370 case nir_deref_type_cast:
371 /* A cast doesn't contribute to the offset */
372 break;
373 default:
374 unreachable("Unsupported deref type");
375 }
376 }
377
378 nir_deref_path_finish(&path);
379
380 return offset;
381 }
382
383 bool
nir_remove_dead_derefs_impl(nir_function_impl * impl)384 nir_remove_dead_derefs_impl(nir_function_impl *impl)
385 {
386 bool progress = false;
387
388 nir_foreach_block(block, impl) {
389 nir_foreach_instr_safe(instr, block) {
390 if (instr->type == nir_instr_type_deref &&
391 nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
392 progress = true;
393 }
394 }
395
396 if (progress) {
397 nir_metadata_preserve(impl, nir_metadata_block_index |
398 nir_metadata_dominance);
399 } else {
400 nir_metadata_preserve(impl, nir_metadata_all);
401 }
402
403 return progress;
404 }
405
406 bool
nir_remove_dead_derefs(nir_shader * shader)407 nir_remove_dead_derefs(nir_shader *shader)
408 {
409 bool progress = false;
410 nir_foreach_function_impl(impl, shader) {
411 if (nir_remove_dead_derefs_impl(impl))
412 progress = true;
413 }
414
415 return progress;
416 }
417
418 static bool
nir_fixup_deref_modes_instr(UNUSED struct nir_builder * b,nir_instr * instr,UNUSED void * data)419 nir_fixup_deref_modes_instr(UNUSED struct nir_builder *b, nir_instr *instr, UNUSED void *data)
420 {
421 if (instr->type != nir_instr_type_deref)
422 return false;
423
424 nir_deref_instr *deref = nir_instr_as_deref(instr);
425 nir_variable_mode parent_modes;
426 if (deref->deref_type == nir_deref_type_var) {
427 parent_modes = deref->var->data.mode;
428 } else {
429 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
430 if (parent == NULL) {
431 /* Cast to some non-deref value, nothing to propagate. */
432 assert(deref->deref_type == nir_deref_type_cast);
433 return false;
434 }
435
436 /* It's safe to propagate a specific mode into a more generic one
437 * but never the other way around.
438 */
439 if (util_bitcount(parent->modes) != 1)
440 return false;
441
442 parent_modes = parent->modes;
443 }
444
445 if (deref->modes == parent_modes)
446 return false;
447
448 deref->modes = parent_modes;
449 return true;
450 }
451
452 void
nir_fixup_deref_modes(nir_shader * shader)453 nir_fixup_deref_modes(nir_shader *shader)
454 {
455 nir_shader_instructions_pass(shader, nir_fixup_deref_modes_instr,
456 nir_metadata_block_index |
457 nir_metadata_dominance |
458 nir_metadata_live_defs |
459 nir_metadata_instr_index,
460 NULL);
461 }
462
463 static bool
nir_fixup_deref_types_instr(UNUSED struct nir_builder * b,nir_instr * instr,UNUSED void * data)464 nir_fixup_deref_types_instr(UNUSED struct nir_builder *b, nir_instr *instr, UNUSED void *data)
465 {
466 if (instr->type != nir_instr_type_deref)
467 return false;
468
469 nir_deref_instr *deref = nir_instr_as_deref(instr);
470 const struct glsl_type *parent_derived_type;
471 if (deref->deref_type == nir_deref_type_var) {
472 parent_derived_type = deref->var->type;
473 } else if (deref->deref_type == nir_deref_type_array ||
474 deref->deref_type == nir_deref_type_struct) {
475 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
476 if (deref->deref_type == nir_deref_type_array) {
477 parent_derived_type = glsl_get_array_element(parent->type);
478 } else if (deref->deref_type == nir_deref_type_struct) {
479 parent_derived_type =
480 glsl_get_struct_field(parent->type, deref->strct.index);
481 } else {
482 unreachable("Unsupported deref type");
483 }
484 } else {
485 unreachable("Unsupported deref type");
486 }
487
488 if (deref->type == parent_derived_type)
489 return false;
490
491 deref->type = parent_derived_type;
492 return true;
493 }
494
495 /* Update deref types when array sizes have changed. */
496 void
nir_fixup_deref_types(nir_shader * shader)497 nir_fixup_deref_types(nir_shader *shader)
498 {
499 nir_shader_instructions_pass(shader, nir_fixup_deref_types_instr,
500 nir_metadata_block_index |
501 nir_metadata_dominance |
502 nir_metadata_live_defs |
503 nir_metadata_instr_index,
504 NULL);
505 }
506
507 static bool
modes_may_alias(nir_variable_mode a,nir_variable_mode b)508 modes_may_alias(nir_variable_mode a, nir_variable_mode b)
509 {
510 /* Generic pointers can alias with SSBOs */
511 if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
512 (b & (nir_var_mem_ssbo | nir_var_mem_global)))
513 return true;
514
515 /* Pointers can only alias if they share a mode. */
516 return a & b;
517 }
518
519 ALWAYS_INLINE static nir_deref_compare_result
compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path,unsigned * i,bool (* stop_fn)(const nir_deref_instr *))520 compare_deref_paths(nir_deref_path *a_path, nir_deref_path *b_path,
521 unsigned *i, bool (*stop_fn)(const nir_deref_instr *))
522 {
523 /* Start off assuming they fully compare. We ignore equality for now. In
524 * the end, we'll determine that by containment.
525 */
526 nir_deref_compare_result result = nir_derefs_may_alias_bit |
527 nir_derefs_a_contains_b_bit |
528 nir_derefs_b_contains_a_bit;
529
530 nir_deref_instr **a = a_path->path;
531 nir_deref_instr **b = b_path->path;
532
533 for (; a[*i] != NULL; (*i)++) {
534 if (a[*i] != b[*i])
535 break;
536
537 if (stop_fn && stop_fn(a[*i]))
538 break;
539 }
540
541 /* We're at either the tail or the divergence point between the two deref
542 * paths. Look to see if either contains cast or a ptr_as_array deref. If
543 * it does we don't know how to safely make any inferences. Hopefully,
544 * nir_opt_deref will clean most of these up and we can start inferring
545 * things again.
546 *
547 * In theory, we could do a bit better. For instance, we could detect the
548 * case where we have exactly one ptr_as_array deref in the chain after the
549 * divergence point and it's matched in both chains and the two chains have
550 * different constant indices.
551 */
552 for (unsigned j = *i; a[j] != NULL; j++) {
553 if (stop_fn && stop_fn(a[j]))
554 break;
555
556 if (a[j]->deref_type == nir_deref_type_cast ||
557 a[j]->deref_type == nir_deref_type_ptr_as_array)
558 return nir_derefs_may_alias_bit;
559 }
560 for (unsigned j = *i; b[j] != NULL; j++) {
561 if (stop_fn && stop_fn(b[j]))
562 break;
563
564 if (b[j]->deref_type == nir_deref_type_cast ||
565 b[j]->deref_type == nir_deref_type_ptr_as_array)
566 return nir_derefs_may_alias_bit;
567 }
568
569 for (; a[*i] != NULL && b[*i] != NULL; (*i)++) {
570 if (stop_fn && (stop_fn(a[*i]) || stop_fn(b[*i])))
571 break;
572
573 switch (a[*i]->deref_type) {
574 case nir_deref_type_array:
575 case nir_deref_type_array_wildcard: {
576 assert(b[*i]->deref_type == nir_deref_type_array ||
577 b[*i]->deref_type == nir_deref_type_array_wildcard);
578
579 if (a[*i]->deref_type == nir_deref_type_array_wildcard) {
580 if (b[*i]->deref_type != nir_deref_type_array_wildcard)
581 result &= ~nir_derefs_b_contains_a_bit;
582 } else if (b[*i]->deref_type == nir_deref_type_array_wildcard) {
583 if (a[*i]->deref_type != nir_deref_type_array_wildcard)
584 result &= ~nir_derefs_a_contains_b_bit;
585 } else {
586 assert(a[*i]->deref_type == nir_deref_type_array &&
587 b[*i]->deref_type == nir_deref_type_array);
588
589 if (nir_src_is_const(a[*i]->arr.index) &&
590 nir_src_is_const(b[*i]->arr.index)) {
591 /* If they're both direct and have different offsets, they
592 * don't even alias much less anything else.
593 */
594 if (nir_src_as_uint(a[*i]->arr.index) !=
595 nir_src_as_uint(b[*i]->arr.index))
596 return nir_derefs_do_not_alias;
597 } else if (a[*i]->arr.index.ssa == b[*i]->arr.index.ssa) {
598 /* They're the same indirect, continue on */
599 } else {
600 /* They're not the same index so we can't prove anything about
601 * containment.
602 */
603 result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
604 }
605 }
606 break;
607 }
608
609 case nir_deref_type_struct: {
610 /* If they're different struct members, they don't even alias */
611 if (a[*i]->strct.index != b[*i]->strct.index)
612 return nir_derefs_do_not_alias;
613 break;
614 }
615
616 default:
617 unreachable("Invalid deref type");
618 }
619 }
620
621 /* If a is longer than b, then it can't contain b. If neither a[i] nor
622 * b[i] are NULL then we aren't at the end of the chain and we know nothing
623 * about containment.
624 */
625 if (a[*i] != NULL)
626 result &= ~nir_derefs_a_contains_b_bit;
627 if (b[*i] != NULL)
628 result &= ~nir_derefs_b_contains_a_bit;
629
630 /* If a contains b and b contains a they must be equal. */
631 if ((result & nir_derefs_a_contains_b_bit) &&
632 (result & nir_derefs_b_contains_a_bit))
633 result |= nir_derefs_equal_bit;
634
635 return result;
636 }
637
638 static bool
is_interface_struct_deref(const nir_deref_instr * deref)639 is_interface_struct_deref(const nir_deref_instr *deref)
640 {
641 if (deref->deref_type == nir_deref_type_struct) {
642 assert(glsl_type_is_struct_or_ifc(nir_deref_instr_parent(deref)->type));
643 return true;
644 } else {
645 return false;
646 }
647 }
648
649 nir_deref_compare_result
nir_compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path)650 nir_compare_deref_paths(nir_deref_path *a_path,
651 nir_deref_path *b_path)
652 {
653 if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
654 return nir_derefs_do_not_alias;
655
656 if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
657 return nir_derefs_may_alias_bit;
658
659 unsigned path_idx = 1;
660 if (a_path->path[0]->deref_type == nir_deref_type_var) {
661 const nir_variable *a_var = a_path->path[0]->var;
662 const nir_variable *b_var = b_path->path[0]->var;
663
664 /* If we got here, the two variables must have the same mode. The
665 * only way modes_may_alias() can return true for two different modes
666 * is if one is global and the other ssbo. However, Global variables
667 * only exist in OpenCL and SSBOs don't exist there. No API allows
668 * both for variables.
669 */
670 assert(a_var->data.mode == b_var->data.mode);
671
672 switch (a_var->data.mode) {
673 case nir_var_mem_ssbo: {
674 nir_deref_compare_result binding_compare;
675 if (a_var == b_var) {
676 binding_compare = compare_deref_paths(a_path, b_path, &path_idx,
677 is_interface_struct_deref);
678 } else {
679 binding_compare = nir_derefs_do_not_alias;
680 }
681
682 if (binding_compare & nir_derefs_equal_bit)
683 break;
684
685 /* If the binding derefs can't alias and at least one is RESTRICT,
686 * then we know they can't alias.
687 */
688 if (!(binding_compare & nir_derefs_may_alias_bit) &&
689 ((a_var->data.access & ACCESS_RESTRICT) ||
690 (b_var->data.access & ACCESS_RESTRICT)))
691 return nir_derefs_do_not_alias;
692
693 return nir_derefs_may_alias_bit;
694 }
695
696 case nir_var_mem_shared:
697 if (a_var == b_var)
698 break;
699
700 /* Per SPV_KHR_workgroup_memory_explicit_layout and
701 * GL_EXT_shared_memory_block, shared blocks alias each other.
702 * We will have either all blocks or all non-blocks.
703 */
704 if (glsl_type_is_interface(a_var->type) ||
705 glsl_type_is_interface(b_var->type)) {
706 assert(glsl_type_is_interface(a_var->type) &&
707 glsl_type_is_interface(b_var->type));
708 return nir_derefs_may_alias_bit;
709 }
710
711 /* Otherwise, distinct shared vars don't alias */
712 return nir_derefs_do_not_alias;
713
714 default:
715 /* For any other variable types, if we can chase them back to the
716 * variable, and the variables are different, they don't alias.
717 */
718 if (a_var == b_var)
719 break;
720
721 return nir_derefs_do_not_alias;
722 }
723 } else {
724 assert(a_path->path[0]->deref_type == nir_deref_type_cast);
725 /* If they're not exactly the same cast, it's hard to compare them so we
726 * just assume they alias. Comparing casts is tricky as there are lots
727 * of things such as mode, type, etc. to make sure work out; for now, we
728 * just assume nit_opt_deref will combine them and compare the deref
729 * instructions.
730 *
731 * TODO: At some point in the future, we could be clever and understand
732 * that a float[] and int[] have the same layout and aliasing structure
733 * but double[] and vec3[] do not and we could potentially be a bit
734 * smarter here.
735 */
736 if (a_path->path[0] != b_path->path[0])
737 return nir_derefs_may_alias_bit;
738 }
739
740 return compare_deref_paths(a_path, b_path, &path_idx, NULL);
741 }
742
743 nir_deref_compare_result
nir_compare_derefs(nir_deref_instr * a,nir_deref_instr * b)744 nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
745 {
746 if (a == b) {
747 return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
748 nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
749 }
750
751 nir_deref_path a_path, b_path;
752 nir_deref_path_init(&a_path, a, NULL);
753 nir_deref_path_init(&b_path, b, NULL);
754 assert(a_path.path[0]->deref_type == nir_deref_type_var ||
755 a_path.path[0]->deref_type == nir_deref_type_cast);
756 assert(b_path.path[0]->deref_type == nir_deref_type_var ||
757 b_path.path[0]->deref_type == nir_deref_type_cast);
758
759 nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
760
761 nir_deref_path_finish(&a_path);
762 nir_deref_path_finish(&b_path);
763
764 return result;
765 }
766
767 nir_deref_path *
nir_get_deref_path(void * mem_ctx,nir_deref_and_path * deref)768 nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
769 {
770 if (!deref->_path) {
771 deref->_path = ralloc(mem_ctx, nir_deref_path);
772 nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
773 }
774 return deref->_path;
775 }
776
777 nir_deref_compare_result
nir_compare_derefs_and_paths(void * mem_ctx,nir_deref_and_path * a,nir_deref_and_path * b)778 nir_compare_derefs_and_paths(void *mem_ctx,
779 nir_deref_and_path *a,
780 nir_deref_and_path *b)
781 {
782 if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
783 return nir_compare_derefs(a->instr, b->instr);
784
785 return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
786 nir_get_deref_path(mem_ctx, b));
787 }
788
789 struct rematerialize_deref_state {
790 bool progress;
791 nir_builder builder;
792 nir_block *block;
793 };
794
795 static nir_deref_instr *
rematerialize_deref_in_block(nir_deref_instr * deref,struct rematerialize_deref_state * state)796 rematerialize_deref_in_block(nir_deref_instr *deref,
797 struct rematerialize_deref_state *state)
798 {
799 if (deref->instr.block == state->block)
800 return deref;
801
802 nir_builder *b = &state->builder;
803 nir_deref_instr *new_deref =
804 nir_deref_instr_create(b->shader, deref->deref_type);
805 new_deref->modes = deref->modes;
806 new_deref->type = deref->type;
807
808 if (deref->deref_type == nir_deref_type_var) {
809 new_deref->var = deref->var;
810 } else {
811 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
812 if (parent) {
813 parent = rematerialize_deref_in_block(parent, state);
814 new_deref->parent = nir_src_for_ssa(&parent->def);
815 } else {
816 new_deref->parent = nir_src_for_ssa(deref->parent.ssa);
817 }
818 }
819
820 switch (deref->deref_type) {
821 case nir_deref_type_var:
822 case nir_deref_type_array_wildcard:
823 /* Nothing more to do */
824 break;
825
826 case nir_deref_type_cast:
827 new_deref->cast.ptr_stride = deref->cast.ptr_stride;
828 new_deref->cast.align_mul = deref->cast.align_mul;
829 new_deref->cast.align_offset = deref->cast.align_offset;
830 break;
831
832 case nir_deref_type_array:
833 case nir_deref_type_ptr_as_array:
834 assert(!nir_src_as_deref(deref->arr.index));
835 new_deref->arr.index = nir_src_for_ssa(deref->arr.index.ssa);
836 break;
837
838 case nir_deref_type_struct:
839 new_deref->strct.index = deref->strct.index;
840 break;
841
842 default:
843 unreachable("Invalid deref instruction type");
844 }
845
846 nir_def_init(&new_deref->instr, &new_deref->def,
847 deref->def.num_components, deref->def.bit_size);
848 nir_builder_instr_insert(b, &new_deref->instr);
849
850 return new_deref;
851 }
852
853 static bool
rematerialize_deref_src(nir_src * src,void * _state)854 rematerialize_deref_src(nir_src *src, void *_state)
855 {
856 struct rematerialize_deref_state *state = _state;
857
858 nir_deref_instr *deref = nir_src_as_deref(*src);
859 if (!deref)
860 return true;
861
862 nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
863 if (block_deref != deref) {
864 nir_src_rewrite(src, &block_deref->def);
865 nir_deref_instr_remove_if_unused(deref);
866 state->progress = true;
867 }
868
869 return true;
870 }
871
872 bool
nir_rematerialize_deref_in_use_blocks(nir_deref_instr * instr)873 nir_rematerialize_deref_in_use_blocks(nir_deref_instr *instr)
874 {
875 if (nir_deref_instr_remove_if_unused(instr))
876 return true;
877
878 struct rematerialize_deref_state state = {
879 .builder = nir_builder_create(nir_cf_node_get_function(&instr->instr.block->cf_node)),
880 };
881
882 nir_foreach_use_safe(use, &instr->def) {
883 nir_instr *parent = nir_src_parent_instr(use);
884 if (parent->block == instr->instr.block)
885 continue;
886
887 /* If a deref is used in a phi, we can't rematerialize it, as the new
888 * derefs would appear before the phi, which is not valid.
889 */
890 if (parent->type == nir_instr_type_phi)
891 continue;
892
893 state.block = parent->block;
894 state.builder.cursor = nir_before_instr(parent);
895 rematerialize_deref_src(use, &state);
896 }
897
898 return state.progress;
899 }
900
901 /** Re-materialize derefs in every block
902 *
903 * This pass re-materializes deref instructions in every block in which it is
904 * used. After this pass has been run, every use of a deref will be of a
905 * deref in the same block as the use. Also, all unused derefs will be
906 * deleted as a side-effect.
907 *
908 * Derefs used as sources of phi instructions are not rematerialized.
909 */
910 bool
nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl * impl)911 nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
912 {
913 bool progress = false;
914 nir_foreach_block_unstructured(block, impl) {
915 nir_foreach_instr_safe(instr, block) {
916 if (instr->type == nir_instr_type_deref) {
917 nir_deref_instr *deref = nir_instr_as_deref(instr);
918 progress |= nir_rematerialize_deref_in_use_blocks(deref);
919 }
920 }
921
922 #ifndef NDEBUG
923 nir_if *following_if = nir_block_get_following_if(block);
924 if (following_if)
925 assert(!nir_src_as_deref(following_if->condition));
926 #endif
927 }
928
929 return progress;
930 }
931
932 static void
nir_deref_instr_fixup_child_types(nir_deref_instr * parent)933 nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
934 {
935 nir_foreach_use(use, &parent->def) {
936 if (nir_src_parent_instr(use)->type != nir_instr_type_deref)
937 continue;
938
939 nir_deref_instr *child = nir_instr_as_deref(nir_src_parent_instr(use));
940 switch (child->deref_type) {
941 case nir_deref_type_var:
942 unreachable("nir_deref_type_var cannot be a child");
943
944 case nir_deref_type_array:
945 case nir_deref_type_array_wildcard:
946 child->type = glsl_get_array_element(parent->type);
947 break;
948
949 case nir_deref_type_ptr_as_array:
950 child->type = parent->type;
951 break;
952
953 case nir_deref_type_struct:
954 child->type = glsl_get_struct_field(parent->type,
955 child->strct.index);
956 break;
957
958 case nir_deref_type_cast:
959 /* We stop the recursion here */
960 continue;
961 }
962
963 /* Recurse into children */
964 nir_deref_instr_fixup_child_types(child);
965 }
966 }
967
968 static bool
opt_alu_of_cast(nir_alu_instr * alu)969 opt_alu_of_cast(nir_alu_instr *alu)
970 {
971 bool progress = false;
972
973 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
974 nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
975 if (src_instr->type != nir_instr_type_deref)
976 continue;
977
978 nir_deref_instr *src_deref = nir_instr_as_deref(src_instr);
979 if (src_deref->deref_type != nir_deref_type_cast)
980 continue;
981
982 nir_src_rewrite(&alu->src[i].src, src_deref->parent.ssa);
983 progress = true;
984 }
985
986 return progress;
987 }
988
989 static bool
is_trivial_array_deref_cast(nir_deref_instr * cast)990 is_trivial_array_deref_cast(nir_deref_instr *cast)
991 {
992 assert(nir_deref_cast_is_trivial(cast));
993
994 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
995
996 if (parent->deref_type == nir_deref_type_array) {
997 return cast->cast.ptr_stride ==
998 glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
999 } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
1000 return cast->cast.ptr_stride ==
1001 nir_deref_instr_array_stride(parent);
1002 } else {
1003 return false;
1004 }
1005 }
1006
1007 static bool
is_deref_ptr_as_array(nir_instr * instr)1008 is_deref_ptr_as_array(nir_instr *instr)
1009 {
1010 return instr->type == nir_instr_type_deref &&
1011 nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
1012 }
1013
1014 static bool
opt_remove_restricting_cast_alignments(nir_deref_instr * cast)1015 opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
1016 {
1017 assert(cast->deref_type == nir_deref_type_cast);
1018 if (cast->cast.align_mul == 0)
1019 return false;
1020
1021 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1022 if (parent == NULL)
1023 return false;
1024
1025 /* Don't use any default alignment for this check. We don't want to fall
1026 * back to type alignment too early in case we find out later that we're
1027 * somehow a child of a packed struct.
1028 */
1029 uint32_t parent_mul, parent_offset;
1030 if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
1031 &parent_mul, &parent_offset))
1032 return false;
1033
1034 /* If this cast increases the alignment, we want to keep it.
1035 *
1036 * There is a possibility that the larger alignment provided by this cast
1037 * somehow disagrees with the smaller alignment further up the deref chain.
1038 * In that case, we choose to favor the alignment closer to the actual
1039 * memory operation which, in this case, is the cast and not its parent so
1040 * keeping the cast alignment is the right thing to do.
1041 */
1042 if (parent_mul < cast->cast.align_mul)
1043 return false;
1044
1045 /* If we've gotten here, we have a parent deref with an align_mul at least
1046 * as large as ours so we can potentially throw away the alignment
1047 * information on this deref. There are two cases to consider here:
1048 *
1049 * 1. We can chase the deref all the way back to the variable. In this
1050 * case, we have "perfect" knowledge, modulo indirect array derefs.
1051 * Unless we've done something wrong in our indirect/wildcard stride
1052 * calculations, our knowledge from the deref walk is better than the
1053 * client's.
1054 *
1055 * 2. We can't chase it all the way back to the variable. In this case,
1056 * because our call to nir_get_explicit_deref_align(parent, ...) above
1057 * above passes default_to_type_align=false, the only way we can even
1058 * get here is if something further up the deref chain has a cast with
1059 * an alignment which can only happen if we get an alignment from the
1060 * client (most likely a decoration in the SPIR-V). If the client has
1061 * provided us with two conflicting alignments in the deref chain,
1062 * that's their fault and we can do whatever we want.
1063 *
1064 * In either case, we should be without our rights, at this point, to throw
1065 * away the alignment information on this deref. However, to be "nice" to
1066 * weird clients, we do one more check. It really shouldn't happen but
1067 * it's possible that the parent's alignment offset disagrees with the
1068 * cast's alignment offset. In this case, we consider the cast as
1069 * providing more information (or at least more valid information) and keep
1070 * it even if the align_mul from the parent is larger.
1071 */
1072 assert(cast->cast.align_mul <= parent_mul);
1073 if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
1074 return false;
1075
1076 /* If we got here, the parent has better alignment information than the
1077 * child and we can get rid of the child alignment information.
1078 */
1079 cast->cast.align_mul = 0;
1080 cast->cast.align_offset = 0;
1081 return true;
1082 }
1083
1084 /**
1085 * Remove casts that just wrap other casts.
1086 */
1087 static bool
opt_remove_cast_cast(nir_deref_instr * cast)1088 opt_remove_cast_cast(nir_deref_instr *cast)
1089 {
1090 nir_deref_instr *parent = nir_deref_instr_parent(cast);
1091 if (parent == NULL || parent->deref_type != nir_deref_type_cast)
1092 return false;
1093
1094 /* Copy align info from the parent cast if needed
1095 *
1096 * In the case that align_mul = 0, the alignment for this cast is inhereted
1097 * from the parent deref (if any). If we aren't careful, removing our
1098 * parent cast from the chain may lose alignment information so we need to
1099 * copy the parent's alignment information (if any).
1100 *
1101 * opt_remove_restricting_cast_alignments() above is run before this pass
1102 * and will will have cleared our alignment (set align_mul = 0) in the case
1103 * where the parent's alignment information is somehow superior.
1104 */
1105 if (cast->cast.align_mul == 0) {
1106 cast->cast.align_mul = parent->cast.align_mul;
1107 cast->cast.align_offset = parent->cast.align_offset;
1108 }
1109
1110 nir_src_rewrite(&cast->parent, parent->parent.ssa);
1111 return true;
1112 }
1113
1114 /* Restrict variable modes in casts.
1115 *
1116 * If we know from something higher up the deref chain that the deref has a
1117 * specific mode, we can cast to more general and back but we can never cast
1118 * across modes. For non-cast derefs, we should only ever do anything here if
1119 * the parent eventually comes from a cast that we restricted earlier.
1120 */
1121 static bool
opt_restrict_deref_modes(nir_deref_instr * deref)1122 opt_restrict_deref_modes(nir_deref_instr *deref)
1123 {
1124 if (deref->deref_type == nir_deref_type_var) {
1125 assert(deref->modes == deref->var->data.mode);
1126 return false;
1127 }
1128
1129 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1130 if (parent == NULL || parent->modes == deref->modes)
1131 return false;
1132
1133 assert(parent->modes & deref->modes);
1134 deref->modes &= parent->modes;
1135 return true;
1136 }
1137
1138 static bool
opt_remove_sampler_cast(nir_deref_instr * cast)1139 opt_remove_sampler_cast(nir_deref_instr *cast)
1140 {
1141 assert(cast->deref_type == nir_deref_type_cast);
1142 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1143 if (parent == NULL)
1144 return false;
1145
1146 /* Strip both types down to their non-array type and bail if there are any
1147 * discrepancies in array lengths.
1148 */
1149 const struct glsl_type *parent_type = parent->type;
1150 const struct glsl_type *cast_type = cast->type;
1151 while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
1152 if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
1153 return false;
1154 parent_type = glsl_get_array_element(parent_type);
1155 cast_type = glsl_get_array_element(cast_type);
1156 }
1157
1158 if (!glsl_type_is_sampler(parent_type))
1159 return false;
1160
1161 if (cast_type != glsl_bare_sampler_type() &&
1162 (glsl_type_is_bare_sampler(parent_type) ||
1163 cast_type != glsl_sampler_type_to_texture(parent_type)))
1164 return false;
1165
1166 /* We're a cast from a more detailed sampler type to a bare sampler or a
1167 * texture type with the same dimensionality.
1168 */
1169 nir_def_rewrite_uses(&cast->def,
1170 &parent->def);
1171 nir_instr_remove(&cast->instr);
1172
1173 /* Recursively crawl the deref tree and clean up types */
1174 nir_deref_instr_fixup_child_types(parent);
1175
1176 return true;
1177 }
1178
1179 /**
1180 * Is this casting a struct to a contained struct.
1181 * struct a { struct b field0 };
1182 * ssa_5 is structa;
1183 * deref_cast (structb *)ssa_5 (function_temp structb);
1184 * converts to
1185 * deref_struct &ssa_5->field0 (function_temp structb);
1186 * This allows subsequent copy propagation to work.
1187 */
1188 static bool
opt_replace_struct_wrapper_cast(nir_builder * b,nir_deref_instr * cast)1189 opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
1190 {
1191 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1192 if (!parent)
1193 return false;
1194
1195 if (cast->cast.align_mul > 0)
1196 return false;
1197
1198 if (!glsl_type_is_struct(parent->type))
1199 return false;
1200
1201 /* Empty struct */
1202 if (glsl_get_length(parent->type) < 1)
1203 return false;
1204
1205 if (glsl_get_struct_field_offset(parent->type, 0) != 0)
1206 return false;
1207
1208 const struct glsl_type *field_type = glsl_get_struct_field(parent->type, 0);
1209 if (cast->type != field_type)
1210 return false;
1211
1212 /* we can't drop the stride information */
1213 if (cast->cast.ptr_stride != glsl_get_explicit_stride(field_type))
1214 return false;
1215
1216 nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
1217 nir_def_rewrite_uses(&cast->def, &replace->def);
1218 nir_deref_instr_remove_if_unused(cast);
1219 return true;
1220 }
1221
1222 static bool
opt_deref_cast(nir_builder * b,nir_deref_instr * cast)1223 opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
1224 {
1225 bool progress = false;
1226
1227 progress |= opt_remove_restricting_cast_alignments(cast);
1228
1229 if (opt_replace_struct_wrapper_cast(b, cast))
1230 return true;
1231
1232 if (opt_remove_sampler_cast(cast))
1233 return true;
1234
1235 progress |= opt_remove_cast_cast(cast);
1236 if (!nir_deref_cast_is_trivial(cast))
1237 return progress;
1238
1239 /* If this deref still contains useful alignment information, we don't want
1240 * to delete it.
1241 */
1242 if (cast->cast.align_mul > 0)
1243 return progress;
1244
1245 bool trivial_array_cast = is_trivial_array_deref_cast(cast);
1246
1247 nir_foreach_use_including_if_safe(use_src, &cast->def) {
1248 assert(!nir_src_is_if(use_src) && "there cannot be if-uses");
1249
1250 /* If this isn't a trivial array cast, we can't propagate into
1251 * ptr_as_array derefs.
1252 */
1253 if (is_deref_ptr_as_array(nir_src_parent_instr(use_src)) &&
1254 !trivial_array_cast)
1255 continue;
1256
1257 nir_src_rewrite(use_src, cast->parent.ssa);
1258 progress = true;
1259 }
1260
1261 if (nir_deref_instr_remove_if_unused(cast))
1262 progress = true;
1263
1264 return progress;
1265 }
1266
1267 static bool
opt_deref_ptr_as_array(nir_builder * b,nir_deref_instr * deref)1268 opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
1269 {
1270 assert(deref->deref_type == nir_deref_type_ptr_as_array);
1271
1272 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1273
1274 if (nir_src_is_const(deref->arr.index) &&
1275 nir_src_as_int(deref->arr.index) == 0) {
1276 /* If it's a ptr_as_array deref with an index of 0, it does nothing
1277 * and we can just replace its uses with its parent, unless it has
1278 * alignment information.
1279 *
1280 * The source of a ptr_as_array deref always has a deref_type of
1281 * nir_deref_type_array or nir_deref_type_cast. If it's a cast, it
1282 * may be trivial and we may be able to get rid of that too. Any
1283 * trivial cast of trivial cast cases should be handled already by
1284 * opt_deref_cast() above.
1285 */
1286 if (parent->deref_type == nir_deref_type_cast &&
1287 parent->cast.align_mul == 0 &&
1288 nir_deref_cast_is_trivial(parent))
1289 parent = nir_deref_instr_parent(parent);
1290 nir_def_rewrite_uses(&deref->def,
1291 &parent->def);
1292 nir_instr_remove(&deref->instr);
1293 return true;
1294 }
1295
1296 if (parent->deref_type != nir_deref_type_array &&
1297 parent->deref_type != nir_deref_type_ptr_as_array)
1298 return false;
1299
1300 deref->arr.in_bounds &= parent->arr.in_bounds;
1301
1302 nir_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
1303 deref->arr.index.ssa);
1304
1305 deref->deref_type = parent->deref_type;
1306 nir_src_rewrite(&deref->parent, parent->parent.ssa);
1307 nir_src_rewrite(&deref->arr.index, new_idx);
1308 return true;
1309 }
1310
1311 static bool
is_vector_bitcast_deref(nir_deref_instr * cast,nir_component_mask_t mask,bool is_write)1312 is_vector_bitcast_deref(nir_deref_instr *cast,
1313 nir_component_mask_t mask,
1314 bool is_write)
1315 {
1316 if (cast->deref_type != nir_deref_type_cast)
1317 return false;
1318
1319 /* Don't throw away useful alignment information */
1320 if (cast->cast.align_mul > 0)
1321 return false;
1322
1323 /* It has to be a cast of another deref */
1324 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1325 if (parent == NULL)
1326 return false;
1327
1328 /* The parent has to be a vector or scalar */
1329 if (!glsl_type_is_vector_or_scalar(parent->type))
1330 return false;
1331
1332 /* Don't bother with 1-bit types */
1333 unsigned cast_bit_size = glsl_get_bit_size(cast->type);
1334 unsigned parent_bit_size = glsl_get_bit_size(parent->type);
1335 if (cast_bit_size == 1 || parent_bit_size == 1)
1336 return false;
1337
1338 /* A strided vector type means it's not tightly packed */
1339 if (glsl_get_explicit_stride(cast->type) ||
1340 glsl_get_explicit_stride(parent->type))
1341 return false;
1342
1343 assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
1344 assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
1345 unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
1346 unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
1347 (parent_bit_size / 8);
1348 if (bytes_used > parent_bytes)
1349 return false;
1350
1351 if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
1352 parent_bit_size))
1353 return false;
1354
1355 return true;
1356 }
1357
1358 static nir_def *
resize_vector(nir_builder * b,nir_def * data,unsigned num_components)1359 resize_vector(nir_builder *b, nir_def *data, unsigned num_components)
1360 {
1361 if (num_components == data->num_components)
1362 return data;
1363
1364 unsigned swiz[NIR_MAX_VEC_COMPONENTS] = {
1365 0,
1366 };
1367 for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
1368 swiz[i] = i;
1369
1370 return nir_swizzle(b, data, swiz, num_components);
1371 }
1372
1373 static bool
opt_load_vec_deref(nir_builder * b,nir_intrinsic_instr * load)1374 opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
1375 {
1376 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1377 nir_component_mask_t read_mask =
1378 nir_def_components_read(&load->def);
1379
1380 /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1381 * vec4-aligned and so it can just read/write them as vec4s. This
1382 * results in a LOT of vec4->vec3 casts on loads and stores.
1383 */
1384 if (is_vector_bitcast_deref(deref, read_mask, false)) {
1385 const unsigned old_num_comps = load->def.num_components;
1386 const unsigned old_bit_size = load->def.bit_size;
1387
1388 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1389 const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1390 const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1391
1392 /* Stomp it to reference the parent */
1393 nir_src_rewrite(&load->src[0], &parent->def);
1394 load->def.bit_size = new_bit_size;
1395 load->def.num_components = new_num_comps;
1396 load->num_components = new_num_comps;
1397
1398 b->cursor = nir_after_instr(&load->instr);
1399 nir_def *data = &load->def;
1400 if (old_bit_size != new_bit_size)
1401 data = nir_bitcast_vector(b, &load->def, old_bit_size);
1402 data = resize_vector(b, data, old_num_comps);
1403
1404 nir_def_rewrite_uses_after(&load->def, data,
1405 data->parent_instr);
1406 return true;
1407 }
1408
1409 return false;
1410 }
1411
1412 static bool
opt_store_vec_deref(nir_builder * b,nir_intrinsic_instr * store)1413 opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
1414 {
1415 nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
1416 nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
1417
1418 /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1419 * vec4-aligned and so it can just read/write them as vec4s. This
1420 * results in a LOT of vec4->vec3 casts on loads and stores.
1421 */
1422 if (is_vector_bitcast_deref(deref, write_mask, true)) {
1423 nir_def *data = store->src[1].ssa;
1424
1425 const unsigned old_bit_size = data->bit_size;
1426
1427 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1428 const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1429 const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1430
1431 nir_src_rewrite(&store->src[0], &parent->def);
1432
1433 /* Restrict things down as needed so the bitcast doesn't fail */
1434 data = nir_trim_vector(b, data, util_last_bit(write_mask));
1435 if (old_bit_size != new_bit_size)
1436 data = nir_bitcast_vector(b, data, new_bit_size);
1437 data = resize_vector(b, data, new_num_comps);
1438 nir_src_rewrite(&store->src[1], data);
1439 store->num_components = new_num_comps;
1440
1441 /* Adjust the write mask */
1442 write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
1443 new_bit_size);
1444 nir_intrinsic_set_write_mask(store, write_mask);
1445 return true;
1446 }
1447
1448 return false;
1449 }
1450
1451 static bool
opt_known_deref_mode_is(nir_builder * b,nir_intrinsic_instr * intrin)1452 opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
1453 {
1454 nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
1455 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1456 if (deref == NULL)
1457 return false;
1458
1459 nir_def *deref_is = NULL;
1460
1461 if (nir_deref_mode_must_be(deref, modes))
1462 deref_is = nir_imm_true(b);
1463
1464 if (!nir_deref_mode_may_be(deref, modes))
1465 deref_is = nir_imm_false(b);
1466
1467 if (deref_is == NULL)
1468 return false;
1469
1470 nir_def_rewrite_uses(&intrin->def, deref_is);
1471 nir_instr_remove(&intrin->instr);
1472 return true;
1473 }
1474
1475 bool
nir_opt_deref_impl(nir_function_impl * impl)1476 nir_opt_deref_impl(nir_function_impl *impl)
1477 {
1478 bool progress = false;
1479
1480 nir_builder b = nir_builder_create(impl);
1481
1482 nir_foreach_block(block, impl) {
1483 nir_foreach_instr_safe(instr, block) {
1484 b.cursor = nir_before_instr(instr);
1485
1486 switch (instr->type) {
1487 case nir_instr_type_alu: {
1488 nir_alu_instr *alu = nir_instr_as_alu(instr);
1489 if (opt_alu_of_cast(alu))
1490 progress = true;
1491 break;
1492 }
1493
1494 case nir_instr_type_deref: {
1495 nir_deref_instr *deref = nir_instr_as_deref(instr);
1496
1497 if (opt_restrict_deref_modes(deref))
1498 progress = true;
1499
1500 switch (deref->deref_type) {
1501 case nir_deref_type_ptr_as_array:
1502 if (opt_deref_ptr_as_array(&b, deref))
1503 progress = true;
1504 break;
1505
1506 case nir_deref_type_cast:
1507 if (opt_deref_cast(&b, deref))
1508 progress = true;
1509 break;
1510
1511 default:
1512 /* Do nothing */
1513 break;
1514 }
1515 break;
1516 }
1517
1518 case nir_instr_type_intrinsic: {
1519 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1520 switch (intrin->intrinsic) {
1521 case nir_intrinsic_load_deref:
1522 if (opt_load_vec_deref(&b, intrin))
1523 progress = true;
1524 break;
1525
1526 case nir_intrinsic_store_deref:
1527 if (opt_store_vec_deref(&b, intrin))
1528 progress = true;
1529 break;
1530
1531 case nir_intrinsic_deref_mode_is:
1532 if (opt_known_deref_mode_is(&b, intrin))
1533 progress = true;
1534 break;
1535
1536 default:
1537 /* Do nothing */
1538 break;
1539 }
1540 break;
1541 }
1542
1543 default:
1544 /* Do nothing */
1545 break;
1546 }
1547 }
1548 }
1549
1550 if (progress) {
1551 nir_metadata_preserve(impl, nir_metadata_block_index |
1552 nir_metadata_dominance);
1553 } else {
1554 nir_metadata_preserve(impl, nir_metadata_all);
1555 }
1556
1557 return progress;
1558 }
1559
1560 bool
nir_opt_deref(nir_shader * shader)1561 nir_opt_deref(nir_shader *shader)
1562 {
1563 bool progress = false;
1564
1565 nir_foreach_function_impl(impl, shader) {
1566 if (nir_opt_deref_impl(impl))
1567 progress = true;
1568 }
1569
1570 return progress;
1571 }
1572