1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/hash_table.h"
28
29 static bool
is_trivial_deref_cast(nir_deref_instr * cast)30 is_trivial_deref_cast(nir_deref_instr *cast)
31 {
32 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
33 if (!parent)
34 return false;
35
36 return cast->modes == parent->modes &&
37 cast->type == parent->type &&
38 cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
39 cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
40 }
41
42 void
nir_deref_path_init(nir_deref_path * path,nir_deref_instr * deref,void * mem_ctx)43 nir_deref_path_init(nir_deref_path *path,
44 nir_deref_instr *deref, void *mem_ctx)
45 {
46 assert(deref != NULL);
47
48 /* The length of the short path is at most ARRAY_SIZE - 1 because we need
49 * room for the NULL terminator.
50 */
51 static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
52
53 int count = 0;
54
55 nir_deref_instr **tail = &path->_short_path[max_short_path_len];
56 nir_deref_instr **head = tail;
57
58 *tail = NULL;
59 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
60 if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
61 continue;
62 count++;
63 if (count <= max_short_path_len)
64 *(--head) = d;
65 }
66
67 if (count <= max_short_path_len) {
68 /* If we're under max_short_path_len, just use the short path. */
69 path->path = head;
70 goto done;
71 }
72
73 #ifndef NDEBUG
74 /* Just in case someone uses short_path by accident */
75 for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
76 path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
77 #endif
78
79 path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
80 head = tail = path->path + count;
81 *tail = NULL;
82 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
83 if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
84 continue;
85 *(--head) = d;
86 }
87
88 done:
89 assert(head == path->path);
90 assert(tail == head + count);
91 assert(*tail == NULL);
92 }
93
94 void
nir_deref_path_finish(nir_deref_path * path)95 nir_deref_path_finish(nir_deref_path *path)
96 {
97 if (path->path < &path->_short_path[0] ||
98 path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
99 ralloc_free(path->path);
100 }
101
102 /**
103 * Recursively removes unused deref instructions
104 */
105 bool
nir_deref_instr_remove_if_unused(nir_deref_instr * instr)106 nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
107 {
108 bool progress = false;
109
110 for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
111 /* If anyone is using this deref, leave it alone */
112 assert(d->dest.is_ssa);
113 if (!nir_ssa_def_is_unused(&d->dest.ssa))
114 break;
115
116 nir_instr_remove(&d->instr);
117 progress = true;
118 }
119
120 return progress;
121 }
122
123 bool
nir_deref_instr_has_indirect(nir_deref_instr * instr)124 nir_deref_instr_has_indirect(nir_deref_instr *instr)
125 {
126 while (instr->deref_type != nir_deref_type_var) {
127 /* Consider casts to be indirects */
128 if (instr->deref_type == nir_deref_type_cast)
129 return true;
130
131 if ((instr->deref_type == nir_deref_type_array ||
132 instr->deref_type == nir_deref_type_ptr_as_array) &&
133 !nir_src_is_const(instr->arr.index))
134 return true;
135
136 instr = nir_deref_instr_parent(instr);
137 }
138
139 return false;
140 }
141
142 bool
nir_deref_instr_is_known_out_of_bounds(nir_deref_instr * instr)143 nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
144 {
145 for (; instr; instr = nir_deref_instr_parent(instr)) {
146 if (instr->deref_type == nir_deref_type_array &&
147 nir_src_is_const(instr->arr.index) &&
148 nir_src_as_uint(instr->arr.index) >=
149 glsl_get_length(nir_deref_instr_parent(instr)->type))
150 return true;
151 }
152
153 return false;
154 }
155
156 bool
nir_deref_instr_has_complex_use(nir_deref_instr * deref)157 nir_deref_instr_has_complex_use(nir_deref_instr *deref)
158 {
159 nir_foreach_use(use_src, &deref->dest.ssa) {
160 nir_instr *use_instr = use_src->parent_instr;
161
162 switch (use_instr->type) {
163 case nir_instr_type_deref: {
164 nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
165
166 /* A var deref has no sources */
167 assert(use_deref->deref_type != nir_deref_type_var);
168
169 /* If a deref shows up in an array index or something like that, it's
170 * a complex use.
171 */
172 if (use_src != &use_deref->parent)
173 return true;
174
175 /* Anything that isn't a basic struct or array deref is considered to
176 * be a "complex" use. In particular, we don't allow ptr_as_array
177 * because we assume that opt_deref will turn any non-complex
178 * ptr_as_array derefs into regular array derefs eventually so passes
179 * which only want to handle simple derefs will pick them up in a
180 * later pass.
181 */
182 if (use_deref->deref_type != nir_deref_type_struct &&
183 use_deref->deref_type != nir_deref_type_array_wildcard &&
184 use_deref->deref_type != nir_deref_type_array)
185 return true;
186
187 if (nir_deref_instr_has_complex_use(use_deref))
188 return true;
189
190 continue;
191 }
192
193 case nir_instr_type_intrinsic: {
194 nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
195 switch (use_intrin->intrinsic) {
196 case nir_intrinsic_load_deref:
197 assert(use_src == &use_intrin->src[0]);
198 continue;
199
200 case nir_intrinsic_copy_deref:
201 assert(use_src == &use_intrin->src[0] ||
202 use_src == &use_intrin->src[1]);
203 continue;
204
205 case nir_intrinsic_store_deref:
206 /* A use in src[1] of a store means we're taking that pointer and
207 * writing it to a variable. Because we have no idea who will
208 * read that variable and what they will do with the pointer, it's
209 * considered a "complex" use. A use in src[0], on the other
210 * hand, is a simple use because we're just going to dereference
211 * it and write a value there.
212 */
213 if (use_src == &use_intrin->src[0])
214 continue;
215 return true;
216
217 default:
218 return true;
219 }
220 unreachable("Switch default failed");
221 }
222
223 default:
224 return true;
225 }
226 }
227
228 nir_foreach_if_use(use, &deref->dest.ssa)
229 return true;
230
231 return false;
232 }
233
234 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)235 type_scalar_size_bytes(const struct glsl_type *type)
236 {
237 assert(glsl_type_is_vector_or_scalar(type) ||
238 glsl_type_is_matrix(type));
239 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
240 }
241
242 unsigned
nir_deref_instr_array_stride(nir_deref_instr * deref)243 nir_deref_instr_array_stride(nir_deref_instr *deref)
244 {
245 switch (deref->deref_type) {
246 case nir_deref_type_array:
247 case nir_deref_type_array_wildcard: {
248 const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
249 unsigned stride = glsl_get_explicit_stride(arr_type);
250
251 if ((glsl_type_is_matrix(arr_type) &&
252 glsl_matrix_type_is_row_major(arr_type)) ||
253 (glsl_type_is_vector(arr_type) && stride == 0))
254 stride = type_scalar_size_bytes(arr_type);
255
256 return stride;
257 }
258 case nir_deref_type_ptr_as_array:
259 return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
260 case nir_deref_type_cast:
261 return deref->cast.ptr_stride;
262 default:
263 return 0;
264 }
265 }
266
267 static unsigned
type_get_array_stride(const struct glsl_type * elem_type,glsl_type_size_align_func size_align)268 type_get_array_stride(const struct glsl_type *elem_type,
269 glsl_type_size_align_func size_align)
270 {
271 unsigned elem_size, elem_align;
272 size_align(elem_type, &elem_size, &elem_align);
273 return ALIGN_POT(elem_size, elem_align);
274 }
275
276 static unsigned
struct_type_get_field_offset(const struct glsl_type * struct_type,glsl_type_size_align_func size_align,unsigned field_idx)277 struct_type_get_field_offset(const struct glsl_type *struct_type,
278 glsl_type_size_align_func size_align,
279 unsigned field_idx)
280 {
281 assert(glsl_type_is_struct_or_ifc(struct_type));
282 unsigned offset = 0;
283 for (unsigned i = 0; i <= field_idx; i++) {
284 unsigned elem_size, elem_align;
285 size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
286 offset = ALIGN_POT(offset, elem_align);
287 if (i < field_idx)
288 offset += elem_size;
289 }
290 return offset;
291 }
292
293 unsigned
nir_deref_instr_get_const_offset(nir_deref_instr * deref,glsl_type_size_align_func size_align)294 nir_deref_instr_get_const_offset(nir_deref_instr *deref,
295 glsl_type_size_align_func size_align)
296 {
297 nir_deref_path path;
298 nir_deref_path_init(&path, deref, NULL);
299
300 unsigned offset = 0;
301 for (nir_deref_instr **p = &path.path[1]; *p; p++) {
302 switch ((*p)->deref_type) {
303 case nir_deref_type_array:
304 offset += nir_src_as_uint((*p)->arr.index) *
305 type_get_array_stride((*p)->type, size_align);
306 break;
307 case nir_deref_type_struct: {
308 /* p starts at path[1], so this is safe */
309 nir_deref_instr *parent = *(p - 1);
310 offset += struct_type_get_field_offset(parent->type, size_align,
311 (*p)->strct.index);
312 break;
313 }
314 case nir_deref_type_cast:
315 /* A cast doesn't contribute to the offset */
316 break;
317 default:
318 unreachable("Unsupported deref type");
319 }
320 }
321
322 nir_deref_path_finish(&path);
323
324 return offset;
325 }
326
327 nir_ssa_def *
nir_build_deref_offset(nir_builder * b,nir_deref_instr * deref,glsl_type_size_align_func size_align)328 nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
329 glsl_type_size_align_func size_align)
330 {
331 nir_deref_path path;
332 nir_deref_path_init(&path, deref, NULL);
333
334 nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);
335 for (nir_deref_instr **p = &path.path[1]; *p; p++) {
336 switch ((*p)->deref_type) {
337 case nir_deref_type_array:
338 case nir_deref_type_ptr_as_array: {
339 nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
340 int stride = type_get_array_stride((*p)->type, size_align);
341 offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
342 break;
343 }
344 case nir_deref_type_struct: {
345 /* p starts at path[1], so this is safe */
346 nir_deref_instr *parent = *(p - 1);
347 unsigned field_offset =
348 struct_type_get_field_offset(parent->type, size_align,
349 (*p)->strct.index);
350 offset = nir_iadd_imm(b, offset, field_offset);
351 break;
352 }
353 case nir_deref_type_cast:
354 /* A cast doesn't contribute to the offset */
355 break;
356 default:
357 unreachable("Unsupported deref type");
358 }
359 }
360
361 nir_deref_path_finish(&path);
362
363 return offset;
364 }
365
366 bool
nir_remove_dead_derefs_impl(nir_function_impl * impl)367 nir_remove_dead_derefs_impl(nir_function_impl *impl)
368 {
369 bool progress = false;
370
371 nir_foreach_block(block, impl) {
372 nir_foreach_instr_safe(instr, block) {
373 if (instr->type == nir_instr_type_deref &&
374 nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
375 progress = true;
376 }
377 }
378
379 if (progress)
380 nir_metadata_preserve(impl, nir_metadata_block_index |
381 nir_metadata_dominance);
382
383 return progress;
384 }
385
386 bool
nir_remove_dead_derefs(nir_shader * shader)387 nir_remove_dead_derefs(nir_shader *shader)
388 {
389 bool progress = false;
390 nir_foreach_function(function, shader) {
391 if (function->impl && nir_remove_dead_derefs_impl(function->impl))
392 progress = true;
393 }
394
395 return progress;
396 }
397
398 void
nir_fixup_deref_modes(nir_shader * shader)399 nir_fixup_deref_modes(nir_shader *shader)
400 {
401 nir_foreach_function(function, shader) {
402 if (!function->impl)
403 continue;
404
405 nir_foreach_block(block, function->impl) {
406 nir_foreach_instr(instr, block) {
407 if (instr->type != nir_instr_type_deref)
408 continue;
409
410 nir_deref_instr *deref = nir_instr_as_deref(instr);
411 if (deref->deref_type == nir_deref_type_cast)
412 continue;
413
414 nir_variable_mode parent_modes;
415 if (deref->deref_type == nir_deref_type_var) {
416 parent_modes = deref->var->data.mode;
417 } else {
418 assert(deref->parent.is_ssa);
419 nir_deref_instr *parent =
420 nir_instr_as_deref(deref->parent.ssa->parent_instr);
421 parent_modes = parent->modes;
422 }
423
424 deref->modes = parent_modes;
425 }
426 }
427 }
428 }
429
430 static bool
modes_may_alias(nir_variable_mode a,nir_variable_mode b)431 modes_may_alias(nir_variable_mode a, nir_variable_mode b)
432 {
433 /* Generic pointers can alias with SSBOs */
434 if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
435 (b & (nir_var_mem_ssbo | nir_var_mem_global)))
436 return true;
437
438 /* Pointers can only alias if they share a mode. */
439 return a & b;
440 }
441
442 static bool
deref_path_contains_coherent_decoration(nir_deref_path * path)443 deref_path_contains_coherent_decoration(nir_deref_path *path)
444 {
445 assert(path->path[0]->deref_type == nir_deref_type_var);
446
447 if (path->path[0]->var->data.access & ACCESS_COHERENT)
448 return true;
449
450 for (nir_deref_instr **p = &path->path[1]; *p; p++) {
451 if ((*p)->deref_type != nir_deref_type_struct)
452 continue;
453
454 const struct glsl_type *struct_type = (*(p - 1))->type;
455 const struct glsl_struct_field *field =
456 glsl_get_struct_field_data(struct_type, (*p)->strct.index);
457 if (field->memory_coherent)
458 return true;
459 }
460
461 return false;
462 }
463
464 nir_deref_compare_result
nir_compare_deref_paths(nir_deref_path * a_path,nir_deref_path * b_path)465 nir_compare_deref_paths(nir_deref_path *a_path,
466 nir_deref_path *b_path)
467 {
468 if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
469 return nir_derefs_do_not_alias;
470
471 if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
472 return nir_derefs_may_alias_bit;
473
474 if (a_path->path[0]->deref_type == nir_deref_type_var) {
475 if (a_path->path[0]->var != b_path->path[0]->var) {
476 /* Shader and function temporaries aren't backed by memory so two
477 * distinct variables never alias.
478 */
479 static const nir_variable_mode temp_var_modes =
480 nir_var_shader_temp | nir_var_function_temp;
481 if (!(a_path->path[0]->modes & ~temp_var_modes) ||
482 !(b_path->path[0]->modes & ~temp_var_modes))
483 return nir_derefs_do_not_alias;
484
485 /* If they are both declared coherent or have coherent somewhere in
486 * their path (due to a member of an interface being declared
487 * coherent), we have to assume we that we could have any kind of
488 * aliasing. Otherwise, they could still alias but the client didn't
489 * tell us and that's their fault.
490 */
491 if (deref_path_contains_coherent_decoration(a_path) &&
492 deref_path_contains_coherent_decoration(b_path))
493 return nir_derefs_may_alias_bit;
494
495 /* Per SPV_KHR_workgroup_memory_explicit_layout and GL_EXT_shared_memory_block,
496 * shared blocks alias each other.
497 */
498 if (a_path->path[0]->modes & nir_var_mem_shared &&
499 b_path->path[0]->modes & nir_var_mem_shared &&
500 (glsl_type_is_interface(a_path->path[0]->var->type) ||
501 glsl_type_is_interface(b_path->path[0]->var->type))) {
502 assert(glsl_type_is_interface(a_path->path[0]->var->type) &&
503 glsl_type_is_interface(b_path->path[0]->var->type));
504 return nir_derefs_may_alias_bit;
505 }
506
507 /* If we can chase the deref all the way back to the variable and
508 * they're not the same variable and at least one is not declared
509 * coherent, we know they can't possibly alias.
510 */
511 return nir_derefs_do_not_alias;
512 }
513 } else {
514 assert(a_path->path[0]->deref_type == nir_deref_type_cast);
515 /* If they're not exactly the same cast, it's hard to compare them so we
516 * just assume they alias. Comparing casts is tricky as there are lots
517 * of things such as mode, type, etc. to make sure work out; for now, we
518 * just assume nit_opt_deref will combine them and compare the deref
519 * instructions.
520 *
521 * TODO: At some point in the future, we could be clever and understand
522 * that a float[] and int[] have the same layout and aliasing structure
523 * but double[] and vec3[] do not and we could potentially be a bit
524 * smarter here.
525 */
526 if (a_path->path[0] != b_path->path[0])
527 return nir_derefs_may_alias_bit;
528 }
529
530 /* Start off assuming they fully compare. We ignore equality for now. In
531 * the end, we'll determine that by containment.
532 */
533 nir_deref_compare_result result = nir_derefs_may_alias_bit |
534 nir_derefs_a_contains_b_bit |
535 nir_derefs_b_contains_a_bit;
536
537 nir_deref_instr **a_p = &a_path->path[1];
538 nir_deref_instr **b_p = &b_path->path[1];
539 while (*a_p != NULL && *a_p == *b_p) {
540 a_p++;
541 b_p++;
542 }
543
544 /* We're at either the tail or the divergence point between the two deref
545 * paths. Look to see if either contains cast or a ptr_as_array deref. If
546 * it does we don't know how to safely make any inferences. Hopefully,
547 * nir_opt_deref will clean most of these up and we can start inferring
548 * things again.
549 *
550 * In theory, we could do a bit better. For instance, we could detect the
551 * case where we have exactly one ptr_as_array deref in the chain after the
552 * divergence point and it's matched in both chains and the two chains have
553 * different constant indices.
554 */
555 for (nir_deref_instr **t_p = a_p; *t_p; t_p++) {
556 if ((*t_p)->deref_type == nir_deref_type_cast ||
557 (*t_p)->deref_type == nir_deref_type_ptr_as_array)
558 return nir_derefs_may_alias_bit;
559 }
560 for (nir_deref_instr **t_p = b_p; *t_p; t_p++) {
561 if ((*t_p)->deref_type == nir_deref_type_cast ||
562 (*t_p)->deref_type == nir_deref_type_ptr_as_array)
563 return nir_derefs_may_alias_bit;
564 }
565
566 while (*a_p != NULL && *b_p != NULL) {
567 nir_deref_instr *a_tail = *(a_p++);
568 nir_deref_instr *b_tail = *(b_p++);
569
570 switch (a_tail->deref_type) {
571 case nir_deref_type_array:
572 case nir_deref_type_array_wildcard: {
573 assert(b_tail->deref_type == nir_deref_type_array ||
574 b_tail->deref_type == nir_deref_type_array_wildcard);
575
576 if (a_tail->deref_type == nir_deref_type_array_wildcard) {
577 if (b_tail->deref_type != nir_deref_type_array_wildcard)
578 result &= ~nir_derefs_b_contains_a_bit;
579 } else if (b_tail->deref_type == nir_deref_type_array_wildcard) {
580 if (a_tail->deref_type != nir_deref_type_array_wildcard)
581 result &= ~nir_derefs_a_contains_b_bit;
582 } else {
583 assert(a_tail->deref_type == nir_deref_type_array &&
584 b_tail->deref_type == nir_deref_type_array);
585 assert(a_tail->arr.index.is_ssa && b_tail->arr.index.is_ssa);
586
587 if (nir_src_is_const(a_tail->arr.index) &&
588 nir_src_is_const(b_tail->arr.index)) {
589 /* If they're both direct and have different offsets, they
590 * don't even alias much less anything else.
591 */
592 if (nir_src_as_uint(a_tail->arr.index) !=
593 nir_src_as_uint(b_tail->arr.index))
594 return nir_derefs_do_not_alias;
595 } else if (a_tail->arr.index.ssa == b_tail->arr.index.ssa) {
596 /* They're the same indirect, continue on */
597 } else {
598 /* They're not the same index so we can't prove anything about
599 * containment.
600 */
601 result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
602 }
603 }
604 break;
605 }
606
607 case nir_deref_type_struct: {
608 /* If they're different struct members, they don't even alias */
609 if (a_tail->strct.index != b_tail->strct.index)
610 return nir_derefs_do_not_alias;
611 break;
612 }
613
614 default:
615 unreachable("Invalid deref type");
616 }
617 }
618
619 /* If a is longer than b, then it can't contain b */
620 if (*a_p != NULL)
621 result &= ~nir_derefs_a_contains_b_bit;
622 if (*b_p != NULL)
623 result &= ~nir_derefs_b_contains_a_bit;
624
625 /* If a contains b and b contains a they must be equal. */
626 if ((result & nir_derefs_a_contains_b_bit) && (result & nir_derefs_b_contains_a_bit))
627 result |= nir_derefs_equal_bit;
628
629 return result;
630 }
631
632 nir_deref_compare_result
nir_compare_derefs(nir_deref_instr * a,nir_deref_instr * b)633 nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
634 {
635 if (a == b) {
636 return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
637 nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
638 }
639
640 nir_deref_path a_path, b_path;
641 nir_deref_path_init(&a_path, a, NULL);
642 nir_deref_path_init(&b_path, b, NULL);
643 assert(a_path.path[0]->deref_type == nir_deref_type_var ||
644 a_path.path[0]->deref_type == nir_deref_type_cast);
645 assert(b_path.path[0]->deref_type == nir_deref_type_var ||
646 b_path.path[0]->deref_type == nir_deref_type_cast);
647
648 nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
649
650 nir_deref_path_finish(&a_path);
651 nir_deref_path_finish(&b_path);
652
653 return result;
654 }
655
nir_get_deref_path(void * mem_ctx,nir_deref_and_path * deref)656 nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
657 {
658 if (!deref->_path) {
659 deref->_path = ralloc(mem_ctx, nir_deref_path);
660 nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
661 }
662 return deref->_path;
663 }
664
nir_compare_derefs_and_paths(void * mem_ctx,nir_deref_and_path * a,nir_deref_and_path * b)665 nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,
666 nir_deref_and_path *a,
667 nir_deref_and_path *b)
668 {
669 if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
670 return nir_compare_derefs(a->instr, b->instr);
671
672 return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
673 nir_get_deref_path(mem_ctx, b));
674 }
675
676 struct rematerialize_deref_state {
677 bool progress;
678 nir_builder builder;
679 nir_block *block;
680 struct hash_table *cache;
681 };
682
683 static nir_deref_instr *
rematerialize_deref_in_block(nir_deref_instr * deref,struct rematerialize_deref_state * state)684 rematerialize_deref_in_block(nir_deref_instr *deref,
685 struct rematerialize_deref_state *state)
686 {
687 if (deref->instr.block == state->block)
688 return deref;
689
690 if (!state->cache) {
691 state->cache = _mesa_pointer_hash_table_create(NULL);
692 }
693
694 struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
695 if (cached)
696 return cached->data;
697
698 nir_builder *b = &state->builder;
699 nir_deref_instr *new_deref =
700 nir_deref_instr_create(b->shader, deref->deref_type);
701 new_deref->modes = deref->modes;
702 new_deref->type = deref->type;
703
704 if (deref->deref_type == nir_deref_type_var) {
705 new_deref->var = deref->var;
706 } else {
707 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
708 if (parent) {
709 parent = rematerialize_deref_in_block(parent, state);
710 new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
711 } else {
712 nir_src_copy(&new_deref->parent, &deref->parent);
713 }
714 }
715
716 switch (deref->deref_type) {
717 case nir_deref_type_var:
718 case nir_deref_type_array_wildcard:
719 /* Nothing more to do */
720 break;
721
722 case nir_deref_type_cast:
723 new_deref->cast.ptr_stride = deref->cast.ptr_stride;
724 break;
725
726 case nir_deref_type_array:
727 case nir_deref_type_ptr_as_array:
728 assert(!nir_src_as_deref(deref->arr.index));
729 nir_src_copy(&new_deref->arr.index, &deref->arr.index);
730 break;
731
732 case nir_deref_type_struct:
733 new_deref->strct.index = deref->strct.index;
734 break;
735
736 default:
737 unreachable("Invalid deref instruction type");
738 }
739
740 nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
741 deref->dest.ssa.num_components,
742 deref->dest.ssa.bit_size,
743 NULL);
744 nir_builder_instr_insert(b, &new_deref->instr);
745
746 return new_deref;
747 }
748
749 static bool
rematerialize_deref_src(nir_src * src,void * _state)750 rematerialize_deref_src(nir_src *src, void *_state)
751 {
752 struct rematerialize_deref_state *state = _state;
753
754 nir_deref_instr *deref = nir_src_as_deref(*src);
755 if (!deref)
756 return true;
757
758 nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
759 if (block_deref != deref) {
760 nir_instr_rewrite_src(src->parent_instr, src,
761 nir_src_for_ssa(&block_deref->dest.ssa));
762 nir_deref_instr_remove_if_unused(deref);
763 state->progress = true;
764 }
765
766 return true;
767 }
768
769 /** Re-materialize derefs in every block
770 *
771 * This pass re-materializes deref instructions in every block in which it is
772 * used. After this pass has been run, every use of a deref will be of a
773 * deref in the same block as the use. Also, all unused derefs will be
774 * deleted as a side-effect.
775 *
776 * Derefs used as sources of phi instructions are not rematerialized.
777 */
778 bool
nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl * impl)779 nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
780 {
781 struct rematerialize_deref_state state = { 0 };
782 nir_builder_init(&state.builder, impl);
783
784 nir_foreach_block_unstructured(block, impl) {
785 state.block = block;
786
787 /* Start each block with a fresh cache */
788 if (state.cache)
789 _mesa_hash_table_clear(state.cache, NULL);
790
791 nir_foreach_instr_safe(instr, block) {
792 if (instr->type == nir_instr_type_deref &&
793 nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
794 continue;
795
796 /* If a deref is used in a phi, we can't rematerialize it, as the new
797 * derefs would appear before the phi, which is not valid.
798 */
799 if (instr->type == nir_instr_type_phi)
800 continue;
801
802 state.builder.cursor = nir_before_instr(instr);
803 nir_foreach_src(instr, rematerialize_deref_src, &state);
804 }
805
806 #ifndef NDEBUG
807 nir_if *following_if = nir_block_get_following_if(block);
808 if (following_if)
809 assert(!nir_src_as_deref(following_if->condition));
810 #endif
811 }
812
813 _mesa_hash_table_destroy(state.cache, NULL);
814
815 return state.progress;
816 }
817
818 static void
nir_deref_instr_fixup_child_types(nir_deref_instr * parent)819 nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
820 {
821 nir_foreach_use(use, &parent->dest.ssa) {
822 if (use->parent_instr->type != nir_instr_type_deref)
823 continue;
824
825 nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);
826 switch (child->deref_type) {
827 case nir_deref_type_var:
828 unreachable("nir_deref_type_var cannot be a child");
829
830 case nir_deref_type_array:
831 case nir_deref_type_array_wildcard:
832 child->type = glsl_get_array_element(parent->type);
833 break;
834
835 case nir_deref_type_ptr_as_array:
836 child->type = parent->type;
837 break;
838
839 case nir_deref_type_struct:
840 child->type = glsl_get_struct_field(parent->type,
841 child->strct.index);
842 break;
843
844 case nir_deref_type_cast:
845 /* We stop the recursion here */
846 continue;
847 }
848
849 /* Recurse into children */
850 nir_deref_instr_fixup_child_types(child);
851 }
852 }
853
854 static bool
is_trivial_array_deref_cast(nir_deref_instr * cast)855 is_trivial_array_deref_cast(nir_deref_instr *cast)
856 {
857 assert(is_trivial_deref_cast(cast));
858
859 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
860
861 if (parent->deref_type == nir_deref_type_array) {
862 return cast->cast.ptr_stride ==
863 glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
864 } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
865 return cast->cast.ptr_stride ==
866 nir_deref_instr_array_stride(parent);
867 } else {
868 return false;
869 }
870 }
871
872 static bool
is_deref_ptr_as_array(nir_instr * instr)873 is_deref_ptr_as_array(nir_instr *instr)
874 {
875 return instr->type == nir_instr_type_deref &&
876 nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
877 }
878
879 static bool
opt_remove_restricting_cast_alignments(nir_deref_instr * cast)880 opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
881 {
882 assert(cast->deref_type == nir_deref_type_cast);
883 if (cast->cast.align_mul == 0)
884 return false;
885
886 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
887 if (parent == NULL)
888 return false;
889
890 /* Don't use any default alignment for this check. We don't want to fall
891 * back to type alignment too early in case we find out later that we're
892 * somehow a child of a packed struct.
893 */
894 uint32_t parent_mul, parent_offset;
895 if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
896 &parent_mul, &parent_offset))
897 return false;
898
899 /* If this cast increases the alignment, we want to keep it.
900 *
901 * There is a possibility that the larger alignment provided by this cast
902 * somehow disagrees with the smaller alignment further up the deref chain.
903 * In that case, we choose to favor the alignment closer to the actual
904 * memory operation which, in this case, is the cast and not its parent so
905 * keeping the cast alignment is the right thing to do.
906 */
907 if (parent_mul < cast->cast.align_mul)
908 return false;
909
910 /* If we've gotten here, we have a parent deref with an align_mul at least
911 * as large as ours so we can potentially throw away the alignment
912 * information on this deref. There are two cases to consider here:
913 *
914 * 1. We can chase the deref all the way back to the variable. In this
915 * case, we have "perfect" knowledge, modulo indirect array derefs.
916 * Unless we've done something wrong in our indirect/wildcard stride
917 * calculations, our knowledge from the deref walk is better than the
918 * client's.
919 *
920 * 2. We can't chase it all the way back to the variable. In this case,
921 * because our call to nir_get_explicit_deref_align(parent, ...) above
922 * above passes default_to_type_align=false, the only way we can even
923 * get here is if something further up the deref chain has a cast with
924 * an alignment which can only happen if we get an alignment from the
925 * client (most likely a decoration in the SPIR-V). If the client has
926 * provided us with two conflicting alignments in the deref chain,
927 * that's their fault and we can do whatever we want.
928 *
929 * In either case, we should be without our rights, at this point, to throw
930 * away the alignment information on this deref. However, to be "nice" to
931 * weird clients, we do one more check. It really shouldn't happen but
932 * it's possible that the parent's alignment offset disagrees with the
933 * cast's alignment offset. In this case, we consider the cast as
934 * providing more information (or at least more valid information) and keep
935 * it even if the align_mul from the parent is larger.
936 */
937 assert(cast->cast.align_mul <= parent_mul);
938 if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
939 return false;
940
941 /* If we got here, the parent has better alignment information than the
942 * child and we can get rid of the child alignment information.
943 */
944 cast->cast.align_mul = 0;
945 cast->cast.align_offset = 0;
946 return true;
947 }
948
949 /**
950 * Remove casts that just wrap other casts.
951 */
952 static bool
opt_remove_cast_cast(nir_deref_instr * cast)953 opt_remove_cast_cast(nir_deref_instr *cast)
954 {
955 nir_deref_instr *first_cast = cast;
956
957 while (true) {
958 nir_deref_instr *parent = nir_deref_instr_parent(first_cast);
959 if (parent == NULL || parent->deref_type != nir_deref_type_cast)
960 break;
961 first_cast = parent;
962 }
963 if (cast == first_cast)
964 return false;
965
966 nir_instr_rewrite_src(&cast->instr, &cast->parent,
967 nir_src_for_ssa(first_cast->parent.ssa));
968 return true;
969 }
970
971 /* Restrict variable modes in casts.
972 *
973 * If we know from something higher up the deref chain that the deref has a
974 * specific mode, we can cast to more general and back but we can never cast
975 * across modes. For non-cast derefs, we should only ever do anything here if
976 * the parent eventually comes from a cast that we restricted earlier.
977 */
978 static bool
opt_restrict_deref_modes(nir_deref_instr * deref)979 opt_restrict_deref_modes(nir_deref_instr *deref)
980 {
981 if (deref->deref_type == nir_deref_type_var) {
982 assert(deref->modes == deref->var->data.mode);
983 return false;
984 }
985
986 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
987 if (parent == NULL || parent->modes == deref->modes)
988 return false;
989
990 assert(parent->modes & deref->modes);
991 deref->modes &= parent->modes;
992 return true;
993 }
994
995 static bool
opt_remove_sampler_cast(nir_deref_instr * cast)996 opt_remove_sampler_cast(nir_deref_instr *cast)
997 {
998 assert(cast->deref_type == nir_deref_type_cast);
999 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1000 if (parent == NULL)
1001 return false;
1002
1003 /* Strip both types down to their non-array type and bail if there are any
1004 * discrepancies in array lengths.
1005 */
1006 const struct glsl_type *parent_type = parent->type;
1007 const struct glsl_type *cast_type = cast->type;
1008 while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
1009 if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
1010 return false;
1011 parent_type = glsl_get_array_element(parent_type);
1012 cast_type = glsl_get_array_element(cast_type);
1013 }
1014
1015 if (glsl_type_is_array(parent_type) || glsl_type_is_array(cast_type))
1016 return false;
1017
1018 if (!glsl_type_is_sampler(parent_type) ||
1019 cast_type != glsl_bare_sampler_type())
1020 return false;
1021
1022 /* We're a cast from a more detailed sampler type to a bare sampler */
1023 nir_ssa_def_rewrite_uses(&cast->dest.ssa,
1024 &parent->dest.ssa);
1025 nir_instr_remove(&cast->instr);
1026
1027 /* Recursively crawl the deref tree and clean up types */
1028 nir_deref_instr_fixup_child_types(parent);
1029
1030 return true;
1031 }
1032
1033 /**
1034 * Is this casting a struct to a contained struct.
1035 * struct a { struct b field0 };
1036 * ssa_5 is structa;
1037 * deref_cast (structb *)ssa_5 (function_temp structb);
1038 * converts to
1039 * deref_struct &ssa_5->field0 (function_temp structb);
1040 * This allows subsequent copy propagation to work.
1041 */
1042 static bool
opt_replace_struct_wrapper_cast(nir_builder * b,nir_deref_instr * cast)1043 opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
1044 {
1045 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1046 if (!parent)
1047 return false;
1048
1049 if (cast->cast.align_mul > 0)
1050 return false;
1051
1052 if (!glsl_type_is_struct(parent->type))
1053 return false;
1054
1055 /* Empty struct */
1056 if (glsl_get_length(parent->type) < 1)
1057 return false;
1058
1059 if (glsl_get_struct_field_offset(parent->type, 0) != 0)
1060 return false;
1061
1062 if (cast->type != glsl_get_struct_field(parent->type, 0))
1063 return false;
1064
1065 nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
1066 nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);
1067 nir_deref_instr_remove_if_unused(cast);
1068 return true;
1069 }
1070
1071 static bool
opt_deref_cast(nir_builder * b,nir_deref_instr * cast)1072 opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
1073 {
1074 bool progress = false;
1075
1076 progress |= opt_remove_restricting_cast_alignments(cast);
1077
1078 if (opt_replace_struct_wrapper_cast(b, cast))
1079 return true;
1080
1081 if (opt_remove_sampler_cast(cast))
1082 return true;
1083
1084 progress |= opt_remove_cast_cast(cast);
1085 if (!is_trivial_deref_cast(cast))
1086 return progress;
1087
1088 /* If this deref still contains useful alignment information, we don't want
1089 * to delete it.
1090 */
1091 if (cast->cast.align_mul > 0)
1092 return progress;
1093
1094 bool trivial_array_cast = is_trivial_array_deref_cast(cast);
1095
1096 assert(cast->dest.is_ssa);
1097 assert(cast->parent.is_ssa);
1098
1099 nir_foreach_use_safe(use_src, &cast->dest.ssa) {
1100 /* If this isn't a trivial array cast, we can't propagate into
1101 * ptr_as_array derefs.
1102 */
1103 if (is_deref_ptr_as_array(use_src->parent_instr) &&
1104 !trivial_array_cast)
1105 continue;
1106
1107 nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
1108 progress = true;
1109 }
1110
1111 /* If uses would be a bit crazy */
1112 assert(list_is_empty(&cast->dest.ssa.if_uses));
1113
1114 if (nir_deref_instr_remove_if_unused(cast))
1115 progress = true;
1116
1117 return progress;
1118 }
1119
1120 static bool
opt_deref_ptr_as_array(nir_builder * b,nir_deref_instr * deref)1121 opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
1122 {
1123 assert(deref->deref_type == nir_deref_type_ptr_as_array);
1124
1125 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1126
1127 if (nir_src_is_const(deref->arr.index) &&
1128 nir_src_as_int(deref->arr.index) == 0) {
1129 /* If it's a ptr_as_array deref with an index of 0, it does nothing
1130 * and we can just replace its uses with its parent, unless it has
1131 * alignment information.
1132 *
1133 * The source of a ptr_as_array deref always has a deref_type of
1134 * nir_deref_type_array or nir_deref_type_cast. If it's a cast, it
1135 * may be trivial and we may be able to get rid of that too. Any
1136 * trivial cast of trivial cast cases should be handled already by
1137 * opt_deref_cast() above.
1138 */
1139 if (parent->deref_type == nir_deref_type_cast &&
1140 parent->cast.align_mul == 0 &&
1141 is_trivial_deref_cast(parent))
1142 parent = nir_deref_instr_parent(parent);
1143 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
1144 &parent->dest.ssa);
1145 nir_instr_remove(&deref->instr);
1146 return true;
1147 }
1148
1149 if (parent->deref_type != nir_deref_type_array &&
1150 parent->deref_type != nir_deref_type_ptr_as_array)
1151 return false;
1152
1153 assert(parent->parent.is_ssa);
1154 assert(parent->arr.index.is_ssa);
1155 assert(deref->arr.index.is_ssa);
1156
1157 nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
1158 deref->arr.index.ssa);
1159
1160 deref->deref_type = parent->deref_type;
1161 nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
1162 nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
1163 nir_src_for_ssa(new_idx));
1164 return true;
1165 }
1166
1167 static bool
is_vector_bitcast_deref(nir_deref_instr * cast,nir_component_mask_t mask,bool is_write)1168 is_vector_bitcast_deref(nir_deref_instr *cast,
1169 nir_component_mask_t mask,
1170 bool is_write)
1171 {
1172 if (cast->deref_type != nir_deref_type_cast)
1173 return false;
1174
1175 /* Don't throw away useful alignment information */
1176 if (cast->cast.align_mul > 0)
1177 return false;
1178
1179 /* It has to be a cast of another deref */
1180 nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1181 if (parent == NULL)
1182 return false;
1183
1184 /* The parent has to be a vector or scalar */
1185 if (!glsl_type_is_vector_or_scalar(parent->type))
1186 return false;
1187
1188 /* Don't bother with 1-bit types */
1189 unsigned cast_bit_size = glsl_get_bit_size(cast->type);
1190 unsigned parent_bit_size = glsl_get_bit_size(parent->type);
1191 if (cast_bit_size == 1 || parent_bit_size == 1)
1192 return false;
1193
1194 /* A strided vector type means it's not tightly packed */
1195 if (glsl_get_explicit_stride(cast->type) ||
1196 glsl_get_explicit_stride(parent->type))
1197 return false;
1198
1199 assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
1200 assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
1201 unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
1202 unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
1203 (parent_bit_size / 8);
1204 if (bytes_used > parent_bytes)
1205 return false;
1206
1207 if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
1208 parent_bit_size))
1209 return false;
1210
1211 return true;
1212 }
1213
1214 static nir_ssa_def *
resize_vector(nir_builder * b,nir_ssa_def * data,unsigned num_components)1215 resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)
1216 {
1217 if (num_components == data->num_components)
1218 return data;
1219
1220 unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
1221 for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
1222 swiz[i] = i;
1223
1224 return nir_swizzle(b, data, swiz, num_components);
1225 }
1226
1227 static bool
opt_load_vec_deref(nir_builder * b,nir_intrinsic_instr * load)1228 opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
1229 {
1230 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1231 nir_component_mask_t read_mask =
1232 nir_ssa_def_components_read(&load->dest.ssa);
1233
1234 /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1235 * vec4-aligned and so it can just read/write them as vec4s. This
1236 * results in a LOT of vec4->vec3 casts on loads and stores.
1237 */
1238 if (is_vector_bitcast_deref(deref, read_mask, false)) {
1239 const unsigned old_num_comps = load->dest.ssa.num_components;
1240 const unsigned old_bit_size = load->dest.ssa.bit_size;
1241
1242 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1243 const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1244 const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1245
1246 /* Stomp it to reference the parent */
1247 nir_instr_rewrite_src(&load->instr, &load->src[0],
1248 nir_src_for_ssa(&parent->dest.ssa));
1249 assert(load->dest.is_ssa);
1250 load->dest.ssa.bit_size = new_bit_size;
1251 load->dest.ssa.num_components = new_num_comps;
1252 load->num_components = new_num_comps;
1253
1254 b->cursor = nir_after_instr(&load->instr);
1255 nir_ssa_def *data = &load->dest.ssa;
1256 if (old_bit_size != new_bit_size)
1257 data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);
1258 data = resize_vector(b, data, old_num_comps);
1259
1260 nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,
1261 data->parent_instr);
1262 return true;
1263 }
1264
1265 return false;
1266 }
1267
1268 static bool
opt_store_vec_deref(nir_builder * b,nir_intrinsic_instr * store)1269 opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
1270 {
1271 nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
1272 nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
1273
1274 /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1275 * vec4-aligned and so it can just read/write them as vec4s. This
1276 * results in a LOT of vec4->vec3 casts on loads and stores.
1277 */
1278 if (is_vector_bitcast_deref(deref, write_mask, true)) {
1279 assert(store->src[1].is_ssa);
1280 nir_ssa_def *data = store->src[1].ssa;
1281
1282 const unsigned old_bit_size = data->bit_size;
1283
1284 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1285 const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1286 const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1287
1288 nir_instr_rewrite_src(&store->instr, &store->src[0],
1289 nir_src_for_ssa(&parent->dest.ssa));
1290
1291 /* Restrict things down as needed so the bitcast doesn't fail */
1292 data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);
1293 if (old_bit_size != new_bit_size)
1294 data = nir_bitcast_vector(b, data, new_bit_size);
1295 data = resize_vector(b, data, new_num_comps);
1296 nir_instr_rewrite_src(&store->instr, &store->src[1],
1297 nir_src_for_ssa(data));
1298 store->num_components = new_num_comps;
1299
1300 /* Adjust the write mask */
1301 write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
1302 new_bit_size);
1303 nir_intrinsic_set_write_mask(store, write_mask);
1304 return true;
1305 }
1306
1307 return false;
1308 }
1309
1310 static bool
opt_known_deref_mode_is(nir_builder * b,nir_intrinsic_instr * intrin)1311 opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
1312 {
1313 nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
1314 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1315 if (deref == NULL)
1316 return false;
1317
1318 nir_ssa_def *deref_is = NULL;
1319
1320 if (nir_deref_mode_must_be(deref, modes))
1321 deref_is = nir_imm_true(b);
1322
1323 if (!nir_deref_mode_may_be(deref, modes))
1324 deref_is = nir_imm_false(b);
1325
1326 if (deref_is == NULL)
1327 return false;
1328
1329 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);
1330 nir_instr_remove(&intrin->instr);
1331 return true;
1332 }
1333
1334 bool
nir_opt_deref_impl(nir_function_impl * impl)1335 nir_opt_deref_impl(nir_function_impl *impl)
1336 {
1337 bool progress = false;
1338
1339 nir_builder b;
1340 nir_builder_init(&b, impl);
1341
1342 nir_foreach_block(block, impl) {
1343 nir_foreach_instr_safe(instr, block) {
1344 b.cursor = nir_before_instr(instr);
1345
1346 switch (instr->type) {
1347 case nir_instr_type_deref: {
1348 nir_deref_instr *deref = nir_instr_as_deref(instr);
1349
1350 if (opt_restrict_deref_modes(deref))
1351 progress = true;
1352
1353 switch (deref->deref_type) {
1354 case nir_deref_type_ptr_as_array:
1355 if (opt_deref_ptr_as_array(&b, deref))
1356 progress = true;
1357 break;
1358
1359 case nir_deref_type_cast:
1360 if (opt_deref_cast(&b, deref))
1361 progress = true;
1362 break;
1363
1364 default:
1365 /* Do nothing */
1366 break;
1367 }
1368 break;
1369 }
1370
1371 case nir_instr_type_intrinsic: {
1372 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1373 switch (intrin->intrinsic) {
1374 case nir_intrinsic_load_deref:
1375 if (opt_load_vec_deref(&b, intrin))
1376 progress = true;
1377 break;
1378
1379 case nir_intrinsic_store_deref:
1380 if (opt_store_vec_deref(&b, intrin))
1381 progress = true;
1382 break;
1383
1384 case nir_intrinsic_deref_mode_is:
1385 if (opt_known_deref_mode_is(&b, intrin))
1386 progress = true;
1387 break;
1388
1389 default:
1390 /* Do nothing */
1391 break;
1392 }
1393 break;
1394 }
1395
1396 default:
1397 /* Do nothing */
1398 break;
1399 }
1400 }
1401 }
1402
1403 if (progress) {
1404 nir_metadata_preserve(impl, nir_metadata_block_index |
1405 nir_metadata_dominance);
1406 } else {
1407 nir_metadata_preserve(impl, nir_metadata_all);
1408 }
1409
1410 return progress;
1411 }
1412
1413 bool
nir_opt_deref(nir_shader * shader)1414 nir_opt_deref(nir_shader *shader)
1415 {
1416 bool progress = false;
1417
1418 nir_foreach_function(func, shader) {
1419 if (func->impl && nir_opt_deref_impl(func->impl))
1420 progress = true;
1421 }
1422
1423 return progress;
1424 }
1425