1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_deref.h"
26 #include "main/menums.h"
27
28 #include "util/set.h"
29
30 static bool
src_is_invocation_id(const nir_src * src)31 src_is_invocation_id(const nir_src *src)
32 {
33 assert(src->is_ssa);
34 if (src->ssa->parent_instr->type != nir_instr_type_intrinsic)
35 return false;
36
37 return nir_instr_as_intrinsic(src->ssa->parent_instr)->intrinsic ==
38 nir_intrinsic_load_invocation_id;
39 }
40
41 static bool
src_is_local_invocation_index(const nir_src * src)42 src_is_local_invocation_index(const nir_src *src)
43 {
44 assert(src->is_ssa);
45 if (src->ssa->parent_instr->type != nir_instr_type_intrinsic)
46 return false;
47
48 return nir_instr_as_intrinsic(src->ssa->parent_instr)->intrinsic ==
49 nir_intrinsic_load_local_invocation_index;
50 }
51
52 static void
get_deref_info(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool * cross_invocation,bool * indirect)53 get_deref_info(nir_shader *shader, nir_variable *var, nir_deref_instr *deref,
54 bool *cross_invocation, bool *indirect)
55 {
56 *cross_invocation = false;
57 *indirect = false;
58
59 const bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);
60
61 nir_deref_path path;
62 nir_deref_path_init(&path, deref, NULL);
63 assert(path.path[0]->deref_type == nir_deref_type_var);
64 nir_deref_instr **p = &path.path[1];
65
66 /* Vertex index is the outermost array index. */
67 if (is_arrayed) {
68 assert((*p)->deref_type == nir_deref_type_array);
69 if (shader->info.stage == MESA_SHADER_TESS_CTRL)
70 *cross_invocation = !src_is_invocation_id(&(*p)->arr.index);
71 else if (shader->info.stage == MESA_SHADER_MESH)
72 *cross_invocation = !src_is_local_invocation_index(&(*p)->arr.index);
73 p++;
74 }
75
76 /* We always lower indirect dereferences for "compact" array vars. */
77 if (!path.path[0]->var->data.compact) {
78 /* Non-compact array vars: find out if they are indirect. */
79 for (; *p; p++) {
80 if ((*p)->deref_type == nir_deref_type_array) {
81 *indirect |= !nir_src_is_const((*p)->arr.index);
82 } else if ((*p)->deref_type == nir_deref_type_struct) {
83 /* Struct indices are always constant. */
84 } else {
85 unreachable("Unsupported deref type");
86 }
87 }
88 }
89
90 nir_deref_path_finish(&path);
91 }
92
93 static void
set_io_mask(nir_shader * shader,nir_variable * var,int offset,int len,nir_deref_instr * deref,bool is_output_read)94 set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len,
95 nir_deref_instr *deref, bool is_output_read)
96 {
97 for (int i = 0; i < len; i++) {
98 /* Varyings might not have been assigned values yet so abort. */
99 if (var->data.location == -1)
100 return;
101
102 int idx = var->data.location + offset + i;
103 bool is_patch_generic = var->data.patch &&
104 idx != VARYING_SLOT_TESS_LEVEL_INNER &&
105 idx != VARYING_SLOT_TESS_LEVEL_OUTER &&
106 idx != VARYING_SLOT_BOUNDING_BOX0 &&
107 idx != VARYING_SLOT_BOUNDING_BOX1;
108 uint64_t bitfield;
109
110 if (is_patch_generic) {
111 /* Varyings might still have temp locations so abort */
112 if (idx < VARYING_SLOT_PATCH0 || idx >= VARYING_SLOT_TESS_MAX)
113 return;
114
115 bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0);
116 }
117 else {
118 /* Varyings might still have temp locations so abort */
119 if (idx >= VARYING_SLOT_MAX)
120 return;
121
122 bitfield = BITFIELD64_BIT(idx);
123 }
124
125 bool cross_invocation;
126 bool indirect;
127 get_deref_info(shader, var, deref, &cross_invocation, &indirect);
128
129 if (var->data.mode == nir_var_shader_in) {
130 if (is_patch_generic) {
131 shader->info.patch_inputs_read |= bitfield;
132 if (indirect)
133 shader->info.patch_inputs_read_indirectly |= bitfield;
134 } else {
135 shader->info.inputs_read |= bitfield;
136 if (indirect)
137 shader->info.inputs_read_indirectly |= bitfield;
138 }
139
140 if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)
141 shader->info.tess.tcs_cross_invocation_inputs_read |= bitfield;
142
143 if (shader->info.stage == MESA_SHADER_FRAGMENT) {
144 shader->info.fs.uses_sample_qualifier |= var->data.sample;
145 }
146 } else {
147 assert(var->data.mode == nir_var_shader_out);
148 if (is_output_read) {
149 if (is_patch_generic) {
150 shader->info.patch_outputs_read |= bitfield;
151 if (indirect)
152 shader->info.patch_outputs_accessed_indirectly |= bitfield;
153 } else {
154 shader->info.outputs_read |= bitfield;
155 if (indirect)
156 shader->info.outputs_accessed_indirectly |= bitfield;
157 }
158
159 if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)
160 shader->info.tess.tcs_cross_invocation_outputs_read |= bitfield;
161 } else {
162 if (is_patch_generic) {
163 shader->info.patch_outputs_written |= bitfield;
164 if (indirect)
165 shader->info.patch_outputs_accessed_indirectly |= bitfield;
166 } else if (!var->data.read_only) {
167 shader->info.outputs_written |= bitfield;
168 if (indirect)
169 shader->info.outputs_accessed_indirectly |= bitfield;
170 }
171 }
172
173 if (cross_invocation && shader->info.stage == MESA_SHADER_MESH)
174 shader->info.mesh.ms_cross_invocation_output_access |= bitfield;
175
176 if (var->data.fb_fetch_output) {
177 shader->info.outputs_read |= bitfield;
178 if (shader->info.stage == MESA_SHADER_FRAGMENT)
179 shader->info.fs.uses_fbfetch_output = true;
180 }
181
182 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
183 !is_output_read && var->data.index == 1)
184 shader->info.fs.color_is_dual_source = true;
185 }
186 }
187 }
188
189 /**
190 * Mark an entire variable as used. Caller must ensure that the variable
191 * represents a shader input or output.
192 */
193 static void
mark_whole_variable(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool is_output_read)194 mark_whole_variable(nir_shader *shader, nir_variable *var,
195 nir_deref_instr *deref, bool is_output_read)
196 {
197 const struct glsl_type *type = var->type;
198
199 if (nir_is_arrayed_io(var, shader->info.stage) ||
200 /* For NV_mesh_shader. */
201 (shader->info.stage == MESA_SHADER_MESH &&
202 var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
203 !var->data.per_primitive)) {
204 assert(glsl_type_is_array(type));
205 type = glsl_get_array_element(type);
206 }
207
208 if (var->data.per_view) {
209 assert(glsl_type_is_array(type));
210 type = glsl_get_array_element(type);
211 }
212
213 const unsigned slots =
214 var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
215 : glsl_count_attribute_slots(type, false);
216
217 set_io_mask(shader, var, 0, slots, deref, is_output_read);
218 }
219
220 static unsigned
get_io_offset(nir_deref_instr * deref,nir_variable * var,bool is_arrayed,bool skip_non_arrayed)221 get_io_offset(nir_deref_instr *deref, nir_variable *var, bool is_arrayed,
222 bool skip_non_arrayed)
223 {
224 if (var->data.compact) {
225 if (deref->deref_type == nir_deref_type_var) {
226 assert(glsl_type_is_array(var->type));
227 return 0;
228 }
229 assert(deref->deref_type == nir_deref_type_array);
230 return nir_src_is_const(deref->arr.index) ?
231 (nir_src_as_uint(deref->arr.index) + var->data.location_frac) / 4u :
232 (unsigned)-1;
233 }
234
235 unsigned offset = 0;
236
237 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
238 if (d->deref_type == nir_deref_type_array) {
239 if (is_arrayed && nir_deref_instr_parent(d)->deref_type == nir_deref_type_var)
240 break;
241
242 if (!is_arrayed && skip_non_arrayed)
243 break;
244
245 if (!nir_src_is_const(d->arr.index))
246 return -1;
247
248 offset += glsl_count_attribute_slots(d->type, false) *
249 nir_src_as_uint(d->arr.index);
250 } else if (d->deref_type == nir_deref_type_struct) {
251 const struct glsl_type *parent_type = nir_deref_instr_parent(d)->type;
252 for (unsigned i = 0; i < d->strct.index; i++) {
253 const struct glsl_type *field_type = glsl_get_struct_field(parent_type, i);
254 offset += glsl_count_attribute_slots(field_type, false);
255 }
256 }
257 }
258
259 return offset;
260 }
261
262 /**
263 * Try to mark a portion of the given varying as used. Caller must ensure
264 * that the variable represents a shader input or output.
265 *
266 * If the index can't be interpreted as a constant, or some other problem
267 * occurs, then nothing will be marked and false will be returned.
268 */
269 static bool
try_mask_partial_io(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool is_output_read)270 try_mask_partial_io(nir_shader *shader, nir_variable *var,
271 nir_deref_instr *deref, bool is_output_read)
272 {
273 const struct glsl_type *type = var->type;
274 bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);
275 bool skip_non_arrayed = shader->info.stage == MESA_SHADER_MESH;
276
277 if (is_arrayed) {
278 assert(glsl_type_is_array(type));
279 type = glsl_get_array_element(type);
280 }
281
282 /* Per view variables will be considered as a whole. */
283 if (var->data.per_view)
284 return false;
285
286 unsigned offset = get_io_offset(deref, var, is_arrayed, skip_non_arrayed);
287 if (offset == -1)
288 return false;
289
290 const unsigned slots =
291 var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
292 : glsl_count_attribute_slots(type, false);
293
294 if (offset >= slots) {
295 /* Constant index outside the bounds of the matrix/array. This could
296 * arise as a result of constant folding of a legal GLSL program.
297 *
298 * Even though the spec says that indexing outside the bounds of a
299 * matrix/array results in undefined behaviour, we don't want to pass
300 * out-of-range values to set_io_mask() (since this could result in
301 * slots that don't exist being marked as used), so just let the caller
302 * mark the whole variable as used.
303 */
304 return false;
305 }
306
307 unsigned len = glsl_count_attribute_slots(deref->type, false);
308 set_io_mask(shader, var, offset, len, deref, is_output_read);
309 return true;
310 }
311
312 /** Returns true if the given intrinsic writes external memory
313 *
314 * Only returns true for writes to globally visible memory, not scratch and
315 * not shared.
316 */
317 bool
nir_intrinsic_writes_external_memory(const nir_intrinsic_instr * instr)318 nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr)
319 {
320 switch (instr->intrinsic) {
321 case nir_intrinsic_atomic_counter_inc:
322 case nir_intrinsic_atomic_counter_inc_deref:
323 case nir_intrinsic_atomic_counter_add:
324 case nir_intrinsic_atomic_counter_add_deref:
325 case nir_intrinsic_atomic_counter_pre_dec:
326 case nir_intrinsic_atomic_counter_pre_dec_deref:
327 case nir_intrinsic_atomic_counter_post_dec:
328 case nir_intrinsic_atomic_counter_post_dec_deref:
329 case nir_intrinsic_atomic_counter_min:
330 case nir_intrinsic_atomic_counter_min_deref:
331 case nir_intrinsic_atomic_counter_max:
332 case nir_intrinsic_atomic_counter_max_deref:
333 case nir_intrinsic_atomic_counter_and:
334 case nir_intrinsic_atomic_counter_and_deref:
335 case nir_intrinsic_atomic_counter_or:
336 case nir_intrinsic_atomic_counter_or_deref:
337 case nir_intrinsic_atomic_counter_xor:
338 case nir_intrinsic_atomic_counter_xor_deref:
339 case nir_intrinsic_atomic_counter_exchange:
340 case nir_intrinsic_atomic_counter_exchange_deref:
341 case nir_intrinsic_atomic_counter_comp_swap:
342 case nir_intrinsic_atomic_counter_comp_swap_deref:
343 case nir_intrinsic_bindless_image_atomic_add:
344 case nir_intrinsic_bindless_image_atomic_and:
345 case nir_intrinsic_bindless_image_atomic_comp_swap:
346 case nir_intrinsic_bindless_image_atomic_dec_wrap:
347 case nir_intrinsic_bindless_image_atomic_exchange:
348 case nir_intrinsic_bindless_image_atomic_fadd:
349 case nir_intrinsic_bindless_image_atomic_imax:
350 case nir_intrinsic_bindless_image_atomic_imin:
351 case nir_intrinsic_bindless_image_atomic_inc_wrap:
352 case nir_intrinsic_bindless_image_atomic_or:
353 case nir_intrinsic_bindless_image_atomic_umax:
354 case nir_intrinsic_bindless_image_atomic_umin:
355 case nir_intrinsic_bindless_image_atomic_xor:
356 case nir_intrinsic_bindless_image_store:
357 case nir_intrinsic_bindless_image_store_raw_intel:
358 case nir_intrinsic_global_atomic_add:
359 case nir_intrinsic_global_atomic_and:
360 case nir_intrinsic_global_atomic_comp_swap:
361 case nir_intrinsic_global_atomic_exchange:
362 case nir_intrinsic_global_atomic_fadd:
363 case nir_intrinsic_global_atomic_fcomp_swap:
364 case nir_intrinsic_global_atomic_fmax:
365 case nir_intrinsic_global_atomic_fmin:
366 case nir_intrinsic_global_atomic_imax:
367 case nir_intrinsic_global_atomic_imin:
368 case nir_intrinsic_global_atomic_or:
369 case nir_intrinsic_global_atomic_umax:
370 case nir_intrinsic_global_atomic_umin:
371 case nir_intrinsic_global_atomic_xor:
372 case nir_intrinsic_global_atomic_add_ir3:
373 case nir_intrinsic_global_atomic_and_ir3:
374 case nir_intrinsic_global_atomic_comp_swap_ir3:
375 case nir_intrinsic_global_atomic_exchange_ir3:
376 case nir_intrinsic_global_atomic_imax_ir3:
377 case nir_intrinsic_global_atomic_imin_ir3:
378 case nir_intrinsic_global_atomic_or_ir3:
379 case nir_intrinsic_global_atomic_umax_ir3:
380 case nir_intrinsic_global_atomic_umin_ir3:
381 case nir_intrinsic_global_atomic_xor_ir3:
382 case nir_intrinsic_image_atomic_add:
383 case nir_intrinsic_image_atomic_and:
384 case nir_intrinsic_image_atomic_comp_swap:
385 case nir_intrinsic_image_atomic_dec_wrap:
386 case nir_intrinsic_image_atomic_exchange:
387 case nir_intrinsic_image_atomic_fadd:
388 case nir_intrinsic_image_atomic_imax:
389 case nir_intrinsic_image_atomic_imin:
390 case nir_intrinsic_image_atomic_inc_wrap:
391 case nir_intrinsic_image_atomic_or:
392 case nir_intrinsic_image_atomic_umax:
393 case nir_intrinsic_image_atomic_umin:
394 case nir_intrinsic_image_atomic_xor:
395 case nir_intrinsic_image_deref_atomic_add:
396 case nir_intrinsic_image_deref_atomic_and:
397 case nir_intrinsic_image_deref_atomic_comp_swap:
398 case nir_intrinsic_image_deref_atomic_dec_wrap:
399 case nir_intrinsic_image_deref_atomic_exchange:
400 case nir_intrinsic_image_deref_atomic_fadd:
401 case nir_intrinsic_image_deref_atomic_imax:
402 case nir_intrinsic_image_deref_atomic_imin:
403 case nir_intrinsic_image_deref_atomic_inc_wrap:
404 case nir_intrinsic_image_deref_atomic_or:
405 case nir_intrinsic_image_deref_atomic_umax:
406 case nir_intrinsic_image_deref_atomic_umin:
407 case nir_intrinsic_image_deref_atomic_xor:
408 case nir_intrinsic_image_deref_store:
409 case nir_intrinsic_image_deref_store_raw_intel:
410 case nir_intrinsic_image_store:
411 case nir_intrinsic_image_store_raw_intel:
412 case nir_intrinsic_ssbo_atomic_add:
413 case nir_intrinsic_ssbo_atomic_add_ir3:
414 case nir_intrinsic_ssbo_atomic_and:
415 case nir_intrinsic_ssbo_atomic_and_ir3:
416 case nir_intrinsic_ssbo_atomic_comp_swap:
417 case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
418 case nir_intrinsic_ssbo_atomic_exchange:
419 case nir_intrinsic_ssbo_atomic_exchange_ir3:
420 case nir_intrinsic_ssbo_atomic_fadd:
421 case nir_intrinsic_ssbo_atomic_fcomp_swap:
422 case nir_intrinsic_ssbo_atomic_fmax:
423 case nir_intrinsic_ssbo_atomic_fmin:
424 case nir_intrinsic_ssbo_atomic_imax:
425 case nir_intrinsic_ssbo_atomic_imax_ir3:
426 case nir_intrinsic_ssbo_atomic_imin:
427 case nir_intrinsic_ssbo_atomic_imin_ir3:
428 case nir_intrinsic_ssbo_atomic_or:
429 case nir_intrinsic_ssbo_atomic_or_ir3:
430 case nir_intrinsic_ssbo_atomic_umax:
431 case nir_intrinsic_ssbo_atomic_umax_ir3:
432 case nir_intrinsic_ssbo_atomic_umin:
433 case nir_intrinsic_ssbo_atomic_umin_ir3:
434 case nir_intrinsic_ssbo_atomic_xor:
435 case nir_intrinsic_ssbo_atomic_xor_ir3:
436 case nir_intrinsic_store_global:
437 case nir_intrinsic_store_global_ir3:
438 case nir_intrinsic_store_global_amd:
439 case nir_intrinsic_store_ssbo:
440 case nir_intrinsic_store_ssbo_ir3:
441 return true;
442
443 case nir_intrinsic_store_deref:
444 case nir_intrinsic_deref_atomic_add:
445 case nir_intrinsic_deref_atomic_imin:
446 case nir_intrinsic_deref_atomic_umin:
447 case nir_intrinsic_deref_atomic_imax:
448 case nir_intrinsic_deref_atomic_umax:
449 case nir_intrinsic_deref_atomic_and:
450 case nir_intrinsic_deref_atomic_or:
451 case nir_intrinsic_deref_atomic_xor:
452 case nir_intrinsic_deref_atomic_exchange:
453 case nir_intrinsic_deref_atomic_comp_swap:
454 case nir_intrinsic_deref_atomic_fadd:
455 case nir_intrinsic_deref_atomic_fmin:
456 case nir_intrinsic_deref_atomic_fmax:
457 case nir_intrinsic_deref_atomic_fcomp_swap:
458 return nir_deref_mode_may_be(nir_src_as_deref(instr->src[0]),
459 nir_var_mem_ssbo | nir_var_mem_global);
460
461 default:
462 return false;
463 }
464 }
465
466 static void
gather_intrinsic_info(nir_intrinsic_instr * instr,nir_shader * shader,void * dead_ctx)467 gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
468 void *dead_ctx)
469 {
470 uint64_t slot_mask = 0;
471 uint16_t slot_mask_16bit = 0;
472
473 if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) {
474 nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
475
476 if (semantics.location >= VARYING_SLOT_PATCH0 &&
477 semantics.location <= VARYING_SLOT_PATCH31) {
478 /* Generic per-patch I/O. */
479 assert((shader->info.stage == MESA_SHADER_TESS_EVAL &&
480 instr->intrinsic == nir_intrinsic_load_input) ||
481 (shader->info.stage == MESA_SHADER_TESS_CTRL &&
482 (instr->intrinsic == nir_intrinsic_load_output ||
483 instr->intrinsic == nir_intrinsic_store_output)));
484
485 semantics.location -= VARYING_SLOT_PATCH0;
486 }
487
488 if (semantics.location >= VARYING_SLOT_VAR0_16BIT &&
489 semantics.location <= VARYING_SLOT_VAR15_16BIT) {
490 /* Convert num_slots from the units of half vectors to full vectors. */
491 unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2;
492 slot_mask_16bit =
493 BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots);
494 } else {
495 slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);
496 assert(util_bitcount64(slot_mask) == semantics.num_slots);
497 }
498 }
499
500 switch (instr->intrinsic) {
501 case nir_intrinsic_demote:
502 case nir_intrinsic_demote_if:
503 shader->info.fs.uses_demote = true;
504 FALLTHROUGH; /* quads with helper lanes only might be discarded entirely */
505 case nir_intrinsic_discard:
506 case nir_intrinsic_discard_if:
507 /* Freedreno uses the discard_if intrinsic to end GS invocations that
508 * don't produce a vertex, so we only set uses_discard if executing on
509 * a fragment shader. */
510 if (shader->info.stage == MESA_SHADER_FRAGMENT)
511 shader->info.fs.uses_discard = true;
512 break;
513
514 case nir_intrinsic_terminate:
515 case nir_intrinsic_terminate_if:
516 assert(shader->info.stage == MESA_SHADER_FRAGMENT);
517 shader->info.fs.uses_discard = true;
518 break;
519
520 case nir_intrinsic_interp_deref_at_centroid:
521 case nir_intrinsic_interp_deref_at_sample:
522 case nir_intrinsic_interp_deref_at_offset:
523 case nir_intrinsic_interp_deref_at_vertex:
524 case nir_intrinsic_load_deref:
525 case nir_intrinsic_store_deref:
526 case nir_intrinsic_copy_deref:{
527 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
528 if (nir_deref_mode_is_one_of(deref, nir_var_shader_in |
529 nir_var_shader_out)) {
530 nir_variable *var = nir_deref_instr_get_variable(deref);
531 bool is_output_read = false;
532 if (var->data.mode == nir_var_shader_out &&
533 instr->intrinsic == nir_intrinsic_load_deref)
534 is_output_read = true;
535
536 if (!try_mask_partial_io(shader, var, deref, is_output_read))
537 mark_whole_variable(shader, var, deref, is_output_read);
538
539 /* We need to track which input_reads bits correspond to a
540 * dvec3/dvec4 input attribute */
541 if (shader->info.stage == MESA_SHADER_VERTEX &&
542 var->data.mode == nir_var_shader_in &&
543 glsl_type_is_dual_slot(glsl_without_array(var->type))) {
544 for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {
545 int idx = var->data.location + i;
546 shader->info.vs.double_inputs |= BITFIELD64_BIT(idx);
547 }
548 }
549 }
550 if (nir_intrinsic_writes_external_memory(instr))
551 shader->info.writes_memory = true;
552 break;
553 }
554 case nir_intrinsic_image_deref_load: {
555 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
556 nir_variable *var = nir_deref_instr_get_variable(deref);
557 enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_without_array(var->type));
558 if (dim != GLSL_SAMPLER_DIM_SUBPASS &&
559 dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
560 break;
561
562 var->data.fb_fetch_output = true;
563 shader->info.fs.uses_fbfetch_output = true;
564 break;
565 }
566
567 case nir_intrinsic_load_input:
568 case nir_intrinsic_load_per_vertex_input:
569 case nir_intrinsic_load_input_vertex:
570 case nir_intrinsic_load_interpolated_input:
571 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
572 instr->intrinsic == nir_intrinsic_load_input) {
573 shader->info.patch_inputs_read |= slot_mask;
574 if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
575 shader->info.patch_inputs_read_indirectly |= slot_mask;
576 } else {
577 shader->info.inputs_read |= slot_mask;
578 shader->info.inputs_read_16bit |= slot_mask_16bit;
579 if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
580 shader->info.inputs_read_indirectly |= slot_mask;
581 shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit;
582 }
583 }
584
585 if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
586 instr->intrinsic == nir_intrinsic_load_per_vertex_input &&
587 !src_is_invocation_id(nir_get_io_arrayed_index_src(instr)))
588 shader->info.tess.tcs_cross_invocation_inputs_read |= slot_mask;
589 break;
590
591 case nir_intrinsic_load_output:
592 case nir_intrinsic_load_per_vertex_output:
593 case nir_intrinsic_load_per_primitive_output:
594 if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
595 instr->intrinsic == nir_intrinsic_load_output) {
596 shader->info.patch_outputs_read |= slot_mask;
597 if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
598 shader->info.patch_outputs_accessed_indirectly |= slot_mask;
599 } else {
600 shader->info.outputs_read |= slot_mask;
601 shader->info.outputs_read_16bit |= slot_mask_16bit;
602 if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
603 shader->info.outputs_accessed_indirectly |= slot_mask;
604 shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
605 }
606 }
607
608 if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
609 instr->intrinsic == nir_intrinsic_load_per_vertex_output &&
610 !src_is_invocation_id(nir_get_io_arrayed_index_src(instr)))
611 shader->info.tess.tcs_cross_invocation_outputs_read |= slot_mask;
612
613 /* NV_mesh_shader: mesh shaders can load their outputs. */
614 if (shader->info.stage == MESA_SHADER_MESH &&
615 (instr->intrinsic == nir_intrinsic_load_per_vertex_output ||
616 instr->intrinsic == nir_intrinsic_load_per_primitive_output) &&
617 !src_is_local_invocation_index(nir_get_io_arrayed_index_src(instr)))
618 shader->info.mesh.ms_cross_invocation_output_access |= slot_mask;
619
620 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
621 nir_intrinsic_io_semantics(instr).fb_fetch_output)
622 shader->info.fs.uses_fbfetch_output = true;
623 break;
624
625 case nir_intrinsic_store_output:
626 case nir_intrinsic_store_per_vertex_output:
627 case nir_intrinsic_store_per_primitive_output:
628 if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
629 instr->intrinsic == nir_intrinsic_store_output) {
630 shader->info.patch_outputs_written |= slot_mask;
631 if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
632 shader->info.patch_outputs_accessed_indirectly |= slot_mask;
633 } else {
634 shader->info.outputs_written |= slot_mask;
635 shader->info.outputs_written_16bit |= slot_mask_16bit;
636 if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
637 shader->info.outputs_accessed_indirectly |= slot_mask;
638 shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
639 }
640 }
641
642 if (shader->info.stage == MESA_SHADER_MESH &&
643 (instr->intrinsic == nir_intrinsic_store_per_vertex_output ||
644 instr->intrinsic == nir_intrinsic_store_per_primitive_output) &&
645 !src_is_local_invocation_index(nir_get_io_arrayed_index_src(instr)))
646 shader->info.mesh.ms_cross_invocation_output_access |= slot_mask;
647
648 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
649 nir_intrinsic_io_semantics(instr).dual_source_blend_index)
650 shader->info.fs.color_is_dual_source = true;
651 break;
652
653 case nir_intrinsic_load_color0:
654 case nir_intrinsic_load_color1:
655 shader->info.inputs_read |=
656 BITFIELD64_BIT(VARYING_SLOT_COL0 <<
657 (instr->intrinsic == nir_intrinsic_load_color1));
658 FALLTHROUGH;
659 case nir_intrinsic_load_subgroup_size:
660 case nir_intrinsic_load_subgroup_invocation:
661 case nir_intrinsic_load_subgroup_eq_mask:
662 case nir_intrinsic_load_subgroup_ge_mask:
663 case nir_intrinsic_load_subgroup_gt_mask:
664 case nir_intrinsic_load_subgroup_le_mask:
665 case nir_intrinsic_load_subgroup_lt_mask:
666 case nir_intrinsic_load_num_subgroups:
667 case nir_intrinsic_load_subgroup_id:
668 case nir_intrinsic_load_vertex_id:
669 case nir_intrinsic_load_instance_id:
670 case nir_intrinsic_load_vertex_id_zero_base:
671 case nir_intrinsic_load_base_vertex:
672 case nir_intrinsic_load_first_vertex:
673 case nir_intrinsic_load_is_indexed_draw:
674 case nir_intrinsic_load_base_instance:
675 case nir_intrinsic_load_draw_id:
676 case nir_intrinsic_load_invocation_id:
677 case nir_intrinsic_load_frag_coord:
678 case nir_intrinsic_load_frag_shading_rate:
679 case nir_intrinsic_load_point_coord:
680 case nir_intrinsic_load_line_coord:
681 case nir_intrinsic_load_front_face:
682 case nir_intrinsic_load_sample_id:
683 case nir_intrinsic_load_sample_pos:
684 case nir_intrinsic_load_sample_pos_or_center:
685 case nir_intrinsic_load_sample_mask_in:
686 case nir_intrinsic_load_helper_invocation:
687 case nir_intrinsic_load_tess_coord:
688 case nir_intrinsic_load_patch_vertices_in:
689 case nir_intrinsic_load_primitive_id:
690 case nir_intrinsic_load_tess_level_outer:
691 case nir_intrinsic_load_tess_level_inner:
692 case nir_intrinsic_load_tess_level_outer_default:
693 case nir_intrinsic_load_tess_level_inner_default:
694 case nir_intrinsic_load_local_invocation_id:
695 case nir_intrinsic_load_local_invocation_index:
696 case nir_intrinsic_load_global_invocation_id:
697 case nir_intrinsic_load_base_global_invocation_id:
698 case nir_intrinsic_load_global_invocation_index:
699 case nir_intrinsic_load_workgroup_id:
700 case nir_intrinsic_load_workgroup_index:
701 case nir_intrinsic_load_num_workgroups:
702 case nir_intrinsic_load_workgroup_size:
703 case nir_intrinsic_load_work_dim:
704 case nir_intrinsic_load_user_data_amd:
705 case nir_intrinsic_load_view_index:
706 case nir_intrinsic_load_barycentric_model:
707 case nir_intrinsic_load_ray_launch_id:
708 case nir_intrinsic_load_ray_launch_size:
709 case nir_intrinsic_load_ray_launch_size_addr_amd:
710 case nir_intrinsic_load_ray_world_origin:
711 case nir_intrinsic_load_ray_world_direction:
712 case nir_intrinsic_load_ray_object_origin:
713 case nir_intrinsic_load_ray_object_direction:
714 case nir_intrinsic_load_ray_t_min:
715 case nir_intrinsic_load_ray_t_max:
716 case nir_intrinsic_load_ray_object_to_world:
717 case nir_intrinsic_load_ray_world_to_object:
718 case nir_intrinsic_load_ray_hit_kind:
719 case nir_intrinsic_load_ray_flags:
720 case nir_intrinsic_load_ray_geometry_index:
721 case nir_intrinsic_load_ray_instance_custom_index:
722 case nir_intrinsic_load_mesh_view_count:
723 case nir_intrinsic_load_gs_header_ir3:
724 case nir_intrinsic_load_tcs_header_ir3:
725 BITSET_SET(shader->info.system_values_read,
726 nir_system_value_from_intrinsic(instr->intrinsic));
727 break;
728
729 case nir_intrinsic_load_barycentric_pixel:
730 if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
731 nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
732 BITSET_SET(shader->info.system_values_read,
733 SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
734 } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
735 BITSET_SET(shader->info.system_values_read,
736 SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
737 }
738 break;
739
740 case nir_intrinsic_load_barycentric_centroid:
741 if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
742 nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
743 BITSET_SET(shader->info.system_values_read,
744 SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
745 } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
746 BITSET_SET(shader->info.system_values_read,
747 SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
748 }
749 break;
750
751 case nir_intrinsic_load_barycentric_sample:
752 if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
753 nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
754 BITSET_SET(shader->info.system_values_read,
755 SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
756 } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
757 BITSET_SET(shader->info.system_values_read,
758 SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
759 }
760 if (shader->info.stage == MESA_SHADER_FRAGMENT)
761 shader->info.fs.uses_sample_qualifier = true;
762 break;
763
764 case nir_intrinsic_quad_broadcast:
765 case nir_intrinsic_quad_swap_horizontal:
766 case nir_intrinsic_quad_swap_vertical:
767 case nir_intrinsic_quad_swap_diagonal:
768 case nir_intrinsic_quad_swizzle_amd:
769 if (shader->info.stage == MESA_SHADER_FRAGMENT)
770 shader->info.fs.needs_quad_helper_invocations = true;
771 break;
772
773 case nir_intrinsic_vote_any:
774 case nir_intrinsic_vote_all:
775 case nir_intrinsic_vote_feq:
776 case nir_intrinsic_vote_ieq:
777 case nir_intrinsic_ballot:
778 case nir_intrinsic_ballot_bit_count_exclusive:
779 case nir_intrinsic_ballot_bit_count_inclusive:
780 case nir_intrinsic_ballot_bitfield_extract:
781 case nir_intrinsic_ballot_bit_count_reduce:
782 case nir_intrinsic_ballot_find_lsb:
783 case nir_intrinsic_ballot_find_msb:
784 case nir_intrinsic_first_invocation:
785 case nir_intrinsic_read_invocation:
786 case nir_intrinsic_read_first_invocation:
787 case nir_intrinsic_elect:
788 case nir_intrinsic_reduce:
789 case nir_intrinsic_inclusive_scan:
790 case nir_intrinsic_exclusive_scan:
791 case nir_intrinsic_shuffle:
792 case nir_intrinsic_shuffle_xor:
793 case nir_intrinsic_shuffle_up:
794 case nir_intrinsic_shuffle_down:
795 case nir_intrinsic_write_invocation_amd:
796 if (shader->info.stage == MESA_SHADER_FRAGMENT)
797 shader->info.fs.needs_all_helper_invocations = true;
798 if (shader->info.stage == MESA_SHADER_COMPUTE)
799 shader->info.cs.uses_wide_subgroup_intrinsics = true;
800 break;
801
802 case nir_intrinsic_end_primitive:
803 case nir_intrinsic_end_primitive_with_counter:
804 assert(shader->info.stage == MESA_SHADER_GEOMETRY);
805 shader->info.gs.uses_end_primitive = 1;
806 FALLTHROUGH;
807
808 case nir_intrinsic_emit_vertex:
809 case nir_intrinsic_emit_vertex_with_counter:
810 shader->info.gs.active_stream_mask |= 1 << nir_intrinsic_stream_id(instr);
811
812 break;
813
814 case nir_intrinsic_control_barrier:
815 shader->info.uses_control_barrier = true;
816 break;
817
818 case nir_intrinsic_scoped_barrier:
819 shader->info.uses_control_barrier |=
820 nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE;
821
822 shader->info.uses_memory_barrier |=
823 nir_intrinsic_memory_scope(instr) != NIR_SCOPE_NONE;
824 break;
825
826 case nir_intrinsic_memory_barrier:
827 case nir_intrinsic_group_memory_barrier:
828 case nir_intrinsic_memory_barrier_atomic_counter:
829 case nir_intrinsic_memory_barrier_buffer:
830 case nir_intrinsic_memory_barrier_image:
831 case nir_intrinsic_memory_barrier_shared:
832 case nir_intrinsic_memory_barrier_tcs_patch:
833 shader->info.uses_memory_barrier = true;
834 break;
835
836 default:
837 if (nir_intrinsic_writes_external_memory(instr))
838 shader->info.writes_memory = true;
839 break;
840 }
841 }
842
843 static void
gather_tex_info(nir_tex_instr * instr,nir_shader * shader)844 gather_tex_info(nir_tex_instr *instr, nir_shader *shader)
845 {
846 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
847 nir_tex_instr_has_implicit_derivative(instr))
848 shader->info.fs.needs_quad_helper_invocations = true;
849
850 switch (instr->op) {
851 case nir_texop_tg4:
852 shader->info.uses_texture_gather = true;
853 break;
854 default:
855 break;
856 }
857 }
858
859 static void
gather_alu_info(nir_alu_instr * instr,nir_shader * shader)860 gather_alu_info(nir_alu_instr *instr, nir_shader *shader)
861 {
862 switch (instr->op) {
863 case nir_op_fddx:
864 case nir_op_fddy:
865 shader->info.uses_fddx_fddy = true;
866 FALLTHROUGH;
867 case nir_op_fddx_fine:
868 case nir_op_fddy_fine:
869 case nir_op_fddx_coarse:
870 case nir_op_fddy_coarse:
871 if (shader->info.stage == MESA_SHADER_FRAGMENT)
872 shader->info.fs.needs_quad_helper_invocations = true;
873 break;
874 default:
875 break;
876 }
877
878 const nir_op_info *info = &nir_op_infos[instr->op];
879
880 for (unsigned i = 0; i < info->num_inputs; i++) {
881 if (nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float)
882 shader->info.bit_sizes_float |= nir_src_bit_size(instr->src[i].src);
883 else
884 shader->info.bit_sizes_int |= nir_src_bit_size(instr->src[i].src);
885 }
886 if (nir_alu_type_get_base_type(info->output_type) == nir_type_float)
887 shader->info.bit_sizes_float |= nir_dest_bit_size(instr->dest.dest);
888 else
889 shader->info.bit_sizes_int |= nir_dest_bit_size(instr->dest.dest);
890 }
891
892 static void
gather_func_info(nir_function_impl * func,nir_shader * shader,struct set * visited_funcs,void * dead_ctx)893 gather_func_info(nir_function_impl *func, nir_shader *shader,
894 struct set *visited_funcs, void *dead_ctx)
895 {
896 if (_mesa_set_search(visited_funcs, func))
897 return;
898
899 _mesa_set_add(visited_funcs, func);
900
901 nir_foreach_block(block, func) {
902 nir_foreach_instr(instr, block) {
903 switch (instr->type) {
904 case nir_instr_type_alu:
905 gather_alu_info(nir_instr_as_alu(instr), shader);
906 break;
907 case nir_instr_type_intrinsic:
908 gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader, dead_ctx);
909 break;
910 case nir_instr_type_tex:
911 gather_tex_info(nir_instr_as_tex(instr), shader);
912 break;
913 case nir_instr_type_call: {
914 nir_call_instr *call = nir_instr_as_call(instr);
915 nir_function_impl *impl = call->callee->impl;
916
917 assert(impl || !"nir_shader_gather_info only works with linked shaders");
918 gather_func_info(impl, shader, visited_funcs, dead_ctx);
919 break;
920 }
921 default:
922 break;
923 }
924 }
925 }
926 }
927
928 void
nir_shader_gather_info(nir_shader * shader,nir_function_impl * entrypoint)929 nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)
930 {
931 shader->info.num_textures = 0;
932 shader->info.num_images = 0;
933 shader->info.bit_sizes_float = 0;
934 shader->info.bit_sizes_int = 0;
935
936 nir_foreach_variable_with_modes(var, shader, nir_var_image | nir_var_uniform) {
937 /* Bindless textures and images don't use non-bindless slots.
938 * Interface blocks imply inputs, outputs, UBO, or SSBO, which can only
939 * mean bindless.
940 */
941 if (var->data.bindless || var->interface_type)
942 continue;
943
944 shader->info.num_textures += glsl_type_get_sampler_count(var->type);
945 shader->info.num_images += glsl_type_get_image_count(var->type);
946 }
947
948 shader->info.inputs_read = 0;
949 shader->info.outputs_written = 0;
950 shader->info.outputs_read = 0;
951 shader->info.inputs_read_16bit = 0;
952 shader->info.outputs_written_16bit = 0;
953 shader->info.outputs_read_16bit = 0;
954 shader->info.inputs_read_indirectly_16bit = 0;
955 shader->info.outputs_accessed_indirectly_16bit = 0;
956 shader->info.patch_outputs_read = 0;
957 shader->info.patch_inputs_read = 0;
958 shader->info.patch_outputs_written = 0;
959 BITSET_ZERO(shader->info.system_values_read);
960 shader->info.inputs_read_indirectly = 0;
961 shader->info.outputs_accessed_indirectly = 0;
962 shader->info.patch_inputs_read_indirectly = 0;
963 shader->info.patch_outputs_accessed_indirectly = 0;
964
965 if (shader->info.stage == MESA_SHADER_VERTEX) {
966 shader->info.vs.double_inputs = 0;
967 }
968 if (shader->info.stage == MESA_SHADER_FRAGMENT) {
969 shader->info.fs.uses_sample_qualifier = false;
970 shader->info.fs.uses_discard = false;
971 shader->info.fs.uses_demote = false;
972 shader->info.fs.color_is_dual_source = false;
973 shader->info.fs.uses_fbfetch_output = false;
974 shader->info.fs.needs_quad_helper_invocations = false;
975 shader->info.fs.needs_all_helper_invocations = false;
976 }
977 if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
978 shader->info.tess.tcs_cross_invocation_inputs_read = 0;
979 shader->info.tess.tcs_cross_invocation_outputs_read = 0;
980 }
981 if (shader->info.stage == MESA_SHADER_MESH) {
982 shader->info.mesh.ms_cross_invocation_output_access = 0;
983 }
984
985 if (shader->info.stage != MESA_SHADER_FRAGMENT)
986 shader->info.writes_memory = shader->info.has_transform_feedback_varyings;
987
988 void *dead_ctx = ralloc_context(NULL);
989 struct set *visited_funcs = _mesa_pointer_set_create(dead_ctx);
990 gather_func_info(entrypoint, shader, visited_funcs, dead_ctx);
991 ralloc_free(dead_ctx);
992
993 shader->info.per_primitive_outputs = 0;
994 if (shader->info.stage == MESA_SHADER_MESH) {
995 nir_foreach_shader_out_variable(var, shader) {
996 if (var->data.per_primitive) {
997 assert(nir_is_arrayed_io(var, shader->info.stage));
998 const unsigned slots =
999 glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
1000 shader->info.per_primitive_outputs |= BITFIELD64_RANGE(var->data.location, slots);
1001 }
1002 }
1003 }
1004
1005 shader->info.per_primitive_inputs = 0;
1006 if (shader->info.stage == MESA_SHADER_FRAGMENT) {
1007 nir_foreach_shader_in_variable(var, shader) {
1008 if (var->data.per_primitive) {
1009 const unsigned slots =
1010 glsl_count_attribute_slots(var->type, false);
1011 shader->info.per_primitive_inputs |= BITFIELD64_RANGE(var->data.location, slots);
1012 }
1013 }
1014 }
1015
1016 shader->info.ray_queries = 0;
1017 nir_foreach_variable_in_shader(var, shader) {
1018 if (!var->data.ray_query)
1019 continue;
1020
1021 shader->info.ray_queries += MAX2(glsl_get_aoa_size(var->type), 1);
1022 }
1023 nir_foreach_function(func, shader) {
1024 if (!func->impl)
1025 continue;
1026 nir_foreach_function_temp_variable(var, func->impl) {
1027 if (!var->data.ray_query)
1028 continue;
1029
1030 shader->info.ray_queries += MAX2(glsl_get_aoa_size(var->type), 1);
1031 }
1032 }
1033 }
1034