• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include "si_pipe.h"
26 #include "util/mesa-sha1.h"
27 #include "util/u_prim.h"
28 #include "sid.h"
29 
30 
31 struct si_shader_profile {
32    uint32_t sha1[SHA1_DIGEST_LENGTH32];
33    uint32_t options;
34 };
35 
36 static struct si_shader_profile profiles[] =
37 {
38    {
39       /* Plot3D */
40       {0x485320cd, 0x87a9ba05, 0x24a60e4f, 0x25aa19f7, 0xf5287451},
41       SI_PROFILE_VS_NO_BINNING,
42    },
43    {
44       /* Viewperf/Energy isn't affected by the discard bug. */
45       {0x17118671, 0xd0102e0c, 0x947f3592, 0xb2057e7b, 0x4da5d9b0},
46       SI_PROFILE_IGNORE_LLVM13_DISCARD_BUG,
47    },
48    {
49       /* Viewperf/Medical */
50       {0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0},
51       SI_PROFILE_PS_NO_BINNING,
52    },
53    {
54       /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32,
55        * probably due to interpolation performance.
56        */
57       {0x29f0f4a0, 0x0672258d, 0x47ccdcfd, 0x31e67dcc, 0xdcb1fda8},
58       SI_PROFILE_WAVE64,
59    },
60    {
61       /* Viewperf/Creo */
62       {0x1f288a73, 0xba46cce5, 0xbf68e6c6, 0x58543651, 0xca3c8efd},
63       SI_PROFILE_CLAMP_DIV_BY_ZERO,
64    },
65 };
66 
get_inst_tessfactor_writemask(nir_intrinsic_instr * intrin)67 static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
68 {
69    if (intrin->intrinsic != nir_intrinsic_store_output)
70       return 0;
71 
72    unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin);
73    unsigned location = nir_intrinsic_io_semantics(intrin).location;
74 
75    if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
76       return writemask << 4;
77    else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
78       return writemask;
79 
80    return 0;
81 }
82 
scan_tess_ctrl(nir_cf_node * cf_node,unsigned * upper_block_tf_writemask,unsigned * cond_block_tf_writemask,bool * tessfactors_are_def_in_all_invocs,bool is_nested_cf)83 static void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
84                            unsigned *cond_block_tf_writemask,
85                            bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
86 {
87    switch (cf_node->type) {
88    case nir_cf_node_block: {
89       nir_block *block = nir_cf_node_as_block(cf_node);
90       nir_foreach_instr (instr, block) {
91          if (instr->type != nir_instr_type_intrinsic)
92             continue;
93 
94          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
95          if (intrin->intrinsic == nir_intrinsic_control_barrier) {
96 
97             /* If we find a barrier in nested control flow put this in the
98              * too hard basket. In GLSL this is not possible but it is in
99              * SPIR-V.
100              */
101             if (is_nested_cf) {
102                *tessfactors_are_def_in_all_invocs = false;
103                return;
104             }
105 
106             /* The following case must be prevented:
107              *    gl_TessLevelInner = ...;
108              *    barrier();
109              *    if (gl_InvocationID == 1)
110              *       gl_TessLevelInner = ...;
111              *
112              * If you consider disjoint code segments separated by barriers, each
113              * such segment that writes tess factor channels should write the same
114              * channels in all codepaths within that segment.
115              */
116             if (*upper_block_tf_writemask || *cond_block_tf_writemask) {
117                /* Accumulate the result: */
118                *tessfactors_are_def_in_all_invocs &=
119                   !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
120 
121                /* Analyze the next code segment from scratch. */
122                *upper_block_tf_writemask = 0;
123                *cond_block_tf_writemask = 0;
124             }
125          } else
126             *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
127       }
128 
129       break;
130    }
131    case nir_cf_node_if: {
132       unsigned then_tessfactor_writemask = 0;
133       unsigned else_tessfactor_writemask = 0;
134 
135       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
136       foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
137       {
138          scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask,
139                         tessfactors_are_def_in_all_invocs, true);
140       }
141 
142       foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
143       {
144          scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask,
145                         tessfactors_are_def_in_all_invocs, true);
146       }
147 
148       if (then_tessfactor_writemask || else_tessfactor_writemask) {
149          /* If both statements write the same tess factor channels,
150           * we can say that the upper block writes them too.
151           */
152          *upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask;
153          *cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask;
154       }
155 
156       break;
157    }
158    case nir_cf_node_loop: {
159       nir_loop *loop = nir_cf_node_as_loop(cf_node);
160       foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
161       {
162          scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask,
163                         tessfactors_are_def_in_all_invocs, true);
164       }
165 
166       break;
167    }
168    default:
169       unreachable("unknown cf node type");
170    }
171 }
172 
are_tessfactors_def_in_all_invocs(const struct nir_shader * nir)173 static bool are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
174 {
175    assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
176 
177    /* The pass works as follows:
178     * If all codepaths write tess factors, we can say that all
179     * invocations define tess factors.
180     *
181     * Each tess factor channel is tracked separately.
182     */
183    unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
184    unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
185 
186    /* Initial value = true. Here the pass will accumulate results from
187     * multiple segments surrounded by barriers. If tess factors aren't
188     * written at all, it's a shader bug and we don't care if this will be
189     * true.
190     */
191    bool tessfactors_are_def_in_all_invocs = true;
192 
193    nir_foreach_function (function, nir) {
194       if (function->impl) {
195          foreach_list_typed(nir_cf_node, node, node, &function->impl->body)
196          {
197             scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask,
198                            &tessfactors_are_def_in_all_invocs, false);
199          }
200       }
201    }
202 
203    /* Accumulate the result for the last code segment separated by a
204     * barrier.
205     */
206    if (main_block_tf_writemask || cond_block_tf_writemask) {
207       tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask);
208    }
209 
210    return tessfactors_are_def_in_all_invocs;
211 }
212 
get_texture_src(nir_tex_instr * instr,nir_tex_src_type type)213 static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type)
214 {
215    for (unsigned i = 0; i < instr->num_srcs; i++) {
216       if (instr->src[i].src_type == type)
217          return &instr->src[i].src;
218    }
219    return NULL;
220 }
221 
scan_io_usage(const nir_shader * nir,struct si_shader_info * info,nir_intrinsic_instr * intr,bool is_input)222 static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
223                           nir_intrinsic_instr *intr, bool is_input)
224 {
225    unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */
226 
227    if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
228       nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
229 
230       if (baryc) {
231          if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0)
232             interp = nir_intrinsic_interp_mode(baryc);
233          else
234             unreachable("unknown barycentric intrinsic");
235       } else {
236          unreachable("unknown barycentric expression");
237       }
238    }
239 
240    unsigned mask, bit_size;
241    bool is_output_load;
242 
243    if (nir_intrinsic_has_write_mask(intr)) {
244       mask = nir_intrinsic_write_mask(intr); /* store */
245       bit_size = nir_src_bit_size(intr->src[0]);
246       is_output_load = false;
247    } else {
248       mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */
249       bit_size = intr->dest.ssa.bit_size;
250       is_output_load = !is_input;
251    }
252    assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered");
253 
254    /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs
255     * where the mask is untyped.
256     */
257    if (bit_size == 16 && !is_input) {
258       unsigned new_mask = 0;
259       for (unsigned i = 0; i < 4; i++) {
260          if (mask & (1 << i))
261             new_mask |= 0x1 << (i / 2);
262       }
263       mask = new_mask;
264    }
265 
266    mask <<= nir_intrinsic_component(intr);
267 
268    nir_src offset = *nir_get_io_offset_src(intr);
269    bool indirect = !nir_src_is_const(offset);
270    if (!indirect)
271       assert(nir_src_as_uint(offset) == 0);
272 
273    unsigned semantic = 0;
274    /* VS doesn't have semantics. */
275    if (nir->info.stage != MESA_SHADER_VERTEX || !is_input)
276       semantic = nir_intrinsic_io_semantics(intr).location;
277 
278    if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
279       /* Never use FRAG_RESULT_COLOR directly. */
280       if (semantic == FRAG_RESULT_COLOR)
281          semantic = FRAG_RESULT_DATA0;
282       semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index;
283    }
284 
285    unsigned driver_location = nir_intrinsic_base(intr);
286    unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1;
287 
288    if (is_input) {
289       assert(driver_location + num_slots <= ARRAY_SIZE(info->input));
290 
291       for (unsigned i = 0; i < num_slots; i++) {
292          unsigned loc = driver_location + i;
293 
294          info->input[loc].semantic = semantic + i;
295 
296          if (semantic == VARYING_SLOT_PRIMITIVE_ID)
297             info->input[loc].interpolate = INTERP_MODE_FLAT;
298          else
299             info->input[loc].interpolate = interp;
300 
301          if (mask) {
302             info->input[loc].usage_mask |= mask;
303             if (bit_size == 16) {
304                if (nir_intrinsic_io_semantics(intr).high_16bits)
305                   info->input[loc].fp16_lo_hi_valid |= 0x2;
306                else
307                   info->input[loc].fp16_lo_hi_valid |= 0x1;
308             }
309             info->num_inputs = MAX2(info->num_inputs, loc + 1);
310          }
311       }
312    } else {
313       /* Outputs. */
314       assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
315 
316       for (unsigned i = 0; i < num_slots; i++) {
317          unsigned loc = driver_location + i;
318 
319          info->output_semantic[loc] = semantic + i;
320 
321          if (is_output_load) {
322             /* Output loads have only a few things that we need to track. */
323             info->output_readmask[loc] |= mask;
324          } else if (mask) {
325             /* Output stores. */
326             unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
327                                   (nir_intrinsic_component(intr) * 2);
328             unsigned new_mask = mask & ~info->output_usagemask[loc];
329 
330             /* Iterate over all components. */
331             for (unsigned i = 0; i < 4; i++) {
332                unsigned stream = (gs_streams >> (i * 2)) & 0x3;
333 
334                if (new_mask & (1 << i)) {
335                   info->output_streams[loc] |= stream << (i * 2);
336                   info->num_stream_output_components[stream]++;
337                }
338 
339                if (nir_intrinsic_has_io_xfb(intr)) {
340                   nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) :
341                                            nir_intrinsic_io_xfb2(intr);
342                   if (xfb.out[i % 2].num_components) {
343                      unsigned stream = (gs_streams >> (i * 2)) & 0x3;
344                      info->enabled_streamout_buffer_mask |=
345                         BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
346                   }
347                }
348             }
349 
350             if (nir_intrinsic_has_src_type(intr))
351                info->output_type[loc] = nir_intrinsic_src_type(intr);
352             else if (nir_intrinsic_has_dest_type(intr))
353                info->output_type[loc] = nir_intrinsic_dest_type(intr);
354             else
355                info->output_type[loc] = nir_type_float32;
356 
357             info->output_usagemask[loc] |= mask;
358             info->num_outputs = MAX2(info->num_outputs, loc + 1);
359 
360             if (nir->info.stage == MESA_SHADER_FRAGMENT &&
361                 semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
362                unsigned index = semantic - FRAG_RESULT_DATA0;
363 
364                if (nir_intrinsic_src_type(intr) == nir_type_float16)
365                   info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2);
366                else if (nir_intrinsic_src_type(intr) == nir_type_int16)
367                   info->output_color_types |= SI_TYPE_INT16 << (index * 2);
368                else if (nir_intrinsic_src_type(intr) == nir_type_uint16)
369                   info->output_color_types |= SI_TYPE_UINT16 << (index * 2);
370             }
371          }
372       }
373    }
374 }
375 
is_bindless_handle_indirect(nir_instr * src)376 static bool is_bindless_handle_indirect(nir_instr *src)
377 {
378    /* Check if the bindless handle comes from indirect load_ubo. */
379    if (src->type == nir_instr_type_intrinsic &&
380        nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) {
381       if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0]))
382          return true;
383    } else {
384       /* Some other instruction. Return the worst-case result. */
385       return true;
386    }
387    return false;
388 }
389 
390 /* TODO: convert to nir_shader_instructions_pass */
scan_instruction(const struct nir_shader * nir,struct si_shader_info * info,nir_instr * instr)391 static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info,
392                              nir_instr *instr)
393 {
394    if (instr->type == nir_instr_type_tex) {
395       nir_tex_instr *tex = nir_instr_as_tex(instr);
396       const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle);
397 
398       /* Gather the types of used VMEM instructions that return something. */
399       switch (tex->op) {
400       case nir_texop_tex:
401       case nir_texop_txb:
402       case nir_texop_txl:
403       case nir_texop_txd:
404       case nir_texop_lod:
405       case nir_texop_tg4:
406          info->uses_vmem_sampler_or_bvh = true;
407          break;
408       default:
409          info->uses_vmem_load_other = true;
410          break;
411       }
412 
413       if (handle) {
414          info->uses_bindless_samplers = true;
415 
416          if (is_bindless_handle_indirect(handle->ssa->parent_instr))
417             info->uses_indirect_descriptor = true;
418       } else {
419          const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref);
420 
421          if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref)))
422             info->uses_indirect_descriptor = true;
423       }
424    } else if (instr->type == nir_instr_type_intrinsic) {
425       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
426       const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name;
427       bool is_ssbo = strstr(intr_name, "ssbo");
428       bool is_image = strstr(intr_name, "image") == intr_name;
429       bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name;
430 
431       /* Gather the types of used VMEM instructions that return something. */
432       if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
433          switch (intr->intrinsic) {
434          case nir_intrinsic_load_ubo:
435             if (!nir_src_is_const(intr->src[1]))
436                info->uses_vmem_load_other = true;
437             break;
438 
439          case nir_intrinsic_load_input:
440          case nir_intrinsic_load_input_vertex:
441          case nir_intrinsic_load_per_vertex_input:
442             if (nir->info.stage == MESA_SHADER_VERTEX ||
443                 nir->info.stage == MESA_SHADER_TESS_EVAL)
444                info->uses_vmem_load_other = true;
445             break;
446 
447          case nir_intrinsic_load_constant:
448          case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */
449          case nir_intrinsic_load_buffer_amd:
450             info->uses_vmem_load_other = true;
451             break;
452 
453          default:
454             if (is_image ||
455                 is_bindless_image ||
456                 is_ssbo ||
457                 (strstr(intr_name, "global") == intr_name ||
458                  intr->intrinsic == nir_intrinsic_load_global ||
459                  intr->intrinsic == nir_intrinsic_store_global) ||
460                 strstr(intr_name, "scratch"))
461                info->uses_vmem_load_other = true;
462             break;
463          }
464       }
465 
466       if (is_bindless_image)
467          info->uses_bindless_images = true;
468 
469       if (nir_intrinsic_writes_external_memory(intr))
470          info->num_memory_stores++;
471 
472       if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0])))
473          info->uses_indirect_descriptor = true;
474 
475       if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr))
476          info->uses_indirect_descriptor = true;
477 
478       if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo &&
479           !nir_src_is_const(intr->src[0]))
480          info->uses_indirect_descriptor = true;
481 
482       switch (intr->intrinsic) {
483       case nir_intrinsic_store_ssbo:
484          if (!nir_src_is_const(intr->src[1]))
485             info->uses_indirect_descriptor = true;
486          break;
487       case nir_intrinsic_load_ubo:
488          if (!nir_src_is_const(intr->src[0]))
489             info->uses_indirect_descriptor = true;
490          break;
491       case nir_intrinsic_load_local_invocation_id:
492       case nir_intrinsic_load_workgroup_id: {
493          unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa);
494          while (mask) {
495             unsigned i = u_bit_scan(&mask);
496 
497             if (intr->intrinsic == nir_intrinsic_load_workgroup_id)
498                info->uses_block_id[i] = true;
499             else
500                info->uses_thread_id[i] = true;
501          }
502          break;
503       }
504       case nir_intrinsic_load_color0:
505       case nir_intrinsic_load_color1: {
506          unsigned index = intr->intrinsic == nir_intrinsic_load_color1;
507          uint8_t mask = nir_ssa_def_components_read(&intr->dest.ssa);
508          info->colors_read |= mask << (index * 4);
509 
510          switch (info->color_interpolate[index]) {
511          case INTERP_MODE_SMOOTH:
512             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
513                info->uses_persp_sample = true;
514             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
515                info->uses_persp_centroid = true;
516             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
517                info->uses_persp_center = true;
518             break;
519          case INTERP_MODE_NOPERSPECTIVE:
520             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
521                info->uses_linear_sample = true;
522             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
523                info->uses_linear_centroid = true;
524             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
525                info->uses_linear_center = true;
526             break;
527          case INTERP_MODE_COLOR:
528             /* We don't know the final value. This will be FLAT if flatshading is enabled
529              * in the rasterizer state, otherwise it will be SMOOTH.
530              */
531             info->uses_interp_color = true;
532             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
533                info->uses_persp_sample_color = true;
534             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
535                info->uses_persp_centroid_color = true;
536             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
537                info->uses_persp_center_color = true;
538             break;
539          }
540          break;
541       }
542       case nir_intrinsic_load_barycentric_at_offset:   /* uses center */
543       case nir_intrinsic_load_barycentric_at_sample:   /* uses center */
544          if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT)
545             break;
546 
547          if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) {
548             info->uses_linear_center = true;
549          } else {
550             info->uses_persp_center = true;
551          }
552          if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
553             info->uses_interp_at_sample = true;
554          break;
555       case nir_intrinsic_load_input:
556       case nir_intrinsic_load_per_vertex_input:
557       case nir_intrinsic_load_input_vertex:
558       case nir_intrinsic_load_interpolated_input:
559          scan_io_usage(nir, info, intr, true);
560          break;
561       case nir_intrinsic_load_output:
562       case nir_intrinsic_load_per_vertex_output:
563       case nir_intrinsic_store_output:
564       case nir_intrinsic_store_per_vertex_output:
565          scan_io_usage(nir, info, intr, false);
566          break;
567       case nir_intrinsic_load_deref:
568       case nir_intrinsic_store_deref:
569          /* These can only occur if there is indirect temp indexing. */
570          break;
571       case nir_intrinsic_interp_deref_at_centroid:
572       case nir_intrinsic_interp_deref_at_sample:
573       case nir_intrinsic_interp_deref_at_offset:
574          unreachable("these opcodes should have been lowered");
575          break;
576       default:
577          break;
578       }
579    }
580 }
581 
si_nir_scan_shader(struct si_screen * sscreen,const struct nir_shader * nir,struct si_shader_info * info)582 void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
583                         struct si_shader_info *info)
584 {
585    memset(info, 0, sizeof(*info));
586    info->base = nir->info;
587 
588    /* Get options from shader profiles. */
589    for (unsigned i = 0; i < ARRAY_SIZE(profiles); i++) {
590       if (_mesa_printed_sha1_equal(info->base.source_sha1, profiles[i].sha1)) {
591          info->options = profiles[i].options;
592          break;
593       }
594    }
595 
596    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
597       /* post_depth_coverage implies early_fragment_tests */
598       info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage;
599 
600       info->color_interpolate[0] = nir->info.fs.color0_interp;
601       info->color_interpolate[1] = nir->info.fs.color1_interp;
602       for (unsigned i = 0; i < 2; i++) {
603          if (info->color_interpolate[i] == INTERP_MODE_NONE)
604             info->color_interpolate[i] = INTERP_MODE_COLOR;
605       }
606 
607       info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
608                                        nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
609                                                                       TGSI_INTERPOLATE_LOC_CENTER;
610       info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
611                                        nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
612                                                                       TGSI_INTERPOLATE_LOC_CENTER;
613       /* Set an invalid value. Will be determined at draw time if needed when the expected
614        * conditions are met.
615        */
616       info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff;
617    }
618 
619    info->constbuf0_num_slots = nir->num_uniforms;
620 
621    if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
622       info->tessfactors_are_def_in_all_invocs = are_tessfactors_def_in_all_invocs(nir);
623    }
624 
625    /* tess factors are loaded as input instead of system value */
626    info->reads_tess_factors = nir->info.patch_inputs_read &
627       (BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) |
628        BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER));
629 
630    info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
631    info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
632    info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
633    info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
634    info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
635    info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
636    info->uses_subgroup_info = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
637                               BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) ||
638                               BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
639    info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
640    info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
641    info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
642                        nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
643    info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
644    info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
645    info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
646    info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
647    info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
648    info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
649    info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
650 
651    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
652       info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
653       info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
654       info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
655 
656       info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0;
657       if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) {
658          info->color0_writes_all_cbufs = true;
659          info->colors_written |= 0x1;
660       }
661       if (nir->info.fs.color_is_dual_source)
662          info->colors_written |= 0x2;
663    } else {
664       info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID;
665       info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT;
666       info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER;
667       info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ;
668       info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX;
669       info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE;
670       info->writes_position = nir->info.outputs_written & VARYING_BIT_POS;
671    }
672 
673    nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader*)nir);
674    nir_foreach_block (block, impl) {
675       nir_foreach_instr (instr, block)
676          scan_instruction(nir, info, instr);
677    }
678 
679    if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
680       /* Add the PrimitiveID output, but don't increment num_outputs.
681        * The driver inserts PrimitiveID only when it's used by the pixel shader,
682        * and si_emit_spi_map uses this unconditionally when such a pixel shader is used.
683        */
684       info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID;
685       info->output_type[info->num_outputs] = nir_type_uint32;
686       info->output_usagemask[info->num_outputs] = 0x1;
687    }
688 
689    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
690       info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid ||
691                                    info->uses_persp_sample || info->uses_linear_center ||
692                                    info->uses_linear_centroid || info->uses_linear_sample ||
693                                    info->uses_interp_at_sample || nir->info.writes_memory ||
694                                    nir->info.fs.uses_fbfetch_output ||
695                                    nir->info.fs.needs_quad_helper_invocations ||
696                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
697                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
698                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
699                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
700                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
701                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
702 
703       info->uses_vmem_load_other |= info->base.fs.uses_fbfetch_output;
704 
705       /* Add both front and back color inputs. */
706       unsigned num_inputs_with_colors = info->num_inputs;
707       for (unsigned back = 0; back < 2; back++) {
708          for (unsigned i = 0; i < 2; i++) {
709             if ((info->colors_read >> (i * 4)) & 0xf) {
710                unsigned index = num_inputs_with_colors;
711 
712                info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i;
713                info->input[index].interpolate = info->color_interpolate[i];
714                info->input[index].usage_mask = info->colors_read >> (i * 4);
715                num_inputs_with_colors++;
716 
717                /* Back-face color don't increment num_inputs. si_emit_spi_map will use
718                 * back-face colors conditionally only when they are needed.
719                 */
720                if (!back)
721                   info->num_inputs = num_inputs_with_colors;
722             }
723          }
724       }
725    }
726 
727    info->uses_vmem_load_other |= info->uses_indirect_descriptor;
728 
729    /* Trim output read masks based on write masks. */
730    for (unsigned i = 0; i < info->num_outputs; i++)
731       info->output_readmask[i] &= info->output_usagemask[i];
732 
733    info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir);
734 
735    if (nir->info.stage == MESA_SHADER_VERTEX ||
736        nir->info.stage == MESA_SHADER_TESS_CTRL ||
737        nir->info.stage == MESA_SHADER_TESS_EVAL ||
738        nir->info.stage == MESA_SHADER_GEOMETRY) {
739       if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
740          /* Always reserve space for these. */
741          info->patch_outputs_written |=
742             (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) |
743             (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER));
744       }
745       for (unsigned i = 0; i < info->num_outputs; i++) {
746          unsigned semantic = info->output_semantic[i];
747 
748          if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
749              semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
750              (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
751             info->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic);
752          } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
753                     semantic != VARYING_SLOT_EDGE) {
754             info->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false);
755 
756             /* Ignore outputs that are not passed from VS to PS. */
757             if (semantic != VARYING_SLOT_POS &&
758                 semantic != VARYING_SLOT_PSIZ &&
759                 semantic != VARYING_SLOT_CLIP_VERTEX) {
760                info->outputs_written_before_ps |= 1ull
761                                                   << si_shader_io_get_unique_index(semantic, true);
762             }
763          }
764       }
765    }
766 
767    if (nir->info.stage == MESA_SHADER_VERTEX) {
768       info->num_vs_inputs =
769          nir->info.stage == MESA_SHADER_VERTEX && !info->base.vs.blit_sgprs_amd ? info->num_inputs : 0;
770       unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.gfx_level);
771       info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs);
772 
773       /* The prolog is a no-op if there are no inputs. */
774       info->vs_needs_prolog = info->num_inputs && !info->base.vs.blit_sgprs_amd;
775    }
776 
777    if (nir->info.stage == MESA_SHADER_VERTEX ||
778        nir->info.stage == MESA_SHADER_TESS_CTRL ||
779        nir->info.stage == MESA_SHADER_TESS_EVAL) {
780       info->esgs_itemsize = util_last_bit64(info->outputs_written) * 16;
781       info->lshs_vertex_stride = info->esgs_itemsize;
782 
783       /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
784        * will start on a different bank. (except for the maximum 32*16).
785        */
786       if (info->lshs_vertex_stride < 32 * 16)
787          info->lshs_vertex_stride += 4;
788 
789       /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
790        * conflicts, i.e. each vertex will start at a different bank.
791        */
792       if (sscreen->info.gfx_level >= GFX9)
793          info->esgs_itemsize += 4;
794 
795       assert(((info->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
796 
797       info->tcs_vgpr_only_inputs = ~info->base.tess.tcs_cross_invocation_inputs_read &
798                                    ~info->base.inputs_read_indirectly &
799                                    info->base.inputs_read;
800    }
801 
802    if (nir->info.stage == MESA_SHADER_GEOMETRY) {
803       info->gsvs_vertex_size = info->num_outputs * 16;
804       info->max_gsvs_emit_size = info->gsvs_vertex_size * info->base.gs.vertices_out;
805       info->gs_input_verts_per_prim =
806          u_vertices_per_prim((enum pipe_prim_type)info->base.gs.input_primitive);
807    }
808 
809    info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
810                          u_bit_consecutive(0, info->base.clip_distance_array_size);
811    info->culldist_mask = u_bit_consecutive(0, info->base.cull_distance_array_size) <<
812                          info->base.clip_distance_array_size;
813 
814    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
815       for (unsigned i = 0; i < info->num_inputs; i++) {
816          unsigned semantic = info->input[i].semantic;
817 
818          if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
819              semantic != VARYING_SLOT_PNTC) {
820             info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true);
821          }
822       }
823 
824       for (unsigned i = 0; i < 8; i++)
825          if (info->colors_written & (1 << i))
826             info->colors_written_4bit |= 0xf << (4 * i);
827 
828       for (unsigned i = 0; i < info->num_inputs; i++) {
829          if (info->input[i].semantic == VARYING_SLOT_COL0)
830             info->color_attr_index[0] = i;
831          else if (info->input[i].semantic == VARYING_SLOT_COL1)
832             info->color_attr_index[1] = i;
833       }
834    }
835 }
836