• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "si_pipe.h"
8 #include "si_shader_internal.h"
9 #include "util/mesa-sha1.h"
10 #include "sid.h"
11 #include "nir.h"
12 #include "nir_xfb_info.h"
13 #include "aco_interface.h"
14 #include "ac_nir.h"
15 
16 struct si_shader_profile si_shader_profiles[] =
17 {
18    {
19       /* Plot3D */
20       {0x38c94662, 0x7b634109, 0x50f8254a, 0x0f4986a9, 0x11e59716, 0x3081e1a2, 0xbb2a0c59, 0xc29e853a},
21       SI_PROFILE_VS_NO_BINNING,
22    },
23    {
24       /* Viewperf/Energy */
25       {0x3279654e, 0xf51c358d, 0xc526e175, 0xd198eb26, 0x75c36c86, 0xd796398b, 0xc99b5e92, 0xddc31503},
26       SI_PROFILE_NO_OPT_UNIFORM_VARYINGS,    /* Uniform propagation regresses performance. */
27    },
28    {
29       /* Viewperf/Medical */
30       {0x4a041ad8, 0xe105a058, 0x2e9f7a38, 0xef4d1c2f, 0xb8aee798, 0x821f166b, 0x17b42668, 0xa4d1cc0a},
31       SI_PROFILE_GFX9_GFX10_PS_NO_BINNING,
32    },
33    {
34       /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32,
35        * probably due to interpolation performance.
36        */
37       {0xa9c7e2c2, 0x3e01de01, 0x886cab63, 0x24327678, 0xe247c394, 0x2ecc4bf9, 0xc196d978, 0x2ba7a89c},
38       SI_PROFILE_GFX10_WAVE64,
39    },
40    {
41       /* Viewperf/Creo */
42       {0x182bd6b3, 0x5e8fba11, 0xa7b74071, 0xc69f6153, 0xc57aef8c, 0x9076492a, 0x53dc83ee, 0x921fb114},
43       SI_PROFILE_CLAMP_DIV_BY_ZERO,
44    },
45 };
46 
si_get_num_shader_profiles(void)47 unsigned si_get_num_shader_profiles(void)
48 {
49    return ARRAY_SIZE(si_shader_profiles);
50 }
51 
get_texture_src(nir_tex_instr * instr,nir_tex_src_type type)52 static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type)
53 {
54    for (unsigned i = 0; i < instr->num_srcs; i++) {
55       if (instr->src[i].src_type == type)
56          return &instr->src[i].src;
57    }
58    return NULL;
59 }
60 
scan_io_usage(const nir_shader * nir,struct si_shader_info * info,nir_intrinsic_instr * intr,bool is_input,bool colors_lowered)61 static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
62                           nir_intrinsic_instr *intr, bool is_input, bool colors_lowered)
63 {
64    unsigned mask, bit_size;
65    bool is_output_load;
66 
67    if (nir_intrinsic_has_write_mask(intr)) {
68       mask = nir_intrinsic_write_mask(intr); /* store */
69       bit_size = nir_src_bit_size(intr->src[0]);
70       is_output_load = false;
71    } else {
72       mask = nir_def_components_read(&intr->def); /* load */
73       bit_size = intr->def.bit_size;
74       is_output_load = !is_input;
75    }
76    assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered");
77 
78    /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs
79     * where the mask is untyped.
80     */
81    if (bit_size == 16 && !is_input) {
82       unsigned new_mask = 0;
83       for (unsigned i = 0; i < 4; i++) {
84          if (mask & (1 << i))
85             new_mask |= 0x1 << (i / 2);
86       }
87       mask = new_mask;
88    }
89 
90    mask <<= nir_intrinsic_component(intr);
91 
92    nir_src offset = *nir_get_io_offset_src(intr);
93    bool indirect = !nir_src_is_const(offset);
94    if (!indirect)
95       assert(nir_src_as_uint(offset) == 0);
96 
97    unsigned semantic = 0;
98    /* VS doesn't have semantics. */
99    if (nir->info.stage != MESA_SHADER_VERTEX || !is_input)
100       semantic = nir_intrinsic_io_semantics(intr).location;
101 
102    if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) {
103       /* Gather color PS inputs. We can only get here after lowering colors in monolithic
104        * shaders. This must match what we do for nir_intrinsic_load_color0/1.
105        */
106       if (!colors_lowered &&
107           (semantic == VARYING_SLOT_COL0 || semantic == VARYING_SLOT_COL1 ||
108            semantic == VARYING_SLOT_BFC0 || semantic == VARYING_SLOT_BFC1)) {
109          unsigned index = semantic == VARYING_SLOT_COL1 || semantic == VARYING_SLOT_BFC1;
110          info->colors_read |= mask << (index * 4);
111          return;
112       }
113    }
114 
115    if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
116       /* Never use FRAG_RESULT_COLOR directly. */
117       if (semantic == FRAG_RESULT_COLOR)
118          semantic = FRAG_RESULT_DATA0;
119       semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index;
120    }
121 
122    unsigned driver_location = nir_intrinsic_base(intr);
123    unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1;
124 
125    if (is_input) {
126       assert(driver_location + num_slots <= ARRAY_SIZE(info->input));
127 
128       for (unsigned i = 0; i < num_slots; i++) {
129          unsigned loc = driver_location + i;
130 
131          info->input[loc].semantic = semantic + i;
132 
133          if (mask) {
134             info->input[loc].usage_mask |= mask;
135             info->num_inputs = MAX2(info->num_inputs, loc + 1);
136          }
137       }
138    } else {
139       /* Outputs. */
140       assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
141 
142       for (unsigned i = 0; i < num_slots; i++) {
143          unsigned loc = driver_location + i;
144          unsigned slot_semantic = semantic + i;
145 
146          /* Call the translation functions to validate the semantic (call assertions in them). */
147          if (nir->info.stage != MESA_SHADER_FRAGMENT &&
148              semantic != VARYING_SLOT_EDGE) {
149             if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
150                 semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
151                 (semantic >= VARYING_SLOT_PATCH0 && semantic <= VARYING_SLOT_PATCH31)) {
152                ac_shader_io_get_unique_index_patch(semantic);
153                ac_shader_io_get_unique_index_patch(slot_semantic);
154             } else {
155                si_shader_io_get_unique_index(semantic);
156                si_shader_io_get_unique_index(slot_semantic);
157             }
158          }
159 
160          info->output_semantic[loc] = slot_semantic;
161 
162          if (!is_output_load && mask) {
163             /* Output stores. */
164             unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
165                                   (nir_intrinsic_component(intr) * 2);
166             unsigned new_mask = mask & ~info->output_usagemask[loc];
167 
168             /* Iterate over all components. */
169             for (unsigned i = 0; i < 4; i++) {
170                unsigned stream = (gs_streams >> (i * 2)) & 0x3;
171 
172                if (new_mask & (1 << i)) {
173                   info->output_streams[loc] |= stream << (i * 2);
174                   info->num_stream_output_components[stream]++;
175                }
176 
177                if (nir_intrinsic_has_io_xfb(intr)) {
178                   nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) :
179                                            nir_intrinsic_io_xfb2(intr);
180                   if (xfb.out[i % 2].num_components) {
181                      unsigned stream = (gs_streams >> (i * 2)) & 0x3;
182                      info->enabled_streamout_buffer_mask |=
183                         BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
184                   }
185 
186                   info->output_xfb_writemask[loc] |= nir_instr_xfb_write_mask(intr);
187                }
188             }
189 
190             if (nir_intrinsic_has_src_type(intr))
191                info->output_type[loc] = nir_intrinsic_src_type(intr);
192             else if (nir_intrinsic_has_dest_type(intr))
193                info->output_type[loc] = nir_intrinsic_dest_type(intr);
194             else
195                info->output_type[loc] = nir_type_float32;
196 
197             info->output_usagemask[loc] |= mask;
198             info->num_outputs = MAX2(info->num_outputs, loc + 1);
199 
200             if (nir->info.stage == MESA_SHADER_VERTEX ||
201                 nir->info.stage == MESA_SHADER_TESS_CTRL ||
202                 nir->info.stage == MESA_SHADER_TESS_EVAL ||
203                 nir->info.stage == MESA_SHADER_GEOMETRY) {
204                if (slot_semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
205                    slot_semantic == VARYING_SLOT_TESS_LEVEL_OUTER) {
206                   if (!nir_intrinsic_io_semantics(intr).no_varying) {
207                      info->tess_levels_written_for_tes |=
208                         BITFIELD_BIT(ac_shader_io_get_unique_index_patch(slot_semantic));
209                   }
210                } else if (slot_semantic >= VARYING_SLOT_PATCH0 &&
211                           slot_semantic < VARYING_SLOT_TESS_MAX) {
212                   if (!nir_intrinsic_io_semantics(intr).no_varying) {
213                      info->patch_outputs_written_for_tes |=
214                         BITFIELD_BIT(ac_shader_io_get_unique_index_patch(slot_semantic));
215                   }
216                } else if ((slot_semantic <= VARYING_SLOT_VAR31 ||
217                            slot_semantic >= VARYING_SLOT_VAR0_16BIT) &&
218                           slot_semantic != VARYING_SLOT_EDGE) {
219                   uint64_t bit = BITFIELD64_BIT(si_shader_io_get_unique_index(slot_semantic));
220 
221                   /* Ignore outputs that are not passed from VS to PS. */
222                   if (slot_semantic != VARYING_SLOT_POS &&
223                       slot_semantic != VARYING_SLOT_PSIZ &&
224                       slot_semantic != VARYING_SLOT_CLIP_VERTEX &&
225                       slot_semantic != VARYING_SLOT_LAYER)
226                      info->outputs_written_before_ps |= bit;
227 
228                   /* LAYER and VIEWPORT have no effect if they don't feed the rasterizer. */
229                   if (slot_semantic != VARYING_SLOT_LAYER &&
230                       slot_semantic != VARYING_SLOT_VIEWPORT) {
231                      info->ls_es_outputs_written |= bit;
232 
233                      if (!nir_intrinsic_io_semantics(intr).no_varying)
234                         info->tcs_outputs_written_for_tes |= bit;
235                   }
236                }
237             }
238 
239             if (nir->info.stage == MESA_SHADER_FRAGMENT &&
240                 semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
241                unsigned index = semantic - FRAG_RESULT_DATA0;
242 
243                if (nir_intrinsic_src_type(intr) == nir_type_float16)
244                   info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2);
245                else if (nir_intrinsic_src_type(intr) == nir_type_int16)
246                   info->output_color_types |= SI_TYPE_INT16 << (index * 2);
247                else if (nir_intrinsic_src_type(intr) == nir_type_uint16)
248                   info->output_color_types |= SI_TYPE_UINT16 << (index * 2);
249             }
250          }
251       }
252    }
253 
254    if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input && semantic == FRAG_RESULT_DEPTH) {
255       if (nir_def_is_frag_coord_z(intr->src[0].ssa))
256          info->output_z_equals_input_z = true;
257       else
258          info->output_z_is_not_input_z = true;
259    }
260 }
261 
is_bindless_handle_indirect(nir_instr * src)262 static bool is_bindless_handle_indirect(nir_instr *src)
263 {
264    /* Check if the bindless handle comes from indirect load_ubo. */
265    if (src->type == nir_instr_type_intrinsic &&
266        nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) {
267       if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0]))
268          return true;
269    } else {
270       /* Some other instruction. Return the worst-case result. */
271       return true;
272    }
273    return false;
274 }
275 
276 /* TODO: convert to nir_shader_instructions_pass */
scan_instruction(const struct nir_shader * nir,struct si_shader_info * info,nir_instr * instr,bool colors_lowered)277 static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info,
278                              nir_instr *instr, bool colors_lowered)
279 {
280    if (instr->type == nir_instr_type_tex) {
281       nir_tex_instr *tex = nir_instr_as_tex(instr);
282       const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle);
283 
284       /* Gather the types of used VMEM instructions that return something. */
285       switch (tex->op) {
286       case nir_texop_tex:
287       case nir_texop_txb:
288       case nir_texop_txl:
289       case nir_texop_txd:
290       case nir_texop_lod:
291       case nir_texop_tg4:
292          info->uses_vmem_sampler_or_bvh = true;
293          break;
294       default:
295          info->uses_vmem_load_other = true;
296          break;
297       }
298 
299       if (handle) {
300          info->uses_bindless_samplers = true;
301 
302          if (is_bindless_handle_indirect(handle->ssa->parent_instr))
303             info->uses_indirect_descriptor = true;
304       } else {
305          const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref);
306 
307          if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref)))
308             info->uses_indirect_descriptor = true;
309       }
310 
311       info->has_non_uniform_tex_access |=
312          tex->texture_non_uniform || tex->sampler_non_uniform;
313 
314       info->has_shadow_comparison |= tex->is_shadow;
315    } else if (instr->type == nir_instr_type_intrinsic) {
316       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
317       const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name;
318       bool is_ssbo = strstr(intr_name, "ssbo");
319       bool is_image = strstr(intr_name, "image") == intr_name;
320       bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name;
321 
322       /* Gather the types of used VMEM instructions that return something. */
323       if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
324          switch (intr->intrinsic) {
325          case nir_intrinsic_load_ubo:
326             if (!nir_src_is_const(intr->src[1]))
327                info->uses_vmem_load_other = true;
328             break;
329 
330          case nir_intrinsic_load_input:
331          case nir_intrinsic_load_input_vertex:
332          case nir_intrinsic_load_per_vertex_input:
333             if (nir->info.stage == MESA_SHADER_VERTEX ||
334                 nir->info.stage == MESA_SHADER_TESS_EVAL)
335                info->uses_vmem_load_other = true;
336             break;
337 
338          case nir_intrinsic_load_constant:
339          case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */
340          case nir_intrinsic_load_buffer_amd:
341             info->uses_vmem_load_other = true;
342             break;
343 
344          default:
345             if (is_image ||
346                 is_bindless_image ||
347                 is_ssbo ||
348                 (strstr(intr_name, "global") == intr_name ||
349                  intr->intrinsic == nir_intrinsic_load_global ||
350                  intr->intrinsic == nir_intrinsic_store_global) ||
351                 strstr(intr_name, "scratch"))
352                info->uses_vmem_load_other = true;
353             break;
354          }
355       }
356 
357       if (is_bindless_image)
358          info->uses_bindless_images = true;
359 
360       if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0])))
361          info->uses_indirect_descriptor = true;
362 
363       if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr))
364          info->uses_indirect_descriptor = true;
365 
366       if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo &&
367           !nir_src_is_const(intr->src[0]))
368          info->uses_indirect_descriptor = true;
369 
370       if (nir_intrinsic_has_atomic_op(intr)) {
371          if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_ordered_add_gfx12_amd)
372             info->uses_atomic_ordered_add = true;
373       }
374 
375       switch (intr->intrinsic) {
376       case nir_intrinsic_store_ssbo:
377          if (!nir_src_is_const(intr->src[1]))
378             info->uses_indirect_descriptor = true;
379          break;
380       case nir_intrinsic_load_ubo:
381          if (!nir_src_is_const(intr->src[0]))
382             info->uses_indirect_descriptor = true;
383          break;
384       case nir_intrinsic_load_local_invocation_id:
385       case nir_intrinsic_load_workgroup_id: {
386          unsigned mask = nir_def_components_read(&intr->def);
387          while (mask) {
388             unsigned i = u_bit_scan(&mask);
389 
390             if (intr->intrinsic == nir_intrinsic_load_workgroup_id)
391                info->uses_block_id[i] = true;
392             else
393                info->uses_thread_id[i] = true;
394          }
395          break;
396       }
397       case nir_intrinsic_load_color0:
398       case nir_intrinsic_load_color1: {
399          unsigned index = intr->intrinsic == nir_intrinsic_load_color1;
400          uint8_t mask = nir_def_components_read(&intr->def);
401          info->colors_read |= mask << (index * 4);
402 
403          switch (info->color_interpolate[index]) {
404          case INTERP_MODE_SMOOTH:
405             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
406                info->uses_persp_sample = true;
407             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
408                info->uses_persp_centroid = true;
409             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
410                info->uses_persp_center = true;
411             break;
412          case INTERP_MODE_NOPERSPECTIVE:
413             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
414                info->uses_linear_sample = true;
415             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
416                info->uses_linear_centroid = true;
417             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
418                info->uses_linear_center = true;
419             break;
420          case INTERP_MODE_COLOR:
421             /* We don't know the final value. This will be FLAT if flatshading is enabled
422              * in the rasterizer state, otherwise it will be SMOOTH.
423              */
424             info->uses_interp_color = true;
425             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
426                info->uses_persp_sample_color = true;
427             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
428                info->uses_persp_centroid_color = true;
429             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
430                info->uses_persp_center_color = true;
431             break;
432          }
433          break;
434       }
435       case nir_intrinsic_load_barycentric_at_offset:   /* uses center */
436       case nir_intrinsic_load_barycentric_at_sample:   /* uses center */
437          if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT)
438             break;
439 
440          if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) {
441             info->uses_linear_center = true;
442          } else {
443             info->uses_persp_center = true;
444          }
445          if (intr->intrinsic == nir_intrinsic_load_barycentric_at_offset)
446             info->uses_interp_at_offset = true;
447          if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
448             info->uses_interp_at_sample = true;
449          break;
450       case nir_intrinsic_load_frag_coord:
451          info->reads_frag_coord_mask |= nir_def_components_read(&intr->def);
452          break;
453       case nir_intrinsic_load_input:
454       case nir_intrinsic_load_per_vertex_input:
455       case nir_intrinsic_load_input_vertex:
456       case nir_intrinsic_load_interpolated_input:
457          scan_io_usage(nir, info, intr, true, colors_lowered);
458          break;
459       case nir_intrinsic_load_output:
460       case nir_intrinsic_load_per_vertex_output:
461       case nir_intrinsic_store_output:
462       case nir_intrinsic_store_per_vertex_output:
463          scan_io_usage(nir, info, intr, false, colors_lowered);
464          break;
465       case nir_intrinsic_load_deref:
466       case nir_intrinsic_store_deref:
467          /* These can only occur if there is indirect temp indexing. */
468          break;
469       case nir_intrinsic_interp_deref_at_centroid:
470       case nir_intrinsic_interp_deref_at_sample:
471       case nir_intrinsic_interp_deref_at_offset:
472          unreachable("these opcodes should have been lowered");
473          break;
474       case nir_intrinsic_ordered_add_loop_gfx12_amd:
475          info->uses_atomic_ordered_add = true;
476          break;
477       default:
478          break;
479       }
480    }
481 }
482 
si_nir_scan_shader(struct si_screen * sscreen,struct nir_shader * nir,struct si_shader_info * info,bool colors_lowered)483 void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
484                         struct si_shader_info *info, bool colors_lowered)
485 {
486    bool force_use_aco = false;
487    if (sscreen->force_shader_use_aco) {
488       if (!memcmp(sscreen->use_aco_shader_blake, nir->info.source_blake3,
489                   sizeof(sscreen->use_aco_shader_blake))) {
490          force_use_aco = true;
491       }
492    }
493 
494    nir->info.use_aco_amd = aco_is_gpu_supported(&sscreen->info) &&
495                            sscreen->info.has_image_opcodes &&
496                            (sscreen->use_aco || nir->info.use_aco_amd || force_use_aco ||
497                             /* Use ACO for streamout on gfx12 because it's faster. */
498                             (sscreen->info.gfx_level >= GFX12 && nir->xfb_info &&
499                              nir->xfb_info->output_count));
500 
501    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
502       /* post_depth_coverage implies early_fragment_tests */
503       nir->info.fs.early_fragment_tests |= nir->info.fs.post_depth_coverage;
504    }
505 
506    memset(info, 0, sizeof(*info));
507    info->base = nir->info;
508 
509    /* Get options from shader profiles. */
510    for (unsigned i = 0; i < ARRAY_SIZE(si_shader_profiles); i++) {
511       if (_mesa_printed_blake3_equal(nir->info.source_blake3, si_shader_profiles[i].blake3)) {
512          info->options = si_shader_profiles[i].options;
513          break;
514       }
515    }
516 
517    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
518       info->color_interpolate[0] = nir->info.fs.color0_interp;
519       info->color_interpolate[1] = nir->info.fs.color1_interp;
520       for (unsigned i = 0; i < 2; i++) {
521          if (info->color_interpolate[i] == INTERP_MODE_NONE)
522             info->color_interpolate[i] = INTERP_MODE_COLOR;
523       }
524 
525       info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
526                                        nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
527                                                                       TGSI_INTERPOLATE_LOC_CENTER;
528       info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
529                                        nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
530                                                                       TGSI_INTERPOLATE_LOC_CENTER;
531       /* Set an invalid value. Will be determined at draw time if needed when the expected
532        * conditions are met.
533        */
534       info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff;
535    }
536 
537    info->constbuf0_num_slots = nir->num_uniforms;
538 
539    if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
540       nir_tcs_info tcs_info;
541       nir_gather_tcs_info(nir, &tcs_info, nir->info.tess._primitive_mode,
542                           nir->info.tess.spacing);
543 
544       info->tessfactors_are_def_in_all_invocs = tcs_info.all_invocations_define_tess_levels;
545    }
546 
547    /* tess factors are loaded as input instead of system value */
548    info->reads_tess_factors = nir->info.inputs_read &
549       (BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) |
550        BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER));
551 
552    info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE) |
553                           BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE_FSIGN);
554    info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
555    info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
556    info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
557    info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
558    info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
559    info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
560    if (sscreen->info.gfx_level < GFX12) {
561       info->uses_tg_size |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
562                             BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) ||
563                             si_should_clear_lds(sscreen, nir);
564    }
565    info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
566    info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
567    info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
568                        nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
569    info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
570    info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
571    info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
572    info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
573    info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
574    info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
575    info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
576 
577    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
578       info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
579       info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
580       info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
581 
582       info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0;
583       if (nir->info.fs.color_is_dual_source)
584          info->colors_written |= 0x2;
585       if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) {
586          info->colors_written |= 0x1;
587          info->color0_writes_all_cbufs = info->colors_written == 0x1;
588 
589       }
590    } else {
591       info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID;
592       info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT;
593       info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER;
594       info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ;
595       info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX;
596       info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE;
597       info->writes_position = nir->info.outputs_written & VARYING_BIT_POS;
598    }
599 
600    nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader*)nir);
601    nir_foreach_block (block, impl) {
602       nir_foreach_instr (instr, block)
603          scan_instruction(nir, info, instr, colors_lowered);
604    }
605 
606    if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||
607        nir->info.stage == MESA_SHADER_GEOMETRY) {
608       info->num_streamout_components = 0;
609       for (unsigned i = 0; i < info->num_outputs; i++)
610          info->num_streamout_components += util_bitcount(info->output_xfb_writemask[i]);
611    }
612 
613    if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
614       /* Add the PrimitiveID output, but don't increment num_outputs.
615        * The driver inserts PrimitiveID only when it's used by the pixel shader,
616        * and si_emit_spi_map uses this unconditionally when such a pixel shader is used.
617        */
618       info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID;
619       info->output_type[info->num_outputs] = nir_type_uint32;
620       info->output_usagemask[info->num_outputs] = 0x1;
621    }
622 
623    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
624       info->output_z_equals_input_z &= !info->output_z_is_not_input_z;
625       info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid ||
626                                    info->uses_persp_sample || info->uses_linear_center ||
627                                    info->uses_linear_centroid || info->uses_linear_sample ||
628                                    info->uses_interp_at_sample || nir->info.writes_memory ||
629                                    nir->info.fs.uses_fbfetch_output ||
630                                    nir->info.fs.needs_quad_helper_invocations ||
631                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
632                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
633                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
634                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
635                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
636                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
637 
638       info->uses_vmem_load_other |= nir->info.fs.uses_fbfetch_output;
639 
640       /* Add both front and back color inputs. */
641       unsigned num_inputs_with_colors = info->num_inputs;
642       for (unsigned back = 0; back < 2; back++) {
643          for (unsigned i = 0; i < 2; i++) {
644             if ((info->colors_read >> (i * 4)) & 0xf) {
645                unsigned index = num_inputs_with_colors;
646 
647                info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i;
648                info->input[index].usage_mask = info->colors_read >> (i * 4);
649                num_inputs_with_colors++;
650 
651                /* Back-face color don't increment num_inputs. si_emit_spi_map will use
652                 * back-face colors conditionally only when they are needed.
653                 */
654                if (!back)
655                   info->num_inputs = num_inputs_with_colors;
656             }
657          }
658       }
659    }
660 
661    info->uses_vmem_load_other |= info->uses_indirect_descriptor;
662    info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir);
663 
664    if (nir->info.stage == MESA_SHADER_VERTEX) {
665       info->num_vs_inputs =
666          nir->info.stage == MESA_SHADER_VERTEX && !nir->info.vs.blit_sgprs_amd ? info->num_inputs : 0;
667       unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.gfx_level);
668       info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs);
669    }
670 
671    if (nir->info.stage == MESA_SHADER_VERTEX ||
672        nir->info.stage == MESA_SHADER_TESS_CTRL ||
673        nir->info.stage == MESA_SHADER_TESS_EVAL) {
674       info->esgs_vertex_stride =
675          util_last_bit64(info->ls_es_outputs_written) * 16;
676 
677       /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
678        * conflicts, i.e. each vertex will start on a different bank.
679        */
680       if (sscreen->info.gfx_level >= GFX9) {
681          if (info->esgs_vertex_stride)
682             info->esgs_vertex_stride += 4;
683       } else {
684          assert(((info->esgs_vertex_stride / 4) & C_028AAC_ITEMSIZE) == 0);
685       }
686 
687       info->tcs_inputs_via_temp = nir->info.tess.tcs_same_invocation_inputs_read;
688       info->tcs_inputs_via_lds = nir->info.tess.tcs_cross_invocation_inputs_read |
689                                  (nir->info.tess.tcs_same_invocation_inputs_read &
690                                   nir->info.inputs_read_indirectly);
691    }
692 
693    if (nir->info.stage == MESA_SHADER_GEOMETRY) {
694       info->gsvs_vertex_size = info->num_outputs * 16;
695       info->max_gsvs_emit_size = info->gsvs_vertex_size * nir->info.gs.vertices_out;
696       info->gs_input_verts_per_prim =
697          mesa_vertices_per_prim(nir->info.gs.input_primitive);
698    }
699 
700    info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
701                          u_bit_consecutive(0, nir->info.clip_distance_array_size);
702    info->culldist_mask = u_bit_consecutive(0, nir->info.cull_distance_array_size) <<
703                          nir->info.clip_distance_array_size;
704 
705    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
706       for (unsigned i = 0; i < info->num_inputs; i++) {
707          unsigned semantic = info->input[i].semantic;
708 
709          if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
710              semantic != VARYING_SLOT_PNTC) {
711             info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic);
712          }
713       }
714 
715       for (unsigned i = 0; i < 8; i++)
716          if (info->colors_written & (1 << i))
717             info->colors_written_4bit |= 0xf << (4 * i);
718 
719       for (unsigned i = 0; i < info->num_inputs; i++) {
720          if (info->input[i].semantic == VARYING_SLOT_COL0)
721             info->color_attr_index[0] = i;
722          else if (info->input[i].semantic == VARYING_SLOT_COL1)
723             info->color_attr_index[1] = i;
724       }
725    }
726 }
727 
728 enum ac_hw_stage
si_select_hw_stage(const gl_shader_stage stage,const union si_shader_key * const key,const enum amd_gfx_level gfx_level)729 si_select_hw_stage(const gl_shader_stage stage, const union si_shader_key *const key,
730                    const enum amd_gfx_level gfx_level)
731 {
732    switch (stage) {
733    case MESA_SHADER_VERTEX:
734    case MESA_SHADER_TESS_EVAL:
735       if (key->ge.as_ngg)
736          return AC_HW_NEXT_GEN_GEOMETRY_SHADER;
737       else if (key->ge.as_es)
738          return gfx_level >= GFX9 ? AC_HW_LEGACY_GEOMETRY_SHADER : AC_HW_EXPORT_SHADER;
739       else if (key->ge.as_ls)
740          return gfx_level >= GFX9 ? AC_HW_HULL_SHADER : AC_HW_LOCAL_SHADER;
741       else
742          return AC_HW_VERTEX_SHADER;
743    case MESA_SHADER_TESS_CTRL:
744       return AC_HW_HULL_SHADER;
745    case MESA_SHADER_GEOMETRY:
746       if (key->ge.as_ngg)
747          return AC_HW_NEXT_GEN_GEOMETRY_SHADER;
748       else
749          return AC_HW_LEGACY_GEOMETRY_SHADER;
750    case MESA_SHADER_FRAGMENT:
751       return AC_HW_PIXEL_SHADER;
752    case MESA_SHADER_COMPUTE:
753    case MESA_SHADER_KERNEL:
754       return AC_HW_COMPUTE_SHADER;
755    default:
756       unreachable("Unsupported HW stage");
757    }
758 }
759