• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "si_pipe.h"
8 #include "si_shader_internal.h"
9 #include "util/mesa-sha1.h"
10 #include "sid.h"
11 #include "nir.h"
12 
13 struct si_shader_profile si_shader_profiles[] =
14 {
15    {
16       /* Plot3D */
17       {0x485320cd, 0x87a9ba05, 0x24a60e4f, 0x25aa19f7, 0xf5287451},
18       SI_PROFILE_VS_NO_BINNING,
19    },
20    {
21       /* Viewperf/Medical */
22       {0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0},
23       SI_PROFILE_GFX9_GFX10_PS_NO_BINNING,
24    },
25    {
26       /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32,
27        * probably due to interpolation performance.
28        */
29       {0x29f0f4a0, 0x0672258d, 0x47ccdcfd, 0x31e67dcc, 0xdcb1fda8},
30       SI_PROFILE_GFX10_WAVE64,
31    },
32    {
33       /* Viewperf/Creo */
34       {0x1f288a73, 0xba46cce5, 0xbf68e6c6, 0x58543651, 0xca3c8efd},
35       SI_PROFILE_CLAMP_DIV_BY_ZERO,
36    },
37 };
38 
si_get_num_shader_profiles(void)39 unsigned si_get_num_shader_profiles(void)
40 {
41    return ARRAY_SIZE(si_shader_profiles);
42 }
43 
get_inst_tessfactor_writemask(nir_intrinsic_instr * intrin)44 static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
45 {
46    if (intrin->intrinsic != nir_intrinsic_store_output)
47       return 0;
48 
49    unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin);
50    unsigned location = nir_intrinsic_io_semantics(intrin).location;
51 
52    if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
53       return writemask << 4;
54    else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
55       return writemask;
56 
57    return 0;
58 }
59 
scan_tess_ctrl(nir_cf_node * cf_node,unsigned * upper_block_tf_writemask,unsigned * cond_block_tf_writemask,bool * tessfactors_are_def_in_all_invocs,bool is_nested_cf)60 static void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
61                            unsigned *cond_block_tf_writemask,
62                            bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
63 {
64    switch (cf_node->type) {
65    case nir_cf_node_block: {
66       nir_block *block = nir_cf_node_as_block(cf_node);
67       nir_foreach_instr (instr, block) {
68          if (instr->type != nir_instr_type_intrinsic)
69             continue;
70 
71          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
72          if (intrin->intrinsic == nir_intrinsic_barrier &&
73              nir_intrinsic_execution_scope(intrin) >= SCOPE_WORKGROUP) {
74 
75             /* If we find a barrier in nested control flow put this in the
76              * too hard basket. In GLSL this is not possible but it is in
77              * SPIR-V.
78              */
79             if (is_nested_cf) {
80                *tessfactors_are_def_in_all_invocs = false;
81                return;
82             }
83 
84             /* The following case must be prevented:
85              *    gl_TessLevelInner = ...;
86              *    barrier();
87              *    if (gl_InvocationID == 1)
88              *       gl_TessLevelInner = ...;
89              *
90              * If you consider disjoint code segments separated by barriers, each
91              * such segment that writes tess factor channels should write the same
92              * channels in all codepaths within that segment.
93              */
94             if (*upper_block_tf_writemask || *cond_block_tf_writemask) {
95                /* Accumulate the result: */
96                *tessfactors_are_def_in_all_invocs &=
97                   !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
98 
99                /* Analyze the next code segment from scratch. */
100                *upper_block_tf_writemask = 0;
101                *cond_block_tf_writemask = 0;
102             }
103          } else
104             *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
105       }
106 
107       break;
108    }
109    case nir_cf_node_if: {
110       unsigned then_tessfactor_writemask = 0;
111       unsigned else_tessfactor_writemask = 0;
112 
113       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
114       foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
115       {
116          scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask,
117                         tessfactors_are_def_in_all_invocs, true);
118       }
119 
120       foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
121       {
122          scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask,
123                         tessfactors_are_def_in_all_invocs, true);
124       }
125 
126       if (then_tessfactor_writemask || else_tessfactor_writemask) {
127          /* If both statements write the same tess factor channels,
128           * we can say that the upper block writes them too.
129           */
130          *upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask;
131          *cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask;
132       }
133 
134       break;
135    }
136    case nir_cf_node_loop: {
137       nir_loop *loop = nir_cf_node_as_loop(cf_node);
138       assert(!nir_loop_has_continue_construct(loop));
139       foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
140       {
141          scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask,
142                         tessfactors_are_def_in_all_invocs, true);
143       }
144 
145       break;
146    }
147    default:
148       unreachable("unknown cf node type");
149    }
150 }
151 
are_tessfactors_def_in_all_invocs(const struct nir_shader * nir)152 static bool are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
153 {
154    assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
155 
156    /* The pass works as follows:
157     * If all codepaths write tess factors, we can say that all
158     * invocations define tess factors.
159     *
160     * Each tess factor channel is tracked separately.
161     */
162    unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
163    unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
164 
165    /* Initial value = true. Here the pass will accumulate results from
166     * multiple segments surrounded by barriers. If tess factors aren't
167     * written at all, it's a shader bug and we don't care if this will be
168     * true.
169     */
170    bool tessfactors_are_def_in_all_invocs = true;
171 
172    nir_foreach_function (function, nir) {
173       if (function->impl) {
174          foreach_list_typed(nir_cf_node, node, node, &function->impl->body)
175          {
176             scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask,
177                            &tessfactors_are_def_in_all_invocs, false);
178          }
179       }
180    }
181 
182    /* Accumulate the result for the last code segment separated by a
183     * barrier.
184     */
185    if (main_block_tf_writemask || cond_block_tf_writemask) {
186       tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask);
187    }
188 
189    return tessfactors_are_def_in_all_invocs;
190 }
191 
get_texture_src(nir_tex_instr * instr,nir_tex_src_type type)192 static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type)
193 {
194    for (unsigned i = 0; i < instr->num_srcs; i++) {
195       if (instr->src[i].src_type == type)
196          return &instr->src[i].src;
197    }
198    return NULL;
199 }
200 
scan_io_usage(const nir_shader * nir,struct si_shader_info * info,nir_intrinsic_instr * intr,bool is_input)201 static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
202                           nir_intrinsic_instr *intr, bool is_input)
203 {
204    unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */
205 
206    if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
207       nir_instr *src_instr = intr->src[0].ssa->parent_instr;
208       if (src_instr->type == nir_instr_type_intrinsic) {
209          nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(src_instr);
210          if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0)
211             interp = nir_intrinsic_interp_mode(baryc);
212          else
213             unreachable("unknown barycentric intrinsic");
214       } else {
215          /* May get here when si_update_shader_binary_info() after ps lower bc_optimize
216           * which select center and centroid. Set to any value is OK because we don't
217           * care this when si_update_shader_binary_info().
218           */
219          interp = INTERP_MODE_SMOOTH;
220       }
221    }
222 
223    unsigned mask, bit_size;
224    bool is_output_load;
225 
226    if (nir_intrinsic_has_write_mask(intr)) {
227       mask = nir_intrinsic_write_mask(intr); /* store */
228       bit_size = nir_src_bit_size(intr->src[0]);
229       is_output_load = false;
230    } else {
231       mask = nir_def_components_read(&intr->def); /* load */
232       bit_size = intr->def.bit_size;
233       is_output_load = !is_input;
234    }
235    assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered");
236 
237    /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs
238     * where the mask is untyped.
239     */
240    if (bit_size == 16 && !is_input) {
241       unsigned new_mask = 0;
242       for (unsigned i = 0; i < 4; i++) {
243          if (mask & (1 << i))
244             new_mask |= 0x1 << (i / 2);
245       }
246       mask = new_mask;
247    }
248 
249    mask <<= nir_intrinsic_component(intr);
250 
251    nir_src offset = *nir_get_io_offset_src(intr);
252    bool indirect = !nir_src_is_const(offset);
253    if (!indirect)
254       assert(nir_src_as_uint(offset) == 0);
255 
256    unsigned semantic = 0;
257    /* VS doesn't have semantics. */
258    if (nir->info.stage != MESA_SHADER_VERTEX || !is_input)
259       semantic = nir_intrinsic_io_semantics(intr).location;
260 
261    if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) {
262       /* The PARAM_GEN input shouldn't be scanned. */
263       if (nir_intrinsic_io_semantics(intr).no_varying)
264          return;
265 
266       /* Gather color PS inputs. We can only get here after lowering colors in monolithic
267        * shaders. This must match what we do for nir_intrinsic_load_color0/1.
268        */
269       if (semantic == VARYING_SLOT_COL0 || semantic == VARYING_SLOT_COL1 ||
270           semantic == VARYING_SLOT_BFC0 || semantic == VARYING_SLOT_BFC1) {
271          unsigned index = semantic == VARYING_SLOT_COL1 || semantic == VARYING_SLOT_BFC1;
272          info->colors_read |= mask << (index * 4);
273          return;
274       }
275    }
276 
277    if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
278       /* Never use FRAG_RESULT_COLOR directly. */
279       if (semantic == FRAG_RESULT_COLOR)
280          semantic = FRAG_RESULT_DATA0;
281       semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index;
282    }
283 
284    unsigned driver_location = nir_intrinsic_base(intr);
285    unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1;
286 
287    if (is_input) {
288       assert(driver_location + num_slots <= ARRAY_SIZE(info->input));
289 
290       for (unsigned i = 0; i < num_slots; i++) {
291          unsigned loc = driver_location + i;
292 
293          info->input[loc].semantic = semantic + i;
294 
295          if (semantic == VARYING_SLOT_PRIMITIVE_ID)
296             info->input[loc].interpolate = INTERP_MODE_FLAT;
297          else
298             info->input[loc].interpolate = interp;
299 
300          if (mask) {
301             info->input[loc].usage_mask |= mask;
302             if (bit_size == 16) {
303                if (nir_intrinsic_io_semantics(intr).high_16bits)
304                   info->input[loc].fp16_lo_hi_valid |= 0x2;
305                else
306                   info->input[loc].fp16_lo_hi_valid |= 0x1;
307             }
308             info->num_inputs = MAX2(info->num_inputs, loc + 1);
309          }
310       }
311    } else {
312       /* Outputs. */
313       assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
314 
315       for (unsigned i = 0; i < num_slots; i++) {
316          unsigned loc = driver_location + i;
317 
318          info->output_semantic[loc] = semantic + i;
319 
320          if (is_output_load) {
321             /* Output loads have only a few things that we need to track. */
322             info->output_readmask[loc] |= mask;
323          } else if (mask) {
324             /* Output stores. */
325             unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
326                                   (nir_intrinsic_component(intr) * 2);
327             unsigned new_mask = mask & ~info->output_usagemask[loc];
328 
329             /* Iterate over all components. */
330             for (unsigned i = 0; i < 4; i++) {
331                unsigned stream = (gs_streams >> (i * 2)) & 0x3;
332 
333                if (new_mask & (1 << i)) {
334                   info->output_streams[loc] |= stream << (i * 2);
335                   info->num_stream_output_components[stream]++;
336                }
337 
338                if (nir_intrinsic_has_io_xfb(intr)) {
339                   nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) :
340                                            nir_intrinsic_io_xfb2(intr);
341                   if (xfb.out[i % 2].num_components) {
342                      unsigned stream = (gs_streams >> (i * 2)) & 0x3;
343                      info->enabled_streamout_buffer_mask |=
344                         BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
345                   }
346                }
347             }
348 
349             if (nir_intrinsic_has_src_type(intr))
350                info->output_type[loc] = nir_intrinsic_src_type(intr);
351             else if (nir_intrinsic_has_dest_type(intr))
352                info->output_type[loc] = nir_intrinsic_dest_type(intr);
353             else
354                info->output_type[loc] = nir_type_float32;
355 
356             info->output_usagemask[loc] |= mask;
357             info->num_outputs = MAX2(info->num_outputs, loc + 1);
358 
359             if (nir->info.stage == MESA_SHADER_FRAGMENT &&
360                 semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
361                unsigned index = semantic - FRAG_RESULT_DATA0;
362 
363                if (nir_intrinsic_src_type(intr) == nir_type_float16)
364                   info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2);
365                else if (nir_intrinsic_src_type(intr) == nir_type_int16)
366                   info->output_color_types |= SI_TYPE_INT16 << (index * 2);
367                else if (nir_intrinsic_src_type(intr) == nir_type_uint16)
368                   info->output_color_types |= SI_TYPE_UINT16 << (index * 2);
369             }
370          }
371       }
372    }
373 }
374 
is_bindless_handle_indirect(nir_instr * src)375 static bool is_bindless_handle_indirect(nir_instr *src)
376 {
377    /* Check if the bindless handle comes from indirect load_ubo. */
378    if (src->type == nir_instr_type_intrinsic &&
379        nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) {
380       if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0]))
381          return true;
382    } else {
383       /* Some other instruction. Return the worst-case result. */
384       return true;
385    }
386    return false;
387 }
388 
389 /* TODO: convert to nir_shader_instructions_pass */
scan_instruction(const struct nir_shader * nir,struct si_shader_info * info,nir_instr * instr)390 static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info,
391                              nir_instr *instr)
392 {
393    if (instr->type == nir_instr_type_tex) {
394       nir_tex_instr *tex = nir_instr_as_tex(instr);
395       const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle);
396 
397       /* Gather the types of used VMEM instructions that return something. */
398       switch (tex->op) {
399       case nir_texop_tex:
400       case nir_texop_txb:
401       case nir_texop_txl:
402       case nir_texop_txd:
403       case nir_texop_lod:
404       case nir_texop_tg4:
405          info->uses_vmem_sampler_or_bvh = true;
406          break;
407       default:
408          info->uses_vmem_load_other = true;
409          break;
410       }
411 
412       if (handle) {
413          info->uses_bindless_samplers = true;
414 
415          if (is_bindless_handle_indirect(handle->ssa->parent_instr))
416             info->uses_indirect_descriptor = true;
417       } else {
418          const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref);
419 
420          if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref)))
421             info->uses_indirect_descriptor = true;
422       }
423 
424       info->has_non_uniform_tex_access =
425          tex->texture_non_uniform || tex->sampler_non_uniform;
426    } else if (instr->type == nir_instr_type_intrinsic) {
427       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
428       const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name;
429       bool is_ssbo = strstr(intr_name, "ssbo");
430       bool is_image = strstr(intr_name, "image") == intr_name;
431       bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name;
432 
433       /* Gather the types of used VMEM instructions that return something. */
434       if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
435          switch (intr->intrinsic) {
436          case nir_intrinsic_load_ubo:
437             if (!nir_src_is_const(intr->src[1]))
438                info->uses_vmem_load_other = true;
439             break;
440 
441          case nir_intrinsic_load_input:
442          case nir_intrinsic_load_input_vertex:
443          case nir_intrinsic_load_per_vertex_input:
444             if (nir->info.stage == MESA_SHADER_VERTEX ||
445                 nir->info.stage == MESA_SHADER_TESS_EVAL)
446                info->uses_vmem_load_other = true;
447             break;
448 
449          case nir_intrinsic_load_constant:
450          case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */
451          case nir_intrinsic_load_buffer_amd:
452             info->uses_vmem_load_other = true;
453             break;
454 
455          default:
456             if (is_image ||
457                 is_bindless_image ||
458                 is_ssbo ||
459                 (strstr(intr_name, "global") == intr_name ||
460                  intr->intrinsic == nir_intrinsic_load_global ||
461                  intr->intrinsic == nir_intrinsic_store_global) ||
462                 strstr(intr_name, "scratch"))
463                info->uses_vmem_load_other = true;
464             break;
465          }
466       }
467 
468       if (is_bindless_image)
469          info->uses_bindless_images = true;
470 
471       if (nir_intrinsic_writes_external_memory(intr))
472          info->num_memory_stores++;
473 
474       if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0])))
475          info->uses_indirect_descriptor = true;
476 
477       if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr))
478          info->uses_indirect_descriptor = true;
479 
480       if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo &&
481           !nir_src_is_const(intr->src[0]))
482          info->uses_indirect_descriptor = true;
483 
484       switch (intr->intrinsic) {
485       case nir_intrinsic_store_ssbo:
486          if (!nir_src_is_const(intr->src[1]))
487             info->uses_indirect_descriptor = true;
488          break;
489       case nir_intrinsic_load_ubo:
490          if (!nir_src_is_const(intr->src[0]))
491             info->uses_indirect_descriptor = true;
492          break;
493       case nir_intrinsic_load_local_invocation_id:
494       case nir_intrinsic_load_workgroup_id: {
495          unsigned mask = nir_def_components_read(&intr->def);
496          while (mask) {
497             unsigned i = u_bit_scan(&mask);
498 
499             if (intr->intrinsic == nir_intrinsic_load_workgroup_id)
500                info->uses_block_id[i] = true;
501             else
502                info->uses_thread_id[i] = true;
503          }
504          break;
505       }
506       case nir_intrinsic_load_color0:
507       case nir_intrinsic_load_color1: {
508          unsigned index = intr->intrinsic == nir_intrinsic_load_color1;
509          uint8_t mask = nir_def_components_read(&intr->def);
510          info->colors_read |= mask << (index * 4);
511 
512          switch (info->color_interpolate[index]) {
513          case INTERP_MODE_SMOOTH:
514             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
515                info->uses_persp_sample = true;
516             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
517                info->uses_persp_centroid = true;
518             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
519                info->uses_persp_center = true;
520             break;
521          case INTERP_MODE_NOPERSPECTIVE:
522             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
523                info->uses_linear_sample = true;
524             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
525                info->uses_linear_centroid = true;
526             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
527                info->uses_linear_center = true;
528             break;
529          case INTERP_MODE_COLOR:
530             /* We don't know the final value. This will be FLAT if flatshading is enabled
531              * in the rasterizer state, otherwise it will be SMOOTH.
532              */
533             info->uses_interp_color = true;
534             if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
535                info->uses_persp_sample_color = true;
536             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
537                info->uses_persp_centroid_color = true;
538             else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
539                info->uses_persp_center_color = true;
540             break;
541          }
542          break;
543       }
544       case nir_intrinsic_load_vector_arg_amd:
545          /* Non-monolithic lowered PS can have this. We need to record color usage. */
546          if (nir_intrinsic_flags(intr) & SI_VECTOR_ARG_IS_COLOR) {
547             /* The channel can be between 0 and 7. */
548             unsigned chan = SI_GET_VECTOR_ARG_COLOR_COMPONENT(nir_intrinsic_flags(intr));
549             info->colors_read |= BITFIELD_BIT(chan);
550          }
551          break;
552       case nir_intrinsic_load_barycentric_at_offset:   /* uses center */
553       case nir_intrinsic_load_barycentric_at_sample:   /* uses center */
554          if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT)
555             break;
556 
557          if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) {
558             info->uses_linear_center = true;
559          } else {
560             info->uses_persp_center = true;
561          }
562          if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
563             info->uses_interp_at_sample = true;
564          break;
565       case nir_intrinsic_load_frag_coord:
566          info->reads_frag_coord_mask |= nir_def_components_read(&intr->def);
567          break;
568       case nir_intrinsic_load_sample_pos:
569          info->reads_sample_pos_mask |= nir_def_components_read(&intr->def);
570          break;
571       case nir_intrinsic_load_input:
572       case nir_intrinsic_load_per_vertex_input:
573       case nir_intrinsic_load_input_vertex:
574       case nir_intrinsic_load_interpolated_input:
575          scan_io_usage(nir, info, intr, true);
576          break;
577       case nir_intrinsic_load_output:
578       case nir_intrinsic_load_per_vertex_output:
579       case nir_intrinsic_store_output:
580       case nir_intrinsic_store_per_vertex_output:
581          scan_io_usage(nir, info, intr, false);
582          break;
583       case nir_intrinsic_load_deref:
584       case nir_intrinsic_store_deref:
585          /* These can only occur if there is indirect temp indexing. */
586          break;
587       case nir_intrinsic_interp_deref_at_centroid:
588       case nir_intrinsic_interp_deref_at_sample:
589       case nir_intrinsic_interp_deref_at_offset:
590          unreachable("these opcodes should have been lowered");
591          break;
592       default:
593          break;
594       }
595    }
596 }
597 
si_nir_scan_shader(struct si_screen * sscreen,const struct nir_shader * nir,struct si_shader_info * info)598 void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
599                         struct si_shader_info *info)
600 {
601    memset(info, 0, sizeof(*info));
602    info->base = nir->info;
603 
604    /* Get options from shader profiles. */
605    for (unsigned i = 0; i < ARRAY_SIZE(si_shader_profiles); i++) {
606       if (_mesa_printed_sha1_equal(info->base.source_sha1, si_shader_profiles[i].sha1)) {
607          info->options = si_shader_profiles[i].options;
608          break;
609       }
610    }
611 
612    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
613       /* post_depth_coverage implies early_fragment_tests */
614       info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage;
615 
616       info->color_interpolate[0] = nir->info.fs.color0_interp;
617       info->color_interpolate[1] = nir->info.fs.color1_interp;
618       for (unsigned i = 0; i < 2; i++) {
619          if (info->color_interpolate[i] == INTERP_MODE_NONE)
620             info->color_interpolate[i] = INTERP_MODE_COLOR;
621       }
622 
623       info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
624                                        nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
625                                                                       TGSI_INTERPOLATE_LOC_CENTER;
626       info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
627                                        nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
628                                                                       TGSI_INTERPOLATE_LOC_CENTER;
629       /* Set an invalid value. Will be determined at draw time if needed when the expected
630        * conditions are met.
631        */
632       info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff;
633    }
634 
635    info->constbuf0_num_slots = nir->num_uniforms;
636 
637    if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
638       info->tessfactors_are_def_in_all_invocs = are_tessfactors_def_in_all_invocs(nir);
639    }
640 
641    /* tess factors are loaded as input instead of system value */
642    info->reads_tess_factors = nir->info.inputs_read &
643       (BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) |
644        BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER));
645 
646    info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
647    info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
648    info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
649    info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
650    info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
651    info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
652    info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) ||
653                         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
654                         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) ||
655                         si_should_clear_lds(sscreen, nir);
656    info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
657    info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
658    info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
659                        nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
660    info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
661    info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
662    info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
663    info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
664    info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
665    info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
666    info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
667    info->uses_sampleid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
668    info->uses_layer_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LAYER_ID);
669 
670    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
671       info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
672       info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
673       info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
674 
675       info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0;
676       if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) {
677          info->color0_writes_all_cbufs = true;
678          info->colors_written |= 0x1;
679       }
680       if (nir->info.fs.color_is_dual_source)
681          info->colors_written |= 0x2;
682    } else {
683       info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID;
684       info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT;
685       info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER;
686       info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ;
687       info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX;
688       info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE;
689       info->writes_position = nir->info.outputs_written & VARYING_BIT_POS;
690    }
691 
692    nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader*)nir);
693    nir_foreach_block (block, impl) {
694       nir_foreach_instr (instr, block)
695          scan_instruction(nir, info, instr);
696    }
697 
698    if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
699       /* Add the PrimitiveID output, but don't increment num_outputs.
700        * The driver inserts PrimitiveID only when it's used by the pixel shader,
701        * and si_emit_spi_map uses this unconditionally when such a pixel shader is used.
702        */
703       info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID;
704       info->output_type[info->num_outputs] = nir_type_uint32;
705       info->output_usagemask[info->num_outputs] = 0x1;
706    }
707 
708    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
709       info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid ||
710                                    info->uses_persp_sample || info->uses_linear_center ||
711                                    info->uses_linear_centroid || info->uses_linear_sample ||
712                                    info->uses_interp_at_sample || nir->info.writes_memory ||
713                                    nir->info.fs.uses_fbfetch_output ||
714                                    nir->info.fs.needs_quad_helper_invocations ||
715                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
716                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
717                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
718                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
719                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
720                                    BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
721 
722       info->uses_vmem_load_other |= info->base.fs.uses_fbfetch_output;
723 
724       /* Add both front and back color inputs. */
725       unsigned num_inputs_with_colors = info->num_inputs;
726       for (unsigned back = 0; back < 2; back++) {
727          for (unsigned i = 0; i < 2; i++) {
728             if ((info->colors_read >> (i * 4)) & 0xf) {
729                unsigned index = num_inputs_with_colors;
730 
731                info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i;
732                info->input[index].interpolate = info->color_interpolate[i];
733                info->input[index].usage_mask = info->colors_read >> (i * 4);
734                num_inputs_with_colors++;
735 
736                /* Back-face color don't increment num_inputs. si_emit_spi_map will use
737                 * back-face colors conditionally only when they are needed.
738                 */
739                if (!back)
740                   info->num_inputs = num_inputs_with_colors;
741             }
742          }
743       }
744    }
745 
746    info->uses_vmem_load_other |= info->uses_indirect_descriptor;
747 
748    /* Trim output read masks based on write masks. */
749    for (unsigned i = 0; i < info->num_outputs; i++)
750       info->output_readmask[i] &= info->output_usagemask[i];
751 
752    info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir);
753 
754    if (nir->info.stage == MESA_SHADER_VERTEX ||
755        nir->info.stage == MESA_SHADER_TESS_CTRL ||
756        nir->info.stage == MESA_SHADER_TESS_EVAL ||
757        nir->info.stage == MESA_SHADER_GEOMETRY) {
758       if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
759          /* Always reserve space for these. */
760          info->patch_outputs_written |=
761             (1ull << ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) |
762             (1ull << ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER));
763       }
764       for (unsigned i = 0; i < info->num_outputs; i++) {
765          unsigned semantic = info->output_semantic[i];
766 
767          if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
768              semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
769              (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
770             info->patch_outputs_written |= 1ull << ac_shader_io_get_unique_index_patch(semantic);
771          } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
772                     semantic != VARYING_SLOT_EDGE) {
773             /* Ignore outputs that are not passed from VS to PS. */
774             if (semantic != VARYING_SLOT_POS &&
775                 semantic != VARYING_SLOT_PSIZ &&
776                 semantic != VARYING_SLOT_CLIP_VERTEX &&
777                 semantic != VARYING_SLOT_LAYER) {
778                info->outputs_written_before_ps |= 1ull
779                                                   << si_shader_io_get_unique_index(semantic);
780             }
781 
782             /* LAYER and VIEWPORT have no effect if they don't feed the rasterizer. */
783             if (semantic != VARYING_SLOT_LAYER &&
784                 semantic != VARYING_SLOT_VIEWPORT) {
785                info->outputs_written_before_tes_gs |=
786                   BITFIELD64_BIT(si_shader_io_get_unique_index(semantic));
787             }
788          }
789       }
790    }
791 
792    if (nir->info.stage == MESA_SHADER_VERTEX) {
793       info->num_vs_inputs =
794          nir->info.stage == MESA_SHADER_VERTEX && !info->base.vs.blit_sgprs_amd ? info->num_inputs : 0;
795       unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.gfx_level);
796       info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs);
797    }
798 
799    if (nir->info.stage == MESA_SHADER_VERTEX ||
800        nir->info.stage == MESA_SHADER_TESS_CTRL ||
801        nir->info.stage == MESA_SHADER_TESS_EVAL) {
802       info->esgs_vertex_stride = info->lshs_vertex_stride =
803          util_last_bit64(info->outputs_written_before_tes_gs) * 16;
804 
805       /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
806        * will start on a different bank. (except for the maximum 32*16).
807        */
808       info->lshs_vertex_stride += 4;
809 
810       /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
811        * conflicts, i.e. each vertex will start on a different bank.
812        */
813       if (sscreen->info.gfx_level >= GFX9)
814          info->esgs_vertex_stride += 4;
815       else
816          assert(((info->esgs_vertex_stride / 4) & C_028AAC_ITEMSIZE) == 0);
817 
818       info->tcs_vgpr_only_inputs = ~info->base.tess.tcs_cross_invocation_inputs_read &
819                                    ~info->base.inputs_read_indirectly &
820                                    info->base.inputs_read;
821    }
822 
823    if (nir->info.stage == MESA_SHADER_GEOMETRY) {
824       info->gsvs_vertex_size = info->num_outputs * 16;
825       info->max_gsvs_emit_size = info->gsvs_vertex_size * info->base.gs.vertices_out;
826       info->gs_input_verts_per_prim =
827          mesa_vertices_per_prim(info->base.gs.input_primitive);
828    }
829 
830    info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
831                          u_bit_consecutive(0, info->base.clip_distance_array_size);
832    info->culldist_mask = u_bit_consecutive(0, info->base.cull_distance_array_size) <<
833                          info->base.clip_distance_array_size;
834 
835    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
836       for (unsigned i = 0; i < info->num_inputs; i++) {
837          unsigned semantic = info->input[i].semantic;
838 
839          if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
840              semantic != VARYING_SLOT_PNTC) {
841             info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic);
842          }
843       }
844 
845       for (unsigned i = 0; i < 8; i++)
846          if (info->colors_written & (1 << i))
847             info->colors_written_4bit |= 0xf << (4 * i);
848 
849       for (unsigned i = 0; i < info->num_inputs; i++) {
850          if (info->input[i].semantic == VARYING_SLOT_COL0)
851             info->color_attr_index[0] = i;
852          else if (info->input[i].semantic == VARYING_SLOT_COL1)
853             info->color_attr_index[1] = i;
854       }
855    }
856 }
857 
858 enum ac_hw_stage
si_select_hw_stage(const gl_shader_stage stage,const union si_shader_key * const key,const enum amd_gfx_level gfx_level)859 si_select_hw_stage(const gl_shader_stage stage, const union si_shader_key *const key,
860                    const enum amd_gfx_level gfx_level)
861 {
862    switch (stage) {
863    case MESA_SHADER_VERTEX:
864    case MESA_SHADER_TESS_EVAL:
865       if (key->ge.as_ngg)
866          return AC_HW_NEXT_GEN_GEOMETRY_SHADER;
867       else if (key->ge.as_es)
868          return gfx_level >= GFX9 ? AC_HW_LEGACY_GEOMETRY_SHADER : AC_HW_EXPORT_SHADER;
869       else if (key->ge.as_ls)
870          return gfx_level >= GFX9 ? AC_HW_HULL_SHADER : AC_HW_LOCAL_SHADER;
871       else
872          return AC_HW_VERTEX_SHADER;
873    case MESA_SHADER_TESS_CTRL:
874       return AC_HW_HULL_SHADER;
875    case MESA_SHADER_GEOMETRY:
876       if (key->ge.as_ngg)
877          return AC_HW_NEXT_GEN_GEOMETRY_SHADER;
878       else
879          return AC_HW_LEGACY_GEOMETRY_SHADER;
880    case MESA_SHADER_FRAGMENT:
881       return AC_HW_PIXEL_SHADER;
882    case MESA_SHADER_COMPUTE:
883    case MESA_SHADER_KERNEL:
884       return AC_HW_COMPUTE_SHADER;
885    default:
886       unreachable("Unsupported HW stage");
887    }
888 }
889