• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Valve Corporation.
3  * Copyright © 2016 Red Hat.
4  * Copyright © 2016 Bas Nieuwenhuizen
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include "radv_shader_args.h"
30 #include "radv_private.h"
31 #include "radv_shader.h"
32 
33 static void
set_loc(struct radv_userdata_info * ud_info,uint8_t * sgpr_idx,uint8_t num_sgprs)34 set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
35 {
36    ud_info->sgpr_idx = *sgpr_idx;
37    ud_info->num_sgprs = num_sgprs;
38    *sgpr_idx += num_sgprs;
39 }
40 
41 static void
set_loc_shader(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx,uint8_t num_sgprs)42 set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
43 {
44    struct radv_userdata_info *ud_info = &args->user_sgprs_locs.shader_data[idx];
45    assert(ud_info);
46 
47    set_loc(ud_info, sgpr_idx, num_sgprs);
48 }
49 
50 static void
set_loc_shader_ptr(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)51 set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
52 {
53    bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS &&
54                              idx != AC_UD_CS_TASK_RING_OFFSETS && idx != AC_UD_CS_SBT_DESCRIPTORS &&
55                              idx != AC_UD_CS_RAY_LAUNCH_SIZE_ADDR;
56 
57    set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
58 }
59 
60 static void
set_loc_desc(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)61 set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
62 {
63    struct radv_userdata_locations *locs = &args->user_sgprs_locs;
64    struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
65    assert(ud_info);
66 
67    set_loc(ud_info, sgpr_idx, 1);
68 
69    locs->descriptor_sets_enabled |= 1u << idx;
70 }
71 
72 struct user_sgpr_info {
73    uint64_t inline_push_constant_mask;
74    bool inlined_all_push_consts;
75    bool indirect_all_descriptor_sets;
76    uint8_t remaining_sgprs;
77 };
78 
79 static uint8_t
count_vs_user_sgprs(const struct radv_shader_info * info)80 count_vs_user_sgprs(const struct radv_shader_info *info)
81 {
82    uint8_t count = 1; /* vertex offset */
83 
84    if (info->vs.vb_desc_usage_mask)
85       count++;
86    if (info->vs.needs_draw_id)
87       count++;
88    if (info->vs.needs_base_instance)
89       count++;
90 
91    return count;
92 }
93 
94 static uint8_t
count_ms_user_sgprs(const struct radv_shader_info * info)95 count_ms_user_sgprs(const struct radv_shader_info *info)
96 {
97    uint8_t count = 1 + 3; /* firstTask + num_work_groups[3] */
98 
99    if (info->vs.needs_draw_id)
100       count++;
101    if (info->cs.uses_task_rings)
102       count++;
103 
104    return count;
105 }
106 
107 static unsigned
count_ngg_sgprs(const struct radv_shader_info * info,bool has_ngg_query)108 count_ngg_sgprs(const struct radv_shader_info *info, bool has_ngg_query)
109 {
110    unsigned count = 0;
111 
112    if (has_ngg_query)
113       count += 1; /* ngg_query_state */
114    if (info->has_ngg_culling)
115       count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
116 
117    return count;
118 }
119 
120 static void
allocate_inline_push_consts(const struct radv_shader_info * info,struct user_sgpr_info * user_sgpr_info)121 allocate_inline_push_consts(const struct radv_shader_info *info,
122                             struct user_sgpr_info *user_sgpr_info)
123 {
124    uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
125 
126    if (!info->inline_push_constant_mask)
127       return;
128 
129    uint64_t mask = info->inline_push_constant_mask;
130    uint8_t num_push_consts = util_bitcount64(mask);
131 
132    /* Disable the default push constants path if all constants can be inlined and if shaders don't
133     * use dynamic descriptors.
134     */
135    if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) &&
136        info->can_inline_all_push_constants && !info->loads_dynamic_offsets) {
137       user_sgpr_info->inlined_all_push_consts = true;
138       remaining_sgprs++;
139    } else {
140       /* Clamp to the maximum number of allowed inlined push constants. */
141       while (num_push_consts > MIN2(remaining_sgprs, AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT)) {
142          num_push_consts--;
143          mask &= ~BITFIELD64_BIT(util_last_bit64(mask) - 1);
144       }
145    }
146 
147    user_sgpr_info->remaining_sgprs = remaining_sgprs - util_bitcount64(mask);
148    user_sgpr_info->inline_push_constant_mask = mask;
149 }
150 
151 static void
allocate_user_sgprs(enum amd_gfx_level gfx_level,const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,bool needs_view_index,bool has_ngg_query,struct user_sgpr_info * user_sgpr_info)152 allocate_user_sgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
153                     struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
154                     gl_shader_stage previous_stage, bool needs_view_index, bool has_ngg_query,
155                     struct user_sgpr_info *user_sgpr_info)
156 {
157    uint8_t user_sgpr_count = 0;
158 
159    memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
160 
161    /* 2 user sgprs will always be allocated for scratch/rings */
162    user_sgpr_count += 2;
163 
164    if (stage == MESA_SHADER_TASK)
165       user_sgpr_count += 2; /* task descriptors */
166 
167    /* prolog inputs */
168    if (info->vs.has_prolog)
169       user_sgpr_count += 2;
170 
171    switch (stage) {
172    case MESA_SHADER_COMPUTE:
173    case MESA_SHADER_TASK:
174       if (info->cs.uses_sbt)
175          user_sgpr_count += 2;
176       if (info->cs.uses_grid_size)
177          user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2;
178       if (info->cs.uses_ray_launch_size)
179          user_sgpr_count += 2;
180       if (info->vs.needs_draw_id)
181          user_sgpr_count += 1;
182       if (info->cs.uses_task_rings)
183          user_sgpr_count += 4; /* ring_entry, 2x ib_addr, ib_stride */
184       break;
185    case MESA_SHADER_FRAGMENT:
186       /* epilog continue PC */
187       if (info->ps.has_epilog)
188          user_sgpr_count += 1;
189       break;
190    case MESA_SHADER_VERTEX:
191       if (!args->is_gs_copy_shader)
192          user_sgpr_count += count_vs_user_sgprs(info);
193       break;
194    case MESA_SHADER_TESS_CTRL:
195       if (has_previous_stage) {
196          if (previous_stage == MESA_SHADER_VERTEX)
197             user_sgpr_count += count_vs_user_sgprs(info);
198       }
199       break;
200    case MESA_SHADER_TESS_EVAL:
201       break;
202    case MESA_SHADER_GEOMETRY:
203       if (has_previous_stage) {
204          if (info->is_ngg)
205             user_sgpr_count += count_ngg_sgprs(info, has_ngg_query);
206 
207          if (previous_stage == MESA_SHADER_VERTEX) {
208             user_sgpr_count += count_vs_user_sgprs(info);
209          } else if (previous_stage == MESA_SHADER_MESH) {
210             user_sgpr_count += count_ms_user_sgprs(info);
211          }
212       }
213       break;
214    default:
215       break;
216    }
217 
218    if (needs_view_index)
219       user_sgpr_count++;
220 
221    if (info->force_vrs_per_vertex)
222       user_sgpr_count++;
223 
224    if (info->loads_push_constants)
225       user_sgpr_count++;
226 
227    if (info->so.num_outputs)
228       user_sgpr_count++;
229 
230    uint32_t available_sgprs =
231       gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
232    uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
233    uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
234 
235    if (remaining_sgprs < num_desc_set) {
236       user_sgpr_info->indirect_all_descriptor_sets = true;
237       user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
238    } else {
239       user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
240    }
241 
242    allocate_inline_push_consts(info, user_sgpr_info);
243 }
244 
245 static void
declare_global_input_sgprs(const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)246 declare_global_input_sgprs(const struct radv_shader_info *info,
247                            const struct user_sgpr_info *user_sgpr_info,
248                            struct radv_shader_args *args)
249 {
250    /* 1 for each descriptor set */
251    if (!user_sgpr_info->indirect_all_descriptor_sets) {
252       uint32_t mask = info->desc_set_used_mask;
253 
254       while (mask) {
255          int i = u_bit_scan(&mask);
256 
257          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]);
258       }
259    } else {
260       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]);
261    }
262 
263    if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) {
264       /* 1 for push constants and dynamic descriptors */
265       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
266    }
267 
268    for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
269       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
270    }
271    args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask;
272 
273    if (info->so.num_outputs) {
274       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
275    }
276 }
277 
278 static void
declare_vs_specific_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage)279 declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
280                                 gl_shader_stage stage, bool has_previous_stage,
281                                 gl_shader_stage previous_stage)
282 {
283    if (info->vs.has_prolog)
284       ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs);
285 
286    if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
287                                     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
288       if (info->vs.vb_desc_usage_mask) {
289          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
290       }
291       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
292       if (info->vs.needs_draw_id) {
293          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
294       }
295       if (info->vs.needs_base_instance) {
296          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
297       }
298    }
299 }
300 
301 static void
declare_vs_input_vgprs(enum amd_gfx_level gfx_level,const struct radv_shader_info * info,struct radv_shader_args * args)302 declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
303                        struct radv_shader_args *args)
304 {
305    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
306    if (!args->is_gs_copy_shader) {
307       if (info->vs.as_ls) {
308 
309          if (gfx_level >= GFX11) {
310             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
311             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
312             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
313          } else if (gfx_level >= GFX10) {
314             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
315             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
316             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
317          } else {
318             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
319             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
320             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
321          }
322       } else {
323          if (gfx_level >= GFX10) {
324             if (info->is_ngg) {
325                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
326                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
327                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
328             } else {
329                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
330                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
331                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
332             }
333          } else {
334             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
335             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
336             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
337          }
338       }
339    }
340 
341    if (info->vs.dynamic_inputs) {
342       assert(info->vs.use_per_attribute_vb_descs);
343       unsigned num_attributes = util_last_bit(info->vs.vb_desc_usage_mask);
344       for (unsigned i = 0; i < num_attributes; i++)
345          ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
346       /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one
347        * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8.
348        */
349       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
350    }
351 }
352 
353 static void
declare_streamout_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage)354 declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
355                         gl_shader_stage stage)
356 {
357    int i;
358 
359    /* Streamout SGPRs. */
360    if (info->so.num_outputs) {
361       assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
362 
363       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
364       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
365    } else if (stage == MESA_SHADER_TESS_EVAL) {
366       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
367    }
368 
369    /* A streamout buffer offset is loaded if the stride is non-zero. */
370    for (i = 0; i < 4; i++) {
371       if (!info->so.strides[i])
372          continue;
373 
374       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
375    }
376 }
377 
378 static void
declare_tes_input_vgprs(struct radv_shader_args * args)379 declare_tes_input_vgprs(struct radv_shader_args *args)
380 {
381    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
382    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
383    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
384    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
385 }
386 
387 static void
declare_ms_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args)388 declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
389 {
390    ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
391    ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
392    if (info->vs.needs_draw_id) {
393       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
394    }
395    if (info->cs.uses_task_rings) {
396       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry);
397    }
398 }
399 
400 static void
declare_ms_input_vgprs(struct radv_shader_args * args)401 declare_ms_input_vgprs(struct radv_shader_args *args)
402 {
403    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
404    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
405    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
406    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* instance_id */
407 }
408 
409 static void
declare_ps_input_vgprs(const struct radv_shader_info * info,struct radv_shader_args * args)410 declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
411 {
412    unsigned spi_ps_input = info->ps.spi_ps_input;
413 
414    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
415    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
416    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
417    ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
418    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
419    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
420    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
421    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
422    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
423    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
424    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
425    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
426    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
427    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
428    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
429    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
430 
431    if (args->remap_spi_ps_input) {
432       /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
433        * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
434        * VGPR arguments here.
435        */
436       unsigned arg_count = 0;
437       for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) {
438          if (args->ac.args[i].file != AC_ARG_VGPR) {
439             arg_count++;
440             continue;
441          }
442 
443          if (!(spi_ps_input & (1 << vgpr_arg))) {
444             args->ac.args[i].skip = true;
445          } else {
446             args->ac.args[i].offset = vgpr_reg;
447             vgpr_reg += args->ac.args[i].size;
448             arg_count++;
449          }
450          vgpr_arg++;
451       }
452    }
453 
454    if (info->ps.has_epilog) {
455       /* FIXME: Ensure the main shader doesn't have less VGPRs than the epilog */
456       for (unsigned i = 0; i < MAX_RTS; i++)
457          ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, NULL);
458    }
459 }
460 
461 static void
declare_ngg_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,bool has_ngg_query)462 declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
463                   bool has_ngg_query)
464 {
465    if (has_ngg_query)
466       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_query_state);
467 
468    if (info->has_ngg_culling) {
469       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings);
470       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]);
471       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]);
472       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]);
473       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]);
474    }
475 }
476 
477 static void
set_global_input_locs(struct radv_shader_args * args,const struct user_sgpr_info * user_sgpr_info,uint8_t * user_sgpr_idx)478 set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
479                       uint8_t *user_sgpr_idx)
480 {
481    if (!user_sgpr_info->indirect_all_descriptor_sets) {
482       for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) {
483          if (args->descriptor_sets[i].used)
484             set_loc_desc(args, i, user_sgpr_idx);
485       }
486    } else {
487       set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
488    }
489 
490    if (args->ac.push_constants.used) {
491       set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
492    }
493 
494    if (user_sgpr_info->inline_push_constant_mask) {
495       set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
496                      util_bitcount64(user_sgpr_info->inline_push_constant_mask));
497    }
498 
499    if (args->streamout_buffers.used) {
500       set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
501    }
502 }
503 
504 static void
set_vs_specific_input_locs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,uint8_t * user_sgpr_idx)505 set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
506                            bool has_previous_stage, gl_shader_stage previous_stage,
507                            uint8_t *user_sgpr_idx)
508 {
509    if (args->prolog_inputs.used)
510       set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2);
511 
512    if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
513                                     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
514       if (args->ac.vertex_buffers.used) {
515          set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
516       }
517 
518       unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used +
519                         args->ac.start_instance.used;
520       set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
521    }
522 }
523 
524 static void
set_ms_input_locs(struct radv_shader_args * args,uint8_t * user_sgpr_idx)525 set_ms_input_locs(struct radv_shader_args *args, uint8_t *user_sgpr_idx)
526 {
527    unsigned vs_num =
528       args->ac.base_vertex.used + 3 * args->ac.num_work_groups.used + args->ac.draw_id.used;
529    set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
530 
531    if (args->ac.task_ring_entry.used)
532       set_loc_shader(args, AC_UD_TASK_RING_ENTRY, user_sgpr_idx, 1);
533 }
534 
535 void
radv_declare_shader_args(enum amd_gfx_level gfx_level,const struct radv_pipeline_key * key,const struct radv_shader_info * info,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,struct radv_shader_args * args)536 radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipeline_key *key,
537                          const struct radv_shader_info *info, gl_shader_stage stage,
538                          bool has_previous_stage, gl_shader_stage previous_stage,
539                          struct radv_shader_args *args)
540 {
541    struct user_sgpr_info user_sgpr_info;
542    bool needs_view_index = info->uses_view_index;
543    bool has_ngg_query = stage == MESA_SHADER_GEOMETRY || key->primitives_generated_query;
544 
545    if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
546       /* Handle all NGG shaders as GS to simplify the code here. */
547       previous_stage = stage;
548       stage = MESA_SHADER_GEOMETRY;
549       has_previous_stage = true;
550    }
551 
552    for (int i = 0; i < MAX_SETS; i++)
553       args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
554    for (int i = 0; i < AC_UD_MAX_UD; i++)
555       args->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
556 
557    allocate_user_sgprs(gfx_level, info, args, stage, has_previous_stage, previous_stage,
558                        needs_view_index, has_ngg_query, &user_sgpr_info);
559 
560    if (args->explicit_scratch_args) {
561       ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
562    }
563    if (stage == MESA_SHADER_TASK) {
564       ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets);
565    }
566 
567    /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
568     * sgprs.
569     */
570 
571    switch (stage) {
572    case MESA_SHADER_COMPUTE:
573    case MESA_SHADER_TASK:
574       declare_global_input_sgprs(info, &user_sgpr_info, args);
575 
576       if (info->cs.uses_sbt) {
577          ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.sbt_descriptors);
578       }
579 
580       if (info->cs.uses_grid_size) {
581          if (args->load_grid_size_from_user_sgpr)
582             ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
583          else
584             ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.num_work_groups);
585       }
586 
587       if (info->cs.uses_ray_launch_size) {
588          ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr);
589       }
590 
591       if (info->vs.needs_draw_id) {
592          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
593       }
594 
595       if (info->cs.uses_task_rings) {
596          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry);
597          ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr);
598          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride);
599       }
600 
601       for (int i = 0; i < 3; i++) {
602          if (info->cs.uses_block_id[i]) {
603             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
604          }
605       }
606 
607       if (info->cs.uses_local_invocation_idx) {
608          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
609       }
610 
611       if (args->explicit_scratch_args && gfx_level < GFX11) {
612          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
613       }
614 
615       if (gfx_level >= GFX11)
616          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
617       else
618          ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
619       break;
620    case MESA_SHADER_VERTEX:
621       /* NGG is handled by the GS case */
622       assert(!info->is_ngg);
623 
624       declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
625 
626       declare_global_input_sgprs(info, &user_sgpr_info, args);
627 
628       if (needs_view_index) {
629          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
630       }
631 
632       if (info->force_vrs_per_vertex) {
633          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates);
634       }
635 
636       if (info->vs.as_es) {
637          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
638       } else if (info->vs.as_ls) {
639          /* no extra parameters */
640       } else {
641          declare_streamout_sgprs(info, args, stage);
642       }
643 
644       if (args->explicit_scratch_args) {
645          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
646       }
647 
648       declare_vs_input_vgprs(gfx_level, info, args);
649       break;
650    case MESA_SHADER_TESS_CTRL:
651       if (has_previous_stage) {
652          // First 6 system regs
653          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
654          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
655          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
656 
657          if (gfx_level >= GFX11) {
658             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id);
659          } else {
660             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
661          }
662 
663          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
664          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
665 
666          declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
667 
668          declare_global_input_sgprs(info, &user_sgpr_info, args);
669 
670          if (needs_view_index) {
671             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
672          }
673 
674          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
675          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
676 
677          declare_vs_input_vgprs(gfx_level, info, args);
678       } else {
679          declare_global_input_sgprs(info, &user_sgpr_info, args);
680 
681          if (needs_view_index) {
682             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
683          }
684 
685          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
686          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
687          if (args->explicit_scratch_args) {
688             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
689          }
690          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
691          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
692       }
693       break;
694    case MESA_SHADER_TESS_EVAL:
695       /* NGG is handled by the GS case */
696       assert(!info->is_ngg);
697 
698       declare_global_input_sgprs(info, &user_sgpr_info, args);
699 
700       if (needs_view_index)
701          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
702 
703       if (info->tes.as_es) {
704          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
705          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
706          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
707       } else {
708          declare_streamout_sgprs(info, args, stage);
709          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
710       }
711       if (args->explicit_scratch_args) {
712          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
713       }
714       declare_tes_input_vgprs(args);
715       break;
716    case MESA_SHADER_GEOMETRY:
717       if (has_previous_stage) {
718          // First 6 system regs
719          if (info->is_ngg) {
720             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
721          } else {
722             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
723          }
724 
725          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
726          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
727 
728          if (gfx_level < GFX11) {
729             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
730          }
731 
732          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
733          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
734 
735          if (previous_stage == MESA_SHADER_VERTEX) {
736             declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
737          } else if (previous_stage == MESA_SHADER_MESH) {
738             declare_ms_input_sgprs(info, args);
739          }
740 
741          declare_global_input_sgprs(info, &user_sgpr_info, args);
742 
743          if (needs_view_index) {
744             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
745          }
746 
747          if (info->force_vrs_per_vertex) {
748             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates);
749          }
750 
751          if (info->is_ngg) {
752             declare_ngg_sgprs(info, args, has_ngg_query);
753          }
754 
755          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
756          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
757          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
758          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
759          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
760 
761          if (previous_stage == MESA_SHADER_VERTEX) {
762             declare_vs_input_vgprs(gfx_level, info, args);
763          } else if (previous_stage == MESA_SHADER_TESS_EVAL) {
764             declare_tes_input_vgprs(args);
765          } else if (previous_stage == MESA_SHADER_MESH) {
766             declare_ms_input_vgprs(args);
767          }
768       } else {
769          declare_global_input_sgprs(info, &user_sgpr_info, args);
770 
771          if (needs_view_index) {
772             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
773          }
774 
775          if (info->force_vrs_per_vertex) {
776             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates);
777          }
778 
779          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
780          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
781          if (args->explicit_scratch_args) {
782             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
783          }
784          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
785          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
786          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
787          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
788          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
789          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
790          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
791          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
792       }
793       break;
794    case MESA_SHADER_FRAGMENT:
795       declare_global_input_sgprs(info, &user_sgpr_info, args);
796 
797       if (info->ps.has_epilog) {
798          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ps_epilog_pc);
799       }
800 
801       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
802       if (args->explicit_scratch_args && gfx_level < GFX11) {
803          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
804       }
805 
806       declare_ps_input_vgprs(info, args);
807       break;
808    default:
809       unreachable("Shader stage not implemented");
810    }
811 
812    uint8_t user_sgpr_idx = 0;
813 
814    set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
815    if (stage == MESA_SHADER_TASK) {
816       set_loc_shader_ptr(args, AC_UD_CS_TASK_RING_OFFSETS, &user_sgpr_idx);
817    }
818 
819    /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
820     * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
821    if (has_previous_stage)
822       user_sgpr_idx = 0;
823 
824    if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))
825       set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
826    else if (has_previous_stage && previous_stage == MESA_SHADER_MESH)
827       set_ms_input_locs(args, &user_sgpr_idx);
828 
829    set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
830 
831    switch (stage) {
832    case MESA_SHADER_COMPUTE:
833    case MESA_SHADER_TASK:
834       if (args->ac.sbt_descriptors.used) {
835          set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx);
836       }
837       if (args->ac.num_work_groups.used) {
838          set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx,
839                         args->load_grid_size_from_user_sgpr ? 3 : 2);
840       }
841       if (args->ac.ray_launch_size_addr.used) {
842          set_loc_shader_ptr(args, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, &user_sgpr_idx);
843       }
844       if (args->ac.draw_id.used) {
845          set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1);
846       }
847       if (args->ac.task_ring_entry.used) {
848          set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1);
849       }
850       if (args->task_ib_addr.used) {
851          assert(args->task_ib_stride.used);
852          set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3);
853       }
854       break;
855    case MESA_SHADER_VERTEX:
856       if (args->ac.view_index.used)
857          set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
858       if (args->ac.force_vrs_rates.used)
859          set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1);
860       break;
861    case MESA_SHADER_TESS_CTRL:
862       if (args->ac.view_index.used)
863          set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
864       break;
865    case MESA_SHADER_TESS_EVAL:
866       if (args->ac.view_index.used)
867          set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
868       break;
869    case MESA_SHADER_GEOMETRY:
870       if (args->ac.view_index.used)
871          set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
872 
873       if (args->ac.force_vrs_rates.used)
874          set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1);
875 
876       if (args->ngg_query_state.used) {
877          set_loc_shader(args, AC_UD_NGG_QUERY_STATE, &user_sgpr_idx, 1);
878       }
879 
880       if (args->ngg_culling_settings.used) {
881          set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1);
882       }
883 
884       if (args->ngg_viewport_scale[0].used) {
885          assert(args->ngg_viewport_scale[1].used &&
886                 args->ngg_viewport_translate[0].used &&
887                 args->ngg_viewport_translate[1].used);
888          set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4);
889       }
890       break;
891    case MESA_SHADER_FRAGMENT:
892       if (args->ps_epilog_pc.used)
893          set_loc_shader(args, AC_UD_PS_EPILOG_PC, &user_sgpr_idx, 1);
894       break;
895    default:
896       unreachable("Shader stage not implemented");
897    }
898 
899    args->num_user_sgprs = user_sgpr_idx;
900 }
901 
902 void
radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level,const struct radv_ps_epilog_key * key,struct radv_shader_args * args)903 radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_epilog_key *key,
904                             struct radv_shader_args *args)
905 {
906    unsigned num_inputs = 0;
907 
908    ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
909    if (gfx_level < GFX11)
910       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
911 
912    /* Declare VGPR arguments for color exports. */
913    for (unsigned i = 0; i < MAX_RTS; i++) {
914       unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf;
915 
916       if (col_format == V_028714_SPI_SHADER_ZERO)
917          continue;
918 
919       ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->ps_epilog_inputs[num_inputs]);
920       num_inputs++;
921    }
922 }
923