• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Valve Corporation.
3  * Copyright © 2016 Red Hat.
4  * Copyright © 2016 Bas Nieuwenhuizen
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include "radv_shader_args.h"
13 #include "radv_device.h"
14 #include "radv_physical_device.h"
15 #include "radv_shader.h"
16 
17 struct user_sgpr_info {
18    uint64_t inline_push_constant_mask;
19    bool inlined_all_push_consts;
20    bool indirect_all_descriptor_sets;
21    uint8_t remaining_sgprs;
22 };
23 
24 static void
allocate_inline_push_consts(const struct radv_shader_info * info,struct user_sgpr_info * user_sgpr_info)25 allocate_inline_push_consts(const struct radv_shader_info *info, struct user_sgpr_info *user_sgpr_info)
26 {
27    uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
28 
29    if (!info->inline_push_constant_mask)
30       return;
31 
32    uint64_t mask = info->inline_push_constant_mask;
33    uint8_t num_push_consts = util_bitcount64(mask);
34 
35    /* Disable the default push constants path if all constants can be inlined and if shaders don't
36     * use dynamic descriptors.
37     */
38    if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) && info->can_inline_all_push_constants &&
39        !info->loads_dynamic_offsets) {
40       user_sgpr_info->inlined_all_push_consts = true;
41       remaining_sgprs++;
42    } else {
43       /* Clamp to the maximum number of allowed inlined push constants. */
44       while (num_push_consts > MIN2(remaining_sgprs, AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT)) {
45          num_push_consts--;
46          mask &= ~BITFIELD64_BIT(util_last_bit64(mask) - 1);
47       }
48    }
49 
50    user_sgpr_info->remaining_sgprs = remaining_sgprs - util_bitcount64(mask);
51    user_sgpr_info->inline_push_constant_mask = mask;
52 }
53 
54 static void
add_ud_arg(struct radv_shader_args * args,unsigned size,enum ac_arg_type type,struct ac_arg * arg,enum radv_ud_index ud)55 add_ud_arg(struct radv_shader_args *args, unsigned size, enum ac_arg_type type, struct ac_arg *arg,
56            enum radv_ud_index ud)
57 {
58    ac_add_arg(&args->ac, AC_ARG_SGPR, size, type, arg);
59 
60    struct radv_userdata_info *ud_info = &args->user_sgprs_locs.shader_data[ud];
61 
62    if (ud_info->sgpr_idx == -1)
63       ud_info->sgpr_idx = args->num_user_sgprs;
64 
65    ud_info->num_sgprs += size;
66 
67    args->num_user_sgprs += size;
68 }
69 
70 static void
add_descriptor_set(struct radv_shader_args * args,enum ac_arg_type type,struct ac_arg * arg,uint32_t set)71 add_descriptor_set(struct radv_shader_args *args, enum ac_arg_type type, struct ac_arg *arg, uint32_t set)
72 {
73    ac_add_arg(&args->ac, AC_ARG_SGPR, 1, type, arg);
74 
75    struct radv_userdata_info *ud_info = &args->user_sgprs_locs.descriptor_sets[set];
76    ud_info->sgpr_idx = args->num_user_sgprs;
77    ud_info->num_sgprs = 1;
78 
79    args->user_sgprs_locs.descriptor_sets_enabled |= 1u << set;
80    args->num_user_sgprs++;
81 }
82 
83 static void
declare_global_input_sgprs(const enum amd_gfx_level gfx_level,const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)84 declare_global_input_sgprs(const enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
85                            const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args)
86 {
87    if (user_sgpr_info) {
88       /* 1 for each descriptor set */
89       if (!user_sgpr_info->indirect_all_descriptor_sets) {
90          uint32_t mask = info->desc_set_used_mask;
91 
92          while (mask) {
93             int i = u_bit_scan(&mask);
94 
95             add_descriptor_set(args, AC_ARG_CONST_PTR, &args->descriptor_sets[i], i);
96          }
97       } else {
98          add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
99       }
100 
101       if (info->merged_shader_compiled_separately ||
102           (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts)) {
103          /* 1 for push constants and dynamic descriptors */
104          add_ud_arg(args, 1, AC_ARG_CONST_PTR, &args->ac.push_constants, AC_UD_PUSH_CONSTANTS);
105       }
106 
107       for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
108          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i], AC_UD_INLINE_PUSH_CONSTANTS);
109       }
110       args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask;
111    }
112 
113    const bool needs_streamout_buffers =
114       info->so.num_outputs ||
115       (info->merged_shader_compiled_separately &&
116        ((info->stage == MESA_SHADER_VERTEX && info->vs.as_es) ||
117         (info->stage == MESA_SHADER_TESS_EVAL && info->tes.as_es) || info->stage == MESA_SHADER_GEOMETRY));
118 
119    if (needs_streamout_buffers) {
120       add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers, AC_UD_STREAMOUT_BUFFERS);
121 
122       if (gfx_level >= GFX12)
123          add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_state, AC_UD_STREAMOUT_STATE);
124    }
125 }
126 
127 static void
declare_vs_specific_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args)128 declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
129 {
130    if (info->vs.has_prolog)
131       add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
132 
133    if (info->type != RADV_SHADER_TYPE_GS_COPY) {
134       if (info->vs.vb_desc_usage_mask) {
135          add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
136       }
137 
138       add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
139       if (info->vs.needs_draw_id) {
140          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
141       }
142       if (info->vs.needs_base_instance) {
143          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
144       }
145    }
146 }
147 
148 static void
declare_vs_input_vgprs(enum amd_gfx_level gfx_level,const struct radv_shader_info * info,struct radv_shader_args * args,bool merged_vs_tcs)149 declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, struct radv_shader_args *args,
150                        bool merged_vs_tcs)
151 {
152    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
153    if (info->type != RADV_SHADER_TYPE_GS_COPY) {
154       if (gfx_level >= GFX12) {
155          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
156       } else if (info->vs.as_ls || merged_vs_tcs) {
157          if (gfx_level >= GFX11) {
158             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
159             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
160             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
161          } else if (gfx_level >= GFX10) {
162             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
163             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
164             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
165          } else {
166             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
167             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
168             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
169          }
170       } else {
171          if (gfx_level >= GFX10) {
172             if (info->is_ngg) {
173                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
174                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
175                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
176             } else {
177                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
178                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
179                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
180             }
181          } else {
182             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
183             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
184             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
185          }
186       }
187    }
188 
189    if (info->vs.dynamic_inputs) {
190       assert(info->vs.use_per_attribute_vb_descs);
191       unsigned num_attributes = util_last_bit(info->vs.input_slot_usage_mask);
192       for (unsigned i = 0; i < num_attributes; i++) {
193          ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
194          args->ac.args[args->vs_inputs[i].arg_index].pending_vmem = true;
195       }
196    }
197 }
198 
199 static void
declare_streamout_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage)200 declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, gl_shader_stage stage)
201 {
202    int i;
203 
204    /* Streamout SGPRs. */
205    if (info->so.num_outputs) {
206       assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
207 
208       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
209       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
210    } else if (stage == MESA_SHADER_TESS_EVAL) {
211       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
212    }
213 
214    /* A streamout buffer offset is loaded if the stride is non-zero. */
215    for (i = 0; i < 4; i++) {
216       if (!info->so.strides[i])
217          continue;
218 
219       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
220    }
221 }
222 
223 static void
declare_tes_input_vgprs(struct radv_shader_args * args)224 declare_tes_input_vgprs(struct radv_shader_args *args)
225 {
226    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
227    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
228    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
229    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
230 }
231 
232 static void
declare_ms_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args)233 declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
234 {
235    if (info->cs.uses_grid_size) {
236       add_ud_arg(args, 3, AC_ARG_INT, &args->ac.num_work_groups, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
237    }
238    if (info->vs.needs_draw_id) {
239       add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
240    }
241    if (info->ms.has_task) {
242       add_ud_arg(args, 1, AC_ARG_INT, &args->ac.task_ring_entry, AC_UD_TASK_RING_ENTRY);
243    }
244 }
245 
246 static void
declare_ms_input_vgprs(const struct radv_device * device,struct radv_shader_args * args)247 declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args)
248 {
249    const struct radv_physical_device *pdev = radv_device_physical(device);
250 
251    if (pdev->mesh_fast_launch_2) {
252       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
253    } else {
254       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
255       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
256       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
257       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* instance_id */
258    }
259 }
260 
261 static void
declare_ps_input_vgprs(const struct radv_shader_info * info,struct radv_shader_args * args)262 declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
263 {
264    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
265    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
266    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
267    ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
268    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
269    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
270    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
271    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
272    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
273    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
274    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
275    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
276    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
277    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
278    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
279    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.pos_fixed_pt);
280 
281    if (args->remap_spi_ps_input)
282       ac_compact_ps_vgpr_args(&args->ac, info->ps.spi_ps_input_ena);
283 }
284 
285 static void
declare_ngg_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,bool ngg_needs_state_sgpr)286 declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool ngg_needs_state_sgpr)
287 {
288    if (ngg_needs_state_sgpr)
289       add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_state, AC_UD_NGG_STATE);
290 
291    if (info->has_ngg_culling) {
292       add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_culling_settings, AC_UD_NGG_CULLING_SETTINGS);
293       add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_scale[0], AC_UD_NGG_VIEWPORT);
294       add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_scale[1], AC_UD_NGG_VIEWPORT);
295       add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_translate[0], AC_UD_NGG_VIEWPORT);
296       add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_translate[1], AC_UD_NGG_VIEWPORT);
297    }
298 }
299 
300 static void
radv_init_shader_args(const struct radv_device * device,gl_shader_stage stage,struct radv_shader_args * args)301 radv_init_shader_args(const struct radv_device *device, gl_shader_stage stage, struct radv_shader_args *args)
302 {
303    const struct radv_physical_device *pdev = radv_device_physical(device);
304    memset(args, 0, sizeof(*args));
305 
306    args->explicit_scratch_args = !radv_use_llvm_for_stage(pdev, stage);
307    args->remap_spi_ps_input = !radv_use_llvm_for_stage(pdev, stage);
308    args->load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr;
309 
310    for (int i = 0; i < MAX_SETS; i++)
311       args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
312    for (int i = 0; i < AC_UD_MAX_UD; i++)
313       args->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
314 }
315 
316 void
radv_declare_rt_shader_args(enum amd_gfx_level gfx_level,struct radv_shader_args * args)317 radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args)
318 {
319    add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.uniform_shader_addr, AC_UD_SCRATCH_RING_OFFSETS);
320    add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
321    ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
322    ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors);
323    ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr);
324 
325    for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++)
326       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.rt.launch_sizes[i]);
327 
328    if (gfx_level < GFX9) {
329       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
330       ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets);
331    }
332 
333    for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_ids); i++)
334       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.launch_ids[i]);
335 
336    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base);
337    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_addr);
338    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_record);
339 
340    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.payload_offset);
341    ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_FLOAT, &args->ac.rt.ray_origin);
342    ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_FLOAT, &args->ac.rt.ray_direction);
343    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.rt.ray_tmin);
344    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.rt.ray_tmax);
345    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.cull_mask_and_flags);
346 
347    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.accel_struct);
348    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.sbt_offset);
349    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.sbt_stride);
350    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.miss_index);
351 
352    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.instance_addr);
353    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.primitive_id);
354    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.geometry_id_and_flags);
355    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.hit_kind);
356 }
357 
358 static bool
radv_tcs_needs_state_sgpr(const struct radv_shader_info * info,const struct radv_graphics_state_key * gfx_state)359 radv_tcs_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_graphics_state_key *gfx_state)
360 {
361    /* Some values are loaded from a SGPR when dynamic states are used or when the shader is unlinked. */
362    return !gfx_state->ts.patch_control_points || !info->num_tess_patches || !info->inputs_linked;
363 }
364 
365 static bool
radv_tes_needs_state_sgpr(const struct radv_shader_info * info)366 radv_tes_needs_state_sgpr(const struct radv_shader_info *info)
367 {
368    /* Some values are loaded from a SGPR when dynamic states are used or when the shader is unlinked. */
369    return !info->num_tess_patches || !info->tes.tcs_vertices_out || !info->inputs_linked;
370 }
371 
372 static bool
radv_ps_needs_state_sgpr(const struct radv_shader_info * info,const struct radv_graphics_state_key * gfx_state)373 radv_ps_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_graphics_state_key *gfx_state)
374 {
375    if (info->ps.needs_sample_positions && gfx_state->dynamic_rasterization_samples)
376       return true;
377 
378    if (gfx_state->dynamic_line_rast_mode)
379       return true;
380 
381    if (info->ps.reads_sample_mask_in && (info->ps.uses_sample_shading || gfx_state->ms.sample_shading_enable))
382       return true;
383 
384    /* For computing barycentrics when the primitive topology is unknown at compile time (GPL). */
385    if (info->ps.load_rasterization_prim && gfx_state->unknown_rast_prim)
386       return true;
387 
388    return false;
389 }
390 
391 static void
declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level,const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)392 declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
393                              const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args)
394 {
395    /* SGPRs */
396    add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
397    add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
398    add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
399    add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
400    add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
401 
402    declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
403 
404    add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
405    add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
406    add_ud_arg(args, 1, AC_ARG_INT, &args->epilog_pc, AC_UD_EPILOG_PC);
407    add_ud_arg(args, 1, AC_ARG_INT, &args->next_stage_pc, AC_UD_NEXT_STAGE_PC);
408 
409    /* VGPRs (TCS first, then VS) */
410    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
411    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
412 
413    declare_vs_input_vgprs(gfx_level, info, args, true);
414 
415    /* Preserved SGPRs */
416    ac_add_preserved(&args->ac, &args->ac.ring_offsets);
417    ac_add_preserved(&args->ac, &args->ac.tess_offchip_offset);
418    ac_add_preserved(&args->ac, &args->ac.merged_wave_info);
419    ac_add_preserved(&args->ac, &args->ac.tcs_factor_offset);
420 
421    if (gfx_level >= GFX11) {
422       ac_add_preserved(&args->ac, &args->ac.tcs_wave_id);
423    } else {
424       ac_add_preserved(&args->ac, &args->ac.scratch_offset);
425    }
426 
427    ac_add_preserved(&args->ac, &args->descriptor_sets[0]);
428    ac_add_preserved(&args->ac, &args->ac.push_constants);
429    ac_add_preserved(&args->ac, &args->ac.view_index);
430    ac_add_preserved(&args->ac, &args->tcs_offchip_layout);
431    ac_add_preserved(&args->ac, &args->epilog_pc);
432 
433    /* Preserved VGPRs */
434    ac_add_preserved(&args->ac, &args->ac.tcs_patch_id);
435    ac_add_preserved(&args->ac, &args->ac.tcs_rel_ids);
436 }
437 
438 static void
declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level,const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)439 declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
440                                 const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args)
441 {
442    /* SGPRs */
443    add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
444    add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
445    add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
446    add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
447    add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
448 
449    declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
450 
451    add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
452    add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
453 
454    if (info->is_ngg) {
455       add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_state, AC_UD_NGG_STATE);
456       if (gfx_level >= GFX12)
457          add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA);
458    }
459    add_ud_arg(args, 1, AC_ARG_INT, &args->vgt_esgs_ring_itemsize, AC_UD_VGT_ESGS_RING_ITEMSIZE);
460    add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_lds_layout, AC_UD_NGG_LDS_LAYOUT);
461    add_ud_arg(args, 1, AC_ARG_INT, &args->next_stage_pc, AC_UD_NEXT_STAGE_PC);
462 
463    /* VGPRs (GS) */
464    if (gfx_level >= GFX12) {
465       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
466       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
467       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
468    } else {
469       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
470       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
471       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
472       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
473       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
474    }
475 
476    /* Preserved SGPRs */
477    ac_add_preserved(&args->ac, &args->ac.ring_offsets);
478    if (info->is_ngg) {
479       ac_add_preserved(&args->ac, &args->ac.gs_tg_info);
480    } else {
481       ac_add_preserved(&args->ac, &args->ac.gs2vs_offset);
482    }
483    ac_add_preserved(&args->ac, &args->ac.merged_wave_info);
484    ac_add_preserved(&args->ac, &args->ac.tess_offchip_offset);
485 
486    if (gfx_level >= GFX11) {
487       ac_add_preserved(&args->ac, &args->ac.gs_attr_offset);
488    } else {
489       ac_add_preserved(&args->ac, &args->ac.scratch_offset);
490    }
491 
492    ac_add_preserved(&args->ac, &args->descriptor_sets[0]);
493    ac_add_preserved(&args->ac, &args->ac.push_constants);
494    ac_add_preserved(&args->ac, &args->streamout_buffers);
495    if (gfx_level >= GFX12)
496       ac_add_preserved(&args->ac, &args->streamout_state);
497    ac_add_preserved(&args->ac, &args->ac.view_index);
498    ac_add_preserved(&args->ac, &args->tcs_offchip_layout);
499    if (info->is_ngg) {
500       ac_add_preserved(&args->ac, &args->ngg_state);
501       if (gfx_level >= GFX12)
502          ac_add_preserved(&args->ac, &args->ngg_query_buf_va);
503    }
504    ac_add_preserved(&args->ac, &args->vgt_esgs_ring_itemsize);
505    ac_add_preserved(&args->ac, &args->ngg_lds_layout);
506 
507    /* Preserved VGPRs */
508    ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[0]);
509    ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[1]);
510    ac_add_preserved(&args->ac, &args->ac.gs_prim_id);
511 
512    if (gfx_level < GFX12) {
513       ac_add_preserved(&args->ac, &args->ac.gs_invocation_id);
514       ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[2]);
515    }
516 }
517 
518 static void
declare_shader_args(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,const struct radv_shader_info * info,gl_shader_stage stage,gl_shader_stage previous_stage,struct radv_shader_args * args,struct user_sgpr_info * user_sgpr_info)519 declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
520                     const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
521                     struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
522 {
523    const struct radv_physical_device *pdev = radv_device_physical(device);
524    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
525    bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
526                            (stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) ||
527                            (stage == MESA_SHADER_MESH && info->ms.has_query) ||
528                            (stage == MESA_SHADER_TASK && info->cs.has_query);
529    bool has_ngg_provoking_vtx =
530       (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && gfx_state->dynamic_provoking_vtx_mode;
531 
532    if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
533       /* Handle all NGG shaders as GS to simplify the code here. */
534       previous_stage = stage;
535       stage = MESA_SHADER_GEOMETRY;
536    }
537 
538    if (info->merged_shader_compiled_separately) {
539       /* Update the stage for merged shaders compiled separately with ESO on GFX9+. */
540       if (stage == MESA_SHADER_VERTEX && info->vs.as_ls) {
541          previous_stage = MESA_SHADER_VERTEX;
542          stage = MESA_SHADER_TESS_CTRL;
543       } else if (stage == MESA_SHADER_VERTEX && info->vs.as_es) {
544          previous_stage = MESA_SHADER_VERTEX;
545          stage = MESA_SHADER_GEOMETRY;
546       } else if (stage == MESA_SHADER_TESS_EVAL && info->tes.as_es) {
547          previous_stage = MESA_SHADER_TESS_EVAL;
548          stage = MESA_SHADER_GEOMETRY;
549       }
550    }
551 
552    radv_init_shader_args(device, stage, args);
553 
554    if (gl_shader_stage_is_rt(stage)) {
555       radv_declare_rt_shader_args(gfx_level, args);
556       return;
557    }
558 
559    add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets, AC_UD_SCRATCH_RING_OFFSETS);
560    if (stage == MESA_SHADER_TASK) {
561       add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets, AC_UD_CS_TASK_RING_OFFSETS);
562    }
563 
564    /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
565     * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0.
566     */
567    if (previous_stage != MESA_SHADER_NONE)
568       args->num_user_sgprs = 0;
569 
570    /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
571     * sgprs.
572     */
573 
574    switch (stage) {
575    case MESA_SHADER_COMPUTE:
576    case MESA_SHADER_TASK:
577       declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
578 
579       if (info->cs.uses_grid_size) {
580          if (args->load_grid_size_from_user_sgpr)
581             add_ud_arg(args, 3, AC_ARG_INT, &args->ac.num_work_groups, AC_UD_CS_GRID_SIZE);
582          else
583             add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.num_work_groups, AC_UD_CS_GRID_SIZE);
584       }
585 
586       if (info->type == RADV_SHADER_TYPE_RT_PROLOG) {
587          add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
588          add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
589          add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
590          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base,
591                     AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
592       }
593 
594       if (info->vs.needs_draw_id) {
595          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_CS_TASK_DRAW_ID);
596       }
597 
598       if (stage == MESA_SHADER_TASK) {
599          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.task_ring_entry, AC_UD_TASK_RING_ENTRY);
600 
601          if (has_shader_query) {
602             add_ud_arg(args, 1, AC_ARG_INT, &args->task_state, AC_UD_TASK_STATE);
603          }
604       }
605 
606       for (int i = 0; i < 3; i++) {
607          if (info->cs.uses_block_id[i]) {
608             if (gfx_level >= GFX12)
609                args->ac.workgroup_ids[i].used = true;
610             else
611                ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
612          }
613       }
614 
615       if (info->cs.uses_local_invocation_idx) {
616          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
617       }
618 
619       if (args->explicit_scratch_args && gfx_level < GFX11) {
620          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
621       }
622 
623       if (gfx_level >= GFX11) {
624          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
625       } else {
626          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_x);
627          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_y);
628          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_z);
629       }
630       break;
631    case MESA_SHADER_VERTEX:
632       /* NGG is handled by the GS case */
633       assert(!info->is_ngg);
634 
635       declare_vs_specific_input_sgprs(info, args);
636 
637       declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
638 
639       if (info->uses_view_index) {
640          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
641       }
642 
643       if (info->force_vrs_per_vertex) {
644          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
645       }
646 
647       if (info->vs.as_es) {
648          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
649       } else if (info->vs.as_ls) {
650          /* no extra parameters */
651       } else {
652          declare_streamout_sgprs(info, args, stage);
653       }
654 
655       if (args->explicit_scratch_args) {
656          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
657       }
658 
659       declare_vs_input_vgprs(gfx_level, info, args, false);
660       break;
661    case MESA_SHADER_TESS_CTRL:
662       if (previous_stage != MESA_SHADER_NONE) {
663          // First 6 system regs
664          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
665          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
666          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
667 
668          if (gfx_level >= GFX11) {
669             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id);
670          } else {
671             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
672          }
673 
674          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
675          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
676 
677          if (info->merged_shader_compiled_separately) {
678             declare_unmerged_vs_tcs_args(gfx_level, info, user_sgpr_info, args);
679          } else {
680             declare_vs_specific_input_sgprs(info, args);
681 
682             declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
683 
684             if (info->uses_view_index) {
685                add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
686             }
687 
688             if (radv_tcs_needs_state_sgpr(info, gfx_state)) {
689                add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
690             }
691 
692             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
693             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
694 
695             declare_vs_input_vgprs(gfx_level, info, args, true);
696          }
697       } else {
698          declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
699 
700          if (info->uses_view_index) {
701             add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
702          }
703 
704          if (radv_tcs_needs_state_sgpr(info, gfx_state)) {
705             add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
706          }
707 
708          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
709          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
710          if (args->explicit_scratch_args) {
711             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
712          }
713          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
714          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
715       }
716       break;
717    case MESA_SHADER_TESS_EVAL:
718       /* NGG is handled by the GS case */
719       assert(!info->is_ngg);
720 
721       declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
722 
723       if (info->uses_view_index)
724          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
725 
726       if (radv_tes_needs_state_sgpr(info))
727          add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
728 
729       if (info->tes.as_es) {
730          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
731          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
732          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
733       } else {
734          declare_streamout_sgprs(info, args, stage);
735          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
736       }
737       if (args->explicit_scratch_args) {
738          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
739       }
740       declare_tes_input_vgprs(args);
741       break;
742    case MESA_SHADER_GEOMETRY:
743       if (previous_stage != MESA_SHADER_NONE) {
744          // First 6 system regs
745          if (info->is_ngg) {
746             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
747          } else {
748             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
749          }
750 
751          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
752          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
753 
754          if (gfx_level >= GFX11) {
755             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_attr_offset);
756          } else {
757             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
758          }
759 
760          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
761          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
762 
763          if (info->merged_shader_compiled_separately) {
764             declare_unmerged_vs_tes_gs_args(gfx_level, info, user_sgpr_info, args);
765          } else {
766             if (previous_stage == MESA_SHADER_VERTEX) {
767                declare_vs_specific_input_sgprs(info, args);
768             } else if (previous_stage == MESA_SHADER_MESH) {
769                declare_ms_input_sgprs(info, args);
770             }
771 
772             declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
773 
774             if (info->uses_view_index) {
775                add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
776             }
777 
778             if (previous_stage == MESA_SHADER_TESS_EVAL && radv_tes_needs_state_sgpr(info))
779                add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
780 
781             /* Legacy GS force vrs is handled by GS copy shader. */
782             if (info->force_vrs_per_vertex && info->is_ngg) {
783                add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
784             }
785 
786             if (info->is_ngg) {
787                const bool ngg_needs_state_sgpr =
788                   has_ngg_provoking_vtx || has_shader_query ||
789                   (previous_stage == MESA_SHADER_VERTEX && info->vs.dynamic_num_verts_per_prim);
790 
791                declare_ngg_sgprs(info, args, ngg_needs_state_sgpr);
792 
793                if (pdev->info.gfx_level >= GFX12 && has_shader_query)
794                   add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA);
795             }
796 
797             if (previous_stage != MESA_SHADER_MESH || !pdev->mesh_fast_launch_2) {
798                if (gfx_level >= GFX12) {
799                   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
800                   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
801                   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
802                } else {
803                   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
804                   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
805                   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
806                   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
807                   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
808                }
809             }
810          }
811 
812          if (previous_stage == MESA_SHADER_VERTEX) {
813             declare_vs_input_vgprs(gfx_level, info, args, false);
814          } else if (previous_stage == MESA_SHADER_TESS_EVAL) {
815             declare_tes_input_vgprs(args);
816          } else if (previous_stage == MESA_SHADER_MESH) {
817             declare_ms_input_vgprs(device, args);
818          }
819       } else {
820          declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
821 
822          if (info->uses_view_index) {
823             add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
824          }
825 
826          if (info->force_vrs_per_vertex) {
827             add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
828          }
829 
830          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
831          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
832          if (args->explicit_scratch_args) {
833             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
834          }
835          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
836          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
837          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
838          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
839          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
840          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
841          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
842          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
843       }
844       break;
845    case MESA_SHADER_FRAGMENT:
846       declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
847 
848       if (info->ps.has_epilog) {
849          add_ud_arg(args, 1, AC_ARG_INT, &args->epilog_pc, AC_UD_EPILOG_PC);
850       }
851 
852       if (radv_ps_needs_state_sgpr(info, gfx_state))
853          add_ud_arg(args, 1, AC_ARG_INT, &args->ps_state, AC_UD_PS_STATE);
854 
855       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
856 
857       if (info->ps.pops && gfx_level < GFX11) {
858          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.pops_collision_wave_id);
859       }
860 
861       if (info->ps.load_provoking_vtx) {
862          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.load_provoking_vtx);
863       }
864 
865       if (args->explicit_scratch_args && gfx_level < GFX11) {
866          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
867       }
868 
869       declare_ps_input_vgprs(info, args);
870       break;
871    default:
872       unreachable("Shader stage not implemented");
873    }
874 }
875 
876 void
radv_declare_shader_args(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,const struct radv_shader_info * info,gl_shader_stage stage,gl_shader_stage previous_stage,struct radv_shader_args * args)877 radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
878                          const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
879                          struct radv_shader_args *args)
880 {
881    declare_shader_args(device, gfx_state, info, stage, previous_stage, args, NULL);
882 
883    if (gl_shader_stage_is_rt(stage))
884       return;
885 
886    uint32_t num_user_sgprs = args->num_user_sgprs;
887    if (info->loads_push_constants)
888       num_user_sgprs++;
889 
890    const struct radv_physical_device *pdev = radv_device_physical(device);
891    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
892    uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
893    uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;
894 
895    struct user_sgpr_info user_sgpr_info = {
896       .remaining_sgprs = remaining_sgprs,
897    };
898 
899    uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
900 
901    if (info->force_indirect_desc_sets || remaining_sgprs < num_desc_set) {
902       user_sgpr_info.indirect_all_descriptor_sets = true;
903       user_sgpr_info.remaining_sgprs--;
904    } else {
905       user_sgpr_info.remaining_sgprs -= num_desc_set;
906    }
907 
908    if (!info->merged_shader_compiled_separately)
909       allocate_inline_push_consts(info, &user_sgpr_info);
910 
911    declare_shader_args(device, gfx_state, info, stage, previous_stage, args, &user_sgpr_info);
912 }
913 
914 void
radv_declare_ps_epilog_args(const struct radv_device * device,const struct radv_ps_epilog_key * key,struct radv_shader_args * args)915 radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key,
916                             struct radv_shader_args *args)
917 {
918    radv_init_shader_args(device, MESA_SHADER_FRAGMENT, args);
919 
920    /* Declare VGPR arguments for depth/stencil/sample exports. */
921    if (key->export_depth)
922       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->depth);
923    if (key->export_stencil)
924       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->stencil);
925    if (key->export_sample_mask)
926       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->sample_mask);
927 
928    /* Declare VGPR arguments for color exports. */
929    for (unsigned i = 0; i < MAX_RTS; i++) {
930       const uint8_t color = (key->colors_written >> (i * 4) & 0xf);
931 
932       if (!color) {
933          ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, NULL);
934          continue;
935       }
936 
937       ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->colors[i]);
938    }
939 }
940