1 /*
2 * Copyright © 2019 Valve Corporation.
3 * Copyright © 2016 Red Hat.
4 * Copyright © 2016 Bas Nieuwenhuizen
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include "radv_shader_args.h"
13 #include "radv_device.h"
14 #include "radv_physical_device.h"
15 #include "radv_shader.h"
16
17 struct user_sgpr_info {
18 uint64_t inline_push_constant_mask;
19 bool inlined_all_push_consts;
20 bool indirect_all_descriptor_sets;
21 uint8_t remaining_sgprs;
22 };
23
24 static void
allocate_inline_push_consts(const struct radv_shader_info * info,struct user_sgpr_info * user_sgpr_info)25 allocate_inline_push_consts(const struct radv_shader_info *info, struct user_sgpr_info *user_sgpr_info)
26 {
27 uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
28
29 if (!info->inline_push_constant_mask)
30 return;
31
32 uint64_t mask = info->inline_push_constant_mask;
33 uint8_t num_push_consts = util_bitcount64(mask);
34
35 /* Disable the default push constants path if all constants can be inlined and if shaders don't
36 * use dynamic descriptors.
37 */
38 if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) && info->can_inline_all_push_constants &&
39 !info->loads_dynamic_offsets) {
40 user_sgpr_info->inlined_all_push_consts = true;
41 remaining_sgprs++;
42 } else {
43 /* Clamp to the maximum number of allowed inlined push constants. */
44 while (num_push_consts > MIN2(remaining_sgprs, AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT)) {
45 num_push_consts--;
46 mask &= ~BITFIELD64_BIT(util_last_bit64(mask) - 1);
47 }
48 }
49
50 user_sgpr_info->remaining_sgprs = remaining_sgprs - util_bitcount64(mask);
51 user_sgpr_info->inline_push_constant_mask = mask;
52 }
53
54 static void
add_ud_arg(struct radv_shader_args * args,unsigned size,enum ac_arg_type type,struct ac_arg * arg,enum radv_ud_index ud)55 add_ud_arg(struct radv_shader_args *args, unsigned size, enum ac_arg_type type, struct ac_arg *arg,
56 enum radv_ud_index ud)
57 {
58 ac_add_arg(&args->ac, AC_ARG_SGPR, size, type, arg);
59
60 struct radv_userdata_info *ud_info = &args->user_sgprs_locs.shader_data[ud];
61
62 if (ud_info->sgpr_idx == -1)
63 ud_info->sgpr_idx = args->num_user_sgprs;
64
65 ud_info->num_sgprs += size;
66
67 args->num_user_sgprs += size;
68 }
69
70 static void
add_descriptor_set(struct radv_shader_args * args,enum ac_arg_type type,struct ac_arg * arg,uint32_t set)71 add_descriptor_set(struct radv_shader_args *args, enum ac_arg_type type, struct ac_arg *arg, uint32_t set)
72 {
73 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, type, arg);
74
75 struct radv_userdata_info *ud_info = &args->user_sgprs_locs.descriptor_sets[set];
76 ud_info->sgpr_idx = args->num_user_sgprs;
77 ud_info->num_sgprs = 1;
78
79 args->user_sgprs_locs.descriptor_sets_enabled |= 1u << set;
80 args->num_user_sgprs++;
81 }
82
83 static void
declare_global_input_sgprs(const enum amd_gfx_level gfx_level,const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)84 declare_global_input_sgprs(const enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
85 const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args)
86 {
87 if (user_sgpr_info) {
88 /* 1 for each descriptor set */
89 if (!user_sgpr_info->indirect_all_descriptor_sets) {
90 uint32_t mask = info->desc_set_used_mask;
91
92 while (mask) {
93 int i = u_bit_scan(&mask);
94
95 add_descriptor_set(args, AC_ARG_CONST_PTR, &args->descriptor_sets[i], i);
96 }
97 } else {
98 add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
99 }
100
101 if (info->merged_shader_compiled_separately ||
102 (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts)) {
103 /* 1 for push constants and dynamic descriptors */
104 add_ud_arg(args, 1, AC_ARG_CONST_PTR, &args->ac.push_constants, AC_UD_PUSH_CONSTANTS);
105 }
106
107 for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
108 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i], AC_UD_INLINE_PUSH_CONSTANTS);
109 }
110 args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask;
111 }
112
113 const bool needs_streamout_buffers =
114 info->so.num_outputs ||
115 (info->merged_shader_compiled_separately &&
116 ((info->stage == MESA_SHADER_VERTEX && info->vs.as_es) ||
117 (info->stage == MESA_SHADER_TESS_EVAL && info->tes.as_es) || info->stage == MESA_SHADER_GEOMETRY));
118
119 if (needs_streamout_buffers) {
120 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers, AC_UD_STREAMOUT_BUFFERS);
121
122 if (gfx_level >= GFX12)
123 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_state, AC_UD_STREAMOUT_STATE);
124 }
125 }
126
127 static void
declare_vs_specific_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args)128 declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
129 {
130 if (info->vs.has_prolog)
131 add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
132
133 if (info->type != RADV_SHADER_TYPE_GS_COPY) {
134 if (info->vs.vb_desc_usage_mask) {
135 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
136 }
137
138 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
139 if (info->vs.needs_draw_id) {
140 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
141 }
142 if (info->vs.needs_base_instance) {
143 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
144 }
145 }
146 }
147
148 static void
declare_vs_input_vgprs(enum amd_gfx_level gfx_level,const struct radv_shader_info * info,struct radv_shader_args * args,bool merged_vs_tcs)149 declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, struct radv_shader_args *args,
150 bool merged_vs_tcs)
151 {
152 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
153 if (info->type != RADV_SHADER_TYPE_GS_COPY) {
154 if (gfx_level >= GFX12) {
155 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
156 } else if (info->vs.as_ls || merged_vs_tcs) {
157 if (gfx_level >= GFX11) {
158 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
159 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
160 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
161 } else if (gfx_level >= GFX10) {
162 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
163 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
164 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
165 } else {
166 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
167 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
168 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
169 }
170 } else {
171 if (gfx_level >= GFX10) {
172 if (info->is_ngg) {
173 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
174 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
175 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
176 } else {
177 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
178 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
179 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
180 }
181 } else {
182 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
183 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
184 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
185 }
186 }
187 }
188
189 if (info->vs.dynamic_inputs) {
190 assert(info->vs.use_per_attribute_vb_descs);
191 unsigned num_attributes = util_last_bit(info->vs.input_slot_usage_mask);
192 for (unsigned i = 0; i < num_attributes; i++) {
193 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
194 args->ac.args[args->vs_inputs[i].arg_index].pending_vmem = true;
195 }
196 }
197 }
198
199 static void
declare_streamout_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage)200 declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, gl_shader_stage stage)
201 {
202 int i;
203
204 /* Streamout SGPRs. */
205 if (info->so.num_outputs) {
206 assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
207
208 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
209 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
210 } else if (stage == MESA_SHADER_TESS_EVAL) {
211 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
212 }
213
214 /* A streamout buffer offset is loaded if the stride is non-zero. */
215 for (i = 0; i < 4; i++) {
216 if (!info->so.strides[i])
217 continue;
218
219 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
220 }
221 }
222
223 static void
declare_tes_input_vgprs(struct radv_shader_args * args)224 declare_tes_input_vgprs(struct radv_shader_args *args)
225 {
226 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
227 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
228 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
229 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
230 }
231
232 static void
declare_ms_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args)233 declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
234 {
235 if (info->cs.uses_grid_size) {
236 add_ud_arg(args, 3, AC_ARG_INT, &args->ac.num_work_groups, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
237 }
238 if (info->vs.needs_draw_id) {
239 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
240 }
241 if (info->ms.has_task) {
242 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.task_ring_entry, AC_UD_TASK_RING_ENTRY);
243 }
244 }
245
246 static void
declare_ms_input_vgprs(const struct radv_device * device,struct radv_shader_args * args)247 declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args)
248 {
249 const struct radv_physical_device *pdev = radv_device_physical(device);
250
251 if (pdev->mesh_fast_launch_2) {
252 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
253 } else {
254 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
255 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
256 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
257 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* instance_id */
258 }
259 }
260
261 static void
declare_ps_input_vgprs(const struct radv_shader_info * info,struct radv_shader_args * args)262 declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
263 {
264 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
265 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
266 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
267 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
268 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
269 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
270 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
271 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
272 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
273 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
274 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
275 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
276 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
277 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
278 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
279 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.pos_fixed_pt);
280
281 if (args->remap_spi_ps_input)
282 ac_compact_ps_vgpr_args(&args->ac, info->ps.spi_ps_input_ena);
283 }
284
285 static void
declare_ngg_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,bool ngg_needs_state_sgpr)286 declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool ngg_needs_state_sgpr)
287 {
288 if (ngg_needs_state_sgpr)
289 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_state, AC_UD_NGG_STATE);
290
291 if (info->has_ngg_culling) {
292 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_culling_settings, AC_UD_NGG_CULLING_SETTINGS);
293 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_scale[0], AC_UD_NGG_VIEWPORT);
294 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_scale[1], AC_UD_NGG_VIEWPORT);
295 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_translate[0], AC_UD_NGG_VIEWPORT);
296 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_translate[1], AC_UD_NGG_VIEWPORT);
297 }
298 }
299
300 static void
radv_init_shader_args(const struct radv_device * device,gl_shader_stage stage,struct radv_shader_args * args)301 radv_init_shader_args(const struct radv_device *device, gl_shader_stage stage, struct radv_shader_args *args)
302 {
303 const struct radv_physical_device *pdev = radv_device_physical(device);
304 memset(args, 0, sizeof(*args));
305
306 args->explicit_scratch_args = !radv_use_llvm_for_stage(pdev, stage);
307 args->remap_spi_ps_input = !radv_use_llvm_for_stage(pdev, stage);
308 args->load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr;
309
310 for (int i = 0; i < MAX_SETS; i++)
311 args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
312 for (int i = 0; i < AC_UD_MAX_UD; i++)
313 args->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
314 }
315
316 void
radv_declare_rt_shader_args(enum amd_gfx_level gfx_level,struct radv_shader_args * args)317 radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args)
318 {
319 add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.uniform_shader_addr, AC_UD_SCRATCH_RING_OFFSETS);
320 add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
321 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
322 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors);
323 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr);
324
325 for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++)
326 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.rt.launch_sizes[i]);
327
328 if (gfx_level < GFX9) {
329 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
330 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets);
331 }
332
333 for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_ids); i++)
334 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.launch_ids[i]);
335
336 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base);
337 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_addr);
338 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_record);
339
340 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.payload_offset);
341 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_FLOAT, &args->ac.rt.ray_origin);
342 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_FLOAT, &args->ac.rt.ray_direction);
343 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.rt.ray_tmin);
344 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.rt.ray_tmax);
345 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.cull_mask_and_flags);
346
347 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.accel_struct);
348 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.sbt_offset);
349 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.sbt_stride);
350 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.miss_index);
351
352 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.instance_addr);
353 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.primitive_id);
354 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.geometry_id_and_flags);
355 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.hit_kind);
356 }
357
358 static bool
radv_tcs_needs_state_sgpr(const struct radv_shader_info * info,const struct radv_graphics_state_key * gfx_state)359 radv_tcs_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_graphics_state_key *gfx_state)
360 {
361 /* Some values are loaded from a SGPR when dynamic states are used or when the shader is unlinked. */
362 return !gfx_state->ts.patch_control_points || !info->num_tess_patches || !info->inputs_linked;
363 }
364
365 static bool
radv_tes_needs_state_sgpr(const struct radv_shader_info * info)366 radv_tes_needs_state_sgpr(const struct radv_shader_info *info)
367 {
368 /* Some values are loaded from a SGPR when dynamic states are used or when the shader is unlinked. */
369 return !info->num_tess_patches || !info->tes.tcs_vertices_out || !info->inputs_linked;
370 }
371
372 static bool
radv_ps_needs_state_sgpr(const struct radv_shader_info * info,const struct radv_graphics_state_key * gfx_state)373 radv_ps_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_graphics_state_key *gfx_state)
374 {
375 if (info->ps.needs_sample_positions && gfx_state->dynamic_rasterization_samples)
376 return true;
377
378 if (gfx_state->dynamic_line_rast_mode)
379 return true;
380
381 if (info->ps.reads_sample_mask_in && (info->ps.uses_sample_shading || gfx_state->ms.sample_shading_enable))
382 return true;
383
384 /* For computing barycentrics when the primitive topology is unknown at compile time (GPL). */
385 if (info->ps.load_rasterization_prim && gfx_state->unknown_rast_prim)
386 return true;
387
388 return false;
389 }
390
391 static void
declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level,const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)392 declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
393 const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args)
394 {
395 /* SGPRs */
396 add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
397 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
398 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
399 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
400 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
401
402 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
403
404 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
405 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
406 add_ud_arg(args, 1, AC_ARG_INT, &args->epilog_pc, AC_UD_EPILOG_PC);
407 add_ud_arg(args, 1, AC_ARG_INT, &args->next_stage_pc, AC_UD_NEXT_STAGE_PC);
408
409 /* VGPRs (TCS first, then VS) */
410 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
411 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
412
413 declare_vs_input_vgprs(gfx_level, info, args, true);
414
415 /* Preserved SGPRs */
416 ac_add_preserved(&args->ac, &args->ac.ring_offsets);
417 ac_add_preserved(&args->ac, &args->ac.tess_offchip_offset);
418 ac_add_preserved(&args->ac, &args->ac.merged_wave_info);
419 ac_add_preserved(&args->ac, &args->ac.tcs_factor_offset);
420
421 if (gfx_level >= GFX11) {
422 ac_add_preserved(&args->ac, &args->ac.tcs_wave_id);
423 } else {
424 ac_add_preserved(&args->ac, &args->ac.scratch_offset);
425 }
426
427 ac_add_preserved(&args->ac, &args->descriptor_sets[0]);
428 ac_add_preserved(&args->ac, &args->ac.push_constants);
429 ac_add_preserved(&args->ac, &args->ac.view_index);
430 ac_add_preserved(&args->ac, &args->tcs_offchip_layout);
431 ac_add_preserved(&args->ac, &args->epilog_pc);
432
433 /* Preserved VGPRs */
434 ac_add_preserved(&args->ac, &args->ac.tcs_patch_id);
435 ac_add_preserved(&args->ac, &args->ac.tcs_rel_ids);
436 }
437
438 static void
declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level,const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)439 declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
440 const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args)
441 {
442 /* SGPRs */
443 add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
444 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
445 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
446 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
447 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
448
449 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
450
451 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
452 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
453
454 if (info->is_ngg) {
455 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_state, AC_UD_NGG_STATE);
456 if (gfx_level >= GFX12)
457 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA);
458 }
459 add_ud_arg(args, 1, AC_ARG_INT, &args->vgt_esgs_ring_itemsize, AC_UD_VGT_ESGS_RING_ITEMSIZE);
460 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_lds_layout, AC_UD_NGG_LDS_LAYOUT);
461 add_ud_arg(args, 1, AC_ARG_INT, &args->next_stage_pc, AC_UD_NEXT_STAGE_PC);
462
463 /* VGPRs (GS) */
464 if (gfx_level >= GFX12) {
465 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
466 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
467 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
468 } else {
469 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
470 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
471 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
472 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
473 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
474 }
475
476 /* Preserved SGPRs */
477 ac_add_preserved(&args->ac, &args->ac.ring_offsets);
478 if (info->is_ngg) {
479 ac_add_preserved(&args->ac, &args->ac.gs_tg_info);
480 } else {
481 ac_add_preserved(&args->ac, &args->ac.gs2vs_offset);
482 }
483 ac_add_preserved(&args->ac, &args->ac.merged_wave_info);
484 ac_add_preserved(&args->ac, &args->ac.tess_offchip_offset);
485
486 if (gfx_level >= GFX11) {
487 ac_add_preserved(&args->ac, &args->ac.gs_attr_offset);
488 } else {
489 ac_add_preserved(&args->ac, &args->ac.scratch_offset);
490 }
491
492 ac_add_preserved(&args->ac, &args->descriptor_sets[0]);
493 ac_add_preserved(&args->ac, &args->ac.push_constants);
494 ac_add_preserved(&args->ac, &args->streamout_buffers);
495 if (gfx_level >= GFX12)
496 ac_add_preserved(&args->ac, &args->streamout_state);
497 ac_add_preserved(&args->ac, &args->ac.view_index);
498 ac_add_preserved(&args->ac, &args->tcs_offchip_layout);
499 if (info->is_ngg) {
500 ac_add_preserved(&args->ac, &args->ngg_state);
501 if (gfx_level >= GFX12)
502 ac_add_preserved(&args->ac, &args->ngg_query_buf_va);
503 }
504 ac_add_preserved(&args->ac, &args->vgt_esgs_ring_itemsize);
505 ac_add_preserved(&args->ac, &args->ngg_lds_layout);
506
507 /* Preserved VGPRs */
508 ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[0]);
509 ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[1]);
510 ac_add_preserved(&args->ac, &args->ac.gs_prim_id);
511
512 if (gfx_level < GFX12) {
513 ac_add_preserved(&args->ac, &args->ac.gs_invocation_id);
514 ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[2]);
515 }
516 }
517
518 static void
declare_shader_args(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,const struct radv_shader_info * info,gl_shader_stage stage,gl_shader_stage previous_stage,struct radv_shader_args * args,struct user_sgpr_info * user_sgpr_info)519 declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
520 const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
521 struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
522 {
523 const struct radv_physical_device *pdev = radv_device_physical(device);
524 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
525 bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
526 (stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) ||
527 (stage == MESA_SHADER_MESH && info->ms.has_query) ||
528 (stage == MESA_SHADER_TASK && info->cs.has_query);
529 bool has_ngg_provoking_vtx =
530 (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && gfx_state->dynamic_provoking_vtx_mode;
531
532 if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
533 /* Handle all NGG shaders as GS to simplify the code here. */
534 previous_stage = stage;
535 stage = MESA_SHADER_GEOMETRY;
536 }
537
538 if (info->merged_shader_compiled_separately) {
539 /* Update the stage for merged shaders compiled separately with ESO on GFX9+. */
540 if (stage == MESA_SHADER_VERTEX && info->vs.as_ls) {
541 previous_stage = MESA_SHADER_VERTEX;
542 stage = MESA_SHADER_TESS_CTRL;
543 } else if (stage == MESA_SHADER_VERTEX && info->vs.as_es) {
544 previous_stage = MESA_SHADER_VERTEX;
545 stage = MESA_SHADER_GEOMETRY;
546 } else if (stage == MESA_SHADER_TESS_EVAL && info->tes.as_es) {
547 previous_stage = MESA_SHADER_TESS_EVAL;
548 stage = MESA_SHADER_GEOMETRY;
549 }
550 }
551
552 radv_init_shader_args(device, stage, args);
553
554 if (gl_shader_stage_is_rt(stage)) {
555 radv_declare_rt_shader_args(gfx_level, args);
556 return;
557 }
558
559 add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets, AC_UD_SCRATCH_RING_OFFSETS);
560 if (stage == MESA_SHADER_TASK) {
561 add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets, AC_UD_CS_TASK_RING_OFFSETS);
562 }
563
564 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
565 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0.
566 */
567 if (previous_stage != MESA_SHADER_NONE)
568 args->num_user_sgprs = 0;
569
570 /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
571 * sgprs.
572 */
573
574 switch (stage) {
575 case MESA_SHADER_COMPUTE:
576 case MESA_SHADER_TASK:
577 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
578
579 if (info->cs.uses_grid_size) {
580 if (args->load_grid_size_from_user_sgpr)
581 add_ud_arg(args, 3, AC_ARG_INT, &args->ac.num_work_groups, AC_UD_CS_GRID_SIZE);
582 else
583 add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.num_work_groups, AC_UD_CS_GRID_SIZE);
584 }
585
586 if (info->type == RADV_SHADER_TYPE_RT_PROLOG) {
587 add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
588 add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
589 add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
590 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base,
591 AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
592 }
593
594 if (info->vs.needs_draw_id) {
595 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_CS_TASK_DRAW_ID);
596 }
597
598 if (stage == MESA_SHADER_TASK) {
599 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.task_ring_entry, AC_UD_TASK_RING_ENTRY);
600
601 if (has_shader_query) {
602 add_ud_arg(args, 1, AC_ARG_INT, &args->task_state, AC_UD_TASK_STATE);
603 }
604 }
605
606 for (int i = 0; i < 3; i++) {
607 if (info->cs.uses_block_id[i]) {
608 if (gfx_level >= GFX12)
609 args->ac.workgroup_ids[i].used = true;
610 else
611 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
612 }
613 }
614
615 if (info->cs.uses_local_invocation_idx) {
616 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
617 }
618
619 if (args->explicit_scratch_args && gfx_level < GFX11) {
620 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
621 }
622
623 if (gfx_level >= GFX11) {
624 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
625 } else {
626 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_x);
627 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_y);
628 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_z);
629 }
630 break;
631 case MESA_SHADER_VERTEX:
632 /* NGG is handled by the GS case */
633 assert(!info->is_ngg);
634
635 declare_vs_specific_input_sgprs(info, args);
636
637 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
638
639 if (info->uses_view_index) {
640 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
641 }
642
643 if (info->force_vrs_per_vertex) {
644 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
645 }
646
647 if (info->vs.as_es) {
648 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
649 } else if (info->vs.as_ls) {
650 /* no extra parameters */
651 } else {
652 declare_streamout_sgprs(info, args, stage);
653 }
654
655 if (args->explicit_scratch_args) {
656 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
657 }
658
659 declare_vs_input_vgprs(gfx_level, info, args, false);
660 break;
661 case MESA_SHADER_TESS_CTRL:
662 if (previous_stage != MESA_SHADER_NONE) {
663 // First 6 system regs
664 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
665 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
666 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
667
668 if (gfx_level >= GFX11) {
669 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id);
670 } else {
671 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
672 }
673
674 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
675 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
676
677 if (info->merged_shader_compiled_separately) {
678 declare_unmerged_vs_tcs_args(gfx_level, info, user_sgpr_info, args);
679 } else {
680 declare_vs_specific_input_sgprs(info, args);
681
682 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
683
684 if (info->uses_view_index) {
685 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
686 }
687
688 if (radv_tcs_needs_state_sgpr(info, gfx_state)) {
689 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
690 }
691
692 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
693 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
694
695 declare_vs_input_vgprs(gfx_level, info, args, true);
696 }
697 } else {
698 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
699
700 if (info->uses_view_index) {
701 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
702 }
703
704 if (radv_tcs_needs_state_sgpr(info, gfx_state)) {
705 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
706 }
707
708 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
709 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
710 if (args->explicit_scratch_args) {
711 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
712 }
713 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
714 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
715 }
716 break;
717 case MESA_SHADER_TESS_EVAL:
718 /* NGG is handled by the GS case */
719 assert(!info->is_ngg);
720
721 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
722
723 if (info->uses_view_index)
724 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
725
726 if (radv_tes_needs_state_sgpr(info))
727 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
728
729 if (info->tes.as_es) {
730 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
731 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
732 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
733 } else {
734 declare_streamout_sgprs(info, args, stage);
735 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
736 }
737 if (args->explicit_scratch_args) {
738 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
739 }
740 declare_tes_input_vgprs(args);
741 break;
742 case MESA_SHADER_GEOMETRY:
743 if (previous_stage != MESA_SHADER_NONE) {
744 // First 6 system regs
745 if (info->is_ngg) {
746 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
747 } else {
748 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
749 }
750
751 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
752 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
753
754 if (gfx_level >= GFX11) {
755 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_attr_offset);
756 } else {
757 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
758 }
759
760 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
761 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
762
763 if (info->merged_shader_compiled_separately) {
764 declare_unmerged_vs_tes_gs_args(gfx_level, info, user_sgpr_info, args);
765 } else {
766 if (previous_stage == MESA_SHADER_VERTEX) {
767 declare_vs_specific_input_sgprs(info, args);
768 } else if (previous_stage == MESA_SHADER_MESH) {
769 declare_ms_input_sgprs(info, args);
770 }
771
772 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
773
774 if (info->uses_view_index) {
775 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
776 }
777
778 if (previous_stage == MESA_SHADER_TESS_EVAL && radv_tes_needs_state_sgpr(info))
779 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
780
781 /* Legacy GS force vrs is handled by GS copy shader. */
782 if (info->force_vrs_per_vertex && info->is_ngg) {
783 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
784 }
785
786 if (info->is_ngg) {
787 const bool ngg_needs_state_sgpr =
788 has_ngg_provoking_vtx || has_shader_query ||
789 (previous_stage == MESA_SHADER_VERTEX && info->vs.dynamic_num_verts_per_prim);
790
791 declare_ngg_sgprs(info, args, ngg_needs_state_sgpr);
792
793 if (pdev->info.gfx_level >= GFX12 && has_shader_query)
794 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA);
795 }
796
797 if (previous_stage != MESA_SHADER_MESH || !pdev->mesh_fast_launch_2) {
798 if (gfx_level >= GFX12) {
799 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
800 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
801 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
802 } else {
803 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
804 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
805 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
806 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
807 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
808 }
809 }
810 }
811
812 if (previous_stage == MESA_SHADER_VERTEX) {
813 declare_vs_input_vgprs(gfx_level, info, args, false);
814 } else if (previous_stage == MESA_SHADER_TESS_EVAL) {
815 declare_tes_input_vgprs(args);
816 } else if (previous_stage == MESA_SHADER_MESH) {
817 declare_ms_input_vgprs(device, args);
818 }
819 } else {
820 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
821
822 if (info->uses_view_index) {
823 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
824 }
825
826 if (info->force_vrs_per_vertex) {
827 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
828 }
829
830 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
831 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
832 if (args->explicit_scratch_args) {
833 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
834 }
835 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
836 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
837 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
838 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
839 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
840 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
841 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
842 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
843 }
844 break;
845 case MESA_SHADER_FRAGMENT:
846 declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
847
848 if (info->ps.has_epilog) {
849 add_ud_arg(args, 1, AC_ARG_INT, &args->epilog_pc, AC_UD_EPILOG_PC);
850 }
851
852 if (radv_ps_needs_state_sgpr(info, gfx_state))
853 add_ud_arg(args, 1, AC_ARG_INT, &args->ps_state, AC_UD_PS_STATE);
854
855 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
856
857 if (info->ps.pops && gfx_level < GFX11) {
858 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.pops_collision_wave_id);
859 }
860
861 if (info->ps.load_provoking_vtx) {
862 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.load_provoking_vtx);
863 }
864
865 if (args->explicit_scratch_args && gfx_level < GFX11) {
866 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
867 }
868
869 declare_ps_input_vgprs(info, args);
870 break;
871 default:
872 unreachable("Shader stage not implemented");
873 }
874 }
875
876 void
radv_declare_shader_args(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,const struct radv_shader_info * info,gl_shader_stage stage,gl_shader_stage previous_stage,struct radv_shader_args * args)877 radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
878 const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
879 struct radv_shader_args *args)
880 {
881 declare_shader_args(device, gfx_state, info, stage, previous_stage, args, NULL);
882
883 if (gl_shader_stage_is_rt(stage))
884 return;
885
886 uint32_t num_user_sgprs = args->num_user_sgprs;
887 if (info->loads_push_constants)
888 num_user_sgprs++;
889
890 const struct radv_physical_device *pdev = radv_device_physical(device);
891 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
892 uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
893 uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;
894
895 struct user_sgpr_info user_sgpr_info = {
896 .remaining_sgprs = remaining_sgprs,
897 };
898
899 uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
900
901 if (info->force_indirect_desc_sets || remaining_sgprs < num_desc_set) {
902 user_sgpr_info.indirect_all_descriptor_sets = true;
903 user_sgpr_info.remaining_sgprs--;
904 } else {
905 user_sgpr_info.remaining_sgprs -= num_desc_set;
906 }
907
908 if (!info->merged_shader_compiled_separately)
909 allocate_inline_push_consts(info, &user_sgpr_info);
910
911 declare_shader_args(device, gfx_state, info, stage, previous_stage, args, &user_sgpr_info);
912 }
913
914 void
radv_declare_ps_epilog_args(const struct radv_device * device,const struct radv_ps_epilog_key * key,struct radv_shader_args * args)915 radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key,
916 struct radv_shader_args *args)
917 {
918 radv_init_shader_args(device, MESA_SHADER_FRAGMENT, args);
919
920 /* Declare VGPR arguments for depth/stencil/sample exports. */
921 if (key->export_depth)
922 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->depth);
923 if (key->export_stencil)
924 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->stencil);
925 if (key->export_sample_mask)
926 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->sample_mask);
927
928 /* Declare VGPR arguments for color exports. */
929 for (unsigned i = 0; i < MAX_RTS; i++) {
930 const uint8_t color = (key->colors_written >> (i * 4) & 0xf);
931
932 if (!color) {
933 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, NULL);
934 continue;
935 }
936
937 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->colors[i]);
938 }
939 }
940