1 /*
2 * Copyright © 2019 Valve Corporation.
3 * Copyright © 2016 Red Hat.
4 * Copyright © 2016 Bas Nieuwenhuizen
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include "radv_shader_args.h"
30 #include "radv_private.h"
31 #include "radv_shader.h"
32
33 struct user_sgpr_info {
34 uint64_t inline_push_constant_mask;
35 bool inlined_all_push_consts;
36 bool indirect_all_descriptor_sets;
37 uint8_t remaining_sgprs;
38 };
39
40 static void
allocate_inline_push_consts(const struct radv_shader_info * info,struct user_sgpr_info * user_sgpr_info)41 allocate_inline_push_consts(const struct radv_shader_info *info, struct user_sgpr_info *user_sgpr_info)
42 {
43 uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
44
45 if (!info->inline_push_constant_mask)
46 return;
47
48 uint64_t mask = info->inline_push_constant_mask;
49 uint8_t num_push_consts = util_bitcount64(mask);
50
51 /* Disable the default push constants path if all constants can be inlined and if shaders don't
52 * use dynamic descriptors.
53 */
54 if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) && info->can_inline_all_push_constants &&
55 !info->loads_dynamic_offsets) {
56 user_sgpr_info->inlined_all_push_consts = true;
57 remaining_sgprs++;
58 } else {
59 /* Clamp to the maximum number of allowed inlined push constants. */
60 while (num_push_consts > MIN2(remaining_sgprs, AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT)) {
61 num_push_consts--;
62 mask &= ~BITFIELD64_BIT(util_last_bit64(mask) - 1);
63 }
64 }
65
66 user_sgpr_info->remaining_sgprs = remaining_sgprs - util_bitcount64(mask);
67 user_sgpr_info->inline_push_constant_mask = mask;
68 }
69
70 static void
add_ud_arg(struct radv_shader_args * args,unsigned size,enum ac_arg_type type,struct ac_arg * arg,enum radv_ud_index ud)71 add_ud_arg(struct radv_shader_args *args, unsigned size, enum ac_arg_type type, struct ac_arg *arg,
72 enum radv_ud_index ud)
73 {
74 ac_add_arg(&args->ac, AC_ARG_SGPR, size, type, arg);
75
76 struct radv_userdata_info *ud_info = &args->user_sgprs_locs.shader_data[ud];
77
78 if (ud_info->sgpr_idx == -1)
79 ud_info->sgpr_idx = args->num_user_sgprs;
80
81 ud_info->num_sgprs += size;
82
83 args->num_user_sgprs += size;
84 }
85
86 static void
add_descriptor_set(struct radv_shader_args * args,enum ac_arg_type type,struct ac_arg * arg,uint32_t set)87 add_descriptor_set(struct radv_shader_args *args, enum ac_arg_type type, struct ac_arg *arg, uint32_t set)
88 {
89 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, type, arg);
90
91 struct radv_userdata_info *ud_info = &args->user_sgprs_locs.descriptor_sets[set];
92 ud_info->sgpr_idx = args->num_user_sgprs;
93 ud_info->num_sgprs = 1;
94
95 args->user_sgprs_locs.descriptor_sets_enabled |= 1u << set;
96 args->num_user_sgprs++;
97 }
98
99 static void
declare_global_input_sgprs(const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)100 declare_global_input_sgprs(const struct radv_shader_info *info, const struct user_sgpr_info *user_sgpr_info,
101 struct radv_shader_args *args)
102 {
103 if (user_sgpr_info) {
104 /* 1 for each descriptor set */
105 if (!user_sgpr_info->indirect_all_descriptor_sets) {
106 uint32_t mask = info->desc_set_used_mask;
107
108 while (mask) {
109 int i = u_bit_scan(&mask);
110
111 add_descriptor_set(args, AC_ARG_CONST_PTR, &args->descriptor_sets[i], i);
112 }
113 } else {
114 add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
115 }
116
117 if (info->merged_shader_compiled_separately ||
118 (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts)) {
119 /* 1 for push constants and dynamic descriptors */
120 add_ud_arg(args, 1, AC_ARG_CONST_PTR, &args->ac.push_constants, AC_UD_PUSH_CONSTANTS);
121 }
122
123 for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
124 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i], AC_UD_INLINE_PUSH_CONSTANTS);
125 }
126 args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask;
127 }
128
129 const bool needs_streamout_buffers =
130 info->so.num_outputs ||
131 (info->merged_shader_compiled_separately &&
132 ((info->stage == MESA_SHADER_VERTEX && info->vs.as_es) ||
133 (info->stage == MESA_SHADER_TESS_EVAL && info->tes.as_es) || info->stage == MESA_SHADER_GEOMETRY));
134
135 if (needs_streamout_buffers) {
136 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers, AC_UD_STREAMOUT_BUFFERS);
137 }
138 }
139
140 static void
declare_vs_specific_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args)141 declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
142 {
143 if (info->vs.has_prolog)
144 add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
145
146 if (info->type != RADV_SHADER_TYPE_GS_COPY) {
147 if (info->vs.vb_desc_usage_mask) {
148 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
149 }
150
151 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
152 if (info->vs.needs_draw_id) {
153 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
154 }
155 if (info->vs.needs_base_instance) {
156 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
157 }
158 }
159 }
160
161 static void
declare_vs_input_vgprs(enum amd_gfx_level gfx_level,const struct radv_shader_info * info,struct radv_shader_args * args,bool merged_vs_tcs)162 declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, struct radv_shader_args *args,
163 bool merged_vs_tcs)
164 {
165 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
166 if (info->type != RADV_SHADER_TYPE_GS_COPY) {
167 if (info->vs.as_ls || merged_vs_tcs) {
168
169 if (gfx_level >= GFX11) {
170 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
171 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
172 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
173 } else if (gfx_level >= GFX10) {
174 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
175 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
176 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
177 } else {
178 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
179 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
180 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
181 }
182 } else {
183 if (gfx_level >= GFX10) {
184 if (info->is_ngg) {
185 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
186 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
187 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
188 } else {
189 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
190 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
191 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
192 }
193 } else {
194 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
195 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
196 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
197 }
198 }
199 }
200
201 if (info->vs.dynamic_inputs) {
202 assert(info->vs.use_per_attribute_vb_descs);
203 unsigned num_attributes = util_last_bit(info->vs.input_slot_usage_mask);
204 for (unsigned i = 0; i < num_attributes; i++) {
205 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
206 args->ac.args[args->vs_inputs[i].arg_index].pending_vmem = true;
207 }
208 /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one
209 * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8.
210 */
211 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
212 }
213 }
214
215 static void
declare_streamout_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage)216 declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, gl_shader_stage stage)
217 {
218 int i;
219
220 /* Streamout SGPRs. */
221 if (info->so.num_outputs) {
222 assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
223
224 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
225 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
226 } else if (stage == MESA_SHADER_TESS_EVAL) {
227 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
228 }
229
230 /* A streamout buffer offset is loaded if the stride is non-zero. */
231 for (i = 0; i < 4; i++) {
232 if (!info->so.strides[i])
233 continue;
234
235 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
236 }
237 }
238
239 static void
declare_tes_input_vgprs(struct radv_shader_args * args)240 declare_tes_input_vgprs(struct radv_shader_args *args)
241 {
242 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
243 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
244 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
245 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
246 }
247
248 static void
declare_ms_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args)249 declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
250 {
251 if (info->cs.uses_grid_size) {
252 add_ud_arg(args, 3, AC_ARG_INT, &args->ac.num_work_groups, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
253 }
254 if (info->vs.needs_draw_id) {
255 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
256 }
257 if (info->ms.has_task) {
258 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.task_ring_entry, AC_UD_TASK_RING_ENTRY);
259 }
260 }
261
262 static void
declare_ms_input_vgprs(const struct radv_device * device,struct radv_shader_args * args)263 declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args)
264 {
265 if (device->physical_device->mesh_fast_launch_2) {
266 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
267 } else {
268 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
269 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
270 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
271 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* instance_id */
272 }
273 }
274
275 static void
declare_ps_input_vgprs(const struct radv_shader_info * info,struct radv_shader_args * args)276 declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
277 {
278 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
279 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
280 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
281 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
282 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
283 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
284 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
285 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
286 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
287 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
288 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
289 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
290 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
291 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
292 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
293 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
294
295 if (args->remap_spi_ps_input)
296 ac_compact_ps_vgpr_args(&args->ac, info->ps.spi_ps_input);
297 }
298
299 static void
declare_ngg_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,bool has_ngg_provoking_vtx)300 declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool has_ngg_provoking_vtx)
301 {
302 if (has_ngg_provoking_vtx)
303 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_provoking_vtx, AC_UD_NGG_PROVOKING_VTX);
304
305 if (info->has_ngg_culling) {
306 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_culling_settings, AC_UD_NGG_CULLING_SETTINGS);
307 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_scale[0], AC_UD_NGG_VIEWPORT);
308 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_scale[1], AC_UD_NGG_VIEWPORT);
309 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_translate[0], AC_UD_NGG_VIEWPORT);
310 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_viewport_translate[1], AC_UD_NGG_VIEWPORT);
311 }
312 }
313
314 static void
radv_init_shader_args(const struct radv_device * device,gl_shader_stage stage,struct radv_shader_args * args)315 radv_init_shader_args(const struct radv_device *device, gl_shader_stage stage, struct radv_shader_args *args)
316 {
317 memset(args, 0, sizeof(*args));
318
319 args->explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
320 args->remap_spi_ps_input = !radv_use_llvm_for_stage(device, stage);
321 args->load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr;
322
323 for (int i = 0; i < MAX_SETS; i++)
324 args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
325 for (int i = 0; i < AC_UD_MAX_UD; i++)
326 args->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
327 }
328
329 void
radv_declare_rt_shader_args(enum amd_gfx_level gfx_level,struct radv_shader_args * args)330 radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args)
331 {
332 add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.uniform_shader_addr, AC_UD_SCRATCH_RING_OFFSETS);
333 add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
334 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
335 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors);
336 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr);
337 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.rt.launch_size);
338 if (gfx_level < GFX9) {
339 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
340 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets);
341 }
342
343 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.rt.launch_id);
344 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base);
345 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_addr);
346 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_record);
347
348 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.payload_offset);
349 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_FLOAT, &args->ac.rt.ray_origin);
350 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_FLOAT, &args->ac.rt.ray_direction);
351 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.rt.ray_tmin);
352 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.rt.ray_tmax);
353 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.cull_mask_and_flags);
354
355 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.accel_struct);
356 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.sbt_offset);
357 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.sbt_stride);
358 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.miss_index);
359
360 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.instance_addr);
361 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.primitive_id);
362 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.geometry_id_and_flags);
363 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.hit_kind);
364 }
365
366 static bool
radv_tcs_needs_state_sgpr(const struct radv_shader_info * info,const struct radv_graphics_state_key * gfx_state)367 radv_tcs_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_graphics_state_key *gfx_state)
368 {
369 /* Some values are loaded from a SGPR when dynamic states are used or when the shader is unlinked. */
370 return !gfx_state->ts.patch_control_points || !info->num_tess_patches || !info->inputs_linked;
371 }
372
373 static bool
radv_tes_needs_state_sgpr(const struct radv_shader_info * info)374 radv_tes_needs_state_sgpr(const struct radv_shader_info *info)
375 {
376 /* Some values are loaded from a SGPR when dynamic states are used or when the shader is unlinked. */
377 return !info->num_tess_patches || !info->tes.tcs_vertices_out || !info->inputs_linked;
378 }
379
380 static bool
radv_ps_needs_state_sgpr(const struct radv_shader_info * info,const struct radv_graphics_state_key * gfx_state)381 radv_ps_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_graphics_state_key *gfx_state)
382 {
383 if (info->ps.needs_sample_positions && gfx_state->dynamic_rasterization_samples)
384 return true;
385
386 if (gfx_state->dynamic_line_rast_mode)
387 return true;
388
389 if (info->ps.reads_sample_mask_in && (info->ps.uses_sample_shading || gfx_state->ms.sample_shading_enable))
390 return true;
391
392 /* For computing barycentrics when the primitive topology is unknown at compile time (GPL). */
393 if (info->ps.load_rasterization_prim && gfx_state->unknown_rast_prim)
394 return true;
395
396 return false;
397 }
398
399 static void
declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level,const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)400 declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
401 const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args)
402 {
403 /* SGPRs */
404 add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
405 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
406 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
407 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
408 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
409
410 declare_global_input_sgprs(info, user_sgpr_info, args);
411
412 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
413 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
414 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_epilog_pc, AC_UD_TCS_EPILOG_PC);
415 add_ud_arg(args, 1, AC_ARG_INT, &args->next_stage_pc, AC_UD_NEXT_STAGE_PC);
416
417 /* VGPRs (TCS first, then VS) */
418 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
419 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
420
421 declare_vs_input_vgprs(gfx_level, info, args, true);
422
423 /* Preserved SGPRs */
424 ac_add_preserved(&args->ac, &args->ac.ring_offsets);
425 ac_add_preserved(&args->ac, &args->ac.tess_offchip_offset);
426 ac_add_preserved(&args->ac, &args->ac.merged_wave_info);
427 ac_add_preserved(&args->ac, &args->ac.tcs_factor_offset);
428
429 if (gfx_level >= GFX11) {
430 ac_add_preserved(&args->ac, &args->ac.tcs_wave_id);
431 } else {
432 ac_add_preserved(&args->ac, &args->ac.scratch_offset);
433 }
434
435 ac_add_preserved(&args->ac, &args->descriptor_sets[0]);
436 ac_add_preserved(&args->ac, &args->ac.push_constants);
437 ac_add_preserved(&args->ac, &args->ac.view_index);
438 ac_add_preserved(&args->ac, &args->tcs_offchip_layout);
439 ac_add_preserved(&args->ac, &args->tcs_epilog_pc);
440
441 /* Preserved VGPRs */
442 ac_add_preserved(&args->ac, &args->ac.tcs_patch_id);
443 ac_add_preserved(&args->ac, &args->ac.tcs_rel_ids);
444 }
445
446 static void
declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level,const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)447 declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
448 const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args)
449 {
450 /* SGPRs */
451 add_ud_arg(args, 2, AC_ARG_INT, &args->prolog_inputs, AC_UD_VS_PROLOG_INPUTS);
452 add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
453 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
454 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
455 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
456
457 declare_global_input_sgprs(info, user_sgpr_info, args);
458
459 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
460 add_ud_arg(args, 1, AC_ARG_INT, &args->tes_state, AC_UD_TES_STATE);
461
462 add_ud_arg(args, 1, AC_ARG_INT, &args->shader_query_state, AC_UD_SHADER_QUERY_STATE);
463 if (info->is_ngg) {
464 add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_provoking_vtx, AC_UD_NGG_PROVOKING_VTX);
465 }
466 add_ud_arg(args, 1, AC_ARG_INT, &args->vgt_esgs_ring_itemsize, AC_UD_VGT_ESGS_RING_ITEMSIZE);
467 add_ud_arg(args, 1, AC_ARG_INT, &args->next_stage_pc, AC_UD_NEXT_STAGE_PC);
468
469 /* VGPRs (GS) */
470 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
471 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
472 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
473 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
474 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
475
476 /* Preserved SGPRs */
477 ac_add_preserved(&args->ac, &args->ac.ring_offsets);
478 if (info->is_ngg) {
479 ac_add_preserved(&args->ac, &args->ac.gs_tg_info);
480 } else {
481 ac_add_preserved(&args->ac, &args->ac.gs2vs_offset);
482 }
483 ac_add_preserved(&args->ac, &args->ac.merged_wave_info);
484 ac_add_preserved(&args->ac, &args->ac.tess_offchip_offset);
485
486 if (gfx_level >= GFX11) {
487 ac_add_preserved(&args->ac, &args->ac.gs_attr_offset);
488 } else {
489 ac_add_preserved(&args->ac, &args->ac.scratch_offset);
490 }
491
492 ac_add_preserved(&args->ac, &args->descriptor_sets[0]);
493 ac_add_preserved(&args->ac, &args->ac.push_constants);
494 ac_add_preserved(&args->ac, &args->ac.view_index);
495 ac_add_preserved(&args->ac, &args->tes_state);
496 ac_add_preserved(&args->ac, &args->shader_query_state);
497 if (info->is_ngg)
498 ac_add_preserved(&args->ac, &args->ngg_provoking_vtx);
499 ac_add_preserved(&args->ac, &args->vgt_esgs_ring_itemsize);
500
501 /* Preserved VGPRs */
502 ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[0]);
503 ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[1]);
504 ac_add_preserved(&args->ac, &args->ac.gs_prim_id);
505 ac_add_preserved(&args->ac, &args->ac.gs_invocation_id);
506 ac_add_preserved(&args->ac, &args->ac.gs_vtx_offset[2]);
507 }
508
509 static void
declare_shader_args(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,const struct radv_shader_info * info,gl_shader_stage stage,gl_shader_stage previous_stage,struct radv_shader_args * args,struct user_sgpr_info * user_sgpr_info)510 declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
511 const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
512 struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
513 {
514 const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
515 bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
516 (stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) ||
517 (stage == MESA_SHADER_MESH && info->ms.has_query) ||
518 (stage == MESA_SHADER_TASK && info->cs.has_query);
519 bool has_ngg_provoking_vtx =
520 (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && gfx_state->dynamic_provoking_vtx_mode;
521
522 if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
523 /* Handle all NGG shaders as GS to simplify the code here. */
524 previous_stage = stage;
525 stage = MESA_SHADER_GEOMETRY;
526 }
527
528 if (info->merged_shader_compiled_separately) {
529 /* Update the stage for merged shaders compiled separately with ESO on GFX9+. */
530 if (stage == MESA_SHADER_VERTEX && info->vs.as_ls) {
531 previous_stage = MESA_SHADER_VERTEX;
532 stage = MESA_SHADER_TESS_CTRL;
533 } else if (stage == MESA_SHADER_VERTEX && info->vs.as_es) {
534 previous_stage = MESA_SHADER_VERTEX;
535 stage = MESA_SHADER_GEOMETRY;
536 } else if (stage == MESA_SHADER_TESS_EVAL && info->tes.as_es) {
537 previous_stage = MESA_SHADER_TESS_EVAL;
538 stage = MESA_SHADER_GEOMETRY;
539 }
540 }
541
542 radv_init_shader_args(device, stage, args);
543
544 if (gl_shader_stage_is_rt(stage)) {
545 radv_declare_rt_shader_args(gfx_level, args);
546 return;
547 }
548
549 add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets, AC_UD_SCRATCH_RING_OFFSETS);
550 if (stage == MESA_SHADER_TASK) {
551 add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets, AC_UD_CS_TASK_RING_OFFSETS);
552 }
553
554 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
555 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0.
556 */
557 if (previous_stage != MESA_SHADER_NONE)
558 args->num_user_sgprs = 0;
559
560 /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
561 * sgprs.
562 */
563
564 switch (stage) {
565 case MESA_SHADER_COMPUTE:
566 case MESA_SHADER_TASK:
567 declare_global_input_sgprs(info, user_sgpr_info, args);
568
569 if (info->cs.uses_grid_size) {
570 if (args->load_grid_size_from_user_sgpr)
571 add_ud_arg(args, 3, AC_ARG_INT, &args->ac.num_work_groups, AC_UD_CS_GRID_SIZE);
572 else
573 add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.num_work_groups, AC_UD_CS_GRID_SIZE);
574 }
575
576 if (info->cs.is_rt_shader) {
577 add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
578 add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
579 add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
580 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base,
581 AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
582 }
583
584 if (info->vs.needs_draw_id) {
585 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_CS_TASK_DRAW_ID);
586 }
587
588 if (stage == MESA_SHADER_TASK) {
589 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.task_ring_entry, AC_UD_TASK_RING_ENTRY);
590
591 if (has_shader_query) {
592 add_ud_arg(args, 1, AC_ARG_INT, &args->shader_query_state, AC_UD_SHADER_QUERY_STATE);
593 }
594 }
595
596 for (int i = 0; i < 3; i++) {
597 if (info->cs.uses_block_id[i]) {
598 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
599 }
600 }
601
602 if (info->cs.uses_local_invocation_idx) {
603 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
604 }
605
606 if (args->explicit_scratch_args && gfx_level < GFX11) {
607 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
608 }
609
610 if (gfx_level >= GFX11)
611 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
612 else
613 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
614 break;
615 case MESA_SHADER_VERTEX:
616 /* NGG is handled by the GS case */
617 assert(!info->is_ngg);
618
619 declare_vs_specific_input_sgprs(info, args);
620
621 declare_global_input_sgprs(info, user_sgpr_info, args);
622
623 if (info->uses_view_index) {
624 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
625 }
626
627 if (info->force_vrs_per_vertex) {
628 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
629 }
630
631 if (info->vs.as_es) {
632 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
633 } else if (info->vs.as_ls) {
634 /* no extra parameters */
635 } else {
636 declare_streamout_sgprs(info, args, stage);
637 }
638
639 if (args->explicit_scratch_args) {
640 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
641 }
642
643 declare_vs_input_vgprs(gfx_level, info, args, false);
644 break;
645 case MESA_SHADER_TESS_CTRL:
646 if (previous_stage != MESA_SHADER_NONE) {
647 // First 6 system regs
648 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
649 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
650 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
651
652 if (gfx_level >= GFX11) {
653 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id);
654 } else {
655 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
656 }
657
658 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
659 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
660
661 if (info->merged_shader_compiled_separately) {
662 declare_unmerged_vs_tcs_args(gfx_level, info, user_sgpr_info, args);
663 } else {
664 declare_vs_specific_input_sgprs(info, args);
665
666 declare_global_input_sgprs(info, user_sgpr_info, args);
667
668 if (info->uses_view_index) {
669 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
670 }
671
672 if (radv_tcs_needs_state_sgpr(info, gfx_state)) {
673 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
674 }
675
676 if (info->has_epilog) {
677 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_epilog_pc, AC_UD_TCS_EPILOG_PC);
678 }
679
680 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
681 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
682
683 declare_vs_input_vgprs(gfx_level, info, args, true);
684 }
685 } else {
686 declare_global_input_sgprs(info, user_sgpr_info, args);
687
688 if (info->uses_view_index) {
689 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
690 }
691
692 if (radv_tcs_needs_state_sgpr(info, gfx_state)) {
693 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
694 }
695
696 if (info->has_epilog) {
697 add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_epilog_pc, AC_UD_TCS_EPILOG_PC);
698 }
699
700 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
701 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
702 if (args->explicit_scratch_args) {
703 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
704 }
705 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
706 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
707 }
708 break;
709 case MESA_SHADER_TESS_EVAL:
710 /* NGG is handled by the GS case */
711 assert(!info->is_ngg);
712
713 declare_global_input_sgprs(info, user_sgpr_info, args);
714
715 if (info->uses_view_index)
716 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
717
718 if (radv_tes_needs_state_sgpr(info))
719 add_ud_arg(args, 1, AC_ARG_INT, &args->tes_state, AC_UD_TES_STATE);
720
721 if (info->tes.as_es) {
722 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
723 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
724 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
725 } else {
726 declare_streamout_sgprs(info, args, stage);
727 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
728 }
729 if (args->explicit_scratch_args) {
730 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
731 }
732 declare_tes_input_vgprs(args);
733 break;
734 case MESA_SHADER_GEOMETRY:
735 if (previous_stage != MESA_SHADER_NONE) {
736 // First 6 system regs
737 if (info->is_ngg) {
738 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
739 } else {
740 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
741 }
742
743 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
744 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
745
746 if (gfx_level >= GFX11) {
747 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_attr_offset);
748 } else {
749 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
750 }
751
752 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
753 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
754
755 if (info->merged_shader_compiled_separately) {
756 declare_unmerged_vs_tes_gs_args(gfx_level, info, user_sgpr_info, args);
757 } else {
758 if (previous_stage == MESA_SHADER_VERTEX) {
759 declare_vs_specific_input_sgprs(info, args);
760 } else if (previous_stage == MESA_SHADER_MESH) {
761 declare_ms_input_sgprs(info, args);
762 }
763
764 declare_global_input_sgprs(info, user_sgpr_info, args);
765
766 if (info->uses_view_index) {
767 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
768 }
769
770 if (previous_stage == MESA_SHADER_TESS_EVAL && radv_tes_needs_state_sgpr(info))
771 add_ud_arg(args, 1, AC_ARG_INT, &args->tes_state, AC_UD_TES_STATE);
772
773 if (previous_stage == MESA_SHADER_VERTEX && info->vs.dynamic_num_verts_per_prim)
774 add_ud_arg(args, 1, AC_ARG_INT, &args->num_verts_per_prim, AC_UD_NUM_VERTS_PER_PRIM);
775
776 /* Legacy GS force vrs is handled by GS copy shader. */
777 if (info->force_vrs_per_vertex && info->is_ngg) {
778 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
779 }
780
781 if (has_shader_query)
782 add_ud_arg(args, 1, AC_ARG_INT, &args->shader_query_state, AC_UD_SHADER_QUERY_STATE);
783
784 if (info->is_ngg) {
785 declare_ngg_sgprs(info, args, has_ngg_provoking_vtx);
786 }
787
788 if (previous_stage != MESA_SHADER_MESH || !device->physical_device->mesh_fast_launch_2) {
789 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
790 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
791 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
792 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
793 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
794 }
795 }
796
797 if (previous_stage == MESA_SHADER_VERTEX) {
798 declare_vs_input_vgprs(gfx_level, info, args, false);
799 } else if (previous_stage == MESA_SHADER_TESS_EVAL) {
800 declare_tes_input_vgprs(args);
801 } else if (previous_stage == MESA_SHADER_MESH) {
802 declare_ms_input_vgprs(device, args);
803 }
804 } else {
805 declare_global_input_sgprs(info, user_sgpr_info, args);
806
807 if (info->uses_view_index) {
808 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
809 }
810
811 if (info->force_vrs_per_vertex) {
812 add_ud_arg(args, 1, AC_ARG_INT, &args->ac.force_vrs_rates, AC_UD_FORCE_VRS_RATES);
813 }
814
815 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
816 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
817 if (args->explicit_scratch_args) {
818 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
819 }
820 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
821 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
822 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
823 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
824 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
825 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
826 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
827 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
828 }
829 break;
830 case MESA_SHADER_FRAGMENT:
831 declare_global_input_sgprs(info, user_sgpr_info, args);
832
833 if (info->has_epilog) {
834 add_ud_arg(args, 1, AC_ARG_INT, &args->ps_epilog_pc, AC_UD_PS_EPILOG_PC);
835 }
836
837 if (radv_ps_needs_state_sgpr(info, gfx_state))
838 add_ud_arg(args, 1, AC_ARG_INT, &args->ps_state, AC_UD_PS_STATE);
839
840 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
841
842 if (info->ps.pops && gfx_level < GFX11) {
843 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.pops_collision_wave_id);
844 }
845
846 if (info->ps.load_provoking_vtx) {
847 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.load_provoking_vtx);
848 }
849
850 if (args->explicit_scratch_args && gfx_level < GFX11) {
851 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
852 }
853
854 declare_ps_input_vgprs(info, args);
855 break;
856 default:
857 unreachable("Shader stage not implemented");
858 }
859 }
860
861 void
radv_declare_shader_args(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,const struct radv_shader_info * info,gl_shader_stage stage,gl_shader_stage previous_stage,struct radv_shader_args * args)862 radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
863 const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
864 struct radv_shader_args *args)
865 {
866 declare_shader_args(device, gfx_state, info, stage, previous_stage, args, NULL);
867
868 if (gl_shader_stage_is_rt(stage))
869 return;
870
871 uint32_t num_user_sgprs = args->num_user_sgprs;
872 if (info->loads_push_constants)
873 num_user_sgprs++;
874
875 const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
876 uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
877 uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;
878
879 struct user_sgpr_info user_sgpr_info = {
880 .remaining_sgprs = remaining_sgprs,
881 };
882
883 uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
884
885 if (info->merged_shader_compiled_separately || remaining_sgprs < num_desc_set) {
886 user_sgpr_info.indirect_all_descriptor_sets = true;
887 user_sgpr_info.remaining_sgprs--;
888 } else {
889 user_sgpr_info.remaining_sgprs -= num_desc_set;
890 }
891
892 if (!info->merged_shader_compiled_separately)
893 allocate_inline_push_consts(info, &user_sgpr_info);
894
895 declare_shader_args(device, gfx_state, info, stage, previous_stage, args, &user_sgpr_info);
896 }
897
898 void
radv_declare_ps_epilog_args(const struct radv_device * device,const struct radv_ps_epilog_key * key,struct radv_shader_args * args)899 radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key,
900 struct radv_shader_args *args)
901 {
902 radv_init_shader_args(device, MESA_SHADER_FRAGMENT, args);
903
904 /* Declare VGPR arguments for depth/stencil/sample exports. */
905 if (key->export_depth)
906 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->depth);
907 if (key->export_stencil)
908 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->stencil);
909 if (key->export_sample_mask)
910 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->sample_mask);
911
912 /* Declare VGPR arguments for color exports. */
913 for (unsigned i = 0; i < MAX_RTS; i++) {
914 const uint8_t color = (key->colors_written >> (i * 4) & 0xf);
915
916 if (!color) {
917 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, NULL);
918 continue;
919 }
920
921 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->colors[i]);
922 }
923 }
924
925 void
radv_declare_tcs_epilog_args(const struct radv_device * device,const struct radv_tcs_epilog_key * key,struct radv_shader_args * args)926 radv_declare_tcs_epilog_args(const struct radv_device *device, const struct radv_tcs_epilog_key *key,
927 struct radv_shader_args *args)
928 {
929 radv_init_shader_args(device, MESA_SHADER_TESS_CTRL, args);
930
931 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets);
932
933 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
934 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
935 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
936 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->patch_base);
937
938 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tcs_out_current_patch_data_offset);
939 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->invocation_id);
940 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_patch_id);
941 }
942