1 /*
2 * Copyright © 2019 Valve Corporation.
3 * Copyright © 2016 Red Hat.
4 * Copyright © 2016 Bas Nieuwenhuizen
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include "radv_shader_args.h"
30 #include "radv_private.h"
31 #include "radv_shader.h"
32
33 static void
set_loc(struct radv_userdata_info * ud_info,uint8_t * sgpr_idx,uint8_t num_sgprs)34 set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
35 {
36 ud_info->sgpr_idx = *sgpr_idx;
37 ud_info->num_sgprs = num_sgprs;
38 *sgpr_idx += num_sgprs;
39 }
40
41 static void
set_loc_shader(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx,uint8_t num_sgprs)42 set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
43 {
44 struct radv_userdata_info *ud_info = &args->user_sgprs_locs.shader_data[idx];
45 assert(ud_info);
46
47 set_loc(ud_info, sgpr_idx, num_sgprs);
48 }
49
50 static void
set_loc_shader_ptr(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)51 set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
52 {
53 bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS &&
54 idx != AC_UD_CS_TASK_RING_OFFSETS && idx != AC_UD_CS_SBT_DESCRIPTORS &&
55 idx != AC_UD_CS_RAY_LAUNCH_SIZE_ADDR;
56
57 set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
58 }
59
60 static void
set_loc_desc(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)61 set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
62 {
63 struct radv_userdata_locations *locs = &args->user_sgprs_locs;
64 struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
65 assert(ud_info);
66
67 set_loc(ud_info, sgpr_idx, 1);
68
69 locs->descriptor_sets_enabled |= 1u << idx;
70 }
71
72 struct user_sgpr_info {
73 uint64_t inline_push_constant_mask;
74 bool inlined_all_push_consts;
75 bool indirect_all_descriptor_sets;
76 uint8_t remaining_sgprs;
77 };
78
79 static uint8_t
count_vs_user_sgprs(const struct radv_shader_info * info)80 count_vs_user_sgprs(const struct radv_shader_info *info)
81 {
82 uint8_t count = 1; /* vertex offset */
83
84 if (info->vs.vb_desc_usage_mask)
85 count++;
86 if (info->vs.needs_draw_id)
87 count++;
88 if (info->vs.needs_base_instance)
89 count++;
90
91 return count;
92 }
93
94 static uint8_t
count_ms_user_sgprs(const struct radv_shader_info * info)95 count_ms_user_sgprs(const struct radv_shader_info *info)
96 {
97 uint8_t count = 1 + 3; /* firstTask + num_work_groups[3] */
98
99 if (info->vs.needs_draw_id)
100 count++;
101 if (info->cs.uses_task_rings)
102 count++;
103
104 return count;
105 }
106
107 static unsigned
count_ngg_sgprs(const struct radv_shader_info * info,bool has_ngg_query)108 count_ngg_sgprs(const struct radv_shader_info *info, bool has_ngg_query)
109 {
110 unsigned count = 0;
111
112 if (has_ngg_query)
113 count += 1; /* ngg_query_state */
114 if (info->has_ngg_culling)
115 count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
116
117 return count;
118 }
119
120 static void
allocate_inline_push_consts(const struct radv_shader_info * info,struct user_sgpr_info * user_sgpr_info)121 allocate_inline_push_consts(const struct radv_shader_info *info,
122 struct user_sgpr_info *user_sgpr_info)
123 {
124 uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
125
126 if (!info->inline_push_constant_mask)
127 return;
128
129 uint64_t mask = info->inline_push_constant_mask;
130 uint8_t num_push_consts = util_bitcount64(mask);
131
132 /* Disable the default push constants path if all constants can be inlined and if shaders don't
133 * use dynamic descriptors.
134 */
135 if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) &&
136 info->can_inline_all_push_constants && !info->loads_dynamic_offsets) {
137 user_sgpr_info->inlined_all_push_consts = true;
138 remaining_sgprs++;
139 } else {
140 /* Clamp to the maximum number of allowed inlined push constants. */
141 while (num_push_consts > MIN2(remaining_sgprs, AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT)) {
142 num_push_consts--;
143 mask &= ~BITFIELD64_BIT(util_last_bit64(mask) - 1);
144 }
145 }
146
147 user_sgpr_info->remaining_sgprs = remaining_sgprs - util_bitcount64(mask);
148 user_sgpr_info->inline_push_constant_mask = mask;
149 }
150
151 static void
allocate_user_sgprs(enum amd_gfx_level gfx_level,const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,bool needs_view_index,bool has_ngg_query,struct user_sgpr_info * user_sgpr_info)152 allocate_user_sgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
153 struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
154 gl_shader_stage previous_stage, bool needs_view_index, bool has_ngg_query,
155 struct user_sgpr_info *user_sgpr_info)
156 {
157 uint8_t user_sgpr_count = 0;
158
159 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
160
161 /* 2 user sgprs will always be allocated for scratch/rings */
162 user_sgpr_count += 2;
163
164 if (stage == MESA_SHADER_TASK)
165 user_sgpr_count += 2; /* task descriptors */
166
167 /* prolog inputs */
168 if (info->vs.has_prolog)
169 user_sgpr_count += 2;
170
171 switch (stage) {
172 case MESA_SHADER_COMPUTE:
173 case MESA_SHADER_TASK:
174 if (info->cs.uses_sbt)
175 user_sgpr_count += 2;
176 if (info->cs.uses_grid_size)
177 user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2;
178 if (info->cs.uses_ray_launch_size)
179 user_sgpr_count += 2;
180 if (info->vs.needs_draw_id)
181 user_sgpr_count += 1;
182 if (info->cs.uses_task_rings)
183 user_sgpr_count += 4; /* ring_entry, 2x ib_addr, ib_stride */
184 break;
185 case MESA_SHADER_FRAGMENT:
186 /* epilog continue PC */
187 if (info->ps.has_epilog)
188 user_sgpr_count += 1;
189 break;
190 case MESA_SHADER_VERTEX:
191 if (!args->is_gs_copy_shader)
192 user_sgpr_count += count_vs_user_sgprs(info);
193 break;
194 case MESA_SHADER_TESS_CTRL:
195 if (has_previous_stage) {
196 if (previous_stage == MESA_SHADER_VERTEX)
197 user_sgpr_count += count_vs_user_sgprs(info);
198 }
199 break;
200 case MESA_SHADER_TESS_EVAL:
201 break;
202 case MESA_SHADER_GEOMETRY:
203 if (has_previous_stage) {
204 if (info->is_ngg)
205 user_sgpr_count += count_ngg_sgprs(info, has_ngg_query);
206
207 if (previous_stage == MESA_SHADER_VERTEX) {
208 user_sgpr_count += count_vs_user_sgprs(info);
209 } else if (previous_stage == MESA_SHADER_MESH) {
210 user_sgpr_count += count_ms_user_sgprs(info);
211 }
212 }
213 break;
214 default:
215 break;
216 }
217
218 if (needs_view_index)
219 user_sgpr_count++;
220
221 if (info->force_vrs_per_vertex)
222 user_sgpr_count++;
223
224 if (info->loads_push_constants)
225 user_sgpr_count++;
226
227 if (info->so.num_outputs)
228 user_sgpr_count++;
229
230 uint32_t available_sgprs =
231 gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
232 uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
233 uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
234
235 if (remaining_sgprs < num_desc_set) {
236 user_sgpr_info->indirect_all_descriptor_sets = true;
237 user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
238 } else {
239 user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
240 }
241
242 allocate_inline_push_consts(info, user_sgpr_info);
243 }
244
245 static void
declare_global_input_sgprs(const struct radv_shader_info * info,const struct user_sgpr_info * user_sgpr_info,struct radv_shader_args * args)246 declare_global_input_sgprs(const struct radv_shader_info *info,
247 const struct user_sgpr_info *user_sgpr_info,
248 struct radv_shader_args *args)
249 {
250 /* 1 for each descriptor set */
251 if (!user_sgpr_info->indirect_all_descriptor_sets) {
252 uint32_t mask = info->desc_set_used_mask;
253
254 while (mask) {
255 int i = u_bit_scan(&mask);
256
257 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]);
258 }
259 } else {
260 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]);
261 }
262
263 if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) {
264 /* 1 for push constants and dynamic descriptors */
265 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
266 }
267
268 for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
269 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
270 }
271 args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask;
272
273 if (info->so.num_outputs) {
274 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
275 }
276 }
277
278 static void
declare_vs_specific_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage)279 declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
280 gl_shader_stage stage, bool has_previous_stage,
281 gl_shader_stage previous_stage)
282 {
283 if (info->vs.has_prolog)
284 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs);
285
286 if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
287 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
288 if (info->vs.vb_desc_usage_mask) {
289 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
290 }
291 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
292 if (info->vs.needs_draw_id) {
293 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
294 }
295 if (info->vs.needs_base_instance) {
296 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
297 }
298 }
299 }
300
301 static void
declare_vs_input_vgprs(enum amd_gfx_level gfx_level,const struct radv_shader_info * info,struct radv_shader_args * args)302 declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
303 struct radv_shader_args *args)
304 {
305 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
306 if (!args->is_gs_copy_shader) {
307 if (info->vs.as_ls) {
308
309 if (gfx_level >= GFX11) {
310 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
311 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
312 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
313 } else if (gfx_level >= GFX10) {
314 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
315 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
316 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
317 } else {
318 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
319 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
320 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
321 }
322 } else {
323 if (gfx_level >= GFX10) {
324 if (info->is_ngg) {
325 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
326 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
327 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
328 } else {
329 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
330 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
331 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
332 }
333 } else {
334 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
335 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
336 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
337 }
338 }
339 }
340
341 if (info->vs.dynamic_inputs) {
342 assert(info->vs.use_per_attribute_vb_descs);
343 unsigned num_attributes = util_last_bit(info->vs.vb_desc_usage_mask);
344 for (unsigned i = 0; i < num_attributes; i++)
345 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
346 /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one
347 * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8.
348 */
349 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
350 }
351 }
352
353 static void
declare_streamout_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,gl_shader_stage stage)354 declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
355 gl_shader_stage stage)
356 {
357 int i;
358
359 /* Streamout SGPRs. */
360 if (info->so.num_outputs) {
361 assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
362
363 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
364 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
365 } else if (stage == MESA_SHADER_TESS_EVAL) {
366 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
367 }
368
369 /* A streamout buffer offset is loaded if the stride is non-zero. */
370 for (i = 0; i < 4; i++) {
371 if (!info->so.strides[i])
372 continue;
373
374 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
375 }
376 }
377
378 static void
declare_tes_input_vgprs(struct radv_shader_args * args)379 declare_tes_input_vgprs(struct radv_shader_args *args)
380 {
381 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
382 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
383 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
384 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
385 }
386
387 static void
declare_ms_input_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args)388 declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
389 {
390 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
391 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
392 if (info->vs.needs_draw_id) {
393 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
394 }
395 if (info->cs.uses_task_rings) {
396 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry);
397 }
398 }
399
400 static void
declare_ms_input_vgprs(struct radv_shader_args * args)401 declare_ms_input_vgprs(struct radv_shader_args *args)
402 {
403 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
404 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
405 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
406 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* instance_id */
407 }
408
409 static void
declare_ps_input_vgprs(const struct radv_shader_info * info,struct radv_shader_args * args)410 declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
411 {
412 unsigned spi_ps_input = info->ps.spi_ps_input;
413
414 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
415 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
416 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
417 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
418 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
419 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
420 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
421 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
422 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
423 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
424 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
425 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
426 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
427 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
428 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
429 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
430
431 if (args->remap_spi_ps_input) {
432 /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
433 * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
434 * VGPR arguments here.
435 */
436 unsigned arg_count = 0;
437 for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) {
438 if (args->ac.args[i].file != AC_ARG_VGPR) {
439 arg_count++;
440 continue;
441 }
442
443 if (!(spi_ps_input & (1 << vgpr_arg))) {
444 args->ac.args[i].skip = true;
445 } else {
446 args->ac.args[i].offset = vgpr_reg;
447 vgpr_reg += args->ac.args[i].size;
448 arg_count++;
449 }
450 vgpr_arg++;
451 }
452 }
453
454 if (info->ps.has_epilog) {
455 /* FIXME: Ensure the main shader doesn't have less VGPRs than the epilog */
456 for (unsigned i = 0; i < MAX_RTS; i++)
457 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, NULL);
458 }
459 }
460
461 static void
declare_ngg_sgprs(const struct radv_shader_info * info,struct radv_shader_args * args,bool has_ngg_query)462 declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
463 bool has_ngg_query)
464 {
465 if (has_ngg_query)
466 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_query_state);
467
468 if (info->has_ngg_culling) {
469 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings);
470 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]);
471 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]);
472 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]);
473 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]);
474 }
475 }
476
477 static void
set_global_input_locs(struct radv_shader_args * args,const struct user_sgpr_info * user_sgpr_info,uint8_t * user_sgpr_idx)478 set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
479 uint8_t *user_sgpr_idx)
480 {
481 if (!user_sgpr_info->indirect_all_descriptor_sets) {
482 for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) {
483 if (args->descriptor_sets[i].used)
484 set_loc_desc(args, i, user_sgpr_idx);
485 }
486 } else {
487 set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
488 }
489
490 if (args->ac.push_constants.used) {
491 set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
492 }
493
494 if (user_sgpr_info->inline_push_constant_mask) {
495 set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
496 util_bitcount64(user_sgpr_info->inline_push_constant_mask));
497 }
498
499 if (args->streamout_buffers.used) {
500 set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
501 }
502 }
503
504 static void
set_vs_specific_input_locs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,uint8_t * user_sgpr_idx)505 set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
506 bool has_previous_stage, gl_shader_stage previous_stage,
507 uint8_t *user_sgpr_idx)
508 {
509 if (args->prolog_inputs.used)
510 set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2);
511
512 if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
513 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
514 if (args->ac.vertex_buffers.used) {
515 set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
516 }
517
518 unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used +
519 args->ac.start_instance.used;
520 set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
521 }
522 }
523
524 static void
set_ms_input_locs(struct radv_shader_args * args,uint8_t * user_sgpr_idx)525 set_ms_input_locs(struct radv_shader_args *args, uint8_t *user_sgpr_idx)
526 {
527 unsigned vs_num =
528 args->ac.base_vertex.used + 3 * args->ac.num_work_groups.used + args->ac.draw_id.used;
529 set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
530
531 if (args->ac.task_ring_entry.used)
532 set_loc_shader(args, AC_UD_TASK_RING_ENTRY, user_sgpr_idx, 1);
533 }
534
535 void
radv_declare_shader_args(enum amd_gfx_level gfx_level,const struct radv_pipeline_key * key,const struct radv_shader_info * info,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,struct radv_shader_args * args)536 radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipeline_key *key,
537 const struct radv_shader_info *info, gl_shader_stage stage,
538 bool has_previous_stage, gl_shader_stage previous_stage,
539 struct radv_shader_args *args)
540 {
541 struct user_sgpr_info user_sgpr_info;
542 bool needs_view_index = info->uses_view_index;
543 bool has_ngg_query = stage == MESA_SHADER_GEOMETRY || key->primitives_generated_query;
544
545 if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
546 /* Handle all NGG shaders as GS to simplify the code here. */
547 previous_stage = stage;
548 stage = MESA_SHADER_GEOMETRY;
549 has_previous_stage = true;
550 }
551
552 for (int i = 0; i < MAX_SETS; i++)
553 args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
554 for (int i = 0; i < AC_UD_MAX_UD; i++)
555 args->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
556
557 allocate_user_sgprs(gfx_level, info, args, stage, has_previous_stage, previous_stage,
558 needs_view_index, has_ngg_query, &user_sgpr_info);
559
560 if (args->explicit_scratch_args) {
561 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
562 }
563 if (stage == MESA_SHADER_TASK) {
564 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets);
565 }
566
567 /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
568 * sgprs.
569 */
570
571 switch (stage) {
572 case MESA_SHADER_COMPUTE:
573 case MESA_SHADER_TASK:
574 declare_global_input_sgprs(info, &user_sgpr_info, args);
575
576 if (info->cs.uses_sbt) {
577 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.sbt_descriptors);
578 }
579
580 if (info->cs.uses_grid_size) {
581 if (args->load_grid_size_from_user_sgpr)
582 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
583 else
584 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.num_work_groups);
585 }
586
587 if (info->cs.uses_ray_launch_size) {
588 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr);
589 }
590
591 if (info->vs.needs_draw_id) {
592 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
593 }
594
595 if (info->cs.uses_task_rings) {
596 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry);
597 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr);
598 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride);
599 }
600
601 for (int i = 0; i < 3; i++) {
602 if (info->cs.uses_block_id[i]) {
603 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
604 }
605 }
606
607 if (info->cs.uses_local_invocation_idx) {
608 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
609 }
610
611 if (args->explicit_scratch_args && gfx_level < GFX11) {
612 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
613 }
614
615 if (gfx_level >= GFX11)
616 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
617 else
618 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
619 break;
620 case MESA_SHADER_VERTEX:
621 /* NGG is handled by the GS case */
622 assert(!info->is_ngg);
623
624 declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
625
626 declare_global_input_sgprs(info, &user_sgpr_info, args);
627
628 if (needs_view_index) {
629 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
630 }
631
632 if (info->force_vrs_per_vertex) {
633 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates);
634 }
635
636 if (info->vs.as_es) {
637 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
638 } else if (info->vs.as_ls) {
639 /* no extra parameters */
640 } else {
641 declare_streamout_sgprs(info, args, stage);
642 }
643
644 if (args->explicit_scratch_args) {
645 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
646 }
647
648 declare_vs_input_vgprs(gfx_level, info, args);
649 break;
650 case MESA_SHADER_TESS_CTRL:
651 if (has_previous_stage) {
652 // First 6 system regs
653 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
654 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
655 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
656
657 if (gfx_level >= GFX11) {
658 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id);
659 } else {
660 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
661 }
662
663 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
664 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
665
666 declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
667
668 declare_global_input_sgprs(info, &user_sgpr_info, args);
669
670 if (needs_view_index) {
671 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
672 }
673
674 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
675 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
676
677 declare_vs_input_vgprs(gfx_level, info, args);
678 } else {
679 declare_global_input_sgprs(info, &user_sgpr_info, args);
680
681 if (needs_view_index) {
682 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
683 }
684
685 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
686 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
687 if (args->explicit_scratch_args) {
688 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
689 }
690 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
691 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
692 }
693 break;
694 case MESA_SHADER_TESS_EVAL:
695 /* NGG is handled by the GS case */
696 assert(!info->is_ngg);
697
698 declare_global_input_sgprs(info, &user_sgpr_info, args);
699
700 if (needs_view_index)
701 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
702
703 if (info->tes.as_es) {
704 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
705 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
706 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
707 } else {
708 declare_streamout_sgprs(info, args, stage);
709 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
710 }
711 if (args->explicit_scratch_args) {
712 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
713 }
714 declare_tes_input_vgprs(args);
715 break;
716 case MESA_SHADER_GEOMETRY:
717 if (has_previous_stage) {
718 // First 6 system regs
719 if (info->is_ngg) {
720 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
721 } else {
722 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
723 }
724
725 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
726 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
727
728 if (gfx_level < GFX11) {
729 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
730 }
731
732 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
733 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
734
735 if (previous_stage == MESA_SHADER_VERTEX) {
736 declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
737 } else if (previous_stage == MESA_SHADER_MESH) {
738 declare_ms_input_sgprs(info, args);
739 }
740
741 declare_global_input_sgprs(info, &user_sgpr_info, args);
742
743 if (needs_view_index) {
744 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
745 }
746
747 if (info->force_vrs_per_vertex) {
748 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates);
749 }
750
751 if (info->is_ngg) {
752 declare_ngg_sgprs(info, args, has_ngg_query);
753 }
754
755 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
756 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
757 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
758 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
759 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
760
761 if (previous_stage == MESA_SHADER_VERTEX) {
762 declare_vs_input_vgprs(gfx_level, info, args);
763 } else if (previous_stage == MESA_SHADER_TESS_EVAL) {
764 declare_tes_input_vgprs(args);
765 } else if (previous_stage == MESA_SHADER_MESH) {
766 declare_ms_input_vgprs(args);
767 }
768 } else {
769 declare_global_input_sgprs(info, &user_sgpr_info, args);
770
771 if (needs_view_index) {
772 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
773 }
774
775 if (info->force_vrs_per_vertex) {
776 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates);
777 }
778
779 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
780 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
781 if (args->explicit_scratch_args) {
782 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
783 }
784 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
785 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
786 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
787 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
788 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
789 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
790 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
791 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
792 }
793 break;
794 case MESA_SHADER_FRAGMENT:
795 declare_global_input_sgprs(info, &user_sgpr_info, args);
796
797 if (info->ps.has_epilog) {
798 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ps_epilog_pc);
799 }
800
801 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
802 if (args->explicit_scratch_args && gfx_level < GFX11) {
803 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
804 }
805
806 declare_ps_input_vgprs(info, args);
807 break;
808 default:
809 unreachable("Shader stage not implemented");
810 }
811
812 uint8_t user_sgpr_idx = 0;
813
814 set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
815 if (stage == MESA_SHADER_TASK) {
816 set_loc_shader_ptr(args, AC_UD_CS_TASK_RING_OFFSETS, &user_sgpr_idx);
817 }
818
819 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
820 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
821 if (has_previous_stage)
822 user_sgpr_idx = 0;
823
824 if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))
825 set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
826 else if (has_previous_stage && previous_stage == MESA_SHADER_MESH)
827 set_ms_input_locs(args, &user_sgpr_idx);
828
829 set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
830
831 switch (stage) {
832 case MESA_SHADER_COMPUTE:
833 case MESA_SHADER_TASK:
834 if (args->ac.sbt_descriptors.used) {
835 set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx);
836 }
837 if (args->ac.num_work_groups.used) {
838 set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx,
839 args->load_grid_size_from_user_sgpr ? 3 : 2);
840 }
841 if (args->ac.ray_launch_size_addr.used) {
842 set_loc_shader_ptr(args, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, &user_sgpr_idx);
843 }
844 if (args->ac.draw_id.used) {
845 set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1);
846 }
847 if (args->ac.task_ring_entry.used) {
848 set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1);
849 }
850 if (args->task_ib_addr.used) {
851 assert(args->task_ib_stride.used);
852 set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3);
853 }
854 break;
855 case MESA_SHADER_VERTEX:
856 if (args->ac.view_index.used)
857 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
858 if (args->ac.force_vrs_rates.used)
859 set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1);
860 break;
861 case MESA_SHADER_TESS_CTRL:
862 if (args->ac.view_index.used)
863 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
864 break;
865 case MESA_SHADER_TESS_EVAL:
866 if (args->ac.view_index.used)
867 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
868 break;
869 case MESA_SHADER_GEOMETRY:
870 if (args->ac.view_index.used)
871 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
872
873 if (args->ac.force_vrs_rates.used)
874 set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1);
875
876 if (args->ngg_query_state.used) {
877 set_loc_shader(args, AC_UD_NGG_QUERY_STATE, &user_sgpr_idx, 1);
878 }
879
880 if (args->ngg_culling_settings.used) {
881 set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1);
882 }
883
884 if (args->ngg_viewport_scale[0].used) {
885 assert(args->ngg_viewport_scale[1].used &&
886 args->ngg_viewport_translate[0].used &&
887 args->ngg_viewport_translate[1].used);
888 set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4);
889 }
890 break;
891 case MESA_SHADER_FRAGMENT:
892 if (args->ps_epilog_pc.used)
893 set_loc_shader(args, AC_UD_PS_EPILOG_PC, &user_sgpr_idx, 1);
894 break;
895 default:
896 unreachable("Shader stage not implemented");
897 }
898
899 args->num_user_sgprs = user_sgpr_idx;
900 }
901
902 void
radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level,const struct radv_ps_epilog_key * key,struct radv_shader_args * args)903 radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_epilog_key *key,
904 struct radv_shader_args *args)
905 {
906 unsigned num_inputs = 0;
907
908 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
909 if (gfx_level < GFX11)
910 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
911
912 /* Declare VGPR arguments for color exports. */
913 for (unsigned i = 0; i < MAX_RTS; i++) {
914 unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf;
915
916 if (col_format == V_028714_SPI_SHADER_ZERO)
917 continue;
918
919 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->ps_epilog_inputs[num_inputs]);
920 num_inputs++;
921 }
922 }
923