1 /*
2 * Copyright © 2019 Valve Corporation.
3 * Copyright © 2016 Red Hat.
4 * Copyright © 2016 Bas Nieuwenhuizen
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include "radv_shader_args.h"
30 #include "radv_private.h"
31 #include "radv_shader.h"
32
33 static void
set_loc(struct radv_userdata_info * ud_info,uint8_t * sgpr_idx,uint8_t num_sgprs)34 set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
35 {
36 ud_info->sgpr_idx = *sgpr_idx;
37 ud_info->num_sgprs = num_sgprs;
38 *sgpr_idx += num_sgprs;
39 }
40
41 static void
set_loc_shader(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx,uint8_t num_sgprs)42 set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
43 {
44 struct radv_userdata_info *ud_info = &args->shader_info->user_sgprs_locs.shader_data[idx];
45 assert(ud_info);
46
47 set_loc(ud_info, sgpr_idx, num_sgprs);
48 }
49
50 static void
set_loc_shader_ptr(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)51 set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
52 {
53 bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
54
55 set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
56 }
57
58 static void
set_loc_desc(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)59 set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
60 {
61 struct radv_userdata_locations *locs = &args->shader_info->user_sgprs_locs;
62 struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
63 assert(ud_info);
64
65 set_loc(ud_info, sgpr_idx, 1);
66
67 locs->descriptor_sets_enabled |= 1u << idx;
68 }
69
70 struct user_sgpr_info {
71 bool indirect_all_descriptor_sets;
72 uint8_t remaining_sgprs;
73 unsigned num_inline_push_consts;
74 bool inlined_all_push_consts;
75 };
76
77 static bool
needs_view_index_sgpr(struct radv_shader_args * args,gl_shader_stage stage)78 needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage)
79 {
80 switch (stage) {
81 case MESA_SHADER_VERTEX:
82 if (args->shader_info->uses_view_index ||
83 (!args->shader_info->vs.as_es && !args->shader_info->vs.as_ls &&
84 args->options->key.has_multiview_view_index))
85 return true;
86 break;
87 case MESA_SHADER_TESS_EVAL:
88 if (args->shader_info->uses_view_index ||
89 (!args->shader_info->tes.as_es && args->options->key.has_multiview_view_index))
90 return true;
91 break;
92 case MESA_SHADER_TESS_CTRL:
93 if (args->shader_info->uses_view_index)
94 return true;
95 break;
96 case MESA_SHADER_GEOMETRY:
97 if (args->shader_info->uses_view_index ||
98 (args->shader_info->is_ngg && args->options->key.has_multiview_view_index))
99 return true;
100 break;
101 default:
102 break;
103 }
104 return false;
105 }
106
107 static uint8_t
count_vs_user_sgprs(struct radv_shader_args * args)108 count_vs_user_sgprs(struct radv_shader_args *args)
109 {
110 uint8_t count = 1; /* vertex offset */
111
112 if (args->shader_info->vs.vb_desc_usage_mask)
113 count++;
114 if (args->shader_info->vs.needs_draw_id)
115 count++;
116 if (args->shader_info->vs.needs_base_instance)
117 count++;
118
119 return count;
120 }
121
122 static unsigned
count_ngg_sgprs(struct radv_shader_args * args,bool has_api_gs)123 count_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
124 {
125 unsigned count = 0;
126
127 if (has_api_gs)
128 count += 1; /* ngg_gs_state */
129 if (args->shader_info->has_ngg_culling)
130 count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
131
132 return count;
133 }
134
135 static void
allocate_inline_push_consts(struct radv_shader_args * args,struct user_sgpr_info * user_sgpr_info)136 allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
137 {
138 uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
139
140 /* Only supported if shaders use push constants. */
141 if (args->shader_info->min_push_constant_used == UINT8_MAX)
142 return;
143
144 /* Only supported if shaders don't have indirect push constants. */
145 if (args->shader_info->has_indirect_push_constants)
146 return;
147
148 /* Only supported for 32-bit push constants. */
149 if (!args->shader_info->has_only_32bit_push_constants)
150 return;
151
152 uint8_t num_push_consts =
153 (args->shader_info->max_push_constant_used - args->shader_info->min_push_constant_used) / 4;
154
155 /* Check if the number of user SGPRs is large enough. */
156 if (num_push_consts < remaining_sgprs) {
157 user_sgpr_info->num_inline_push_consts = num_push_consts;
158 } else {
159 user_sgpr_info->num_inline_push_consts = remaining_sgprs;
160 }
161
162 /* Clamp to the maximum number of allowed inlined push constants. */
163 if (user_sgpr_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
164 user_sgpr_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
165
166 if (user_sgpr_info->num_inline_push_consts == num_push_consts &&
167 !args->shader_info->loads_dynamic_offsets) {
168 /* Disable the default push constants path if all constants are
169 * inlined and if shaders don't use dynamic descriptors.
170 */
171 user_sgpr_info->inlined_all_push_consts = true;
172 }
173 }
174
175 static void
allocate_user_sgprs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,bool needs_view_index,bool has_api_gs,struct user_sgpr_info * user_sgpr_info)176 allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
177 gl_shader_stage previous_stage, bool needs_view_index, bool has_api_gs,
178 struct user_sgpr_info *user_sgpr_info)
179 {
180 uint8_t user_sgpr_count = 0;
181
182 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
183
184 /* 2 user sgprs will always be allocated for scratch/rings */
185 user_sgpr_count += 2;
186
187 /* prolog inputs */
188 if (args->shader_info->vs.has_prolog)
189 user_sgpr_count += 2;
190
191 switch (stage) {
192 case MESA_SHADER_COMPUTE:
193 if (args->shader_info->cs.uses_sbt)
194 user_sgpr_count += 1;
195 if (args->shader_info->cs.uses_grid_size)
196 user_sgpr_count += 3;
197 if (args->shader_info->cs.uses_ray_launch_size)
198 user_sgpr_count += 3;
199 break;
200 case MESA_SHADER_FRAGMENT:
201 break;
202 case MESA_SHADER_VERTEX:
203 if (!args->is_gs_copy_shader)
204 user_sgpr_count += count_vs_user_sgprs(args);
205 break;
206 case MESA_SHADER_TESS_CTRL:
207 if (has_previous_stage) {
208 if (previous_stage == MESA_SHADER_VERTEX)
209 user_sgpr_count += count_vs_user_sgprs(args);
210 }
211 break;
212 case MESA_SHADER_TESS_EVAL:
213 break;
214 case MESA_SHADER_GEOMETRY:
215 if (has_previous_stage) {
216 if (args->shader_info->is_ngg)
217 user_sgpr_count += count_ngg_sgprs(args, has_api_gs);
218
219 if (previous_stage == MESA_SHADER_VERTEX) {
220 user_sgpr_count += count_vs_user_sgprs(args);
221 }
222 }
223 break;
224 default:
225 break;
226 }
227
228 if (needs_view_index)
229 user_sgpr_count++;
230
231 if (args->shader_info->loads_push_constants)
232 user_sgpr_count++;
233
234 if (args->shader_info->so.num_outputs)
235 user_sgpr_count++;
236
237 uint32_t available_sgprs =
238 args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
239 uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
240 uint32_t num_desc_set = util_bitcount(args->shader_info->desc_set_used_mask);
241
242 if (remaining_sgprs < num_desc_set) {
243 user_sgpr_info->indirect_all_descriptor_sets = true;
244 user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
245 } else {
246 user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
247 }
248
249 allocate_inline_push_consts(args, user_sgpr_info);
250 }
251
252 static void
declare_global_input_sgprs(struct radv_shader_args * args,const struct user_sgpr_info * user_sgpr_info)253 declare_global_input_sgprs(struct radv_shader_args *args,
254 const struct user_sgpr_info *user_sgpr_info)
255 {
256 /* 1 for each descriptor set */
257 if (!user_sgpr_info->indirect_all_descriptor_sets) {
258 uint32_t mask = args->shader_info->desc_set_used_mask;
259
260 while (mask) {
261 int i = u_bit_scan(&mask);
262
263 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]);
264 }
265 } else {
266 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]);
267 }
268
269 if (args->shader_info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) {
270 /* 1 for push constants and dynamic descriptors */
271 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
272 }
273
274 for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) {
275 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
276 }
277 args->ac.base_inline_push_consts = args->shader_info->min_push_constant_used / 4;
278
279 if (args->shader_info->so.num_outputs) {
280 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
281 }
282 }
283
284 static void
declare_vs_specific_input_sgprs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage)285 declare_vs_specific_input_sgprs(struct radv_shader_args *args, gl_shader_stage stage,
286 bool has_previous_stage, gl_shader_stage previous_stage)
287 {
288 if (args->shader_info->vs.has_prolog)
289 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs);
290
291 if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
292 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
293 if (args->shader_info->vs.vb_desc_usage_mask) {
294 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
295 }
296 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
297 if (args->shader_info->vs.needs_draw_id) {
298 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
299 }
300 if (args->shader_info->vs.needs_base_instance) {
301 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
302 }
303 }
304 }
305
306 static void
declare_vs_input_vgprs(struct radv_shader_args * args)307 declare_vs_input_vgprs(struct radv_shader_args *args)
308 {
309 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
310 if (!args->is_gs_copy_shader) {
311 if (args->shader_info->vs.as_ls) {
312 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
313 if (args->options->chip_class >= GFX10) {
314 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
315 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
316 } else {
317 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
318 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
319 }
320 } else {
321 if (args->options->chip_class >= GFX10) {
322 if (args->shader_info->is_ngg) {
323 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
324 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
325 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
326 } else {
327 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
328 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
329 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
330 }
331 } else {
332 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
333 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
334 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
335 }
336 }
337 }
338
339 if (args->shader_info->vs.dynamic_inputs) {
340 assert(args->shader_info->vs.use_per_attribute_vb_descs);
341 unsigned num_attributes = util_last_bit(args->shader_info->vs.vb_desc_usage_mask);
342 for (unsigned i = 0; i < num_attributes; i++)
343 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
344 /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one
345 * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8.
346 */
347 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
348 }
349 }
350
351 static void
declare_streamout_sgprs(struct radv_shader_args * args,gl_shader_stage stage)352 declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
353 {
354 int i;
355
356 /* Streamout SGPRs. */
357 if (args->shader_info->so.num_outputs) {
358 assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
359
360 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
361 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
362 } else if (stage == MESA_SHADER_TESS_EVAL) {
363 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
364 }
365
366 /* A streamout buffer offset is loaded if the stride is non-zero. */
367 for (i = 0; i < 4; i++) {
368 if (!args->shader_info->so.strides[i])
369 continue;
370
371 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
372 }
373 }
374
375 static void
declare_tes_input_vgprs(struct radv_shader_args * args)376 declare_tes_input_vgprs(struct radv_shader_args *args)
377 {
378 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
379 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
380 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
381 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
382 }
383
384 static void
declare_ps_input_vgprs(struct radv_shader_args * args)385 declare_ps_input_vgprs(struct radv_shader_args *args)
386 {
387 unsigned spi_ps_input = args->shader_info->ps.spi_ps_input;
388
389 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
390 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
391 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
392 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
393 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
394 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
395 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
396 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
397 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
398 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
399 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
400 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
401 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
402 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
403 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
404 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
405
406 if (args->options->remap_spi_ps_input) {
407 /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
408 * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
409 * VGPR arguments here.
410 */
411 unsigned arg_count = 0;
412 for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) {
413 if (args->ac.args[i].file != AC_ARG_VGPR) {
414 arg_count++;
415 continue;
416 }
417
418 if (!(spi_ps_input & (1 << vgpr_arg))) {
419 args->ac.args[i].skip = true;
420 } else {
421 args->ac.args[i].offset = vgpr_reg;
422 vgpr_reg += args->ac.args[i].size;
423 arg_count++;
424 }
425 vgpr_arg++;
426 }
427 }
428 }
429
430 static void
declare_ngg_sgprs(struct radv_shader_args * args,bool has_api_gs)431 declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
432 {
433 if (has_api_gs) {
434 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
435 }
436
437 if (args->shader_info->has_ngg_culling) {
438 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings);
439 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]);
440 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]);
441 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]);
442 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]);
443 }
444 }
445
446 static void
set_global_input_locs(struct radv_shader_args * args,const struct user_sgpr_info * user_sgpr_info,uint8_t * user_sgpr_idx)447 set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
448 uint8_t *user_sgpr_idx)
449 {
450 unsigned num_inline_push_consts = 0;
451
452 if (!user_sgpr_info->indirect_all_descriptor_sets) {
453 for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) {
454 if (args->descriptor_sets[i].used)
455 set_loc_desc(args, i, user_sgpr_idx);
456 }
457 } else {
458 set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
459 }
460
461 if (args->ac.push_constants.used) {
462 set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
463 }
464
465 for (unsigned i = 0; i < ARRAY_SIZE(args->ac.inline_push_consts); i++) {
466 if (args->ac.inline_push_consts[i].used)
467 num_inline_push_consts++;
468 }
469
470 if (num_inline_push_consts) {
471 set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts);
472 }
473
474 if (args->streamout_buffers.used) {
475 set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
476 }
477 }
478
479 static void
set_vs_specific_input_locs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,uint8_t * user_sgpr_idx)480 set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
481 bool has_previous_stage, gl_shader_stage previous_stage,
482 uint8_t *user_sgpr_idx)
483 {
484 if (args->prolog_inputs.used)
485 set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2);
486
487 if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
488 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
489 if (args->ac.vertex_buffers.used) {
490 set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
491 }
492
493 unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used +
494 args->ac.start_instance.used;
495 set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
496 }
497 }
498
499 /* Returns whether the stage is a stage that can be directly before the GS */
500 static bool
is_pre_gs_stage(gl_shader_stage stage)501 is_pre_gs_stage(gl_shader_stage stage)
502 {
503 return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
504 }
505
506 void
radv_declare_shader_args(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage)507 radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
508 bool has_previous_stage, gl_shader_stage previous_stage)
509 {
510 struct user_sgpr_info user_sgpr_info;
511 bool needs_view_index = needs_view_index_sgpr(args, stage);
512 bool has_api_gs = stage == MESA_SHADER_GEOMETRY;
513
514 if (args->options->chip_class >= GFX10) {
515 if (is_pre_gs_stage(stage) && args->shader_info->is_ngg) {
516 /* On GFX10, VS is merged into GS for NGG. */
517 previous_stage = stage;
518 stage = MESA_SHADER_GEOMETRY;
519 has_previous_stage = true;
520 }
521 }
522
523 for (int i = 0; i < MAX_SETS; i++)
524 args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
525 for (int i = 0; i < AC_UD_MAX_UD; i++)
526 args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
527
528 allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index,
529 has_api_gs, &user_sgpr_info);
530
531 if (args->options->explicit_scratch_args) {
532 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
533 }
534
535 /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
536 * sgprs.
537 */
538
539 switch (stage) {
540 case MESA_SHADER_COMPUTE:
541 declare_global_input_sgprs(args, &user_sgpr_info);
542
543 if (args->shader_info->cs.uses_sbt) {
544 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.sbt_descriptors);
545 }
546
547 if (args->shader_info->cs.uses_grid_size) {
548 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
549 }
550
551 if (args->shader_info->cs.uses_ray_launch_size) {
552 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size);
553 }
554
555 for (int i = 0; i < 3; i++) {
556 if (args->shader_info->cs.uses_block_id[i]) {
557 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
558 }
559 }
560
561 if (args->shader_info->cs.uses_local_invocation_idx) {
562 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
563 }
564
565 if (args->options->explicit_scratch_args) {
566 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
567 }
568
569 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
570 break;
571 case MESA_SHADER_VERTEX:
572 /* NGG is handled by the GS case */
573 assert(!args->shader_info->is_ngg);
574
575 declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
576
577 declare_global_input_sgprs(args, &user_sgpr_info);
578
579 if (needs_view_index) {
580 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
581 }
582
583 if (args->shader_info->vs.as_es) {
584 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
585 } else if (args->shader_info->vs.as_ls) {
586 /* no extra parameters */
587 } else {
588 declare_streamout_sgprs(args, stage);
589 }
590
591 if (args->options->explicit_scratch_args) {
592 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
593 }
594
595 declare_vs_input_vgprs(args);
596 break;
597 case MESA_SHADER_TESS_CTRL:
598 if (has_previous_stage) {
599 // First 6 system regs
600 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
601 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
602 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
603
604 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
605 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
606 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
607
608 declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
609
610 declare_global_input_sgprs(args, &user_sgpr_info);
611
612 if (needs_view_index) {
613 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
614 }
615
616 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
617 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
618
619 declare_vs_input_vgprs(args);
620 } else {
621 declare_global_input_sgprs(args, &user_sgpr_info);
622
623 if (needs_view_index) {
624 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
625 }
626
627 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
628 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
629 if (args->options->explicit_scratch_args) {
630 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
631 }
632 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
633 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
634 }
635 break;
636 case MESA_SHADER_TESS_EVAL:
637 /* NGG is handled by the GS case */
638 assert(!args->shader_info->is_ngg);
639
640 declare_global_input_sgprs(args, &user_sgpr_info);
641
642 if (needs_view_index)
643 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
644
645 if (args->shader_info->tes.as_es) {
646 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
647 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
648 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
649 } else {
650 declare_streamout_sgprs(args, stage);
651 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
652 }
653 if (args->options->explicit_scratch_args) {
654 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
655 }
656 declare_tes_input_vgprs(args);
657 break;
658 case MESA_SHADER_GEOMETRY:
659 if (has_previous_stage) {
660 // First 6 system regs
661 if (args->shader_info->is_ngg) {
662 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
663 } else {
664 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
665 }
666
667 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
668 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
669
670 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
671 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
672 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
673
674 if (previous_stage != MESA_SHADER_TESS_EVAL) {
675 declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
676 }
677
678 declare_global_input_sgprs(args, &user_sgpr_info);
679
680 if (needs_view_index) {
681 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
682 }
683
684 if (args->shader_info->is_ngg) {
685 declare_ngg_sgprs(args, has_api_gs);
686 }
687
688 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
689 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
690 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
691 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
692 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
693
694 if (previous_stage == MESA_SHADER_VERTEX) {
695 declare_vs_input_vgprs(args);
696 } else {
697 declare_tes_input_vgprs(args);
698 }
699 } else {
700 declare_global_input_sgprs(args, &user_sgpr_info);
701
702 if (needs_view_index) {
703 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
704 }
705
706 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
707 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
708 if (args->options->explicit_scratch_args) {
709 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
710 }
711 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
712 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
713 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
714 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
715 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
716 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
717 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
718 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
719 }
720 break;
721 case MESA_SHADER_FRAGMENT:
722 declare_global_input_sgprs(args, &user_sgpr_info);
723
724 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
725 if (args->options->explicit_scratch_args) {
726 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
727 }
728
729 declare_ps_input_vgprs(args);
730 break;
731 default:
732 unreachable("Shader stage not implemented");
733 }
734
735 args->shader_info->num_input_vgprs = 0;
736 args->shader_info->num_input_sgprs = 2;
737 args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
738 args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
739
740 uint8_t user_sgpr_idx = 0;
741
742 set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
743
744 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
745 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
746 if (has_previous_stage)
747 user_sgpr_idx = 0;
748
749 if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))
750 set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
751
752 set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
753
754 switch (stage) {
755 case MESA_SHADER_COMPUTE:
756 if (args->ac.sbt_descriptors.used) {
757 set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx);
758 }
759 if (args->ac.num_work_groups.used) {
760 set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3);
761 }
762 if (args->ac.ray_launch_size.used) {
763 set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3);
764 }
765 break;
766 case MESA_SHADER_VERTEX:
767 if (args->ac.view_index.used)
768 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
769 break;
770 case MESA_SHADER_TESS_CTRL:
771 if (args->ac.view_index.used)
772 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
773 break;
774 case MESA_SHADER_TESS_EVAL:
775 if (args->ac.view_index.used)
776 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
777 break;
778 case MESA_SHADER_GEOMETRY:
779 if (args->ac.view_index.used)
780 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
781
782 if (args->ngg_gs_state.used) {
783 set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
784 }
785
786 if (args->ngg_culling_settings.used) {
787 set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1);
788 }
789
790 if (args->ngg_viewport_scale[0].used) {
791 assert(args->ngg_viewport_scale[1].used &&
792 args->ngg_viewport_translate[0].used &&
793 args->ngg_viewport_translate[1].used);
794 set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4);
795 }
796 break;
797 case MESA_SHADER_FRAGMENT:
798 break;
799 default:
800 unreachable("Shader stage not implemented");
801 }
802
803 args->shader_info->num_user_sgprs = user_sgpr_idx;
804 }
805