1 /*
2 * Copyright © 2017 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include "nir/nir.h"
24 #include "nir/nir_xfb_info.h"
25 #include "radv_private.h"
26 #include "radv_shader.h"
27
28 #include "ac_nir.h"
29
30 static void
mark_sampler_desc(const nir_variable * var,struct radv_shader_info * info)31 mark_sampler_desc(const nir_variable *var, struct radv_shader_info *info)
32 {
33 info->desc_set_used_mask |= (1u << var->data.descriptor_set);
34 }
35
36 static void
gather_intrinsic_load_input_info(const nir_shader * nir,const nir_intrinsic_instr * instr,struct radv_shader_info * info)37 gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
38 struct radv_shader_info *info)
39 {
40 switch (nir->info.stage) {
41 case MESA_SHADER_VERTEX: {
42 unsigned idx = nir_intrinsic_io_semantics(instr).location;
43 unsigned component = nir_intrinsic_component(instr);
44 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
45
46 info->vs.input_usage_mask[idx] |= mask << component;
47 break;
48 }
49 default:
50 break;
51 }
52 }
53
54 static void
gather_intrinsic_store_output_info(const nir_shader * nir,const nir_intrinsic_instr * instr,struct radv_shader_info * info)55 gather_intrinsic_store_output_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
56 struct radv_shader_info *info)
57 {
58 unsigned idx = nir_intrinsic_base(instr);
59 unsigned num_slots = nir_intrinsic_io_semantics(instr).num_slots;
60 unsigned component = nir_intrinsic_component(instr);
61 unsigned write_mask = nir_intrinsic_write_mask(instr);
62 uint8_t *output_usage_mask = NULL;
63
64 if (instr->src[0].ssa->bit_size == 64)
65 write_mask = util_widen_mask(write_mask, 2);
66
67 switch (nir->info.stage) {
68 case MESA_SHADER_VERTEX:
69 output_usage_mask = info->vs.output_usage_mask;
70 break;
71 case MESA_SHADER_TESS_EVAL:
72 output_usage_mask = info->tes.output_usage_mask;
73 break;
74 case MESA_SHADER_GEOMETRY:
75 output_usage_mask = info->gs.output_usage_mask;
76 break;
77 default:
78 break;
79 }
80
81 if (output_usage_mask) {
82 for (unsigned i = 0; i < num_slots; i++) {
83 output_usage_mask[idx + i] |= ((write_mask >> (i * 4)) & 0xf) << component;
84 }
85 }
86 }
87
88 static void
gather_push_constant_info(const nir_shader * nir,const nir_intrinsic_instr * instr,struct radv_shader_info * info)89 gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
90 struct radv_shader_info *info)
91 {
92 info->loads_push_constants = true;
93
94 if (nir_src_is_const(instr->src[0]) && instr->dest.ssa.bit_size >= 32) {
95 uint32_t start = (nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0])) / 4u;
96 uint32_t size = instr->num_components * (instr->dest.ssa.bit_size / 32u);
97
98 if (start + size <= (MAX_PUSH_CONSTANTS_SIZE / 4u)) {
99 info->inline_push_constant_mask |= u_bit_consecutive64(start, size);
100 return;
101 }
102 }
103
104 info->can_inline_all_push_constants = false;
105 }
106
107 static void
gather_intrinsic_info(const nir_shader * nir,const nir_intrinsic_instr * instr,struct radv_shader_info * info)108 gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
109 struct radv_shader_info *info)
110 {
111 switch (instr->intrinsic) {
112 case nir_intrinsic_load_barycentric_sample:
113 case nir_intrinsic_load_barycentric_pixel:
114 case nir_intrinsic_load_barycentric_centroid:
115 case nir_intrinsic_load_barycentric_at_sample:
116 case nir_intrinsic_load_barycentric_at_offset: {
117 enum glsl_interp_mode mode = nir_intrinsic_interp_mode(instr);
118 switch (mode) {
119 case INTERP_MODE_SMOOTH:
120 case INTERP_MODE_NONE:
121 if (instr->intrinsic == nir_intrinsic_load_barycentric_pixel ||
122 instr->intrinsic == nir_intrinsic_load_barycentric_at_sample ||
123 instr->intrinsic == nir_intrinsic_load_barycentric_at_offset)
124 info->ps.reads_persp_center = true;
125 else if (instr->intrinsic == nir_intrinsic_load_barycentric_centroid)
126 info->ps.reads_persp_centroid = true;
127 else if (instr->intrinsic == nir_intrinsic_load_barycentric_sample)
128 info->ps.reads_persp_sample = true;
129 break;
130 case INTERP_MODE_NOPERSPECTIVE:
131 if (instr->intrinsic == nir_intrinsic_load_barycentric_pixel ||
132 instr->intrinsic == nir_intrinsic_load_barycentric_at_sample ||
133 instr->intrinsic == nir_intrinsic_load_barycentric_at_offset)
134 info->ps.reads_linear_center = true;
135 else if (instr->intrinsic == nir_intrinsic_load_barycentric_centroid)
136 info->ps.reads_linear_centroid = true;
137 else if (instr->intrinsic == nir_intrinsic_load_barycentric_sample)
138 info->ps.reads_linear_sample = true;
139 break;
140 default:
141 break;
142 }
143 if (instr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
144 info->ps.needs_sample_positions = true;
145 break;
146 }
147 case nir_intrinsic_load_local_invocation_id:
148 case nir_intrinsic_load_workgroup_id: {
149 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
150 while (mask) {
151 unsigned i = u_bit_scan(&mask);
152
153 if (instr->intrinsic == nir_intrinsic_load_workgroup_id)
154 info->cs.uses_block_id[i] = true;
155 else
156 info->cs.uses_thread_id[i] = true;
157 }
158 break;
159 }
160 case nir_intrinsic_load_frag_coord:
161 info->ps.reads_frag_coord_mask |= nir_ssa_def_components_read(&instr->dest.ssa);
162 break;
163 case nir_intrinsic_load_sample_pos:
164 info->ps.reads_sample_pos_mask |= nir_ssa_def_components_read(&instr->dest.ssa);
165 break;
166 case nir_intrinsic_load_push_constant:
167 gather_push_constant_info(nir, instr, info);
168 break;
169 case nir_intrinsic_vulkan_resource_index:
170 info->desc_set_used_mask |= (1u << nir_intrinsic_desc_set(instr));
171 break;
172 case nir_intrinsic_image_deref_load:
173 case nir_intrinsic_image_deref_sparse_load:
174 case nir_intrinsic_image_deref_store:
175 case nir_intrinsic_image_deref_atomic_add:
176 case nir_intrinsic_image_deref_atomic_imin:
177 case nir_intrinsic_image_deref_atomic_umin:
178 case nir_intrinsic_image_deref_atomic_imax:
179 case nir_intrinsic_image_deref_atomic_umax:
180 case nir_intrinsic_image_deref_atomic_and:
181 case nir_intrinsic_image_deref_atomic_or:
182 case nir_intrinsic_image_deref_atomic_xor:
183 case nir_intrinsic_image_deref_atomic_exchange:
184 case nir_intrinsic_image_deref_atomic_comp_swap:
185 case nir_intrinsic_image_deref_atomic_fmin:
186 case nir_intrinsic_image_deref_atomic_fmax:
187 case nir_intrinsic_image_deref_size:
188 case nir_intrinsic_image_deref_samples: {
189 nir_variable *var =
190 nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
191 mark_sampler_desc(var, info);
192 break;
193 }
194 case nir_intrinsic_load_input:
195 gather_intrinsic_load_input_info(nir, instr, info);
196 break;
197 case nir_intrinsic_store_output:
198 gather_intrinsic_store_output_info(nir, instr, info);
199 break;
200 case nir_intrinsic_load_sbt_base_amd:
201 info->cs.uses_sbt = true;
202 break;
203 case nir_intrinsic_load_force_vrs_rates_amd:
204 info->force_vrs_per_vertex = true;
205 break;
206 default:
207 break;
208 }
209 }
210
211 static void
gather_tex_info(const nir_shader * nir,const nir_tex_instr * instr,struct radv_shader_info * info)212 gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr, struct radv_shader_info *info)
213 {
214 for (unsigned i = 0; i < instr->num_srcs; i++) {
215 switch (instr->src[i].src_type) {
216 case nir_tex_src_texture_deref:
217 mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
218 break;
219 case nir_tex_src_sampler_deref:
220 mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
221 break;
222 default:
223 break;
224 }
225 }
226 }
227
228 static void
gather_info_block(const nir_shader * nir,const nir_block * block,struct radv_shader_info * info)229 gather_info_block(const nir_shader *nir, const nir_block *block, struct radv_shader_info *info)
230 {
231 nir_foreach_instr (instr, block) {
232 switch (instr->type) {
233 case nir_instr_type_intrinsic:
234 gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info);
235 break;
236 case nir_instr_type_tex:
237 gather_tex_info(nir, nir_instr_as_tex(instr), info);
238 break;
239 default:
240 break;
241 }
242 }
243 }
244
245 static void
gather_info_input_decl_vs(const nir_shader * nir,const nir_variable * var,const struct radv_pipeline_key * key,struct radv_shader_info * info)246 gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var,
247 const struct radv_pipeline_key *key, struct radv_shader_info *info)
248 {
249 unsigned attrib_count = glsl_count_attribute_slots(var->type, true);
250
251 for (unsigned i = 0; i < attrib_count; ++i) {
252 unsigned attrib_index = var->data.location + i - VERT_ATTRIB_GENERIC0;
253
254 if (key->vs.instance_rate_inputs & (1u << attrib_index)) {
255 info->vs.needs_instance_id = true;
256 info->vs.needs_base_instance = true;
257 }
258
259 if (info->vs.use_per_attribute_vb_descs)
260 info->vs.vb_desc_usage_mask |= 1u << attrib_index;
261 else
262 info->vs.vb_desc_usage_mask |= 1u << key->vs.vertex_attribute_bindings[attrib_index];
263 }
264 }
265
266 static void
mark_16bit_ps_input(struct radv_shader_info * info,const struct glsl_type * type,int location)267 mark_16bit_ps_input(struct radv_shader_info *info, const struct glsl_type *type, int location)
268 {
269 if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
270 unsigned attrib_count = glsl_count_attribute_slots(type, false);
271 if (glsl_type_is_16bit(type)) {
272 info->ps.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
273 }
274 } else if (glsl_type_is_array(type)) {
275 unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
276 for (unsigned i = 0; i < glsl_get_length(type); ++i) {
277 mark_16bit_ps_input(info, glsl_get_array_element(type), location + i * stride);
278 }
279 } else {
280 assert(glsl_type_is_struct_or_ifc(type));
281 for (unsigned i = 0; i < glsl_get_length(type); i++) {
282 mark_16bit_ps_input(info, glsl_get_struct_field(type, i), location);
283 location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
284 }
285 }
286 }
287 static void
gather_info_input_decl_ps(const nir_shader * nir,const nir_variable * var,struct radv_shader_info * info)288 gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
289 struct radv_shader_info *info)
290 {
291 unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
292 int idx = var->data.location;
293
294 switch (idx) {
295 case VARYING_SLOT_CLIP_DIST0:
296 case VARYING_SLOT_CLIP_DIST1:
297 info->ps.num_input_clips_culls += attrib_count;
298 break;
299 default:
300 break;
301 }
302
303 if (var->data.compact) {
304 unsigned component_count = var->data.location_frac + glsl_get_length(var->type);
305 attrib_count = (component_count + 3) / 4;
306 } else {
307 mark_16bit_ps_input(info, var->type, var->data.driver_location);
308 }
309
310 uint64_t mask = ((1ull << attrib_count) - 1);
311
312 if (!var->data.per_primitive) {
313 if (var->data.interpolation == INTERP_MODE_FLAT)
314 info->ps.flat_shaded_mask |= mask << var->data.driver_location;
315 else if (var->data.interpolation == INTERP_MODE_EXPLICIT)
316 info->ps.explicit_shaded_mask |= mask << var->data.driver_location;
317 }
318
319 if (var->data.location >= VARYING_SLOT_VAR0) {
320 if (var->data.per_primitive)
321 info->ps.input_per_primitive_mask |= mask << (var->data.location - VARYING_SLOT_VAR0);
322 else
323 info->ps.input_mask |= mask << (var->data.location - VARYING_SLOT_VAR0);
324 }
325 }
326
327 static void
gather_info_input_decl(const nir_shader * nir,const nir_variable * var,const struct radv_pipeline_key * key,struct radv_shader_info * info)328 gather_info_input_decl(const nir_shader *nir, const nir_variable *var,
329 const struct radv_pipeline_key *key, struct radv_shader_info *info)
330 {
331 switch (nir->info.stage) {
332 case MESA_SHADER_VERTEX:
333 gather_info_input_decl_vs(nir, var, key, info);
334 break;
335 case MESA_SHADER_FRAGMENT:
336 gather_info_input_decl_ps(nir, var, info);
337 break;
338 default:
339 break;
340 }
341 }
342
343 static void
gather_info_output_decl_gs(const nir_shader * nir,const nir_variable * var,struct radv_shader_info * info)344 gather_info_output_decl_gs(const nir_shader *nir, const nir_variable *var,
345 struct radv_shader_info *info)
346 {
347 unsigned num_components = glsl_get_component_slots(var->type);
348 unsigned stream = var->data.stream;
349 unsigned idx = var->data.location;
350
351 assert(stream < 4);
352
353 info->gs.num_stream_output_components[stream] += num_components;
354 info->gs.output_streams[idx] = stream;
355 }
356
357 static struct radv_vs_output_info *
get_vs_output_info(const nir_shader * nir,struct radv_shader_info * info)358 get_vs_output_info(const nir_shader *nir, struct radv_shader_info *info)
359 {
360
361 switch (nir->info.stage) {
362 case MESA_SHADER_VERTEX:
363 if (!info->vs.as_ls && !info->vs.as_es)
364 return &info->vs.outinfo;
365 break;
366 case MESA_SHADER_GEOMETRY:
367 return &info->vs.outinfo;
368 break;
369 case MESA_SHADER_TESS_EVAL:
370 if (!info->tes.as_es)
371 return &info->tes.outinfo;
372 break;
373 case MESA_SHADER_MESH:
374 return &info->ms.outinfo;
375 default:
376 break;
377 }
378
379 return NULL;
380 }
381
382 static void
gather_info_output_decl(const nir_shader * nir,const nir_variable * var,struct radv_shader_info * info)383 gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
384 struct radv_shader_info *info)
385 {
386 switch (nir->info.stage) {
387 case MESA_SHADER_VERTEX:
388 break;
389 case MESA_SHADER_GEOMETRY:
390 gather_info_output_decl_gs(nir, var, info);
391 break;
392 case MESA_SHADER_TESS_EVAL:
393 break;
394 default:
395 break;
396 }
397 }
398
399 static void
gather_xfb_info(const nir_shader * nir,struct radv_shader_info * info)400 gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info)
401 {
402 struct radv_streamout_info *so = &info->so;
403
404 if (!nir->xfb_info)
405 return;
406
407 const nir_xfb_info *xfb = nir->xfb_info;
408 assert(xfb->output_count <= MAX_SO_OUTPUTS);
409 so->num_outputs = xfb->output_count;
410
411 for (unsigned i = 0; i < xfb->output_count; i++) {
412 struct radv_stream_output *output = &so->outputs[i];
413
414 output->buffer = xfb->outputs[i].buffer;
415 output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
416 output->offset = xfb->outputs[i].offset;
417 output->location = xfb->outputs[i].location;
418 output->component_mask = xfb->outputs[i].component_mask;
419
420 so->enabled_stream_buffers_mask |= (1 << output->buffer) << (output->stream * 4);
421 }
422
423 for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) {
424 so->strides[i] = xfb->buffers[i].stride / 4;
425 }
426 }
427
428 static void
assign_outinfo_param(struct radv_vs_output_info * outinfo,gl_varying_slot idx,unsigned * total_param_exports)429 assign_outinfo_param(struct radv_vs_output_info *outinfo, gl_varying_slot idx,
430 unsigned *total_param_exports)
431 {
432 if (outinfo->vs_output_param_offset[idx] == AC_EXP_PARAM_UNDEFINED)
433 outinfo->vs_output_param_offset[idx] = (*total_param_exports)++;
434 }
435
436 static void
assign_outinfo_params(struct radv_vs_output_info * outinfo,uint64_t mask,unsigned * total_param_exports)437 assign_outinfo_params(struct radv_vs_output_info *outinfo, uint64_t mask,
438 unsigned *total_param_exports)
439 {
440 u_foreach_bit64(idx, mask) {
441 if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER ||
442 idx == VARYING_SLOT_PRIMITIVE_ID || idx == VARYING_SLOT_VIEWPORT ||
443 ((idx == VARYING_SLOT_CLIP_DIST0 || idx == VARYING_SLOT_CLIP_DIST1) &&
444 outinfo->export_clip_dists))
445 assign_outinfo_param(outinfo, idx, total_param_exports);
446 }
447 }
448
449 void
radv_nir_shader_info_init(struct radv_shader_info * info)450 radv_nir_shader_info_init(struct radv_shader_info *info)
451 {
452 /* Assume that shaders can inline all push constants by default. */
453 info->can_inline_all_push_constants = true;
454 }
455
456 void
radv_nir_shader_info_pass(struct radv_device * device,const struct nir_shader * nir,const struct radv_pipeline_layout * layout,const struct radv_pipeline_key * pipeline_key,struct radv_shader_info * info)457 radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
458 const struct radv_pipeline_layout *layout,
459 const struct radv_pipeline_key *pipeline_key,
460 struct radv_shader_info *info)
461 {
462 struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
463
464 if (layout && layout->dynamic_offset_count &&
465 (layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) {
466 info->loads_push_constants = true;
467 info->loads_dynamic_offsets = true;
468 }
469
470 if (nir->info.stage == MESA_SHADER_VERTEX) {
471 if (pipeline_key->vs.dynamic_input_state && nir->info.inputs_read) {
472 info->vs.has_prolog = true;
473 info->vs.dynamic_inputs = true;
474 }
475
476 /* Use per-attribute vertex descriptors to prevent faults and
477 * for correct bounds checking.
478 */
479 info->vs.use_per_attribute_vb_descs = device->robust_buffer_access || info->vs.dynamic_inputs;
480 }
481
482 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
483 if (pipeline_key->ps.has_epilog) {
484 info->ps.has_epilog = true;
485 }
486 }
487
488 /* We have to ensure consistent input register assignments between the main shader and the
489 * prolog. */
490 info->vs.needs_instance_id |= info->vs.has_prolog;
491 info->vs.needs_base_instance |= info->vs.has_prolog;
492 info->vs.needs_draw_id |= info->vs.has_prolog;
493
494 nir_foreach_shader_in_variable (variable, nir)
495 gather_info_input_decl(nir, variable, pipeline_key, info);
496
497 nir_foreach_block (block, func->impl) {
498 gather_info_block(nir, block, info);
499 }
500
501 nir_foreach_shader_out_variable(variable, nir) gather_info_output_decl(nir, variable, info);
502
503 if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||
504 nir->info.stage == MESA_SHADER_GEOMETRY)
505 gather_xfb_info(nir, info);
506
507 struct radv_vs_output_info *outinfo = get_vs_output_info(nir, info);
508 if (outinfo) {
509 /* These are not compiled into neither output param nor position exports. */
510 uint64_t special_mask = BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT) |
511 BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES) |
512 BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE);
513 uint64_t per_prim_mask =
514 nir->info.outputs_written & nir->info.per_primitive_outputs & ~special_mask;
515 uint64_t per_vtx_mask =
516 nir->info.outputs_written & ~nir->info.per_primitive_outputs & ~special_mask;
517
518 /* Mesh multivew is only lowered in ac_nir_lower_ngg, so we have to fake it here. */
519 if (nir->info.stage == MESA_SHADER_MESH && pipeline_key->has_multiview_view_index) {
520 per_prim_mask |= VARYING_BIT_LAYER;
521 info->uses_view_index = true;
522 }
523
524 /* Per vertex outputs. */
525 outinfo->writes_pointsize = per_vtx_mask & VARYING_BIT_PSIZ;
526 outinfo->writes_viewport_index = per_vtx_mask & VARYING_BIT_VIEWPORT;
527 outinfo->writes_layer = per_vtx_mask & VARYING_BIT_LAYER;
528 outinfo->writes_primitive_shading_rate = per_vtx_mask & VARYING_BIT_PRIMITIVE_SHADING_RATE;
529
530 /* Per primitive outputs. */
531 outinfo->writes_viewport_index_per_primitive = per_prim_mask & VARYING_BIT_VIEWPORT;
532 outinfo->writes_layer_per_primitive = per_prim_mask & VARYING_BIT_LAYER;
533 outinfo->writes_primitive_shading_rate_per_primitive = per_prim_mask & VARYING_BIT_PRIMITIVE_SHADING_RATE;
534
535 /* Clip/cull distances. */
536 outinfo->clip_dist_mask = (1 << nir->info.clip_distance_array_size) - 1;
537 outinfo->cull_dist_mask = (1 << nir->info.cull_distance_array_size) - 1;
538 outinfo->cull_dist_mask <<= nir->info.clip_distance_array_size;
539
540 int pos_written = 0x1;
541
542 if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer ||
543 outinfo->writes_primitive_shading_rate)
544 pos_written |= 1 << 1;
545
546 unsigned num_clip_distances = util_bitcount(outinfo->clip_dist_mask);
547 unsigned num_cull_distances = util_bitcount(outinfo->cull_dist_mask);
548
549 if (num_clip_distances + num_cull_distances > 0)
550 pos_written |= 1 << 2;
551 if (num_clip_distances + num_cull_distances > 4)
552 pos_written |= 1 << 3;
553
554 outinfo->pos_exports = util_bitcount(pos_written);
555
556 memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
557 sizeof(outinfo->vs_output_param_offset));
558
559 unsigned total_param_exports = 0;
560
561 /* Per-vertex outputs */
562 assign_outinfo_params(outinfo, per_vtx_mask, &total_param_exports);
563 if (outinfo->writes_layer)
564 assign_outinfo_param(outinfo, VARYING_SLOT_LAYER, &total_param_exports);
565 if (outinfo->export_prim_id)
566 assign_outinfo_param(outinfo, VARYING_SLOT_PRIMITIVE_ID, &total_param_exports);
567
568 outinfo->param_exports = total_param_exports;
569
570 /* Per-primitive outputs: the HW needs these to be last. */
571 assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports);
572 if (outinfo->writes_layer_per_primitive)
573 assign_outinfo_param(outinfo, VARYING_SLOT_LAYER, &total_param_exports);
574 if (outinfo->writes_viewport_index_per_primitive)
575 assign_outinfo_param(outinfo, VARYING_SLOT_VIEWPORT, &total_param_exports);
576
577 outinfo->prim_param_exports = total_param_exports - outinfo->param_exports;
578 }
579
580 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
581 uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs;
582 unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask);
583 assert(num_per_primitive_inputs <= nir->num_inputs);
584
585 info->ps.num_interp = nir->num_inputs - num_per_primitive_inputs;
586 info->ps.num_prim_interp = num_per_primitive_inputs;
587 }
588
589 info->vs.needs_draw_id |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
590 info->vs.needs_base_instance |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
591 info->vs.needs_instance_id |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
592 info->uses_view_index |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_VIEW_INDEX);
593 info->uses_invocation_id |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
594 info->uses_prim_id |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
595
596 /* Used by compute and mesh shaders. */
597 info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
598 info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
599 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
600 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
601 switch (nir->info.stage) {
602 case MESA_SHADER_COMPUTE:
603 case MESA_SHADER_TASK:
604 for (int i = 0; i < 3; ++i)
605 info->cs.block_size[i] = nir->info.workgroup_size[i];
606 info->cs.uses_ray_launch_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_RAY_LAUNCH_SIZE_ADDR_AMD);
607
608 /* Task shaders always need these for the I/O lowering even if
609 * the API shader doesn't actually use them.
610 */
611 if (nir->info.stage == MESA_SHADER_TASK) {
612 /* Needed to address the IB to read firstTask. */
613 info->vs.needs_draw_id |=
614 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID);
615
616 /* Needed to address the task draw/payload rings. */
617 info->cs.uses_block_id[0] = true;
618 info->cs.uses_block_id[1] = true;
619 info->cs.uses_block_id[2] = true;
620 info->cs.uses_grid_size = true;
621
622 /* Needed for storing draw ready only on the 1st thread. */
623 info->cs.uses_local_invocation_idx = true;
624 }
625 break;
626 case MESA_SHADER_FRAGMENT:
627 info->ps.can_discard = nir->info.fs.uses_discard;
628 info->ps.early_fragment_test = nir->info.fs.early_fragment_tests;
629 info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage;
630 info->ps.depth_layout = nir->info.fs.depth_layout;
631 info->ps.uses_sample_shading = nir->info.fs.uses_sample_shading;
632 info->ps.writes_memory = nir->info.writes_memory;
633 info->ps.has_pcoord = nir->info.inputs_read & VARYING_BIT_PNTC;
634 info->ps.prim_id_input = nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
635 info->ps.layer_input = nir->info.inputs_read & VARYING_BIT_LAYER;
636 info->ps.viewport_index_input = nir->info.inputs_read & VARYING_BIT_VIEWPORT;
637 info->ps.writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
638 info->ps.writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
639 info->ps.writes_sample_mask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
640 info->ps.reads_sample_mask_in = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
641 info->ps.reads_sample_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
642 info->ps.reads_frag_shading_rate = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_SHADING_RATE);
643 info->ps.reads_front_face = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
644 info->ps.reads_barycentric_model = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL);
645 break;
646 case MESA_SHADER_GEOMETRY:
647 info->gs.vertices_in = nir->info.gs.vertices_in;
648 info->gs.vertices_out = nir->info.gs.vertices_out;
649 info->gs.output_prim = nir->info.gs.output_primitive;
650 info->gs.invocations = nir->info.gs.invocations;
651 info->gs.max_stream =
652 nir->info.gs.active_stream_mask ? util_last_bit(nir->info.gs.active_stream_mask) - 1 : 0;
653 break;
654 case MESA_SHADER_TESS_EVAL:
655 info->tes._primitive_mode = nir->info.tess._primitive_mode;
656 info->tes.spacing = nir->info.tess.spacing;
657 info->tes.ccw = nir->info.tess.ccw;
658 info->tes.point_mode = nir->info.tess.point_mode;
659 break;
660 case MESA_SHADER_TESS_CTRL:
661 info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
662 break;
663 case MESA_SHADER_VERTEX:
664 break;
665 case MESA_SHADER_MESH:
666 info->ms.output_prim = nir->info.mesh.primitive_type;
667 break;
668 default:
669 break;
670 }
671
672 if (nir->info.stage == MESA_SHADER_GEOMETRY) {
673 unsigned add_clip =
674 nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4;
675 info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
676 info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
677 }
678
679 /* Compute the ESGS item size for VS or TES as ES. */
680 if ((nir->info.stage == MESA_SHADER_VERTEX && info->vs.as_es) ||
681 (nir->info.stage == MESA_SHADER_TESS_EVAL && info->tes.as_es)) {
682 struct radv_es_output_info *es_info =
683 nir->info.stage == MESA_SHADER_VERTEX ? &info->vs.es_info : &info->tes.es_info;
684 uint32_t num_outputs_written = nir->info.stage == MESA_SHADER_VERTEX
685 ? info->vs.num_linked_outputs
686 : info->tes.num_linked_outputs;
687 es_info->esgs_itemsize = num_outputs_written * 16;
688 }
689
690 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
691 bool uses_persp_or_linear_interp = info->ps.reads_persp_center ||
692 info->ps.reads_persp_centroid ||
693 info->ps.reads_persp_sample ||
694 info->ps.reads_linear_center ||
695 info->ps.reads_linear_centroid ||
696 info->ps.reads_linear_sample;
697
698 info->ps.allow_flat_shading =
699 !(uses_persp_or_linear_interp || info->ps.needs_sample_positions ||
700 info->ps.writes_memory || nir->info.fs.needs_quad_helper_invocations ||
701 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
702 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
703 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
704 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
705 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
706 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
707
708 info->ps.spi_ps_input = radv_compute_spi_ps_input(pipeline_key, info);
709 }
710 }
711