1 /*
2 * Copyright 2021 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "si_pipe.h"
8 #include "si_shader_internal.h"
9 #include "util/mesa-sha1.h"
10 #include "sid.h"
11 #include "nir.h"
12 #include "nir_xfb_info.h"
13 #include "aco_interface.h"
14 #include "ac_nir.h"
15
16 struct si_shader_profile si_shader_profiles[] =
17 {
18 {
19 /* Plot3D */
20 {0x38c94662, 0x7b634109, 0x50f8254a, 0x0f4986a9, 0x11e59716, 0x3081e1a2, 0xbb2a0c59, 0xc29e853a},
21 SI_PROFILE_VS_NO_BINNING,
22 },
23 {
24 /* Viewperf/Energy */
25 {0x3279654e, 0xf51c358d, 0xc526e175, 0xd198eb26, 0x75c36c86, 0xd796398b, 0xc99b5e92, 0xddc31503},
26 SI_PROFILE_NO_OPT_UNIFORM_VARYINGS, /* Uniform propagation regresses performance. */
27 },
28 {
29 /* Viewperf/Medical */
30 {0x4a041ad8, 0xe105a058, 0x2e9f7a38, 0xef4d1c2f, 0xb8aee798, 0x821f166b, 0x17b42668, 0xa4d1cc0a},
31 SI_PROFILE_GFX9_GFX10_PS_NO_BINNING,
32 },
33 {
34 /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32,
35 * probably due to interpolation performance.
36 */
37 {0xa9c7e2c2, 0x3e01de01, 0x886cab63, 0x24327678, 0xe247c394, 0x2ecc4bf9, 0xc196d978, 0x2ba7a89c},
38 SI_PROFILE_GFX10_WAVE64,
39 },
40 {
41 /* Viewperf/Creo */
42 {0x182bd6b3, 0x5e8fba11, 0xa7b74071, 0xc69f6153, 0xc57aef8c, 0x9076492a, 0x53dc83ee, 0x921fb114},
43 SI_PROFILE_CLAMP_DIV_BY_ZERO,
44 },
45 };
46
si_get_num_shader_profiles(void)47 unsigned si_get_num_shader_profiles(void)
48 {
49 return ARRAY_SIZE(si_shader_profiles);
50 }
51
get_texture_src(nir_tex_instr * instr,nir_tex_src_type type)52 static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type)
53 {
54 for (unsigned i = 0; i < instr->num_srcs; i++) {
55 if (instr->src[i].src_type == type)
56 return &instr->src[i].src;
57 }
58 return NULL;
59 }
60
scan_io_usage(const nir_shader * nir,struct si_shader_info * info,nir_intrinsic_instr * intr,bool is_input,bool colors_lowered)61 static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
62 nir_intrinsic_instr *intr, bool is_input, bool colors_lowered)
63 {
64 unsigned mask, bit_size;
65 bool is_output_load;
66
67 if (nir_intrinsic_has_write_mask(intr)) {
68 mask = nir_intrinsic_write_mask(intr); /* store */
69 bit_size = nir_src_bit_size(intr->src[0]);
70 is_output_load = false;
71 } else {
72 mask = nir_def_components_read(&intr->def); /* load */
73 bit_size = intr->def.bit_size;
74 is_output_load = !is_input;
75 }
76 assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered");
77
78 /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs
79 * where the mask is untyped.
80 */
81 if (bit_size == 16 && !is_input) {
82 unsigned new_mask = 0;
83 for (unsigned i = 0; i < 4; i++) {
84 if (mask & (1 << i))
85 new_mask |= 0x1 << (i / 2);
86 }
87 mask = new_mask;
88 }
89
90 mask <<= nir_intrinsic_component(intr);
91
92 nir_src offset = *nir_get_io_offset_src(intr);
93 bool indirect = !nir_src_is_const(offset);
94 if (!indirect)
95 assert(nir_src_as_uint(offset) == 0);
96
97 unsigned semantic = 0;
98 /* VS doesn't have semantics. */
99 if (nir->info.stage != MESA_SHADER_VERTEX || !is_input)
100 semantic = nir_intrinsic_io_semantics(intr).location;
101
102 if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) {
103 /* Gather color PS inputs. We can only get here after lowering colors in monolithic
104 * shaders. This must match what we do for nir_intrinsic_load_color0/1.
105 */
106 if (!colors_lowered &&
107 (semantic == VARYING_SLOT_COL0 || semantic == VARYING_SLOT_COL1 ||
108 semantic == VARYING_SLOT_BFC0 || semantic == VARYING_SLOT_BFC1)) {
109 unsigned index = semantic == VARYING_SLOT_COL1 || semantic == VARYING_SLOT_BFC1;
110 info->colors_read |= mask << (index * 4);
111 return;
112 }
113 }
114
115 if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
116 /* Never use FRAG_RESULT_COLOR directly. */
117 if (semantic == FRAG_RESULT_COLOR)
118 semantic = FRAG_RESULT_DATA0;
119 semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index;
120 }
121
122 unsigned driver_location = nir_intrinsic_base(intr);
123 unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1;
124
125 if (is_input) {
126 assert(driver_location + num_slots <= ARRAY_SIZE(info->input));
127
128 for (unsigned i = 0; i < num_slots; i++) {
129 unsigned loc = driver_location + i;
130
131 info->input[loc].semantic = semantic + i;
132
133 if (mask) {
134 info->input[loc].usage_mask |= mask;
135 info->num_inputs = MAX2(info->num_inputs, loc + 1);
136 }
137 }
138 } else {
139 /* Outputs. */
140 assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
141
142 for (unsigned i = 0; i < num_slots; i++) {
143 unsigned loc = driver_location + i;
144 unsigned slot_semantic = semantic + i;
145
146 /* Call the translation functions to validate the semantic (call assertions in them). */
147 if (nir->info.stage != MESA_SHADER_FRAGMENT &&
148 semantic != VARYING_SLOT_EDGE) {
149 if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
150 semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
151 (semantic >= VARYING_SLOT_PATCH0 && semantic <= VARYING_SLOT_PATCH31)) {
152 ac_shader_io_get_unique_index_patch(semantic);
153 ac_shader_io_get_unique_index_patch(slot_semantic);
154 } else {
155 si_shader_io_get_unique_index(semantic);
156 si_shader_io_get_unique_index(slot_semantic);
157 }
158 }
159
160 info->output_semantic[loc] = slot_semantic;
161
162 if (!is_output_load && mask) {
163 /* Output stores. */
164 unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
165 (nir_intrinsic_component(intr) * 2);
166 unsigned new_mask = mask & ~info->output_usagemask[loc];
167
168 /* Iterate over all components. */
169 for (unsigned i = 0; i < 4; i++) {
170 unsigned stream = (gs_streams >> (i * 2)) & 0x3;
171
172 if (new_mask & (1 << i)) {
173 info->output_streams[loc] |= stream << (i * 2);
174 info->num_stream_output_components[stream]++;
175 }
176
177 if (nir_intrinsic_has_io_xfb(intr)) {
178 nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) :
179 nir_intrinsic_io_xfb2(intr);
180 if (xfb.out[i % 2].num_components) {
181 unsigned stream = (gs_streams >> (i * 2)) & 0x3;
182 info->enabled_streamout_buffer_mask |=
183 BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
184 }
185
186 info->output_xfb_writemask[loc] |= nir_instr_xfb_write_mask(intr);
187 }
188 }
189
190 if (nir_intrinsic_has_src_type(intr))
191 info->output_type[loc] = nir_intrinsic_src_type(intr);
192 else if (nir_intrinsic_has_dest_type(intr))
193 info->output_type[loc] = nir_intrinsic_dest_type(intr);
194 else
195 info->output_type[loc] = nir_type_float32;
196
197 info->output_usagemask[loc] |= mask;
198 info->num_outputs = MAX2(info->num_outputs, loc + 1);
199
200 if (nir->info.stage == MESA_SHADER_VERTEX ||
201 nir->info.stage == MESA_SHADER_TESS_CTRL ||
202 nir->info.stage == MESA_SHADER_TESS_EVAL ||
203 nir->info.stage == MESA_SHADER_GEOMETRY) {
204 if (slot_semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
205 slot_semantic == VARYING_SLOT_TESS_LEVEL_OUTER) {
206 if (!nir_intrinsic_io_semantics(intr).no_varying) {
207 info->tess_levels_written_for_tes |=
208 BITFIELD_BIT(ac_shader_io_get_unique_index_patch(slot_semantic));
209 }
210 } else if (slot_semantic >= VARYING_SLOT_PATCH0 &&
211 slot_semantic < VARYING_SLOT_TESS_MAX) {
212 if (!nir_intrinsic_io_semantics(intr).no_varying) {
213 info->patch_outputs_written_for_tes |=
214 BITFIELD_BIT(ac_shader_io_get_unique_index_patch(slot_semantic));
215 }
216 } else if ((slot_semantic <= VARYING_SLOT_VAR31 ||
217 slot_semantic >= VARYING_SLOT_VAR0_16BIT) &&
218 slot_semantic != VARYING_SLOT_EDGE) {
219 uint64_t bit = BITFIELD64_BIT(si_shader_io_get_unique_index(slot_semantic));
220
221 /* Ignore outputs that are not passed from VS to PS. */
222 if (slot_semantic != VARYING_SLOT_POS &&
223 slot_semantic != VARYING_SLOT_PSIZ &&
224 slot_semantic != VARYING_SLOT_CLIP_VERTEX &&
225 slot_semantic != VARYING_SLOT_LAYER)
226 info->outputs_written_before_ps |= bit;
227
228 /* LAYER and VIEWPORT have no effect if they don't feed the rasterizer. */
229 if (slot_semantic != VARYING_SLOT_LAYER &&
230 slot_semantic != VARYING_SLOT_VIEWPORT) {
231 info->ls_es_outputs_written |= bit;
232
233 if (!nir_intrinsic_io_semantics(intr).no_varying)
234 info->tcs_outputs_written_for_tes |= bit;
235 }
236 }
237 }
238
239 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
240 semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
241 unsigned index = semantic - FRAG_RESULT_DATA0;
242
243 if (nir_intrinsic_src_type(intr) == nir_type_float16)
244 info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2);
245 else if (nir_intrinsic_src_type(intr) == nir_type_int16)
246 info->output_color_types |= SI_TYPE_INT16 << (index * 2);
247 else if (nir_intrinsic_src_type(intr) == nir_type_uint16)
248 info->output_color_types |= SI_TYPE_UINT16 << (index * 2);
249 }
250 }
251 }
252 }
253
254 if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input && semantic == FRAG_RESULT_DEPTH) {
255 if (nir_def_is_frag_coord_z(intr->src[0].ssa))
256 info->output_z_equals_input_z = true;
257 else
258 info->output_z_is_not_input_z = true;
259 }
260 }
261
is_bindless_handle_indirect(nir_instr * src)262 static bool is_bindless_handle_indirect(nir_instr *src)
263 {
264 /* Check if the bindless handle comes from indirect load_ubo. */
265 if (src->type == nir_instr_type_intrinsic &&
266 nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) {
267 if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0]))
268 return true;
269 } else {
270 /* Some other instruction. Return the worst-case result. */
271 return true;
272 }
273 return false;
274 }
275
276 /* TODO: convert to nir_shader_instructions_pass */
scan_instruction(const struct nir_shader * nir,struct si_shader_info * info,nir_instr * instr,bool colors_lowered)277 static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info,
278 nir_instr *instr, bool colors_lowered)
279 {
280 if (instr->type == nir_instr_type_tex) {
281 nir_tex_instr *tex = nir_instr_as_tex(instr);
282 const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle);
283
284 /* Gather the types of used VMEM instructions that return something. */
285 switch (tex->op) {
286 case nir_texop_tex:
287 case nir_texop_txb:
288 case nir_texop_txl:
289 case nir_texop_txd:
290 case nir_texop_lod:
291 case nir_texop_tg4:
292 info->uses_vmem_sampler_or_bvh = true;
293 break;
294 default:
295 info->uses_vmem_load_other = true;
296 break;
297 }
298
299 if (handle) {
300 info->uses_bindless_samplers = true;
301
302 if (is_bindless_handle_indirect(handle->ssa->parent_instr))
303 info->uses_indirect_descriptor = true;
304 } else {
305 const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref);
306
307 if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref)))
308 info->uses_indirect_descriptor = true;
309 }
310
311 info->has_non_uniform_tex_access |=
312 tex->texture_non_uniform || tex->sampler_non_uniform;
313
314 info->has_shadow_comparison |= tex->is_shadow;
315 } else if (instr->type == nir_instr_type_intrinsic) {
316 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
317 const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name;
318 bool is_ssbo = strstr(intr_name, "ssbo");
319 bool is_image = strstr(intr_name, "image") == intr_name;
320 bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name;
321
322 /* Gather the types of used VMEM instructions that return something. */
323 if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
324 switch (intr->intrinsic) {
325 case nir_intrinsic_load_ubo:
326 if (!nir_src_is_const(intr->src[1]))
327 info->uses_vmem_load_other = true;
328 break;
329
330 case nir_intrinsic_load_input:
331 case nir_intrinsic_load_input_vertex:
332 case nir_intrinsic_load_per_vertex_input:
333 if (nir->info.stage == MESA_SHADER_VERTEX ||
334 nir->info.stage == MESA_SHADER_TESS_EVAL)
335 info->uses_vmem_load_other = true;
336 break;
337
338 case nir_intrinsic_load_constant:
339 case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */
340 case nir_intrinsic_load_buffer_amd:
341 info->uses_vmem_load_other = true;
342 break;
343
344 default:
345 if (is_image ||
346 is_bindless_image ||
347 is_ssbo ||
348 (strstr(intr_name, "global") == intr_name ||
349 intr->intrinsic == nir_intrinsic_load_global ||
350 intr->intrinsic == nir_intrinsic_store_global) ||
351 strstr(intr_name, "scratch"))
352 info->uses_vmem_load_other = true;
353 break;
354 }
355 }
356
357 if (is_bindless_image)
358 info->uses_bindless_images = true;
359
360 if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0])))
361 info->uses_indirect_descriptor = true;
362
363 if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr))
364 info->uses_indirect_descriptor = true;
365
366 if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo &&
367 !nir_src_is_const(intr->src[0]))
368 info->uses_indirect_descriptor = true;
369
370 if (nir_intrinsic_has_atomic_op(intr)) {
371 if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_ordered_add_gfx12_amd)
372 info->uses_atomic_ordered_add = true;
373 }
374
375 switch (intr->intrinsic) {
376 case nir_intrinsic_store_ssbo:
377 if (!nir_src_is_const(intr->src[1]))
378 info->uses_indirect_descriptor = true;
379 break;
380 case nir_intrinsic_load_ubo:
381 if (!nir_src_is_const(intr->src[0]))
382 info->uses_indirect_descriptor = true;
383 break;
384 case nir_intrinsic_load_local_invocation_id:
385 case nir_intrinsic_load_workgroup_id: {
386 unsigned mask = nir_def_components_read(&intr->def);
387 while (mask) {
388 unsigned i = u_bit_scan(&mask);
389
390 if (intr->intrinsic == nir_intrinsic_load_workgroup_id)
391 info->uses_block_id[i] = true;
392 else
393 info->uses_thread_id[i] = true;
394 }
395 break;
396 }
397 case nir_intrinsic_load_color0:
398 case nir_intrinsic_load_color1: {
399 unsigned index = intr->intrinsic == nir_intrinsic_load_color1;
400 uint8_t mask = nir_def_components_read(&intr->def);
401 info->colors_read |= mask << (index * 4);
402
403 switch (info->color_interpolate[index]) {
404 case INTERP_MODE_SMOOTH:
405 if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
406 info->uses_persp_sample = true;
407 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
408 info->uses_persp_centroid = true;
409 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
410 info->uses_persp_center = true;
411 break;
412 case INTERP_MODE_NOPERSPECTIVE:
413 if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
414 info->uses_linear_sample = true;
415 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
416 info->uses_linear_centroid = true;
417 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
418 info->uses_linear_center = true;
419 break;
420 case INTERP_MODE_COLOR:
421 /* We don't know the final value. This will be FLAT if flatshading is enabled
422 * in the rasterizer state, otherwise it will be SMOOTH.
423 */
424 info->uses_interp_color = true;
425 if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
426 info->uses_persp_sample_color = true;
427 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
428 info->uses_persp_centroid_color = true;
429 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
430 info->uses_persp_center_color = true;
431 break;
432 }
433 break;
434 }
435 case nir_intrinsic_load_barycentric_at_offset: /* uses center */
436 case nir_intrinsic_load_barycentric_at_sample: /* uses center */
437 if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT)
438 break;
439
440 if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) {
441 info->uses_linear_center = true;
442 } else {
443 info->uses_persp_center = true;
444 }
445 if (intr->intrinsic == nir_intrinsic_load_barycentric_at_offset)
446 info->uses_interp_at_offset = true;
447 if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
448 info->uses_interp_at_sample = true;
449 break;
450 case nir_intrinsic_load_frag_coord:
451 info->reads_frag_coord_mask |= nir_def_components_read(&intr->def);
452 break;
453 case nir_intrinsic_load_input:
454 case nir_intrinsic_load_per_vertex_input:
455 case nir_intrinsic_load_input_vertex:
456 case nir_intrinsic_load_interpolated_input:
457 scan_io_usage(nir, info, intr, true, colors_lowered);
458 break;
459 case nir_intrinsic_load_output:
460 case nir_intrinsic_load_per_vertex_output:
461 case nir_intrinsic_store_output:
462 case nir_intrinsic_store_per_vertex_output:
463 scan_io_usage(nir, info, intr, false, colors_lowered);
464 break;
465 case nir_intrinsic_load_deref:
466 case nir_intrinsic_store_deref:
467 /* These can only occur if there is indirect temp indexing. */
468 break;
469 case nir_intrinsic_interp_deref_at_centroid:
470 case nir_intrinsic_interp_deref_at_sample:
471 case nir_intrinsic_interp_deref_at_offset:
472 unreachable("these opcodes should have been lowered");
473 break;
474 case nir_intrinsic_ordered_add_loop_gfx12_amd:
475 info->uses_atomic_ordered_add = true;
476 break;
477 default:
478 break;
479 }
480 }
481 }
482
si_nir_scan_shader(struct si_screen * sscreen,struct nir_shader * nir,struct si_shader_info * info,bool colors_lowered)483 void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
484 struct si_shader_info *info, bool colors_lowered)
485 {
486 bool force_use_aco = false;
487 if (sscreen->force_shader_use_aco) {
488 if (!memcmp(sscreen->use_aco_shader_blake, nir->info.source_blake3,
489 sizeof(sscreen->use_aco_shader_blake))) {
490 force_use_aco = true;
491 }
492 }
493
494 nir->info.use_aco_amd = aco_is_gpu_supported(&sscreen->info) &&
495 sscreen->info.has_image_opcodes &&
496 (sscreen->use_aco || nir->info.use_aco_amd || force_use_aco ||
497 /* Use ACO for streamout on gfx12 because it's faster. */
498 (sscreen->info.gfx_level >= GFX12 && nir->xfb_info &&
499 nir->xfb_info->output_count));
500
501 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
502 /* post_depth_coverage implies early_fragment_tests */
503 nir->info.fs.early_fragment_tests |= nir->info.fs.post_depth_coverage;
504 }
505
506 memset(info, 0, sizeof(*info));
507 info->base = nir->info;
508
509 /* Get options from shader profiles. */
510 for (unsigned i = 0; i < ARRAY_SIZE(si_shader_profiles); i++) {
511 if (_mesa_printed_blake3_equal(nir->info.source_blake3, si_shader_profiles[i].blake3)) {
512 info->options = si_shader_profiles[i].options;
513 break;
514 }
515 }
516
517 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
518 info->color_interpolate[0] = nir->info.fs.color0_interp;
519 info->color_interpolate[1] = nir->info.fs.color1_interp;
520 for (unsigned i = 0; i < 2; i++) {
521 if (info->color_interpolate[i] == INTERP_MODE_NONE)
522 info->color_interpolate[i] = INTERP_MODE_COLOR;
523 }
524
525 info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
526 nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
527 TGSI_INTERPOLATE_LOC_CENTER;
528 info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
529 nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
530 TGSI_INTERPOLATE_LOC_CENTER;
531 /* Set an invalid value. Will be determined at draw time if needed when the expected
532 * conditions are met.
533 */
534 info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff;
535 }
536
537 info->constbuf0_num_slots = nir->num_uniforms;
538
539 if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
540 nir_tcs_info tcs_info;
541 nir_gather_tcs_info(nir, &tcs_info, nir->info.tess._primitive_mode,
542 nir->info.tess.spacing);
543
544 info->tessfactors_are_def_in_all_invocs = tcs_info.all_invocations_define_tess_levels;
545 }
546
547 /* tess factors are loaded as input instead of system value */
548 info->reads_tess_factors = nir->info.inputs_read &
549 (BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) |
550 BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER));
551
552 info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE) |
553 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE_FSIGN);
554 info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
555 info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
556 info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
557 info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
558 info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
559 info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
560 if (sscreen->info.gfx_level < GFX12) {
561 info->uses_tg_size |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
562 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) ||
563 si_should_clear_lds(sscreen, nir);
564 }
565 info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
566 info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
567 info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
568 nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
569 info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
570 info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
571 info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
572 info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
573 info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
574 info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
575 info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
576
577 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
578 info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
579 info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
580 info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
581
582 info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0;
583 if (nir->info.fs.color_is_dual_source)
584 info->colors_written |= 0x2;
585 if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) {
586 info->colors_written |= 0x1;
587 info->color0_writes_all_cbufs = info->colors_written == 0x1;
588
589 }
590 } else {
591 info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID;
592 info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT;
593 info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER;
594 info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ;
595 info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX;
596 info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE;
597 info->writes_position = nir->info.outputs_written & VARYING_BIT_POS;
598 }
599
600 nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader*)nir);
601 nir_foreach_block (block, impl) {
602 nir_foreach_instr (instr, block)
603 scan_instruction(nir, info, instr, colors_lowered);
604 }
605
606 if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||
607 nir->info.stage == MESA_SHADER_GEOMETRY) {
608 info->num_streamout_components = 0;
609 for (unsigned i = 0; i < info->num_outputs; i++)
610 info->num_streamout_components += util_bitcount(info->output_xfb_writemask[i]);
611 }
612
613 if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
614 /* Add the PrimitiveID output, but don't increment num_outputs.
615 * The driver inserts PrimitiveID only when it's used by the pixel shader,
616 * and si_emit_spi_map uses this unconditionally when such a pixel shader is used.
617 */
618 info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID;
619 info->output_type[info->num_outputs] = nir_type_uint32;
620 info->output_usagemask[info->num_outputs] = 0x1;
621 }
622
623 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
624 info->output_z_equals_input_z &= !info->output_z_is_not_input_z;
625 info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid ||
626 info->uses_persp_sample || info->uses_linear_center ||
627 info->uses_linear_centroid || info->uses_linear_sample ||
628 info->uses_interp_at_sample || nir->info.writes_memory ||
629 nir->info.fs.uses_fbfetch_output ||
630 nir->info.fs.needs_quad_helper_invocations ||
631 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
632 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
633 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
634 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
635 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
636 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
637
638 info->uses_vmem_load_other |= nir->info.fs.uses_fbfetch_output;
639
640 /* Add both front and back color inputs. */
641 unsigned num_inputs_with_colors = info->num_inputs;
642 for (unsigned back = 0; back < 2; back++) {
643 for (unsigned i = 0; i < 2; i++) {
644 if ((info->colors_read >> (i * 4)) & 0xf) {
645 unsigned index = num_inputs_with_colors;
646
647 info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i;
648 info->input[index].usage_mask = info->colors_read >> (i * 4);
649 num_inputs_with_colors++;
650
651 /* Back-face color don't increment num_inputs. si_emit_spi_map will use
652 * back-face colors conditionally only when they are needed.
653 */
654 if (!back)
655 info->num_inputs = num_inputs_with_colors;
656 }
657 }
658 }
659 }
660
661 info->uses_vmem_load_other |= info->uses_indirect_descriptor;
662 info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir);
663
664 if (nir->info.stage == MESA_SHADER_VERTEX) {
665 info->num_vs_inputs =
666 nir->info.stage == MESA_SHADER_VERTEX && !nir->info.vs.blit_sgprs_amd ? info->num_inputs : 0;
667 unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.gfx_level);
668 info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs);
669 }
670
671 if (nir->info.stage == MESA_SHADER_VERTEX ||
672 nir->info.stage == MESA_SHADER_TESS_CTRL ||
673 nir->info.stage == MESA_SHADER_TESS_EVAL) {
674 info->esgs_vertex_stride =
675 util_last_bit64(info->ls_es_outputs_written) * 16;
676
677 /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
678 * conflicts, i.e. each vertex will start on a different bank.
679 */
680 if (sscreen->info.gfx_level >= GFX9) {
681 if (info->esgs_vertex_stride)
682 info->esgs_vertex_stride += 4;
683 } else {
684 assert(((info->esgs_vertex_stride / 4) & C_028AAC_ITEMSIZE) == 0);
685 }
686
687 info->tcs_inputs_via_temp = nir->info.tess.tcs_same_invocation_inputs_read;
688 info->tcs_inputs_via_lds = nir->info.tess.tcs_cross_invocation_inputs_read |
689 (nir->info.tess.tcs_same_invocation_inputs_read &
690 nir->info.inputs_read_indirectly);
691 }
692
693 if (nir->info.stage == MESA_SHADER_GEOMETRY) {
694 info->gsvs_vertex_size = info->num_outputs * 16;
695 info->max_gsvs_emit_size = info->gsvs_vertex_size * nir->info.gs.vertices_out;
696 info->gs_input_verts_per_prim =
697 mesa_vertices_per_prim(nir->info.gs.input_primitive);
698 }
699
700 info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
701 u_bit_consecutive(0, nir->info.clip_distance_array_size);
702 info->culldist_mask = u_bit_consecutive(0, nir->info.cull_distance_array_size) <<
703 nir->info.clip_distance_array_size;
704
705 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
706 for (unsigned i = 0; i < info->num_inputs; i++) {
707 unsigned semantic = info->input[i].semantic;
708
709 if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
710 semantic != VARYING_SLOT_PNTC) {
711 info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic);
712 }
713 }
714
715 for (unsigned i = 0; i < 8; i++)
716 if (info->colors_written & (1 << i))
717 info->colors_written_4bit |= 0xf << (4 * i);
718
719 for (unsigned i = 0; i < info->num_inputs; i++) {
720 if (info->input[i].semantic == VARYING_SLOT_COL0)
721 info->color_attr_index[0] = i;
722 else if (info->input[i].semantic == VARYING_SLOT_COL1)
723 info->color_attr_index[1] = i;
724 }
725 }
726 }
727
728 enum ac_hw_stage
si_select_hw_stage(const gl_shader_stage stage,const union si_shader_key * const key,const enum amd_gfx_level gfx_level)729 si_select_hw_stage(const gl_shader_stage stage, const union si_shader_key *const key,
730 const enum amd_gfx_level gfx_level)
731 {
732 switch (stage) {
733 case MESA_SHADER_VERTEX:
734 case MESA_SHADER_TESS_EVAL:
735 if (key->ge.as_ngg)
736 return AC_HW_NEXT_GEN_GEOMETRY_SHADER;
737 else if (key->ge.as_es)
738 return gfx_level >= GFX9 ? AC_HW_LEGACY_GEOMETRY_SHADER : AC_HW_EXPORT_SHADER;
739 else if (key->ge.as_ls)
740 return gfx_level >= GFX9 ? AC_HW_HULL_SHADER : AC_HW_LOCAL_SHADER;
741 else
742 return AC_HW_VERTEX_SHADER;
743 case MESA_SHADER_TESS_CTRL:
744 return AC_HW_HULL_SHADER;
745 case MESA_SHADER_GEOMETRY:
746 if (key->ge.as_ngg)
747 return AC_HW_NEXT_GEN_GEOMETRY_SHADER;
748 else
749 return AC_HW_LEGACY_GEOMETRY_SHADER;
750 case MESA_SHADER_FRAGMENT:
751 return AC_HW_PIXEL_SHADER;
752 case MESA_SHADER_COMPUTE:
753 case MESA_SHADER_KERNEL:
754 return AC_HW_COMPUTE_SHADER;
755 default:
756 unreachable("Unsupported HW stage");
757 }
758 }
759