1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "nir_builder.h"
8
9 #include "ac_nir.h"
10 #include "si_pipe.h"
11 #include "si_query.h"
12 #include "si_state.h"
13 #include "si_shader_internal.h"
14
15 struct lower_abi_state {
16 struct si_shader *shader;
17 struct si_shader_args *args;
18
19 nir_def *esgs_ring;
20 nir_def *tess_offchip_ring;
21 nir_def *gsvs_ring[4];
22 };
23
24 #define GET_FIELD_NIR(field) \
25 ac_nir_unpack_arg(b, &args->ac, args->vs_state_bits, \
26 field##__SHIFT, util_bitcount(field##__MASK))
27
si_nir_load_internal_binding(nir_builder * b,struct si_shader_args * args,unsigned slot,unsigned num_components)28 nir_def *si_nir_load_internal_binding(nir_builder *b, struct si_shader_args *args,
29 unsigned slot, unsigned num_components)
30 {
31 nir_def *addr = ac_nir_load_arg(b, &args->ac, args->internal_bindings);
32 return nir_load_smem_amd(b, num_components, addr, nir_imm_int(b, slot * 16));
33 }
34
build_attr_ring_desc(nir_builder * b,struct si_shader * shader,struct si_shader_args * args)35 static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
36 struct si_shader_args *args)
37 {
38 struct si_shader_selector *sel = shader->selector;
39
40 nir_def *attr_address =
41 b->shader->info.stage == MESA_SHADER_VERTEX && b->shader->info.vs.blit_sgprs_amd ?
42 ac_nir_load_arg_at_offset(b, &args->ac, args->vs_blit_inputs,
43 b->shader->info.vs.blit_sgprs_amd - 1) :
44 ac_nir_load_arg(b, &args->ac, args->gs_attr_address);
45
46 unsigned stride = 16 * si_shader_num_alloc_param_exports(shader);
47 uint32_t desc[4];
48
49 ac_build_attr_ring_descriptor(sel->screen->info.gfx_level,
50 (uint64_t)sel->screen->info.address32_hi << 32,
51 0xffffffff, stride, desc);
52
53 nir_def *comp[] = {
54 attr_address,
55 nir_imm_int(b, desc[1]),
56 nir_imm_int(b, desc[2]),
57 nir_imm_int(b, desc[3]),
58 };
59
60 return nir_vec(b, comp, 4);
61 }
62
63 static nir_def *
fetch_framebuffer(nir_builder * b,struct si_shader_args * args,struct si_shader_selector * sel,union si_shader_key * key)64 fetch_framebuffer(nir_builder *b, struct si_shader_args *args,
65 struct si_shader_selector *sel, union si_shader_key *key)
66 {
67 /* Load the image descriptor. */
68 STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
69 STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
70
71 nir_def *zero = nir_imm_zero(b, 1, 32);
72 nir_def *undef = nir_undef(b, 1, 32);
73
74 unsigned chan = 0;
75 nir_def *vec[4] = {undef, undef, undef, undef};
76
77 vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.pos_fixed_pt, 0, 16);
78
79 if (!key->ps.mono.fbfetch_is_1D)
80 vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.pos_fixed_pt, 16, 16);
81
82 /* Get the current render target layer index. */
83 if (key->ps.mono.fbfetch_layered)
84 vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 16, 11);
85
86 nir_def *coords = nir_vec(b, vec, 4);
87
88 enum glsl_sampler_dim dim;
89 if (key->ps.mono.fbfetch_msaa)
90 dim = GLSL_SAMPLER_DIM_MS;
91 else if (key->ps.mono.fbfetch_is_1D)
92 dim = GLSL_SAMPLER_DIM_1D;
93 else
94 dim = GLSL_SAMPLER_DIM_2D;
95
96 nir_def *sample_id;
97 if (key->ps.mono.fbfetch_msaa) {
98 sample_id = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 8, 4);
99
100 if (sel->screen->info.gfx_level < GFX11 &&
101 !(sel->screen->debug_flags & DBG(NO_FMASK))) {
102 nir_def *desc =
103 si_nir_load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0_FMASK, 8);
104
105 nir_def *fmask =
106 nir_bindless_image_fragment_mask_load_amd(
107 b, desc, coords,
108 .image_dim = dim,
109 .image_array = key->ps.mono.fbfetch_layered,
110 .access = ACCESS_CAN_REORDER);
111
112 nir_def *offset = nir_ishl_imm(b, sample_id, 2);
113 /* 3 for EQAA handling, see lower_image_to_fragment_mask_load() */
114 nir_def *width = nir_imm_int(b, 3);
115 sample_id = nir_ubfe(b, fmask, offset, width);
116 }
117 } else {
118 sample_id = zero;
119 }
120
121 nir_def *desc = si_nir_load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0, 8);
122
123 return nir_bindless_image_load(b, 4, 32, desc, coords, sample_id, zero,
124 .image_dim = dim,
125 .image_array = key->ps.mono.fbfetch_layered,
126 .access = ACCESS_CAN_REORDER);
127 }
128
build_tess_ring_desc(nir_builder * b,struct si_screen * screen,struct si_shader_args * args)129 static nir_def *build_tess_ring_desc(nir_builder *b, struct si_screen *screen,
130 struct si_shader_args *args)
131 {
132 nir_def *addr = ac_nir_load_arg(b, &args->ac, args->tes_offchip_addr);
133 uint32_t desc[4];
134
135 ac_build_raw_buffer_descriptor(screen->info.gfx_level,
136 (uint64_t)screen->info.address32_hi << 32,
137 0xffffffff, desc);
138
139 nir_def *comp[4] = {
140 addr,
141 nir_imm_int(b, desc[1]),
142 nir_imm_int(b, desc[2]),
143 nir_imm_int(b, desc[3]),
144 };
145
146 return nir_vec(b, comp, 4);
147 }
148
build_esgs_ring_desc(nir_builder * b,enum amd_gfx_level gfx_level,struct si_shader_args * args)149 static nir_def *build_esgs_ring_desc(nir_builder *b, enum amd_gfx_level gfx_level,
150 struct si_shader_args *args)
151 {
152 nir_def *desc = si_nir_load_internal_binding(b, args, SI_RING_ESGS, 4);
153
154 if (b->shader->info.stage == MESA_SHADER_GEOMETRY)
155 return desc;
156
157 nir_def *vec[4];
158 for (int i = 0; i < 4; i++)
159 vec[i] = nir_channel(b, desc, i);
160
161 vec[1] = nir_ior_imm(b, vec[1], S_008F04_SWIZZLE_ENABLE_GFX6(1));
162 vec[3] = nir_ior_imm(b, vec[3],
163 S_008F0C_ELEMENT_SIZE(1) |
164 S_008F0C_INDEX_STRIDE(3) |
165 S_008F0C_ADD_TID_ENABLE(1));
166
167 /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */
168 if (gfx_level == GFX8)
169 vec[3] = nir_iand_imm(b, vec[3], C_008F0C_DATA_FORMAT);
170
171 return nir_vec(b, vec, 4);
172 }
173
build_gsvs_ring_desc(nir_builder * b,struct lower_abi_state * s)174 static void build_gsvs_ring_desc(nir_builder *b, struct lower_abi_state *s)
175 {
176 const struct si_shader_selector *sel = s->shader->selector;
177 const union si_shader_key *key = &s->shader->key;
178
179 if (s->shader->is_gs_copy_shader) {
180 s->gsvs_ring[0] = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 4);
181 } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) {
182 nir_def *base_addr = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 2);
183 base_addr = nir_pack_64_2x32(b, base_addr);
184
185 /* The conceptual layout of the GSVS ring is
186 * v0c0 .. vLv0 v0c1 .. vLc1 ..
187 * but the real memory layout is swizzled across
188 * threads:
189 * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
190 * t16v0c0 ..
191 * Override the buffer descriptor accordingly.
192 */
193
194 for (unsigned stream = 0; stream < 4; stream++) {
195 unsigned num_components = sel->info.num_stream_output_components[stream];
196 if (!num_components)
197 continue;
198
199 unsigned stride = 4 * num_components * b->shader->info.gs.vertices_out;
200 /* Limit on the stride field for <= GFX7. */
201 assert(stride < (1 << 14));
202
203 unsigned num_records = s->shader->wave_size;
204
205 const struct ac_buffer_state buffer_state = {
206 .size = num_records,
207 .format = PIPE_FORMAT_R32_FLOAT,
208 .swizzle = {
209 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
210 },
211 .stride = stride,
212 .swizzle_enable = true,
213 .element_size = 1,
214 .index_stride = 1,
215 .add_tid = true,
216 .gfx10_oob_select = V_008F0C_OOB_SELECT_DISABLED,
217 };
218 uint32_t tmp_desc[4];
219
220 ac_build_buffer_descriptor(sel->screen->info.gfx_level, &buffer_state, tmp_desc);
221
222 nir_def *desc[4];
223 desc[0] = nir_unpack_64_2x32_split_x(b, base_addr);
224 desc[1] = nir_ior_imm(b, nir_unpack_64_2x32_split_y(b, base_addr), tmp_desc[1]);
225 desc[2] = nir_imm_int(b, tmp_desc[2]);
226 desc[3] = nir_imm_int(b, tmp_desc[3]);
227
228 s->gsvs_ring[stream] = nir_vec(b, desc, 4);
229
230 /* next stream's desc addr */
231 base_addr = nir_iadd_imm(b, base_addr, stride * num_records);
232 }
233 }
234 }
235
preload_reusable_variables(nir_builder * b,struct lower_abi_state * s)236 static void preload_reusable_variables(nir_builder *b, struct lower_abi_state *s)
237 {
238 const struct si_shader_selector *sel = s->shader->selector;
239 const union si_shader_key *key = &s->shader->key;
240
241 b->cursor = nir_before_impl(b->impl);
242
243 if (sel->screen->info.gfx_level <= GFX8 && b->shader->info.stage <= MESA_SHADER_GEOMETRY &&
244 (key->ge.as_es || b->shader->info.stage == MESA_SHADER_GEOMETRY)) {
245 s->esgs_ring = build_esgs_ring_desc(b, sel->screen->info.gfx_level, s->args);
246 }
247
248 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
249 b->shader->info.stage == MESA_SHADER_TESS_EVAL)
250 s->tess_offchip_ring = build_tess_ring_desc(b, sel->screen, s->args);
251
252 build_gsvs_ring_desc(b, s);
253 }
254
get_num_vertices_per_prim(nir_builder * b,struct lower_abi_state * s)255 static nir_def *get_num_vertices_per_prim(nir_builder *b, struct lower_abi_state *s)
256 {
257 struct si_shader_args *args = s->args;
258 unsigned num_vertices = si_get_num_vertices_per_output_prim(s->shader);
259
260 if (num_vertices)
261 return nir_imm_int(b, num_vertices);
262 else
263 return nir_iadd_imm(b, GET_FIELD_NIR(GS_STATE_OUTPRIM), 1);
264 }
265
get_small_prim_precision(nir_builder * b,struct lower_abi_state * s,bool lines)266 static nir_def *get_small_prim_precision(nir_builder *b, struct lower_abi_state *s, bool lines)
267 {
268 /* Compute FP32 value "num_samples / quant_mode" using integer ops.
269 * See si_shader.h for how this works.
270 */
271 struct si_shader_args *args = s->args;
272 nir_def *precision = GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION);
273 nir_def *log_samples = GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION_LOG_SAMPLES);
274
275 if (lines)
276 precision = nir_iadd(b, precision, log_samples);
277
278 /* The final FP32 value is: 1/2^(15 - precision) */
279 return nir_ishl_imm(b, nir_ior_imm(b, precision, 0x70), 23);
280 }
281
lower_intrinsic(nir_builder * b,nir_instr * instr,struct lower_abi_state * s)282 static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_state *s)
283 {
284 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
285
286 struct si_shader *shader = s->shader;
287 struct si_shader_args *args = s->args;
288 struct si_shader_selector *sel = shader->selector;
289 union si_shader_key *key = &shader->key;
290 gl_shader_stage stage = b->shader->info.stage;
291
292 b->cursor = nir_before_instr(instr);
293
294 nir_def *replacement = NULL;
295
296 switch (intrin->intrinsic) {
297 case nir_intrinsic_load_base_vertex: {
298 nir_def *indexed = GET_FIELD_NIR(VS_STATE_INDEXED);
299 indexed = nir_i2b(b, indexed);
300
301 nir_def *base_vertex = ac_nir_load_arg(b, &args->ac, args->ac.base_vertex);
302 replacement = nir_bcsel(b, indexed, base_vertex, nir_imm_int(b, 0));
303 break;
304 }
305 case nir_intrinsic_load_workgroup_size: {
306 assert(b->shader->info.workgroup_size_variable && sel->info.uses_variable_block_size);
307
308 nir_def *block_size = ac_nir_load_arg(b, &args->ac, args->block_size);
309 nir_def *comp[] = {
310 nir_ubfe_imm(b, block_size, 0, 10),
311 nir_ubfe_imm(b, block_size, 10, 10),
312 nir_ubfe_imm(b, block_size, 20, 10),
313 };
314 replacement = nir_vec(b, comp, 3);
315 break;
316 }
317 case nir_intrinsic_load_tess_level_outer_default:
318 case nir_intrinsic_load_tess_level_inner_default: {
319 nir_def *buf = si_nir_load_internal_binding(b, args, SI_HS_CONST_DEFAULT_TESS_LEVELS, 4);
320 unsigned num_components = intrin->def.num_components;
321 unsigned offset =
322 intrin->intrinsic == nir_intrinsic_load_tess_level_inner_default ? 16 : 0;
323 replacement = nir_load_ubo(b, num_components, 32, buf, nir_imm_int(b, offset),
324 .range = ~0);
325 break;
326 }
327 case nir_intrinsic_load_patch_vertices_in:
328 if (stage == MESA_SHADER_TESS_CTRL)
329 replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 12, 5);
330 else if (stage == MESA_SHADER_TESS_EVAL) {
331 replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5);
332 } else
333 unreachable("no nir_load_patch_vertices_in");
334 replacement = nir_iadd_imm(b, replacement, 1);
335 break;
336 case nir_intrinsic_load_sample_mask_in:
337 replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage);
338 break;
339 case nir_intrinsic_load_lshs_vertex_stride_amd:
340 if (stage == MESA_SHADER_VERTEX) {
341 replacement = nir_imm_int(b, si_shader_lshs_vertex_stride(shader));
342 } else if (stage == MESA_SHADER_TESS_CTRL) {
343 if (sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic) {
344 replacement = nir_imm_int(b, si_shader_lshs_vertex_stride(shader));
345 } else {
346 nir_def *num_ls_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 17, 6);
347 nir_def *extra_dw = nir_bcsel(b, nir_ieq_imm(b, num_ls_out, 0), nir_imm_int(b, 0), nir_imm_int(b, 4));
348 replacement = nir_iadd_nuw(b, nir_ishl_imm(b, num_ls_out, 4), extra_dw);
349 }
350 } else {
351 unreachable("no nir_load_lshs_vertex_stride_amd");
352 }
353 break;
354 case nir_intrinsic_load_esgs_vertex_stride_amd:
355 assert(sel->screen->info.gfx_level >= GFX9);
356 if (shader->is_monolithic) {
357 replacement = nir_imm_int(b, key->ge.part.gs.es->info.esgs_vertex_stride / 4);
358 } else {
359 nir_def *num_es_outputs = GET_FIELD_NIR(GS_STATE_NUM_ES_OUTPUTS);
360 replacement = nir_iadd_imm(b, nir_imul_imm(b, num_es_outputs, 4), 1);
361 }
362 break;
363 case nir_intrinsic_load_tcs_num_patches_amd: {
364 nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
365 replacement = nir_iadd_imm(b, tmp, 1);
366 break;
367 }
368 case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
369 nir_def *per_vtx_out_patch_size = NULL;
370
371 if (stage == MESA_SHADER_TESS_CTRL) {
372 const unsigned num_hs_out = util_last_bit64(sel->info.tcs_outputs_written_for_tes);
373 const unsigned out_vtx_size = num_hs_out * 16;
374 const unsigned out_vtx_per_patch = b->shader->info.tess.tcs_vertices_out;
375 per_vtx_out_patch_size = nir_imm_int(b, out_vtx_size * out_vtx_per_patch);
376 } else {
377 nir_def *num_hs_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 23, 6);
378 nir_def *out_vtx_size = nir_ishl_imm(b, num_hs_out, 4);
379 nir_def *o = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5);
380 nir_def *out_vtx_per_patch = nir_iadd_imm_nuw(b, o, 1);
381 per_vtx_out_patch_size = nir_imul(b, out_vtx_per_patch, out_vtx_size);
382 }
383
384 nir_def *p = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
385 nir_def *num_patches = nir_iadd_imm_nuw(b, p, 1);
386 replacement = nir_imul(b, per_vtx_out_patch_size, num_patches);
387 break;
388 }
389 case nir_intrinsic_load_clip_half_line_width_amd: {
390 nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
391 replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32));
392 break;
393 }
394 case nir_intrinsic_load_cull_triangle_viewport_xy_scale_and_offset_amd: {
395 nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
396 replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 0));
397 break;
398 }
399 case nir_intrinsic_load_cull_line_viewport_xy_scale_and_offset_amd: {
400 nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
401 replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 16));
402 break;
403 }
404 case nir_intrinsic_load_num_vertices_per_primitive_amd:
405 replacement = get_num_vertices_per_prim(b, s);
406 break;
407 case nir_intrinsic_load_cull_ccw_amd:
408 /* radeonsi embed cw/ccw info into front/back face enabled */
409 replacement = nir_imm_false(b);
410 break;
411 case nir_intrinsic_load_cull_any_enabled_amd:
412 /* If culling is enabled at compile time, it's always enabled at runtime. */
413 assert(si_shader_culling_enabled(shader));
414 replacement = nir_imm_true(b);
415 break;
416 case nir_intrinsic_load_cull_back_face_enabled_amd:
417 replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_CULL_FACE_BACK));
418 break;
419 case nir_intrinsic_load_cull_front_face_enabled_amd:
420 replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_CULL_FACE_FRONT));
421 break;
422 case nir_intrinsic_load_cull_small_triangle_precision_amd:
423 replacement = get_small_prim_precision(b, s, false);
424 break;
425 case nir_intrinsic_load_cull_small_line_precision_amd:
426 replacement = get_small_prim_precision(b, s, true);
427 break;
428 case nir_intrinsic_load_cull_small_triangles_enabled_amd:
429 /* Triangles always have small primitive culling enabled. */
430 replacement = nir_imm_bool(b, true);
431 break;
432 case nir_intrinsic_load_cull_small_lines_enabled_amd:
433 replacement =
434 nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT);
435 break;
436 case nir_intrinsic_load_provoking_vtx_in_prim_amd:
437 replacement = nir_bcsel(b, nir_i2b(b, GET_FIELD_NIR(GS_STATE_PROVOKING_VTX_FIRST)),
438 nir_imm_int(b, 0),
439 nir_iadd_imm(b, get_num_vertices_per_prim(b, s), -1));
440 break;
441 case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
442 replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_PIPELINE_STATS_EMU));
443 break;
444 case nir_intrinsic_load_prim_gen_query_enabled_amd:
445 case nir_intrinsic_load_prim_xfb_query_enabled_amd:
446 replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_STREAMOUT_QUERY_ENABLED));
447 break;
448 case nir_intrinsic_load_clamp_vertex_color_amd:
449 replacement = nir_i2b(b, GET_FIELD_NIR(VS_STATE_CLAMP_VERTEX_COLOR));
450 break;
451 case nir_intrinsic_load_user_clip_plane: {
452 nir_def *buf = si_nir_load_internal_binding(b, args, SI_VS_CONST_CLIP_PLANES, 4);
453 unsigned offset = nir_intrinsic_ucp_id(intrin) * 16;
454 replacement = nir_load_ubo(b, 4, 32, buf, nir_imm_int(b, offset),
455 .range = ~0);
456 break;
457 }
458 case nir_intrinsic_load_streamout_buffer_amd: {
459 unsigned slot = SI_VS_STREAMOUT_BUF0 + nir_intrinsic_base(intrin);
460 replacement = si_nir_load_internal_binding(b, args, slot, 4);
461 break;
462 }
463 case nir_intrinsic_load_xfb_state_address_gfx12_amd: {
464 nir_def *address = si_nir_load_internal_binding(b, args, SI_STREAMOUT_STATE_BUF, 1);
465 nir_def *address32_hi = nir_imm_int(b, s->shader->selector->screen->info.address32_hi);
466 replacement = nir_pack_64_2x32_split(b, address, address32_hi);
467 break;
468 }
469 case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
470 case nir_intrinsic_atomic_add_shader_invocation_count_amd: {
471 enum pipe_statistics_query_index index =
472 intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd ?
473 PIPE_STAT_QUERY_GS_PRIMITIVES : PIPE_STAT_QUERY_GS_INVOCATIONS;
474
475 /* GFX11 only needs to emulate PIPE_STAT_QUERY_GS_PRIMITIVES because GS culls,
476 * which makes the pipeline statistic incorrect.
477 */
478 assert(sel->screen->info.gfx_level < GFX11 || index == PIPE_STAT_QUERY_GS_PRIMITIVES);
479
480 nir_def *buf =
481 si_nir_load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF, 4);
482 unsigned offset = si_query_pipestat_end_dw_offset(sel->screen, index) * 4;
483
484 nir_def *count = intrin->src[0].ssa;
485 nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), count,
486 .atomic_op = nir_atomic_op_iadd);
487 break;
488 }
489 case nir_intrinsic_atomic_add_gen_prim_count_amd:
490 case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
491 nir_def *buf = si_nir_load_internal_binding(b, args, SI_GS_QUERY_BUF, 4);
492
493 unsigned stream = nir_intrinsic_stream_id(intrin);
494 unsigned offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
495 offsetof(struct gfx11_sh_query_buffer_mem, stream[stream].generated_primitives) :
496 offsetof(struct gfx11_sh_query_buffer_mem, stream[stream].emitted_primitives);
497
498 nir_def *prim_count = intrin->src[0].ssa;
499 nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), prim_count,
500 .atomic_op = nir_atomic_op_iadd);
501 break;
502 }
503 case nir_intrinsic_load_debug_log_desc_amd:
504 replacement = si_nir_load_internal_binding(b, args, SI_RING_SHADER_LOG, 4);
505 break;
506 case nir_intrinsic_load_ring_attr_amd:
507 replacement = build_attr_ring_desc(b, shader, args);
508 break;
509 case nir_intrinsic_load_force_vrs_rates_amd:
510 if (sel->screen->info.gfx_level >= GFX11) {
511 /* Bits [2:5] = VRS rate
512 *
513 * The range is [0, 15].
514 *
515 * If the hw doesn't support VRS 4x4, it will silently use 2x2 instead.
516 */
517 replacement = nir_imm_int(b, V_0283D0_VRS_SHADING_RATE_4X4 << 2);
518 } else {
519 /* Bits [2:3] = VRS rate X
520 * Bits [4:5] = VRS rate Y
521 *
522 * The range is [-2, 1]. Values:
523 * 1: 2x coarser shading rate in that direction.
524 * 0: normal shading rate
525 * -1: 2x finer shading rate (sample shading, not directional)
526 * -2: 4x finer shading rate (sample shading, not directional)
527 *
528 * Sample shading can't go above 8 samples, so both numbers can't be -2
529 * at the same time.
530 */
531 replacement = nir_imm_int(b, (1 << 2) | (1 << 4));
532 }
533 break;
534 case nir_intrinsic_load_barycentric_at_sample: {
535 unsigned mode = nir_intrinsic_interp_mode(intrin);
536
537 if (key->ps.mono.interpolate_at_sample_force_center) {
538 replacement = nir_load_barycentric_pixel(b, 32, .interp_mode = mode);
539 } else {
540 nir_def *sample_id = intrin->src[0].ssa;
541 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
542 nir_def *offset = nir_ishl_imm(b, sample_id, 3);
543
544 nir_def *buf = si_nir_load_internal_binding(b, args, SI_PS_CONST_SAMPLE_POSITIONS, 4);
545 nir_def *sample_pos = nir_load_ubo(b, 2, 32, buf, offset, .range = ~0);
546
547 sample_pos = nir_fadd_imm(b, sample_pos, -0.5);
548
549 replacement = nir_load_barycentric_at_offset(b, 32, sample_pos, .interp_mode = mode);
550 }
551 break;
552 }
553 case nir_intrinsic_load_output: {
554 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
555
556 /* not fbfetch */
557 if (!(stage == MESA_SHADER_FRAGMENT && sem.fb_fetch_output))
558 return false;
559
560 /* Ignore src0, because KHR_blend_func_extended disallows multiple render targets. */
561
562 replacement = fetch_framebuffer(b, args, sel, key);
563 break;
564 }
565 case nir_intrinsic_load_ring_tess_factors_amd: {
566 assert(s->tess_offchip_ring);
567 nir_def *addr = nir_channel(b, s->tess_offchip_ring, 0);
568 addr = nir_iadd_imm(b, addr, sel->screen->hs.tess_offchip_ring_size);
569 replacement = nir_vector_insert_imm(b, s->tess_offchip_ring, addr, 0);
570 break;
571 }
572 case nir_intrinsic_load_alpha_reference_amd:
573 replacement = ac_nir_load_arg(b, &args->ac, args->alpha_reference);
574 break;
575 case nir_intrinsic_load_front_face:
576 case nir_intrinsic_load_front_face_fsign:
577 if (!key->ps.opt.force_front_face_input)
578 return false;
579 if (intrin->intrinsic == nir_intrinsic_load_front_face)
580 replacement = nir_imm_bool(b, key->ps.opt.force_front_face_input == 1);
581 else
582 replacement = nir_imm_float(b, key->ps.opt.force_front_face_input == 1 ? 1.0 : -1.0);
583 break;
584 case nir_intrinsic_load_color0:
585 case nir_intrinsic_load_color1: {
586 uint32_t colors_read = sel->info.colors_read;
587
588 int start, offset;
589 if (intrin->intrinsic == nir_intrinsic_load_color0) {
590 start = 0;
591 offset = 0;
592 } else {
593 start = 4;
594 offset = util_bitcount(colors_read & 0xf);
595 }
596
597 nir_def *color[4];
598 for (int i = 0; i < 4; i++) {
599 if (colors_read & BITFIELD_BIT(start + i)) {
600 color[i] = ac_nir_load_arg_at_offset(b, &args->ac, args->color_start, offset++);
601
602 nir_intrinsic_set_flags(nir_instr_as_intrinsic(color[i]->parent_instr),
603 AC_VECTOR_ARG_FLAG(AC_VECTOR_ARG_IS_COLOR, start + i));
604 } else {
605 color[i] = nir_undef(b, 1, 32);
606 }
607 }
608
609 replacement = nir_vec(b, color, 4);
610 break;
611 }
612 case nir_intrinsic_load_point_coord_maybe_flipped: {
613 nir_def *interp_param =
614 nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NONE);
615
616 /* Load point coordinates (x, y) which are written by the hw after the interpolated inputs */
617 replacement = nir_load_interpolated_input(b, 2, 32, interp_param, nir_imm_int(b, 0),
618 .base = si_get_ps_num_interp(shader),
619 .component = 2,
620 /* This tells si_nir_scan_shader that it's PARAM_GEN */
621 .io_semantics.no_varying = 1);
622 break;
623 }
624 case nir_intrinsic_load_poly_line_smooth_enabled:
625 replacement = nir_imm_bool(b, key->ps.mono.poly_line_smoothing);
626 break;
627 case nir_intrinsic_load_initial_edgeflags_amd: {
628 unsigned output_prim = si_get_output_prim_simplified(sel, &shader->key);
629
630 /* Points, lines, and rectangles don't need edge flags. */
631 if (output_prim == MESA_PRIM_POINTS || output_prim == MESA_PRIM_LINES ||
632 output_prim == SI_PRIM_RECTANGLE_LIST) {
633 replacement = nir_imm_int(b, 0);
634 } else if (stage == MESA_SHADER_VERTEX) {
635 if (sel->screen->info.gfx_level >= GFX12) {
636 replacement = nir_iand_imm(b, ac_nir_load_arg(b, &args->ac, args->ac.gs_vtx_offset[0]),
637 ac_get_all_edge_flag_bits(sel->screen->info.gfx_level));
638 } else {
639 /* Use the following trick to extract the edge flags:
640 * extracted = v_and_b32 gs_invocation_id, 0x700 ; get edge flags at bits 8, 9, 10
641 * shifted = v_mul_u32_u24 extracted, 0x80402u ; shift the bits: 8->9, 9->19, 10->29
642 * result = v_and_b32 shifted, 0x20080200 ; remove garbage
643 */
644 nir_def *tmp = ac_nir_load_arg(b, &args->ac, args->ac.gs_invocation_id);
645 tmp = nir_iand_imm(b, tmp, 0x700);
646 tmp = nir_imul_imm(b, tmp, 0x80402);
647 replacement = nir_iand_imm(b, tmp, 0x20080200);
648 }
649 } else {
650 /* TES and GS: Edge flags are always enabled by the rasterizer state when polygon mode is
651 * enabled, so set all edge flags to 1 for triangles.
652 */
653 replacement = nir_imm_int(b, ac_get_all_edge_flag_bits(sel->screen->info.gfx_level));
654 }
655 break;
656 }
657 case nir_intrinsic_load_ring_esgs_amd:
658 assert(s->esgs_ring);
659 replacement = s->esgs_ring;
660 break;
661 case nir_intrinsic_load_ring_tess_offchip_amd:
662 assert(s->tess_offchip_ring);
663 replacement = s->tess_offchip_ring;
664 break;
665 case nir_intrinsic_load_tcs_tess_levels_to_tes_amd:
666 if (shader->is_monolithic) {
667 replacement = nir_imm_bool(b, key->ge.opt.tes_reads_tess_factors);
668 } else {
669 replacement = nir_ine_imm(b, ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 31, 1), 0);
670 }
671 break;
672 case nir_intrinsic_load_tcs_primitive_mode_amd:
673 if (shader->is_monolithic) {
674 replacement = nir_imm_int(b, key->ge.opt.tes_prim_mode);
675 } else {
676 if (b->shader->info.tess._primitive_mode != TESS_PRIMITIVE_UNSPECIFIED)
677 replacement = nir_imm_int(b, b->shader->info.tess._primitive_mode);
678 else
679 replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 29, 2);
680 }
681 break;
682 case nir_intrinsic_load_ring_gsvs_amd: {
683 unsigned stream_id = nir_intrinsic_stream_id(intrin);
684 /* Unused nir_load_ring_gsvs_amd may not be eliminated yet. */
685 replacement = s->gsvs_ring[stream_id] ?
686 s->gsvs_ring[stream_id] : nir_undef(b, 4, 32);
687 break;
688 }
689 case nir_intrinsic_load_user_data_amd: {
690 nir_def *low_vec4 = ac_nir_load_arg(b, &args->ac, args->cs_user_data[0]);
691 replacement = nir_pad_vector(b, low_vec4, 8);
692
693 if (args->cs_user_data[1].used && intrin->def.num_components > 4) {
694 nir_def *high_vec4 = ac_nir_load_arg(b, &args->ac, args->cs_user_data[1]);
695 for (unsigned i = 0; i < high_vec4->num_components; i++)
696 replacement = nir_vector_insert_imm(b, replacement, nir_channel(b, high_vec4, i), 4 + i);
697 }
698 break;
699 }
700 default:
701 return false;
702 }
703
704 if (replacement)
705 nir_def_rewrite_uses(&intrin->def, replacement);
706
707 nir_instr_remove(instr);
708 nir_instr_free(instr);
709
710 return true;
711 }
712
si_nir_lower_abi(nir_shader * nir,struct si_shader * shader,struct si_shader_args * args)713 bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args)
714 {
715 struct lower_abi_state state = {
716 .shader = shader,
717 .args = args,
718 };
719
720 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
721
722 nir_builder b = nir_builder_create(impl);
723
724 preload_reusable_variables(&b, &state);
725
726 bool progress = false;
727 nir_foreach_block_safe(block, impl) {
728 nir_foreach_instr_safe(instr, block) {
729 if (instr->type == nir_instr_type_intrinsic)
730 progress |= lower_intrinsic(&b, instr, &state);
731 }
732 }
733
734 nir_metadata preserved = progress ?
735 nir_metadata_control_flow :
736 nir_metadata_all;
737 nir_metadata_preserve(impl, preserved);
738
739 return progress;
740 }
741