• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "nir_builder.h"
8 
9 #include "ac_nir.h"
10 #include "si_pipe.h"
11 #include "si_query.h"
12 #include "si_state.h"
13 #include "si_shader_internal.h"
14 
15 struct lower_abi_state {
16    struct si_shader *shader;
17    struct si_shader_args *args;
18 
19    nir_def *esgs_ring;
20    nir_def *tess_offchip_ring;
21    nir_def *gsvs_ring[4];
22 };
23 
24 #define GET_FIELD_NIR(field) \
25    ac_nir_unpack_arg(b, &args->ac, args->vs_state_bits, \
26                      field##__SHIFT, util_bitcount(field##__MASK))
27 
si_nir_load_internal_binding(nir_builder * b,struct si_shader_args * args,unsigned slot,unsigned num_components)28 nir_def *si_nir_load_internal_binding(nir_builder *b, struct si_shader_args *args,
29                                           unsigned slot, unsigned num_components)
30 {
31    nir_def *addr = ac_nir_load_arg(b, &args->ac, args->internal_bindings);
32    return nir_load_smem_amd(b, num_components, addr, nir_imm_int(b, slot * 16));
33 }
34 
build_attr_ring_desc(nir_builder * b,struct si_shader * shader,struct si_shader_args * args)35 static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
36                                          struct si_shader_args *args)
37 {
38    struct si_shader_selector *sel = shader->selector;
39 
40    nir_def *attr_address =
41       b->shader->info.stage == MESA_SHADER_VERTEX && b->shader->info.vs.blit_sgprs_amd ?
42       ac_nir_load_arg_at_offset(b, &args->ac, args->vs_blit_inputs,
43                                 b->shader->info.vs.blit_sgprs_amd - 1) :
44       ac_nir_load_arg(b, &args->ac, args->gs_attr_address);
45 
46    unsigned stride = 16 * si_shader_num_alloc_param_exports(shader);
47    uint32_t desc[4];
48 
49    ac_build_attr_ring_descriptor(sel->screen->info.gfx_level,
50                                  (uint64_t)sel->screen->info.address32_hi << 32,
51                                  0xffffffff, stride, desc);
52 
53    nir_def *comp[] = {
54       attr_address,
55       nir_imm_int(b, desc[1]),
56       nir_imm_int(b, desc[2]),
57       nir_imm_int(b, desc[3]),
58    };
59 
60    return nir_vec(b, comp, 4);
61 }
62 
63 static nir_def *
fetch_framebuffer(nir_builder * b,struct si_shader_args * args,struct si_shader_selector * sel,union si_shader_key * key)64 fetch_framebuffer(nir_builder *b, struct si_shader_args *args,
65                   struct si_shader_selector *sel, union si_shader_key *key)
66 {
67    /* Load the image descriptor. */
68    STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
69    STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
70 
71    nir_def *zero = nir_imm_zero(b, 1, 32);
72    nir_def *undef = nir_undef(b, 1, 32);
73 
74    unsigned chan = 0;
75    nir_def *vec[4] = {undef, undef, undef, undef};
76 
77    vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.pos_fixed_pt, 0, 16);
78 
79    if (!key->ps.mono.fbfetch_is_1D)
80       vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.pos_fixed_pt, 16, 16);
81 
82    /* Get the current render target layer index. */
83    if (key->ps.mono.fbfetch_layered)
84       vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 16, 11);
85 
86    nir_def *coords = nir_vec(b, vec, 4);
87 
88    enum glsl_sampler_dim dim;
89    if (key->ps.mono.fbfetch_msaa)
90       dim = GLSL_SAMPLER_DIM_MS;
91    else if (key->ps.mono.fbfetch_is_1D)
92       dim = GLSL_SAMPLER_DIM_1D;
93    else
94       dim = GLSL_SAMPLER_DIM_2D;
95 
96    nir_def *sample_id;
97    if (key->ps.mono.fbfetch_msaa) {
98       sample_id = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 8, 4);
99 
100       if (sel->screen->info.gfx_level < GFX11 &&
101           !(sel->screen->debug_flags & DBG(NO_FMASK))) {
102          nir_def *desc =
103             si_nir_load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0_FMASK, 8);
104 
105          nir_def *fmask =
106             nir_bindless_image_fragment_mask_load_amd(
107                b, desc, coords,
108                .image_dim = dim,
109                .image_array = key->ps.mono.fbfetch_layered,
110                .access = ACCESS_CAN_REORDER);
111 
112          nir_def *offset = nir_ishl_imm(b, sample_id, 2);
113          /* 3 for EQAA handling, see lower_image_to_fragment_mask_load() */
114          nir_def *width = nir_imm_int(b, 3);
115          sample_id = nir_ubfe(b, fmask, offset, width);
116       }
117    } else {
118       sample_id = zero;
119    }
120 
121    nir_def *desc = si_nir_load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0, 8);
122 
123    return nir_bindless_image_load(b, 4, 32, desc, coords, sample_id, zero,
124                                   .image_dim = dim,
125                                   .image_array = key->ps.mono.fbfetch_layered,
126                                   .access = ACCESS_CAN_REORDER);
127 }
128 
build_tess_ring_desc(nir_builder * b,struct si_screen * screen,struct si_shader_args * args)129 static nir_def *build_tess_ring_desc(nir_builder *b, struct si_screen *screen,
130                                          struct si_shader_args *args)
131 {
132    nir_def *addr = ac_nir_load_arg(b, &args->ac, args->tes_offchip_addr);
133    uint32_t desc[4];
134 
135    ac_build_raw_buffer_descriptor(screen->info.gfx_level,
136                              (uint64_t)screen->info.address32_hi << 32,
137                              0xffffffff, desc);
138 
139    nir_def *comp[4] = {
140       addr,
141       nir_imm_int(b, desc[1]),
142       nir_imm_int(b, desc[2]),
143       nir_imm_int(b, desc[3]),
144    };
145 
146    return nir_vec(b, comp, 4);
147 }
148 
build_esgs_ring_desc(nir_builder * b,enum amd_gfx_level gfx_level,struct si_shader_args * args)149 static nir_def *build_esgs_ring_desc(nir_builder *b, enum amd_gfx_level gfx_level,
150                                          struct si_shader_args *args)
151 {
152    nir_def *desc = si_nir_load_internal_binding(b, args, SI_RING_ESGS, 4);
153 
154    if (b->shader->info.stage == MESA_SHADER_GEOMETRY)
155       return desc;
156 
157    nir_def *vec[4];
158    for (int i = 0; i < 4; i++)
159       vec[i] = nir_channel(b, desc, i);
160 
161    vec[1] = nir_ior_imm(b, vec[1], S_008F04_SWIZZLE_ENABLE_GFX6(1));
162    vec[3] = nir_ior_imm(b, vec[3],
163                         S_008F0C_ELEMENT_SIZE(1) |
164                         S_008F0C_INDEX_STRIDE(3) |
165                         S_008F0C_ADD_TID_ENABLE(1));
166 
167    /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */
168    if (gfx_level == GFX8)
169       vec[3] = nir_iand_imm(b, vec[3], C_008F0C_DATA_FORMAT);
170 
171    return nir_vec(b, vec, 4);
172 }
173 
build_gsvs_ring_desc(nir_builder * b,struct lower_abi_state * s)174 static void build_gsvs_ring_desc(nir_builder *b, struct lower_abi_state *s)
175 {
176    const struct si_shader_selector *sel = s->shader->selector;
177    const union si_shader_key *key = &s->shader->key;
178 
179    if (s->shader->is_gs_copy_shader) {
180       s->gsvs_ring[0] = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 4);
181    } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) {
182       nir_def *base_addr = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 2);
183       base_addr = nir_pack_64_2x32(b, base_addr);
184 
185       /* The conceptual layout of the GSVS ring is
186        *   v0c0 .. vLv0 v0c1 .. vLc1 ..
187        * but the real memory layout is swizzled across
188        * threads:
189        *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
190        *   t16v0c0 ..
191        * Override the buffer descriptor accordingly.
192        */
193 
194       for (unsigned stream = 0; stream < 4; stream++) {
195          unsigned num_components = sel->info.num_stream_output_components[stream];
196          if (!num_components)
197             continue;
198 
199          unsigned stride = 4 * num_components * b->shader->info.gs.vertices_out;
200          /* Limit on the stride field for <= GFX7. */
201          assert(stride < (1 << 14));
202 
203          unsigned num_records = s->shader->wave_size;
204 
205          const struct ac_buffer_state buffer_state = {
206             .size = num_records,
207             .format = PIPE_FORMAT_R32_FLOAT,
208             .swizzle = {
209                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
210             },
211             .stride = stride,
212             .swizzle_enable = true,
213             .element_size = 1,
214             .index_stride = 1,
215             .add_tid = true,
216             .gfx10_oob_select = V_008F0C_OOB_SELECT_DISABLED,
217          };
218          uint32_t tmp_desc[4];
219 
220          ac_build_buffer_descriptor(sel->screen->info.gfx_level, &buffer_state, tmp_desc);
221 
222          nir_def *desc[4];
223          desc[0] = nir_unpack_64_2x32_split_x(b, base_addr);
224          desc[1] = nir_ior_imm(b, nir_unpack_64_2x32_split_y(b, base_addr), tmp_desc[1]);
225          desc[2] = nir_imm_int(b, tmp_desc[2]);
226          desc[3] = nir_imm_int(b, tmp_desc[3]);
227 
228          s->gsvs_ring[stream] = nir_vec(b, desc, 4);
229 
230          /* next stream's desc addr */
231          base_addr = nir_iadd_imm(b, base_addr, stride * num_records);
232       }
233    }
234 }
235 
preload_reusable_variables(nir_builder * b,struct lower_abi_state * s)236 static void preload_reusable_variables(nir_builder *b, struct lower_abi_state *s)
237 {
238    const struct si_shader_selector *sel = s->shader->selector;
239    const union si_shader_key *key = &s->shader->key;
240 
241    b->cursor = nir_before_impl(b->impl);
242 
243    if (sel->screen->info.gfx_level <= GFX8 && b->shader->info.stage <= MESA_SHADER_GEOMETRY &&
244        (key->ge.as_es || b->shader->info.stage == MESA_SHADER_GEOMETRY)) {
245       s->esgs_ring = build_esgs_ring_desc(b, sel->screen->info.gfx_level, s->args);
246    }
247 
248    if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
249        b->shader->info.stage == MESA_SHADER_TESS_EVAL)
250       s->tess_offchip_ring = build_tess_ring_desc(b, sel->screen, s->args);
251 
252    build_gsvs_ring_desc(b, s);
253 }
254 
get_num_vertices_per_prim(nir_builder * b,struct lower_abi_state * s)255 static nir_def *get_num_vertices_per_prim(nir_builder *b, struct lower_abi_state *s)
256 {
257    struct si_shader_args *args = s->args;
258    unsigned num_vertices = si_get_num_vertices_per_output_prim(s->shader);
259 
260    if (num_vertices)
261       return nir_imm_int(b, num_vertices);
262    else
263       return nir_iadd_imm(b, GET_FIELD_NIR(GS_STATE_OUTPRIM), 1);
264 }
265 
get_small_prim_precision(nir_builder * b,struct lower_abi_state * s,bool lines)266 static nir_def *get_small_prim_precision(nir_builder *b, struct lower_abi_state *s, bool lines)
267 {
268    /* Compute FP32 value "num_samples / quant_mode" using integer ops.
269     * See si_shader.h for how this works.
270     */
271    struct si_shader_args *args = s->args;
272    nir_def *precision = GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION);
273    nir_def *log_samples = GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION_LOG_SAMPLES);
274 
275    if (lines)
276       precision = nir_iadd(b, precision, log_samples);
277 
278    /* The final FP32 value is: 1/2^(15 - precision) */
279    return nir_ishl_imm(b, nir_ior_imm(b, precision, 0x70), 23);
280 }
281 
lower_intrinsic(nir_builder * b,nir_instr * instr,struct lower_abi_state * s)282 static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_state *s)
283 {
284    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
285 
286    struct si_shader *shader = s->shader;
287    struct si_shader_args *args = s->args;
288    struct si_shader_selector *sel = shader->selector;
289    union si_shader_key *key = &shader->key;
290    gl_shader_stage stage = b->shader->info.stage;
291 
292    b->cursor = nir_before_instr(instr);
293 
294    nir_def *replacement = NULL;
295 
296    switch (intrin->intrinsic) {
297    case nir_intrinsic_load_base_vertex: {
298       nir_def *indexed = GET_FIELD_NIR(VS_STATE_INDEXED);
299       indexed = nir_i2b(b, indexed);
300 
301       nir_def *base_vertex = ac_nir_load_arg(b, &args->ac, args->ac.base_vertex);
302       replacement = nir_bcsel(b, indexed, base_vertex, nir_imm_int(b, 0));
303       break;
304    }
305    case nir_intrinsic_load_workgroup_size: {
306       assert(b->shader->info.workgroup_size_variable && sel->info.uses_variable_block_size);
307 
308       nir_def *block_size = ac_nir_load_arg(b, &args->ac, args->block_size);
309       nir_def *comp[] = {
310          nir_ubfe_imm(b, block_size, 0, 10),
311          nir_ubfe_imm(b, block_size, 10, 10),
312          nir_ubfe_imm(b, block_size, 20, 10),
313       };
314       replacement = nir_vec(b, comp, 3);
315       break;
316    }
317    case nir_intrinsic_load_tess_level_outer_default:
318    case nir_intrinsic_load_tess_level_inner_default: {
319       nir_def *buf = si_nir_load_internal_binding(b, args, SI_HS_CONST_DEFAULT_TESS_LEVELS, 4);
320       unsigned num_components = intrin->def.num_components;
321       unsigned offset =
322          intrin->intrinsic == nir_intrinsic_load_tess_level_inner_default ? 16 : 0;
323       replacement = nir_load_ubo(b, num_components, 32, buf, nir_imm_int(b, offset),
324                                  .range = ~0);
325       break;
326    }
327    case nir_intrinsic_load_patch_vertices_in:
328       if (stage == MESA_SHADER_TESS_CTRL)
329          replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 12, 5);
330       else if (stage == MESA_SHADER_TESS_EVAL) {
331          replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5);
332       } else
333          unreachable("no nir_load_patch_vertices_in");
334       replacement = nir_iadd_imm(b, replacement, 1);
335       break;
336    case nir_intrinsic_load_sample_mask_in:
337       replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage);
338       break;
339    case nir_intrinsic_load_lshs_vertex_stride_amd:
340       if (stage == MESA_SHADER_VERTEX) {
341          replacement = nir_imm_int(b, si_shader_lshs_vertex_stride(shader));
342       } else if (stage == MESA_SHADER_TESS_CTRL) {
343          if (sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic) {
344             replacement = nir_imm_int(b, si_shader_lshs_vertex_stride(shader));
345          } else {
346             nir_def *num_ls_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 17, 6);
347             nir_def *extra_dw = nir_bcsel(b, nir_ieq_imm(b, num_ls_out, 0), nir_imm_int(b, 0), nir_imm_int(b, 4));
348             replacement = nir_iadd_nuw(b, nir_ishl_imm(b, num_ls_out, 4), extra_dw);
349          }
350       } else {
351          unreachable("no nir_load_lshs_vertex_stride_amd");
352       }
353       break;
354    case nir_intrinsic_load_esgs_vertex_stride_amd:
355       assert(sel->screen->info.gfx_level >= GFX9);
356       if (shader->is_monolithic) {
357          replacement = nir_imm_int(b, key->ge.part.gs.es->info.esgs_vertex_stride / 4);
358       } else {
359          nir_def *num_es_outputs = GET_FIELD_NIR(GS_STATE_NUM_ES_OUTPUTS);
360          replacement = nir_iadd_imm(b, nir_imul_imm(b, num_es_outputs, 4), 1);
361       }
362       break;
363    case nir_intrinsic_load_tcs_num_patches_amd: {
364       nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
365       replacement = nir_iadd_imm(b, tmp, 1);
366       break;
367    }
368    case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
369       nir_def *per_vtx_out_patch_size = NULL;
370 
371       if (stage == MESA_SHADER_TESS_CTRL) {
372          const unsigned num_hs_out = util_last_bit64(sel->info.tcs_outputs_written_for_tes);
373          const unsigned out_vtx_size = num_hs_out * 16;
374          const unsigned out_vtx_per_patch = b->shader->info.tess.tcs_vertices_out;
375          per_vtx_out_patch_size = nir_imm_int(b, out_vtx_size * out_vtx_per_patch);
376       } else {
377          nir_def *num_hs_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 23, 6);
378          nir_def *out_vtx_size = nir_ishl_imm(b, num_hs_out, 4);
379          nir_def *o = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5);
380          nir_def *out_vtx_per_patch = nir_iadd_imm_nuw(b, o, 1);
381          per_vtx_out_patch_size = nir_imul(b, out_vtx_per_patch, out_vtx_size);
382       }
383 
384       nir_def *p = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
385       nir_def *num_patches = nir_iadd_imm_nuw(b, p, 1);
386       replacement = nir_imul(b, per_vtx_out_patch_size, num_patches);
387       break;
388    }
389    case nir_intrinsic_load_clip_half_line_width_amd: {
390       nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
391       replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32));
392       break;
393    }
394    case nir_intrinsic_load_cull_triangle_viewport_xy_scale_and_offset_amd: {
395       nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
396       replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 0));
397       break;
398    }
399    case nir_intrinsic_load_cull_line_viewport_xy_scale_and_offset_amd: {
400       nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
401       replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 16));
402       break;
403    }
404    case nir_intrinsic_load_num_vertices_per_primitive_amd:
405       replacement = get_num_vertices_per_prim(b, s);
406       break;
407    case nir_intrinsic_load_cull_ccw_amd:
408       /* radeonsi embed cw/ccw info into front/back face enabled */
409       replacement = nir_imm_false(b);
410       break;
411    case nir_intrinsic_load_cull_any_enabled_amd:
412       /* If culling is enabled at compile time, it's always enabled at runtime. */
413       assert(si_shader_culling_enabled(shader));
414       replacement = nir_imm_true(b);
415       break;
416    case nir_intrinsic_load_cull_back_face_enabled_amd:
417       replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_CULL_FACE_BACK));
418       break;
419    case nir_intrinsic_load_cull_front_face_enabled_amd:
420       replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_CULL_FACE_FRONT));
421       break;
422    case nir_intrinsic_load_cull_small_triangle_precision_amd:
423       replacement = get_small_prim_precision(b, s, false);
424       break;
425    case nir_intrinsic_load_cull_small_line_precision_amd:
426       replacement = get_small_prim_precision(b, s, true);
427       break;
428    case nir_intrinsic_load_cull_small_triangles_enabled_amd:
429       /* Triangles always have small primitive culling enabled. */
430       replacement = nir_imm_bool(b, true);
431       break;
432    case nir_intrinsic_load_cull_small_lines_enabled_amd:
433       replacement =
434          nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT);
435       break;
436    case nir_intrinsic_load_provoking_vtx_in_prim_amd:
437       replacement = nir_bcsel(b, nir_i2b(b, GET_FIELD_NIR(GS_STATE_PROVOKING_VTX_FIRST)),
438                               nir_imm_int(b, 0),
439                               nir_iadd_imm(b, get_num_vertices_per_prim(b, s), -1));
440       break;
441    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
442       replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_PIPELINE_STATS_EMU));
443       break;
444    case nir_intrinsic_load_prim_gen_query_enabled_amd:
445    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
446       replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_STREAMOUT_QUERY_ENABLED));
447       break;
448    case nir_intrinsic_load_clamp_vertex_color_amd:
449       replacement = nir_i2b(b, GET_FIELD_NIR(VS_STATE_CLAMP_VERTEX_COLOR));
450       break;
451    case nir_intrinsic_load_user_clip_plane: {
452       nir_def *buf = si_nir_load_internal_binding(b, args, SI_VS_CONST_CLIP_PLANES, 4);
453       unsigned offset = nir_intrinsic_ucp_id(intrin) * 16;
454       replacement = nir_load_ubo(b, 4, 32, buf, nir_imm_int(b, offset),
455                                  .range = ~0);
456       break;
457    }
458    case nir_intrinsic_load_streamout_buffer_amd: {
459       unsigned slot = SI_VS_STREAMOUT_BUF0 + nir_intrinsic_base(intrin);
460       replacement = si_nir_load_internal_binding(b, args, slot, 4);
461       break;
462    }
463    case nir_intrinsic_load_xfb_state_address_gfx12_amd: {
464       nir_def *address = si_nir_load_internal_binding(b, args, SI_STREAMOUT_STATE_BUF, 1);
465       nir_def *address32_hi = nir_imm_int(b, s->shader->selector->screen->info.address32_hi);
466       replacement = nir_pack_64_2x32_split(b, address, address32_hi);
467       break;
468    }
469    case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
470    case nir_intrinsic_atomic_add_shader_invocation_count_amd: {
471       enum pipe_statistics_query_index index =
472          intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd ?
473          PIPE_STAT_QUERY_GS_PRIMITIVES : PIPE_STAT_QUERY_GS_INVOCATIONS;
474 
475       /* GFX11 only needs to emulate PIPE_STAT_QUERY_GS_PRIMITIVES because GS culls,
476        * which makes the pipeline statistic incorrect.
477        */
478       assert(sel->screen->info.gfx_level < GFX11 || index == PIPE_STAT_QUERY_GS_PRIMITIVES);
479 
480       nir_def *buf =
481          si_nir_load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF, 4);
482       unsigned offset = si_query_pipestat_end_dw_offset(sel->screen, index) * 4;
483 
484       nir_def *count = intrin->src[0].ssa;
485       nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), count,
486                       .atomic_op = nir_atomic_op_iadd);
487       break;
488    }
489    case nir_intrinsic_atomic_add_gen_prim_count_amd:
490    case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
491       nir_def *buf = si_nir_load_internal_binding(b, args, SI_GS_QUERY_BUF, 4);
492 
493       unsigned stream = nir_intrinsic_stream_id(intrin);
494       unsigned offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
495          offsetof(struct gfx11_sh_query_buffer_mem, stream[stream].generated_primitives) :
496          offsetof(struct gfx11_sh_query_buffer_mem, stream[stream].emitted_primitives);
497 
498       nir_def *prim_count = intrin->src[0].ssa;
499       nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), prim_count,
500                       .atomic_op = nir_atomic_op_iadd);
501       break;
502    }
503    case nir_intrinsic_load_debug_log_desc_amd:
504       replacement = si_nir_load_internal_binding(b, args, SI_RING_SHADER_LOG, 4);
505       break;
506    case nir_intrinsic_load_ring_attr_amd:
507       replacement = build_attr_ring_desc(b, shader, args);
508       break;
509    case nir_intrinsic_load_force_vrs_rates_amd:
510       if (sel->screen->info.gfx_level >= GFX11) {
511          /* Bits [2:5] = VRS rate
512           *
513           * The range is [0, 15].
514           *
515           * If the hw doesn't support VRS 4x4, it will silently use 2x2 instead.
516           */
517          replacement = nir_imm_int(b, V_0283D0_VRS_SHADING_RATE_4X4 << 2);
518       } else {
519          /* Bits [2:3] = VRS rate X
520           * Bits [4:5] = VRS rate Y
521           *
522           * The range is [-2, 1]. Values:
523           *   1: 2x coarser shading rate in that direction.
524           *   0: normal shading rate
525           *  -1: 2x finer shading rate (sample shading, not directional)
526           *  -2: 4x finer shading rate (sample shading, not directional)
527           *
528           * Sample shading can't go above 8 samples, so both numbers can't be -2
529           * at the same time.
530           */
531          replacement = nir_imm_int(b, (1 << 2) | (1 << 4));
532       }
533       break;
534    case nir_intrinsic_load_barycentric_at_sample: {
535       unsigned mode = nir_intrinsic_interp_mode(intrin);
536 
537       if (key->ps.mono.interpolate_at_sample_force_center) {
538          replacement = nir_load_barycentric_pixel(b, 32, .interp_mode = mode);
539       } else {
540          nir_def *sample_id = intrin->src[0].ssa;
541          /* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
542          nir_def *offset = nir_ishl_imm(b, sample_id, 3);
543 
544          nir_def *buf = si_nir_load_internal_binding(b, args, SI_PS_CONST_SAMPLE_POSITIONS, 4);
545          nir_def *sample_pos = nir_load_ubo(b, 2, 32, buf, offset, .range = ~0);
546 
547          sample_pos = nir_fadd_imm(b, sample_pos, -0.5);
548 
549          replacement = nir_load_barycentric_at_offset(b, 32, sample_pos, .interp_mode = mode);
550       }
551       break;
552    }
553    case nir_intrinsic_load_output: {
554       nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
555 
556       /* not fbfetch */
557       if (!(stage == MESA_SHADER_FRAGMENT && sem.fb_fetch_output))
558          return false;
559 
560       /* Ignore src0, because KHR_blend_func_extended disallows multiple render targets. */
561 
562       replacement = fetch_framebuffer(b, args, sel, key);
563       break;
564    }
565    case nir_intrinsic_load_ring_tess_factors_amd: {
566       assert(s->tess_offchip_ring);
567       nir_def *addr = nir_channel(b, s->tess_offchip_ring, 0);
568       addr = nir_iadd_imm(b, addr, sel->screen->hs.tess_offchip_ring_size);
569       replacement = nir_vector_insert_imm(b, s->tess_offchip_ring, addr, 0);
570       break;
571    }
572    case nir_intrinsic_load_alpha_reference_amd:
573       replacement = ac_nir_load_arg(b, &args->ac, args->alpha_reference);
574       break;
575    case nir_intrinsic_load_front_face:
576    case nir_intrinsic_load_front_face_fsign:
577       if (!key->ps.opt.force_front_face_input)
578          return false;
579       if (intrin->intrinsic == nir_intrinsic_load_front_face)
580          replacement = nir_imm_bool(b, key->ps.opt.force_front_face_input == 1);
581       else
582          replacement = nir_imm_float(b, key->ps.opt.force_front_face_input == 1 ? 1.0 : -1.0);
583       break;
584    case nir_intrinsic_load_color0:
585    case nir_intrinsic_load_color1: {
586       uint32_t colors_read = sel->info.colors_read;
587 
588       int start, offset;
589       if (intrin->intrinsic == nir_intrinsic_load_color0) {
590          start = 0;
591          offset = 0;
592       } else {
593          start = 4;
594          offset = util_bitcount(colors_read & 0xf);
595       }
596 
597       nir_def *color[4];
598       for (int i = 0; i < 4; i++) {
599          if (colors_read & BITFIELD_BIT(start + i)) {
600             color[i] = ac_nir_load_arg_at_offset(b, &args->ac, args->color_start, offset++);
601 
602             nir_intrinsic_set_flags(nir_instr_as_intrinsic(color[i]->parent_instr),
603                                     AC_VECTOR_ARG_FLAG(AC_VECTOR_ARG_IS_COLOR, start + i));
604          } else {
605             color[i] = nir_undef(b, 1, 32);
606          }
607       }
608 
609       replacement = nir_vec(b, color, 4);
610       break;
611    }
612    case nir_intrinsic_load_point_coord_maybe_flipped: {
613       nir_def *interp_param =
614          nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NONE);
615 
616       /* Load point coordinates (x, y) which are written by the hw after the interpolated inputs */
617       replacement = nir_load_interpolated_input(b, 2, 32, interp_param, nir_imm_int(b, 0),
618                                                 .base = si_get_ps_num_interp(shader),
619                                                 .component = 2,
620                                                 /* This tells si_nir_scan_shader that it's PARAM_GEN */
621                                                 .io_semantics.no_varying = 1);
622       break;
623    }
624    case nir_intrinsic_load_poly_line_smooth_enabled:
625       replacement = nir_imm_bool(b, key->ps.mono.poly_line_smoothing);
626       break;
627    case nir_intrinsic_load_initial_edgeflags_amd: {
628       unsigned output_prim = si_get_output_prim_simplified(sel, &shader->key);
629 
630       /* Points, lines, and rectangles don't need edge flags. */
631       if (output_prim == MESA_PRIM_POINTS || output_prim == MESA_PRIM_LINES ||
632           output_prim == SI_PRIM_RECTANGLE_LIST) {
633          replacement = nir_imm_int(b, 0);
634       } else if (stage == MESA_SHADER_VERTEX) {
635          if (sel->screen->info.gfx_level >= GFX12) {
636             replacement = nir_iand_imm(b, ac_nir_load_arg(b, &args->ac, args->ac.gs_vtx_offset[0]),
637                                        ac_get_all_edge_flag_bits(sel->screen->info.gfx_level));
638          } else {
639             /* Use the following trick to extract the edge flags:
640              *   extracted = v_and_b32 gs_invocation_id, 0x700 ; get edge flags at bits 8, 9, 10
641              *   shifted = v_mul_u32_u24 extracted, 0x80402u   ; shift the bits: 8->9, 9->19, 10->29
642              *   result = v_and_b32 shifted, 0x20080200        ; remove garbage
643              */
644             nir_def *tmp = ac_nir_load_arg(b, &args->ac, args->ac.gs_invocation_id);
645             tmp = nir_iand_imm(b, tmp, 0x700);
646             tmp = nir_imul_imm(b, tmp, 0x80402);
647             replacement = nir_iand_imm(b, tmp, 0x20080200);
648          }
649       } else {
650          /* TES and GS: Edge flags are always enabled by the rasterizer state when polygon mode is
651           * enabled, so set all edge flags to 1 for triangles.
652           */
653          replacement = nir_imm_int(b, ac_get_all_edge_flag_bits(sel->screen->info.gfx_level));
654       }
655       break;
656    }
657    case nir_intrinsic_load_ring_esgs_amd:
658       assert(s->esgs_ring);
659       replacement = s->esgs_ring;
660       break;
661    case nir_intrinsic_load_ring_tess_offchip_amd:
662       assert(s->tess_offchip_ring);
663       replacement = s->tess_offchip_ring;
664       break;
665    case nir_intrinsic_load_tcs_tess_levels_to_tes_amd:
666       if (shader->is_monolithic) {
667          replacement = nir_imm_bool(b, key->ge.opt.tes_reads_tess_factors);
668       } else {
669          replacement = nir_ine_imm(b, ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 31, 1), 0);
670       }
671       break;
672    case nir_intrinsic_load_tcs_primitive_mode_amd:
673       if (shader->is_monolithic) {
674          replacement = nir_imm_int(b, key->ge.opt.tes_prim_mode);
675       } else {
676          if (b->shader->info.tess._primitive_mode != TESS_PRIMITIVE_UNSPECIFIED)
677             replacement = nir_imm_int(b, b->shader->info.tess._primitive_mode);
678          else
679             replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 29, 2);
680       }
681       break;
682    case nir_intrinsic_load_ring_gsvs_amd: {
683       unsigned stream_id = nir_intrinsic_stream_id(intrin);
684       /* Unused nir_load_ring_gsvs_amd may not be eliminated yet. */
685       replacement = s->gsvs_ring[stream_id] ?
686          s->gsvs_ring[stream_id] : nir_undef(b, 4, 32);
687       break;
688    }
689    case nir_intrinsic_load_user_data_amd: {
690       nir_def *low_vec4 = ac_nir_load_arg(b, &args->ac, args->cs_user_data[0]);
691       replacement = nir_pad_vector(b, low_vec4, 8);
692 
693       if (args->cs_user_data[1].used && intrin->def.num_components > 4) {
694          nir_def *high_vec4 = ac_nir_load_arg(b, &args->ac, args->cs_user_data[1]);
695          for (unsigned i = 0; i < high_vec4->num_components; i++)
696             replacement = nir_vector_insert_imm(b, replacement, nir_channel(b, high_vec4, i), 4 + i);
697       }
698       break;
699    }
700    default:
701       return false;
702    }
703 
704    if (replacement)
705       nir_def_rewrite_uses(&intrin->def, replacement);
706 
707    nir_instr_remove(instr);
708    nir_instr_free(instr);
709 
710    return true;
711 }
712 
si_nir_lower_abi(nir_shader * nir,struct si_shader * shader,struct si_shader_args * args)713 bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args)
714 {
715    struct lower_abi_state state = {
716       .shader = shader,
717       .args = args,
718    };
719 
720    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
721 
722    nir_builder b = nir_builder_create(impl);
723 
724    preload_reusable_variables(&b, &state);
725 
726    bool progress = false;
727    nir_foreach_block_safe(block, impl) {
728       nir_foreach_instr_safe(instr, block) {
729          if (instr->type == nir_instr_type_intrinsic)
730             progress |= lower_intrinsic(&b, instr, &state);
731       }
732    }
733 
734    nir_metadata preserved = progress ?
735       nir_metadata_control_flow :
736       nir_metadata_all;
737    nir_metadata_preserve(impl, preserved);
738 
739    return progress;
740 }
741