• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "nir_builder.h"
8 
9 #include "ac_nir.h"
10 #include "si_pipe.h"
11 #include "si_query.h"
12 #include "si_state.h"
13 #include "si_shader_internal.h"
14 
15 struct lower_abi_state {
16    struct si_shader *shader;
17    struct si_shader_args *args;
18 
19    nir_def *esgs_ring;
20    nir_def *tess_offchip_ring;
21    nir_def *gsvs_ring[4];
22 };
23 
24 #define GET_FIELD_NIR(field) \
25    ac_nir_unpack_arg(b, &args->ac, args->vs_state_bits, \
26                      field##__SHIFT, util_bitcount(field##__MASK))
27 
si_nir_load_internal_binding(nir_builder * b,struct si_shader_args * args,unsigned slot,unsigned num_components)28 nir_def *si_nir_load_internal_binding(nir_builder *b, struct si_shader_args *args,
29                                           unsigned slot, unsigned num_components)
30 {
31    nir_def *addr = ac_nir_load_arg(b, &args->ac, args->internal_bindings);
32    return nir_load_smem_amd(b, num_components, addr, nir_imm_int(b, slot * 16));
33 }
34 
build_attr_ring_desc(nir_builder * b,struct si_shader * shader,struct si_shader_args * args)35 static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
36                                          struct si_shader_args *args)
37 {
38    struct si_shader_selector *sel = shader->selector;
39 
40    nir_def *attr_address =
41       b->shader->info.stage == MESA_SHADER_VERTEX && b->shader->info.vs.blit_sgprs_amd ?
42       ac_nir_load_arg_at_offset(b, &args->ac, args->vs_blit_inputs,
43                                 b->shader->info.vs.blit_sgprs_amd - 1) :
44       ac_nir_load_arg(b, &args->ac, args->gs_attr_address);
45 
46    unsigned stride = 16 * si_shader_num_alloc_param_exports(shader);
47    uint32_t desc[4];
48 
49    ac_build_attr_ring_descriptor(sel->screen->info.gfx_level,
50                                  (uint64_t)sel->screen->info.address32_hi << 32,
51                                  0xffffffff, stride, desc);
52 
53    nir_def *comp[] = {
54       attr_address,
55       nir_imm_int(b, desc[1]),
56       nir_imm_int(b, desc[2]),
57       nir_imm_int(b, desc[3]),
58    };
59 
60    return nir_vec(b, comp, 4);
61 }
62 
build_tess_ring_desc(nir_builder * b,struct si_screen * screen,struct si_shader_args * args)63 static nir_def *build_tess_ring_desc(nir_builder *b, struct si_screen *screen,
64                                          struct si_shader_args *args)
65 {
66    nir_def *addr = ac_nir_load_arg(b, &args->ac, args->tes_offchip_addr);
67    uint32_t desc[4];
68 
69    ac_build_raw_buffer_descriptor(screen->info.gfx_level,
70                              (uint64_t)screen->info.address32_hi << 32,
71                              0xffffffff, desc);
72 
73    nir_def *comp[4] = {
74       addr,
75       nir_imm_int(b, desc[1]),
76       nir_imm_int(b, desc[2]),
77       nir_imm_int(b, desc[3]),
78    };
79 
80    return nir_vec(b, comp, 4);
81 }
82 
build_esgs_ring_desc(nir_builder * b,enum amd_gfx_level gfx_level,struct si_shader_args * args)83 static nir_def *build_esgs_ring_desc(nir_builder *b, enum amd_gfx_level gfx_level,
84                                          struct si_shader_args *args)
85 {
86    nir_def *desc = si_nir_load_internal_binding(b, args, SI_RING_ESGS, 4);
87 
88    if (b->shader->info.stage == MESA_SHADER_GEOMETRY)
89       return desc;
90 
91    nir_def *vec[4];
92    for (int i = 0; i < 4; i++)
93       vec[i] = nir_channel(b, desc, i);
94 
95    vec[1] = nir_ior_imm(b, vec[1], S_008F04_SWIZZLE_ENABLE_GFX6(1));
96    vec[3] = nir_ior_imm(b, vec[3],
97                         S_008F0C_ELEMENT_SIZE(1) |
98                         S_008F0C_INDEX_STRIDE(3) |
99                         S_008F0C_ADD_TID_ENABLE(1));
100 
101    /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */
102    if (gfx_level == GFX8)
103       vec[3] = nir_iand_imm(b, vec[3], C_008F0C_DATA_FORMAT);
104 
105    return nir_vec(b, vec, 4);
106 }
107 
build_gsvs_ring_desc(nir_builder * b,struct lower_abi_state * s)108 static void build_gsvs_ring_desc(nir_builder *b, struct lower_abi_state *s)
109 {
110    const struct si_shader_selector *sel = s->shader->selector;
111    const union si_shader_key *key = &s->shader->key;
112 
113    if (s->shader->is_gs_copy_shader) {
114       s->gsvs_ring[0] = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 4);
115    } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) {
116       nir_def *base_addr = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 2);
117       base_addr = nir_pack_64_2x32(b, base_addr);
118 
119       /* The conceptual layout of the GSVS ring is
120        *   v0c0 .. vLv0 v0c1 .. vLc1 ..
121        * but the real memory layout is swizzled across
122        * threads:
123        *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
124        *   t16v0c0 ..
125        * Override the buffer descriptor accordingly.
126        */
127 
128       for (unsigned stream = 0; stream < 4; stream++) {
129          unsigned num_components = sel->info.num_stream_output_components[stream];
130          if (!num_components)
131             continue;
132 
133          unsigned stride = 4 * num_components * b->shader->info.gs.vertices_out;
134          /* Limit on the stride field for <= GFX7. */
135          assert(stride < (1 << 14));
136 
137          unsigned num_records = s->shader->wave_size;
138 
139          const struct ac_buffer_state buffer_state = {
140             .size = num_records,
141             .format = PIPE_FORMAT_R32_FLOAT,
142             .swizzle = {
143                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
144             },
145             .stride = stride,
146             .swizzle_enable = true,
147             .element_size = 1,
148             .index_stride = 1,
149             .add_tid = true,
150             .gfx10_oob_select = V_008F0C_OOB_SELECT_DISABLED,
151          };
152          uint32_t tmp_desc[4];
153 
154          ac_build_buffer_descriptor(sel->screen->info.gfx_level, &buffer_state, tmp_desc);
155 
156          nir_def *desc[4];
157          desc[0] = nir_unpack_64_2x32_split_x(b, base_addr);
158          desc[1] = nir_ior_imm(b, nir_unpack_64_2x32_split_y(b, base_addr), tmp_desc[1]);
159          desc[2] = nir_imm_int(b, tmp_desc[2]);
160          desc[3] = nir_imm_int(b, tmp_desc[3]);
161 
162          s->gsvs_ring[stream] = nir_vec(b, desc, 4);
163 
164          /* next stream's desc addr */
165          base_addr = nir_iadd_imm(b, base_addr, stride * num_records);
166       }
167    }
168 }
169 
preload_reusable_variables(nir_builder * b,struct lower_abi_state * s)170 static void preload_reusable_variables(nir_builder *b, struct lower_abi_state *s)
171 {
172    const struct si_shader_selector *sel = s->shader->selector;
173    const union si_shader_key *key = &s->shader->key;
174 
175    b->cursor = nir_before_impl(b->impl);
176 
177    if (sel->screen->info.gfx_level <= GFX8 && b->shader->info.stage <= MESA_SHADER_GEOMETRY &&
178        (key->ge.as_es || b->shader->info.stage == MESA_SHADER_GEOMETRY)) {
179       s->esgs_ring = build_esgs_ring_desc(b, sel->screen->info.gfx_level, s->args);
180    }
181 
182    if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
183        b->shader->info.stage == MESA_SHADER_TESS_EVAL)
184       s->tess_offchip_ring = build_tess_ring_desc(b, sel->screen, s->args);
185 
186    build_gsvs_ring_desc(b, s);
187 }
188 
get_num_vertices_per_prim(nir_builder * b,struct lower_abi_state * s)189 static nir_def *get_num_vertices_per_prim(nir_builder *b, struct lower_abi_state *s)
190 {
191    struct si_shader_args *args = s->args;
192    unsigned num_vertices = si_get_num_vertices_per_output_prim(s->shader);
193 
194    if (num_vertices)
195       return nir_imm_int(b, num_vertices);
196    else
197       return nir_iadd_imm(b, GET_FIELD_NIR(GS_STATE_OUTPRIM), 1);
198 }
199 
get_small_prim_precision(nir_builder * b,struct lower_abi_state * s,bool lines)200 static nir_def *get_small_prim_precision(nir_builder *b, struct lower_abi_state *s, bool lines)
201 {
202    /* Compute FP32 value "num_samples / quant_mode" using integer ops.
203     * See si_shader.h for how this works.
204     */
205    struct si_shader_args *args = s->args;
206    nir_def *precision = GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION);
207    nir_def *log_samples = GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION_LOG_SAMPLES);
208 
209    if (lines)
210       precision = nir_iadd(b, precision, log_samples);
211 
212    /* The final FP32 value is: 1/2^(15 - precision) */
213    return nir_ishl_imm(b, nir_ior_imm(b, precision, 0x70), 23);
214 }
215 
lower_intrinsic(nir_builder * b,nir_instr * instr,struct lower_abi_state * s)216 static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_state *s)
217 {
218    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
219 
220    struct si_shader *shader = s->shader;
221    struct si_shader_args *args = s->args;
222    struct si_shader_selector *sel = shader->selector;
223    union si_shader_key *key = &shader->key;
224    gl_shader_stage stage = b->shader->info.stage;
225 
226    b->cursor = nir_before_instr(instr);
227 
228    nir_def *replacement = NULL;
229 
230    switch (intrin->intrinsic) {
231    case nir_intrinsic_load_base_vertex: {
232       nir_def *indexed = GET_FIELD_NIR(VS_STATE_INDEXED);
233       indexed = nir_i2b(b, indexed);
234 
235       nir_def *base_vertex = ac_nir_load_arg(b, &args->ac, args->ac.base_vertex);
236       replacement = nir_bcsel(b, indexed, base_vertex, nir_imm_int(b, 0));
237       break;
238    }
239    case nir_intrinsic_load_workgroup_size: {
240       assert(b->shader->info.workgroup_size_variable && sel->info.uses_variable_block_size);
241 
242       nir_def *block_size = ac_nir_load_arg(b, &args->ac, args->block_size);
243       nir_def *comp[] = {
244          nir_ubfe_imm(b, block_size, 0, 10),
245          nir_ubfe_imm(b, block_size, 10, 10),
246          nir_ubfe_imm(b, block_size, 20, 10),
247       };
248       replacement = nir_vec(b, comp, 3);
249       break;
250    }
251    case nir_intrinsic_load_tess_level_outer_default:
252    case nir_intrinsic_load_tess_level_inner_default: {
253       nir_def *buf = si_nir_load_internal_binding(b, args, SI_HS_CONST_DEFAULT_TESS_LEVELS, 4);
254       unsigned num_components = intrin->def.num_components;
255       unsigned offset =
256          intrin->intrinsic == nir_intrinsic_load_tess_level_inner_default ? 16 : 0;
257       replacement = nir_load_ubo(b, num_components, 32, buf, nir_imm_int(b, offset),
258                                  .range = ~0);
259       break;
260    }
261    case nir_intrinsic_load_patch_vertices_in:
262       if (stage == MESA_SHADER_TESS_CTRL)
263          replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 12, 5);
264       else if (stage == MESA_SHADER_TESS_EVAL) {
265          replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5);
266       } else
267          unreachable("no nir_load_patch_vertices_in");
268       replacement = nir_iadd_imm(b, replacement, 1);
269       break;
270    case nir_intrinsic_load_sample_mask_in:
271       replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage);
272       break;
273    case nir_intrinsic_load_lshs_vertex_stride_amd:
274       if (stage == MESA_SHADER_VERTEX) {
275          replacement = nir_imm_int(b, si_shader_lshs_vertex_stride(shader));
276       } else if (stage == MESA_SHADER_TESS_CTRL) {
277          if (sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic) {
278             replacement = nir_imm_int(b, si_shader_lshs_vertex_stride(shader));
279          } else {
280             nir_def *num_ls_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 17, 6);
281             nir_def *extra_dw = nir_bcsel(b, nir_ieq_imm(b, num_ls_out, 0), nir_imm_int(b, 0), nir_imm_int(b, 4));
282             replacement = nir_iadd_nuw(b, nir_ishl_imm(b, num_ls_out, 4), extra_dw);
283          }
284       } else {
285          unreachable("no nir_load_lshs_vertex_stride_amd");
286       }
287       break;
288    case nir_intrinsic_load_esgs_vertex_stride_amd:
289       assert(sel->screen->info.gfx_level >= GFX9);
290       if (shader->is_monolithic) {
291          replacement = nir_imm_int(b, key->ge.part.gs.es->info.esgs_vertex_stride / 4);
292       } else {
293          nir_def *num_es_outputs = GET_FIELD_NIR(GS_STATE_NUM_ES_OUTPUTS);
294          replacement = nir_iadd_imm(b, nir_imul_imm(b, num_es_outputs, 4), 1);
295       }
296       break;
297    case nir_intrinsic_load_tcs_num_patches_amd: {
298       nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
299       replacement = nir_iadd_imm(b, tmp, 1);
300       break;
301    }
302    case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
303       nir_def *per_vtx_out_patch_size = NULL;
304 
305       if (stage == MESA_SHADER_TESS_CTRL) {
306          const unsigned num_hs_out = util_last_bit64(sel->info.tcs_outputs_written_for_tes);
307          const unsigned out_vtx_size = num_hs_out * 16;
308          const unsigned out_vtx_per_patch = b->shader->info.tess.tcs_vertices_out;
309          per_vtx_out_patch_size = nir_imm_int(b, out_vtx_size * out_vtx_per_patch);
310       } else {
311          nir_def *num_hs_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 23, 6);
312          nir_def *out_vtx_size = nir_ishl_imm(b, num_hs_out, 4);
313          nir_def *o = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5);
314          nir_def *out_vtx_per_patch = nir_iadd_imm_nuw(b, o, 1);
315          per_vtx_out_patch_size = nir_imul(b, out_vtx_per_patch, out_vtx_size);
316       }
317 
318       nir_def *p = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
319       nir_def *num_patches = nir_iadd_imm_nuw(b, p, 1);
320       replacement = nir_imul(b, per_vtx_out_patch_size, num_patches);
321       break;
322    }
323    case nir_intrinsic_load_clip_half_line_width_amd: {
324       nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
325       replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32));
326       break;
327    }
328    case nir_intrinsic_load_cull_triangle_viewport_xy_scale_and_offset_amd: {
329       nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
330       replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 0));
331       break;
332    }
333    case nir_intrinsic_load_cull_line_viewport_xy_scale_and_offset_amd: {
334       nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
335       replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 16));
336       break;
337    }
338    case nir_intrinsic_load_num_vertices_per_primitive_amd:
339       replacement = get_num_vertices_per_prim(b, s);
340       break;
341    case nir_intrinsic_load_cull_ccw_amd:
342       /* radeonsi embed cw/ccw info into front/back face enabled */
343       replacement = nir_imm_false(b);
344       break;
345    case nir_intrinsic_load_cull_any_enabled_amd:
346       /* If culling is enabled at compile time, it's always enabled at runtime. */
347       assert(si_shader_culling_enabled(shader));
348       replacement = nir_imm_true(b);
349       break;
350    case nir_intrinsic_load_cull_back_face_enabled_amd:
351       replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_CULL_FACE_BACK));
352       break;
353    case nir_intrinsic_load_cull_front_face_enabled_amd:
354       replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_CULL_FACE_FRONT));
355       break;
356    case nir_intrinsic_load_cull_small_triangle_precision_amd:
357       replacement = get_small_prim_precision(b, s, false);
358       break;
359    case nir_intrinsic_load_cull_small_line_precision_amd:
360       replacement = get_small_prim_precision(b, s, true);
361       break;
362    case nir_intrinsic_load_cull_small_triangles_enabled_amd:
363       /* Triangles always have small primitive culling enabled. */
364       replacement = nir_imm_bool(b, true);
365       break;
366    case nir_intrinsic_load_cull_small_lines_enabled_amd:
367       replacement =
368          nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT);
369       break;
370    case nir_intrinsic_load_provoking_vtx_in_prim_amd:
371       replacement = nir_bcsel(b, nir_i2b(b, GET_FIELD_NIR(GS_STATE_PROVOKING_VTX_FIRST)),
372                               nir_imm_int(b, 0),
373                               nir_iadd_imm(b, get_num_vertices_per_prim(b, s), -1));
374       break;
375    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
376       replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_PIPELINE_STATS_EMU));
377       break;
378    case nir_intrinsic_load_prim_gen_query_enabled_amd:
379    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
380       replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_STREAMOUT_QUERY_ENABLED));
381       break;
382    case nir_intrinsic_load_clamp_vertex_color_amd:
383       replacement = nir_i2b(b, GET_FIELD_NIR(VS_STATE_CLAMP_VERTEX_COLOR));
384       break;
385    case nir_intrinsic_load_user_clip_plane: {
386       nir_def *buf = si_nir_load_internal_binding(b, args, SI_VS_CONST_CLIP_PLANES, 4);
387       unsigned offset = nir_intrinsic_ucp_id(intrin) * 16;
388       replacement = nir_load_ubo(b, 4, 32, buf, nir_imm_int(b, offset),
389                                  .range = ~0);
390       break;
391    }
392    case nir_intrinsic_load_streamout_buffer_amd: {
393       unsigned slot = SI_VS_STREAMOUT_BUF0 + nir_intrinsic_base(intrin);
394       replacement = si_nir_load_internal_binding(b, args, slot, 4);
395       break;
396    }
397    case nir_intrinsic_load_xfb_state_address_gfx12_amd: {
398       nir_def *address = si_nir_load_internal_binding(b, args, SI_STREAMOUT_STATE_BUF, 1);
399       nir_def *address32_hi = nir_imm_int(b, s->shader->selector->screen->info.address32_hi);
400       replacement = nir_pack_64_2x32_split(b, address, address32_hi);
401       break;
402    }
403    case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
404    case nir_intrinsic_atomic_add_shader_invocation_count_amd: {
405       enum pipe_statistics_query_index index =
406          intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd ?
407          PIPE_STAT_QUERY_GS_PRIMITIVES : PIPE_STAT_QUERY_GS_INVOCATIONS;
408 
409       /* GFX11 only needs to emulate PIPE_STAT_QUERY_GS_PRIMITIVES because GS culls,
410        * which makes the pipeline statistic incorrect.
411        */
412       assert(sel->screen->info.gfx_level < GFX11 || index == PIPE_STAT_QUERY_GS_PRIMITIVES);
413 
414       nir_def *buf =
415          si_nir_load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF, 4);
416       unsigned offset = si_query_pipestat_end_dw_offset(sel->screen, index) * 4;
417 
418       nir_def *count = intrin->src[0].ssa;
419       nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), count,
420                       .atomic_op = nir_atomic_op_iadd);
421       break;
422    }
423    case nir_intrinsic_atomic_add_gen_prim_count_amd:
424    case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
425       nir_def *buf = si_nir_load_internal_binding(b, args, SI_GS_QUERY_BUF, 4);
426 
427       unsigned stream = nir_intrinsic_stream_id(intrin);
428       unsigned offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
429          offsetof(struct gfx11_sh_query_buffer_mem, stream[stream].generated_primitives) :
430          offsetof(struct gfx11_sh_query_buffer_mem, stream[stream].emitted_primitives);
431 
432       nir_def *prim_count = intrin->src[0].ssa;
433       nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), prim_count,
434                       .atomic_op = nir_atomic_op_iadd);
435       break;
436    }
437    case nir_intrinsic_load_debug_log_desc_amd:
438       replacement = si_nir_load_internal_binding(b, args, SI_RING_SHADER_LOG, 4);
439       break;
440    case nir_intrinsic_load_ring_attr_amd:
441       replacement = build_attr_ring_desc(b, shader, args);
442       break;
443    case nir_intrinsic_load_force_vrs_rates_amd:
444       if (sel->screen->info.gfx_level >= GFX11) {
445          /* Bits [2:5] = VRS rate
446           *
447           * The range is [0, 15].
448           *
449           * If the hw doesn't support VRS 4x4, it will silently use 2x2 instead.
450           */
451          replacement = nir_imm_int(b, V_0283D0_VRS_SHADING_RATE_4X4 << 2);
452       } else {
453          /* Bits [2:3] = VRS rate X
454           * Bits [4:5] = VRS rate Y
455           *
456           * The range is [-2, 1]. Values:
457           *   1: 2x coarser shading rate in that direction.
458           *   0: normal shading rate
459           *  -1: 2x finer shading rate (sample shading, not directional)
460           *  -2: 4x finer shading rate (sample shading, not directional)
461           *
462           * Sample shading can't go above 8 samples, so both numbers can't be -2
463           * at the same time.
464           */
465          replacement = nir_imm_int(b, (1 << 2) | (1 << 4));
466       }
467       break;
468    case nir_intrinsic_load_sample_positions_amd: {
469       /* Sample locations are packed in 2 user SGPRs, 4 bits per component. */
470       nir_def *sample_id = intrin->src[0].ssa;
471       nir_def *sample_locs =
472          nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->sample_locs[0]),
473                                 ac_nir_load_arg(b, &s->args->ac, s->args->sample_locs[1]));
474       sample_locs = nir_ushr(b, sample_locs, nir_imul_imm(b, sample_id, 8));
475       sample_locs = nir_u2u32(b, sample_locs);
476       nir_def *sample_pos = nir_vec2(b, nir_iand_imm(b, sample_locs, 0xf),
477                                      nir_ubfe_imm(b, sample_locs, 4, 4));
478       replacement = nir_fmul_imm(b, nir_u2f32(b, sample_pos), 1.0 / 16);
479       break;
480    }
481    case nir_intrinsic_load_ring_tess_factors_amd: {
482       assert(s->tess_offchip_ring);
483       nir_def *addr = nir_channel(b, s->tess_offchip_ring, 0);
484       addr = nir_iadd_imm(b, addr, sel->screen->hs.tess_offchip_ring_size);
485       replacement = nir_vector_insert_imm(b, s->tess_offchip_ring, addr, 0);
486       break;
487    }
488    case nir_intrinsic_load_alpha_reference_amd:
489       replacement = ac_nir_load_arg(b, &args->ac, args->alpha_reference);
490       break;
491    case nir_intrinsic_load_color0:
492    case nir_intrinsic_load_color1: {
493       uint32_t colors_read = sel->info.colors_read;
494 
495       int start, offset;
496       if (intrin->intrinsic == nir_intrinsic_load_color0) {
497          start = 0;
498          offset = 0;
499       } else {
500          start = 4;
501          offset = util_bitcount(colors_read & 0xf);
502       }
503 
504       nir_def *color[4];
505       for (int i = 0; i < 4; i++) {
506          if (colors_read & BITFIELD_BIT(start + i))
507             color[i] = ac_nir_load_arg_at_offset(b, &args->ac, args->color_start, offset++);
508          else
509             color[i] = nir_undef(b, 1, 32);
510       }
511 
512       replacement = nir_vec(b, color, 4);
513       break;
514    }
515    case nir_intrinsic_load_point_coord_maybe_flipped: {
516       /* Load point coordinates (x, y) which are written by the hw after the interpolated inputs */
517       nir_def *baryc = intrin->src[0].ssa;
518       replacement = nir_load_interpolated_input(b, 2, 32, baryc, nir_imm_int(b, 0),
519                                                 .base = si_get_ps_num_interp(shader),
520                                                 .component = 2);
521       break;
522    }
523    case nir_intrinsic_load_poly_line_smooth_enabled:
524       replacement = nir_imm_bool(b, key->ps.mono.poly_line_smoothing);
525       break;
526    case nir_intrinsic_load_initial_edgeflags_amd: {
527       unsigned output_prim = si_get_output_prim_simplified(sel, &shader->key);
528 
529       /* Points, lines, and rectangles don't need edge flags. */
530       if (output_prim == MESA_PRIM_POINTS || output_prim == MESA_PRIM_LINES ||
531           output_prim == SI_PRIM_RECTANGLE_LIST) {
532          replacement = nir_imm_int(b, 0);
533       } else if (stage == MESA_SHADER_VERTEX) {
534          if (sel->screen->info.gfx_level >= GFX12) {
535             replacement = nir_iand_imm(b, ac_nir_load_arg(b, &args->ac, args->ac.gs_vtx_offset[0]),
536                                        ac_get_all_edge_flag_bits(sel->screen->info.gfx_level));
537          } else {
538             /* Use the following trick to extract the edge flags:
539              *   extracted = v_and_b32 gs_invocation_id, 0x700 ; get edge flags at bits 8, 9, 10
540              *   shifted = v_mul_u32_u24 extracted, 0x80402u   ; shift the bits: 8->9, 9->19, 10->29
541              *   result = v_and_b32 shifted, 0x20080200        ; remove garbage
542              */
543             nir_def *tmp = ac_nir_load_arg(b, &args->ac, args->ac.gs_invocation_id);
544             tmp = nir_iand_imm(b, tmp, 0x700);
545             tmp = nir_imul_imm(b, tmp, 0x80402);
546             replacement = nir_iand_imm(b, tmp, 0x20080200);
547          }
548       } else {
549          /* TES and GS: Edge flags are always enabled by the rasterizer state when polygon mode is
550           * enabled, so set all edge flags to 1 for triangles.
551           */
552          replacement = nir_imm_int(b, ac_get_all_edge_flag_bits(sel->screen->info.gfx_level));
553       }
554       break;
555    }
556    case nir_intrinsic_load_ring_esgs_amd:
557       assert(s->esgs_ring);
558       replacement = s->esgs_ring;
559       break;
560    case nir_intrinsic_load_ring_tess_offchip_amd:
561       assert(s->tess_offchip_ring);
562       replacement = s->tess_offchip_ring;
563       break;
564    case nir_intrinsic_load_tcs_tess_levels_to_tes_amd:
565       if (shader->is_monolithic) {
566          replacement = nir_imm_bool(b, key->ge.opt.tes_reads_tess_factors);
567       } else {
568          replacement = nir_ine_imm(b, ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 31, 1), 0);
569       }
570       break;
571    case nir_intrinsic_load_tcs_primitive_mode_amd:
572       if (shader->is_monolithic) {
573          replacement = nir_imm_int(b, key->ge.opt.tes_prim_mode);
574       } else {
575          if (b->shader->info.tess._primitive_mode != TESS_PRIMITIVE_UNSPECIFIED)
576             replacement = nir_imm_int(b, b->shader->info.tess._primitive_mode);
577          else
578             replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 29, 2);
579       }
580       break;
581    case nir_intrinsic_load_ring_gsvs_amd: {
582       unsigned stream_id = nir_intrinsic_stream_id(intrin);
583       /* Unused nir_load_ring_gsvs_amd may not be eliminated yet. */
584       replacement = s->gsvs_ring[stream_id] ?
585          s->gsvs_ring[stream_id] : nir_undef(b, 4, 32);
586       break;
587    }
588    case nir_intrinsic_load_user_data_amd: {
589       nir_def *low_vec4 = ac_nir_load_arg(b, &args->ac, args->cs_user_data[0]);
590       replacement = nir_pad_vector(b, low_vec4, 8);
591 
592       if (args->cs_user_data[1].used && intrin->def.num_components > 4) {
593          nir_def *high_vec4 = ac_nir_load_arg(b, &args->ac, args->cs_user_data[1]);
594          for (unsigned i = 0; i < high_vec4->num_components; i++)
595             replacement = nir_vector_insert_imm(b, replacement, nir_channel(b, high_vec4, i), 4 + i);
596       }
597       break;
598    }
599    case nir_intrinsic_load_fbfetch_image_fmask_desc_amd:
600       STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
601       replacement = si_nir_load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0_FMASK, 8);
602       break;
603    case nir_intrinsic_load_fbfetch_image_desc_amd:
604       STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
605       replacement = si_nir_load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0, 8);
606       break;
607    case nir_intrinsic_load_polygon_stipple_buffer_amd:
608       replacement = si_nir_load_internal_binding(b, args, SI_PS_CONST_POLY_STIPPLE, 4);
609       break;
610    default:
611       return false;
612    }
613 
614    if (replacement)
615       nir_def_rewrite_uses(&intrin->def, replacement);
616 
617    nir_instr_remove(instr);
618    nir_instr_free(instr);
619 
620    return true;
621 }
622 
si_nir_lower_abi(nir_shader * nir,struct si_shader * shader,struct si_shader_args * args)623 bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args)
624 {
625    struct lower_abi_state state = {
626       .shader = shader,
627       .args = args,
628    };
629 
630    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
631 
632    nir_builder b = nir_builder_create(impl);
633 
634    preload_reusable_variables(&b, &state);
635 
636    bool progress = false;
637    nir_foreach_block_safe(block, impl) {
638       nir_foreach_instr_safe(instr, block) {
639          if (instr->type == nir_instr_type_intrinsic)
640             progress |= lower_intrinsic(&b, instr, &state);
641       }
642    }
643 
644    nir_metadata preserved = progress ?
645       nir_metadata_control_flow :
646       nir_metadata_all;
647    nir_metadata_preserve(impl, preserved);
648 
649    return progress;
650 }
651