• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "ac_nir.h"
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "radv_constants.h"
28 #include "radv_nir.h"
29 #include "radv_private.h"
30 #include "radv_shader.h"
31 #include "radv_shader_args.h"
32 
33 #define GET_SGPR_FIELD_NIR(arg, field)                                                                                 \
34    ac_nir_unpack_arg(b, &s->args->ac, arg, field##__SHIFT, util_bitcount(field##__MASK))
35 
36 typedef struct {
37    enum amd_gfx_level gfx_level;
38    const struct radv_shader_args *args;
39    const struct radv_shader_info *info;
40    const struct radv_graphics_state_key *gfx_state;
41    uint32_t address32_hi;
42    nir_def *gsvs_ring[4];
43 } lower_abi_state;
44 
45 static nir_def *
load_ring(nir_builder * b,unsigned ring,lower_abi_state * s)46 load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
47 {
48    struct ac_arg arg =
49       b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets : s->args->ac.ring_offsets;
50 
51    nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
52    ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
53    return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u);
54 }
55 
56 static nir_def *
nggc_bool_setting(nir_builder * b,unsigned mask,lower_abi_state * s)57 nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
58 {
59    nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
60    return nir_test_mask(b, settings, mask);
61 }
62 
63 static nir_def *
shader_query_bool_setting(nir_builder * b,unsigned mask,lower_abi_state * s)64 shader_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
65 {
66    nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->shader_query_state);
67    return nir_test_mask(b, settings, mask);
68 }
69 
70 static bool
lower_abi_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * state)71 lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
72 {
73    lower_abi_state *s = (lower_abi_state *)state;
74    gl_shader_stage stage = b->shader->info.stage;
75 
76    b->cursor = nir_before_instr(&intrin->instr);
77 
78    nir_def *replacement = NULL;
79    bool progress = true;
80 
81    switch (intrin->intrinsic) {
82    case nir_intrinsic_load_ring_tess_factors_amd:
83       replacement = load_ring(b, RING_HS_TESS_FACTOR, s);
84       break;
85    case nir_intrinsic_load_ring_tess_factors_offset_amd:
86       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset);
87       break;
88    case nir_intrinsic_load_ring_tess_offchip_amd:
89       replacement = load_ring(b, RING_HS_TESS_OFFCHIP, s);
90       break;
91    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
92       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset);
93       break;
94    case nir_intrinsic_load_tcs_num_patches_amd:
95       if (s->info->num_tess_patches) {
96          replacement = nir_imm_int(b, s->info->num_tess_patches);
97       } else {
98          if (stage == MESA_SHADER_TESS_CTRL) {
99             replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
100          } else {
101             replacement = GET_SGPR_FIELD_NIR(s->args->tes_state, TES_STATE_NUM_PATCHES);
102          }
103       }
104       break;
105    case nir_intrinsic_load_ring_esgs_amd:
106       replacement = load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s);
107       break;
108    case nir_intrinsic_load_ring_gsvs_amd:
109       if (stage == MESA_SHADER_VERTEX)
110          replacement = load_ring(b, RING_GSVS_VS, s);
111       else
112          replacement = s->gsvs_ring[nir_intrinsic_stream_id(intrin)];
113       break;
114    case nir_intrinsic_load_ring_gs2vs_offset_amd:
115       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs2vs_offset);
116       break;
117    case nir_intrinsic_load_ring_es2gs_offset_amd:
118       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset);
119       break;
120 
121    case nir_intrinsic_load_ring_attr_amd:
122       replacement = load_ring(b, RING_PS_ATTR, s);
123 
124       /* Note, the HW always assumes there is at least 1 per-vertex param. */
125       const unsigned total_num_params = MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports;
126 
127       nir_def *dword1 = nir_channel(b, replacement, 1);
128       dword1 = nir_ior_imm(b, dword1, S_008F04_STRIDE(16 * total_num_params));
129       replacement = nir_vector_insert_imm(b, replacement, dword1, 1);
130       break;
131 
132    case nir_intrinsic_load_ring_attr_offset_amd: {
133       nir_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset);
134       replacement = nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), 9); /* 512b increments. */
135       break;
136    }
137 
138    case nir_intrinsic_load_tess_rel_patch_id_amd:
139       if (stage == MESA_SHADER_TESS_CTRL) {
140          replacement = nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0));
141       } else if (stage == MESA_SHADER_TESS_EVAL) {
142          /* Setting an upper bound like this will actually make it possible
143           * to optimize some multiplications (in address calculations) so that
144           * constant additions can be added to the const offset in memory load instructions.
145           */
146          nir_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
147 
148          if (s->info->tes.tcs_vertices_out) {
149             nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr);
150             nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(s->info->tes.tcs_vertices_out, 1));
151          }
152 
153          replacement = arg;
154       } else {
155          unreachable("invalid tessellation shader stage");
156       }
157       break;
158    case nir_intrinsic_load_patch_vertices_in:
159       if (stage == MESA_SHADER_TESS_CTRL) {
160          if (s->gfx_state->ts.patch_control_points) {
161             replacement = nir_imm_int(b, s->gfx_state->ts.patch_control_points);
162          } else {
163             replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS);
164          }
165       } else if (stage == MESA_SHADER_TESS_EVAL) {
166          if (s->info->tes.tcs_vertices_out) {
167             replacement = nir_imm_int(b, s->info->tes.tcs_vertices_out);
168          } else {
169             replacement = GET_SGPR_FIELD_NIR(s->args->tes_state, TES_STATE_TCS_VERTICES_OUT);
170          }
171       } else
172          unreachable("invalid tessellation shader stage");
173       break;
174    case nir_intrinsic_load_gs_vertex_offset_amd:
175       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]);
176       break;
177    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
178       replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 12, 9);
179       break;
180    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
181       replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 22, 9);
182       break;
183    case nir_intrinsic_load_packed_passthrough_primitive_amd:
184       /* NGG passthrough mode: the HW already packs the primitive export value to a single register.
185        */
186       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]);
187       break;
188    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
189       replacement = shader_query_bool_setting(b, radv_shader_query_pipeline_stat, s);
190       break;
191    case nir_intrinsic_load_prim_gen_query_enabled_amd:
192       replacement = shader_query_bool_setting(b, radv_shader_query_prim_gen, s);
193       break;
194    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
195       replacement = shader_query_bool_setting(b, radv_shader_query_prim_xfb, s);
196       break;
197    case nir_intrinsic_load_merged_wave_info_amd:
198       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.merged_wave_info);
199       break;
200    case nir_intrinsic_load_cull_any_enabled_amd: {
201       nir_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info);
202 
203       /* Consider a workgroup small if it contains less than 16 triangles.
204        *
205        * The gs_tg_info[30:22] is the number of primitives, which we know is non-zero,
206        * so the below is equivalent to: "ult(ubfe(gs_tg_info, 22, 9), 16)", but
207        * ACO can optimize out the comparison to zero (see try_optimize_scc_nocompare).
208        */
209       nir_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
210 
211       nir_def *mask =
212          nir_bcsel(b, small_workgroup, nir_imm_int(b, radv_nggc_none),
213                    nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives));
214       nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
215       replacement = nir_ine_imm(b, nir_iand(b, settings, mask), 0);
216       break;
217    }
218    case nir_intrinsic_load_cull_front_face_enabled_amd:
219       replacement = nggc_bool_setting(b, radv_nggc_front_face, s);
220       break;
221    case nir_intrinsic_load_cull_back_face_enabled_amd:
222       replacement = nggc_bool_setting(b, radv_nggc_back_face, s);
223       break;
224    case nir_intrinsic_load_cull_ccw_amd:
225       replacement = nggc_bool_setting(b, radv_nggc_face_is_ccw, s);
226       break;
227    case nir_intrinsic_load_cull_small_primitives_enabled_amd:
228       replacement = nggc_bool_setting(b, radv_nggc_small_primitives, s);
229       break;
230    case nir_intrinsic_load_cull_small_prim_precision_amd: {
231       /* To save space, only the exponent is stored in the high 8 bits.
232        * We calculate the precision from those 8 bits:
233        * exponent = nggc_settings >> 24
234        * precision = 1.0 * 2 ^ exponent
235        */
236       nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
237       nir_def *exponent = nir_ishr_imm(b, settings, 24u);
238       replacement = nir_ldexp(b, nir_imm_float(b, 1.0f), exponent);
239       break;
240    }
241 
242    case nir_intrinsic_load_viewport_xy_scale_and_offset: {
243       nir_def *comps[] = {
244          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]),
245          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]),
246          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]),
247          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]),
248       };
249       replacement = nir_vec(b, comps, 4);
250       break;
251    }
252 
253    case nir_intrinsic_load_ring_task_draw_amd:
254       replacement = load_ring(b, RING_TS_DRAW, s);
255       break;
256    case nir_intrinsic_load_ring_task_payload_amd:
257       replacement = load_ring(b, RING_TS_PAYLOAD, s);
258       break;
259    case nir_intrinsic_load_ring_mesh_scratch_amd:
260       replacement = load_ring(b, RING_MS_SCRATCH, s);
261       break;
262    case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
263       /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */
264       replacement = nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff),
265                                  RADV_MESH_SCRATCH_ENTRY_BYTES);
266       break;
267    case nir_intrinsic_load_task_ring_entry_amd:
268       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
269       break;
270    case nir_intrinsic_load_lshs_vertex_stride_amd: {
271       if (stage == MESA_SHADER_VERTEX) {
272          replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->vs.num_linked_outputs));
273       } else {
274          assert(stage == MESA_SHADER_TESS_CTRL);
275          if (s->info->inputs_linked) {
276             replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->tcs.num_linked_inputs));
277          } else {
278             nir_def *lshs_vertex_stride =
279                GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE);
280             replacement = nir_ishl_imm(b, lshs_vertex_stride, 2);
281          }
282       }
283       break;
284    }
285    case nir_intrinsic_load_esgs_vertex_stride_amd: {
286       /* Emulate VGT_ESGS_RING_ITEMSIZE on GFX9+ to reduce context register writes. */
287       assert(s->gfx_level >= GFX9);
288       if (s->info->merged_shader_compiled_separately) {
289          replacement = ac_nir_load_arg(b, &s->args->ac, s->args->vgt_esgs_ring_itemsize);
290       } else {
291          const unsigned stride =
292             s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.vgt_esgs_ring_itemsize;
293          replacement = nir_imm_int(b, stride);
294       }
295       break;
296    }
297    case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
298       nir_def *num_tcs_outputs, *out_vertices_per_patch;
299 
300       if (stage == MESA_SHADER_TESS_CTRL) {
301          num_tcs_outputs = nir_imm_int(b, s->info->tcs.num_linked_outputs);
302          out_vertices_per_patch = nir_imm_int(b, s->info->tcs.tcs_vertices_out);
303       } else {
304          if (s->info->inputs_linked) {
305             num_tcs_outputs = nir_imm_int(b, s->info->tes.num_linked_inputs);
306          } else {
307             num_tcs_outputs = GET_SGPR_FIELD_NIR(s->args->tes_state, TES_STATE_NUM_TCS_OUTPUTS);
308          }
309 
310          if (s->info->tes.tcs_vertices_out) {
311             out_vertices_per_patch = nir_imm_int(b, s->info->tes.tcs_vertices_out);
312          } else {
313             out_vertices_per_patch = GET_SGPR_FIELD_NIR(s->args->tes_state, TES_STATE_TCS_VERTICES_OUT);
314          }
315       }
316 
317       nir_def *per_vertex_output_patch_size =
318          nir_imul(b, out_vertices_per_patch, nir_imul_imm(b, num_tcs_outputs, 16u));
319 
320       if (s->info->num_tess_patches) {
321          unsigned num_patches = s->info->num_tess_patches;
322          replacement = nir_imul_imm(b, per_vertex_output_patch_size, num_patches);
323       } else {
324          nir_def *num_patches;
325 
326          if (stage == MESA_SHADER_TESS_CTRL) {
327             num_patches = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
328          } else {
329             num_patches = GET_SGPR_FIELD_NIR(s->args->tes_state, TES_STATE_NUM_PATCHES);
330          }
331          replacement = nir_imul(b, per_vertex_output_patch_size, num_patches);
332       }
333       break;
334    }
335    case nir_intrinsic_load_sample_positions_amd: {
336       uint32_t sample_pos_offset = (RING_PS_SAMPLE_POSITIONS * 16) - 8;
337 
338       nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, s->args->ac.ring_offsets);
339       nir_def *addr = nir_pack_64_2x32(b, ring_offsets);
340       nir_def *sample_id = nir_umin(b, intrin->src[0].ssa, nir_imm_int(b, 7));
341       nir_def *offset = nir_ishl_imm(b, sample_id, 3); /* 2 floats containing samplepos.xy */
342 
343       nir_const_value *const_num_samples = nir_src_as_const_value(intrin->src[1]);
344       if (const_num_samples) {
345          sample_pos_offset += (const_num_samples->u32 << 3);
346       } else {
347          offset = nir_iadd(b, offset, nir_ishl_imm(b, intrin->src[1].ssa, 3));
348       }
349 
350       replacement =
351          nir_load_global_amd(b, 2, 32, addr, offset, .base = sample_pos_offset, .access = ACCESS_NON_WRITEABLE);
352       break;
353    }
354    case nir_intrinsic_load_rasterization_samples_amd:
355       if (s->gfx_state->dynamic_rasterization_samples) {
356          replacement = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_NUM_SAMPLES);
357       } else {
358          replacement = nir_imm_int(b, s->gfx_state->ms.rasterization_samples);
359       }
360       break;
361    case nir_intrinsic_load_provoking_vtx_in_prim_amd: {
362       if (s->gfx_state->dynamic_provoking_vtx_mode) {
363          replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_provoking_vtx);
364       } else {
365          unsigned provoking_vertex = 0;
366          if (s->gfx_state->rs.provoking_vtx_last) {
367             if (stage == MESA_SHADER_VERTEX) {
368                provoking_vertex = radv_get_num_vertices_per_prim(s->gfx_state) - 1;
369             } else if (stage == MESA_SHADER_GEOMETRY) {
370                provoking_vertex = b->shader->info.gs.vertices_in - 1;
371             } else {
372                /* TES won't use this intrinsic, because it can get primitive id directly
373                 * instead of using this intrinsic to pass primitive id by LDS.
374                 */
375                unreachable("load_provoking_vtx_in_prim_amd is only supported in VS and GS");
376             }
377          }
378 
379          replacement = nir_imm_int(b, provoking_vertex);
380       }
381       break;
382    }
383    case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
384       nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, RADV_SHADER_QUERY_GS_PRIM_EMIT_OFFSET),
385                              nir_imm_int(b, 0x100));
386       break;
387    case nir_intrinsic_atomic_add_gen_prim_count_amd: {
388       uint32_t offset = stage == MESA_SHADER_MESH ? RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET
389                                                   : RADV_SHADER_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin));
390 
391       nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, offset), nir_imm_int(b, 0x100));
392       break;
393    }
394    case nir_intrinsic_atomic_add_xfb_prim_count_amd:
395       nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
396                              nir_imm_int(b, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
397                              nir_imm_int(b, 0x100));
398       break;
399    case nir_intrinsic_atomic_add_shader_invocation_count_amd: {
400       uint32_t offset;
401 
402       if (stage == MESA_SHADER_MESH) {
403          offset = RADV_SHADER_QUERY_MS_INVOCATION_OFFSET;
404       } else if (stage == MESA_SHADER_TASK) {
405          offset = RADV_SHADER_QUERY_TS_INVOCATION_OFFSET;
406       } else {
407          offset = RADV_SHADER_QUERY_GS_INVOCATION_OFFSET;
408       }
409 
410       nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, offset), nir_imm_int(b, 0x100));
411       break;
412    }
413    case nir_intrinsic_load_streamout_config_amd:
414       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_config);
415       break;
416    case nir_intrinsic_load_streamout_write_index_amd:
417       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_write_index);
418       break;
419    case nir_intrinsic_load_streamout_buffer_amd: {
420       nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
421                                             nir_imm_int(b, s->address32_hi));
422       replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16));
423       break;
424    }
425    case nir_intrinsic_load_streamout_offset_amd:
426       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_offset[nir_intrinsic_base(intrin)]);
427       break;
428 
429    case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd:
430       replacement = nir_imm_int(b, s->info->ngg_info.esgs_ring_size);
431       break;
432    case nir_intrinsic_load_lds_ngg_scratch_base_amd:
433       replacement = nir_imm_int(b, s->info->ngg_info.scratch_lds_base);
434       break;
435    case nir_intrinsic_load_num_vertices_per_primitive_amd: {
436       unsigned num_vertices;
437 
438       if (stage == MESA_SHADER_VERTEX) {
439          /* For dynamic primitive topology with streamout. */
440          if (s->info->vs.dynamic_num_verts_per_prim) {
441             replacement = ac_nir_load_arg(b, &s->args->ac, s->args->num_verts_per_prim);
442          } else {
443             replacement = nir_imm_int(b, radv_get_num_vertices_per_prim(s->gfx_state));
444          }
445       } else if (stage == MESA_SHADER_TESS_EVAL) {
446          if (s->info->tes.point_mode) {
447             num_vertices = 1;
448          } else if (s->info->tes._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
449             num_vertices = 2;
450          } else {
451             num_vertices = 3;
452          }
453          replacement = nir_imm_int(b, num_vertices);
454       } else {
455          assert(stage == MESA_SHADER_GEOMETRY);
456          switch (s->info->gs.output_prim) {
457          case MESA_PRIM_POINTS:
458             num_vertices = 1;
459             break;
460          case MESA_PRIM_LINE_STRIP:
461             num_vertices = 2;
462             break;
463          case MESA_PRIM_TRIANGLE_STRIP:
464             num_vertices = 3;
465             break;
466          default:
467             unreachable("invalid GS output primitive");
468             break;
469          }
470          replacement = nir_imm_int(b, num_vertices);
471       }
472       break;
473    }
474    case nir_intrinsic_load_ordered_id_amd:
475       replacement = ac_nir_unpack_arg(b, &s->args->ac, s->args->ac.gs_tg_info, 0, 12);
476       break;
477    case nir_intrinsic_load_force_vrs_rates_amd:
478       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.force_vrs_rates);
479       break;
480    case nir_intrinsic_load_fully_covered: {
481       nir_def *sample_coverage = ac_nir_load_arg(b, &s->args->ac, s->args->ac.sample_coverage);
482       replacement = nir_ine_imm(b, sample_coverage, 0);
483       break;
484    }
485    case nir_intrinsic_load_barycentric_optimize_amd: {
486       nir_def *prim_mask = ac_nir_load_arg(b, &s->args->ac, s->args->ac.prim_mask);
487       /* enabled when bit 31 is set */
488       replacement = nir_ilt_imm(b, prim_mask, 0);
489       break;
490    }
491    case nir_intrinsic_load_poly_line_smooth_enabled:
492       if (s->gfx_state->dynamic_line_rast_mode) {
493          nir_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
494          replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR);
495       } else {
496          replacement = nir_imm_bool(b, s->gfx_state->rs.line_smooth_enabled);
497       }
498       break;
499    case nir_intrinsic_load_initial_edgeflags_amd:
500       replacement = nir_imm_int(b, 0);
501       break;
502    case nir_intrinsic_load_provoking_vtx_amd:
503       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.load_provoking_vtx);
504       break;
505    case nir_intrinsic_load_rasterization_primitive_amd:
506       assert(s->gfx_state->unknown_rast_prim);
507       /* Load the primitive topology from an user SGPR when it's unknown at compile time (GPL). */
508       replacement = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_RAST_PRIM);
509       break;
510    default:
511       progress = false;
512       break;
513    }
514 
515    if (!progress)
516       return false;
517 
518    if (replacement)
519       nir_def_rewrite_uses(&intrin->def, replacement);
520 
521    nir_instr_remove(&intrin->instr);
522    nir_instr_free(&intrin->instr);
523 
524    return true;
525 }
526 
527 static nir_def *
load_gsvs_ring(nir_builder * b,lower_abi_state * s,unsigned stream_id)528 load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id)
529 {
530    nir_def *ring = load_ring(b, RING_GSVS_GS, s);
531    unsigned stream_offset = 0;
532    unsigned stride = 0;
533    for (unsigned i = 0; i <= stream_id; i++) {
534       stride = 4 * s->info->gs.num_stream_output_components[i] * s->info->gs.vertices_out;
535       if (i < stream_id)
536          stream_offset += stride * s->info->wave_size;
537    }
538 
539    /* Limit on the stride field for <= GFX7. */
540    assert(stride < (1 << 14));
541 
542    if (stream_offset) {
543       nir_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
544       addr = nir_iadd_imm(b, addr, stream_offset);
545       ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_x(b, addr), 0);
546       ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_y(b, addr), 1);
547    }
548 
549    ring = nir_vector_insert_imm(b, ring, nir_ior_imm(b, nir_channel(b, ring, 1), S_008F04_STRIDE(stride)), 1);
550    return nir_vector_insert_imm(b, ring, nir_imm_int(b, s->info->wave_size), 2);
551 }
552 
553 void
radv_nir_lower_abi(nir_shader * shader,enum amd_gfx_level gfx_level,const struct radv_shader_stage * stage,const struct radv_graphics_state_key * gfx_state,uint32_t address32_hi)554 radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_stage *stage,
555                    const struct radv_graphics_state_key *gfx_state, uint32_t address32_hi)
556 {
557    lower_abi_state state = {
558       .gfx_level = gfx_level,
559       .info = &stage->info,
560       .args = &stage->args,
561       .gfx_state = gfx_state,
562       .address32_hi = address32_hi,
563    };
564 
565    if (shader->info.stage == MESA_SHADER_GEOMETRY && !stage->info.is_ngg) {
566       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
567 
568       nir_builder b = nir_builder_at(nir_before_impl(impl));
569 
570       u_foreach_bit (i, shader->info.gs.active_stream_mask)
571          state.gsvs_ring[i] = load_gsvs_ring(&b, &state, i);
572    }
573 
574    nir_shader_intrinsics_pass(shader, lower_abi_instr, nir_metadata_dominance | nir_metadata_block_index, &state);
575 }
576