• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_nir.h"
8 #include "nir.h"
9 #include "nir_builder.h"
10 #include "radv_constants.h"
11 #include "radv_nir.h"
12 #include "radv_pipeline_graphics.h"
13 #include "radv_shader.h"
14 #include "radv_shader_args.h"
15 #include "sid.h"
16 
17 #define GET_SGPR_FIELD_NIR(arg, field)                                                                                 \
18    ac_nir_unpack_arg(b, &s->args->ac, arg, field##__SHIFT, util_bitcount(field##__MASK))
19 
20 typedef struct {
21    enum amd_gfx_level gfx_level;
22    const struct radv_shader_args *args;
23    const struct radv_shader_info *info;
24    const struct radv_graphics_state_key *gfx_state;
25    uint32_t address32_hi;
26    nir_def *gsvs_ring[4];
27 } lower_abi_state;
28 
29 static nir_def *
load_ring(nir_builder * b,unsigned ring,lower_abi_state * s)30 load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
31 {
32    struct ac_arg arg =
33       b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets : s->args->ac.ring_offsets;
34 
35    nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
36    ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
37    return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u);
38 }
39 
40 static nir_def *
nggc_bool_setting(nir_builder * b,unsigned mask,lower_abi_state * s)41 nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
42 {
43    nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
44    return nir_test_mask(b, settings, mask);
45 }
46 
47 static nir_def *
shader_query_bool_setting(nir_builder * b,unsigned mask,lower_abi_state * s)48 shader_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
49 {
50    nir_def *settings;
51 
52    if (b->shader->info.stage == MESA_SHADER_TASK) {
53       settings = ac_nir_load_arg(b, &s->args->ac, s->args->task_state);
54    } else {
55       settings = GET_SGPR_FIELD_NIR(s->args->ngg_state, NGG_STATE_QUERY);
56    }
57 
58    return nir_test_mask(b, settings, mask);
59 }
60 
61 static bool
lower_abi_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * state)62 lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
63 {
64    lower_abi_state *s = (lower_abi_state *)state;
65    gl_shader_stage stage = b->shader->info.stage;
66 
67    b->cursor = nir_before_instr(&intrin->instr);
68 
69    nir_def *replacement = NULL;
70    bool progress = true;
71 
72    switch (intrin->intrinsic) {
73    case nir_intrinsic_load_ring_tess_factors_amd:
74       replacement = load_ring(b, RING_HS_TESS_FACTOR, s);
75       break;
76    case nir_intrinsic_load_ring_tess_offchip_amd:
77       replacement = load_ring(b, RING_HS_TESS_OFFCHIP, s);
78       break;
79    case nir_intrinsic_load_tcs_num_patches_amd:
80       if (s->info->num_tess_patches) {
81          replacement = nir_imm_int(b, s->info->num_tess_patches);
82       } else {
83          nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
84          replacement = nir_iadd_imm_nuw(b, n, 1);
85       }
86       break;
87    case nir_intrinsic_load_tcs_tess_levels_to_tes_amd:
88       if (s->info->outputs_linked) {
89          replacement = nir_imm_bool(b, s->info->tcs.tes_reads_tess_factors);
90       } else {
91          replacement =
92             nir_ine_imm(b, GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_TES_READS_TF), 0);
93       }
94       break;
95    case nir_intrinsic_load_tcs_primitive_mode_amd:
96       if (s->info->outputs_linked) {
97          replacement = nir_imm_int(b, s->info->tes._primitive_mode);
98       } else {
99          replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE);
100       }
101       break;
102    case nir_intrinsic_load_ring_esgs_amd:
103       replacement = load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s);
104       break;
105    case nir_intrinsic_load_ring_gsvs_amd:
106       if (stage == MESA_SHADER_VERTEX)
107          replacement = load_ring(b, RING_GSVS_VS, s);
108       else
109          replacement = s->gsvs_ring[nir_intrinsic_stream_id(intrin)];
110       break;
111    case nir_intrinsic_load_ring_attr_amd:
112       replacement = load_ring(b, RING_PS_ATTR, s);
113 
114       /* Note, the HW always assumes there is at least 1 per-vertex param. */
115       const unsigned total_num_params = MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports;
116 
117       nir_def *dword1 = nir_channel(b, replacement, 1);
118       dword1 = nir_ior_imm(b, dword1, S_008F04_STRIDE(16 * total_num_params));
119       replacement = nir_vector_insert_imm(b, replacement, dword1, 1);
120       break;
121 
122    case nir_intrinsic_load_patch_vertices_in:
123       if (stage == MESA_SHADER_TESS_CTRL) {
124          if (s->gfx_state->ts.patch_control_points) {
125             replacement = nir_imm_int(b, s->gfx_state->ts.patch_control_points);
126          } else {
127             nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS);
128             replacement = nir_iadd_imm_nuw(b, n, 1);
129          }
130       } else if (stage == MESA_SHADER_TESS_EVAL) {
131          if (s->info->tes.tcs_vertices_out) {
132             replacement = nir_imm_int(b, s->info->tes.tcs_vertices_out);
133          } else {
134             nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_OUT_PATCH_CP);
135             replacement = nir_iadd_imm_nuw(b, n, 1);
136          }
137       } else
138          unreachable("invalid tessellation shader stage");
139       break;
140    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
141       replacement = shader_query_bool_setting(b, radv_shader_query_pipeline_stat, s);
142       break;
143    case nir_intrinsic_load_prim_gen_query_enabled_amd:
144       replacement = shader_query_bool_setting(b, radv_shader_query_prim_gen, s);
145       break;
146    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
147       replacement = shader_query_bool_setting(b, radv_shader_query_prim_xfb, s);
148       break;
149    case nir_intrinsic_load_cull_any_enabled_amd: {
150       nir_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info);
151 
152       /* Consider a workgroup small if it contains less than 16 triangles.
153        *
154        * The gs_tg_info[30:22] is the number of primitives, which we know is non-zero,
155        * so the below is equivalent to: "ult(ubfe(gs_tg_info, 22, 9), 16)", but
156        * ACO can optimize out the comparison to zero (see try_optimize_scc_nocompare).
157        */
158       nir_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
159 
160       nir_def *mask =
161          nir_bcsel(b, small_workgroup, nir_imm_int(b, radv_nggc_none),
162                    nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives));
163       nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
164       replacement = nir_ine_imm(b, nir_iand(b, settings, mask), 0);
165       break;
166    }
167    case nir_intrinsic_load_cull_front_face_enabled_amd:
168       replacement = nggc_bool_setting(b, radv_nggc_front_face, s);
169       break;
170    case nir_intrinsic_load_cull_back_face_enabled_amd:
171       replacement = nggc_bool_setting(b, radv_nggc_back_face, s);
172       break;
173    case nir_intrinsic_load_cull_ccw_amd:
174       replacement = nggc_bool_setting(b, radv_nggc_face_is_ccw, s);
175       break;
176    case nir_intrinsic_load_cull_small_triangles_enabled_amd:
177       replacement = nggc_bool_setting(b, radv_nggc_small_primitives, s);
178       break;
179    case nir_intrinsic_load_cull_small_triangle_precision_amd: {
180       /* To save space, only the exponent is stored in the high 8 bits.
181        * We calculate the precision from those 8 bits:
182        * exponent = nggc_settings >> 24
183        * precision = 1.0 * 2 ^ exponent
184        */
185       nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
186       nir_def *exponent = nir_ishr_imm(b, settings, 24u);
187       replacement = nir_ldexp(b, nir_imm_float(b, 1.0f), exponent);
188       break;
189    }
190 
191    case nir_intrinsic_load_cull_triangle_viewport_xy_scale_and_offset_amd: {
192       nir_def *comps[] = {
193          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]),
194          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]),
195          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]),
196          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]),
197       };
198       replacement = nir_vec(b, comps, 4);
199       break;
200    }
201 
202    case nir_intrinsic_load_ring_task_draw_amd:
203       replacement = load_ring(b, RING_TS_DRAW, s);
204       break;
205    case nir_intrinsic_load_ring_task_payload_amd:
206       replacement = load_ring(b, RING_TS_PAYLOAD, s);
207       break;
208    case nir_intrinsic_load_ring_mesh_scratch_amd:
209       replacement = load_ring(b, RING_MS_SCRATCH, s);
210       break;
211    case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
212       /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */
213       replacement = nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff),
214                                  RADV_MESH_SCRATCH_ENTRY_BYTES);
215       break;
216    case nir_intrinsic_load_task_ring_entry_amd:
217       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
218       break;
219    case nir_intrinsic_load_lshs_vertex_stride_amd: {
220       if (stage == MESA_SHADER_VERTEX) {
221          replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->vs.num_linked_outputs));
222       } else {
223          assert(stage == MESA_SHADER_TESS_CTRL);
224          if (s->info->inputs_linked) {
225             replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->tcs.num_linked_inputs));
226          } else {
227             nir_def *num_ls_out = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS);
228             nir_def *extra_dw = nir_bcsel(b, nir_ieq_imm(b, num_ls_out, 0), nir_imm_int(b, 0), nir_imm_int(b, 4));
229             replacement = nir_iadd_nuw(b, nir_ishl_imm(b, num_ls_out, 4), extra_dw);
230          }
231       }
232       break;
233    }
234    case nir_intrinsic_load_esgs_vertex_stride_amd: {
235       /* Emulate VGT_ESGS_RING_ITEMSIZE on GFX9+ to reduce context register writes. */
236       assert(s->gfx_level >= GFX9);
237       if (s->info->merged_shader_compiled_separately) {
238          replacement = ac_nir_load_arg(b, &s->args->ac, s->args->vgt_esgs_ring_itemsize);
239       } else {
240          const unsigned stride =
241             s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.esgs_itemsize;
242          replacement = nir_imm_int(b, stride);
243       }
244       break;
245    }
246    case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
247       nir_def *num_tcs_outputs, *out_vertices_per_patch;
248 
249       if (stage == MESA_SHADER_TESS_CTRL) {
250          num_tcs_outputs = nir_imm_int(b, s->info->tcs.num_linked_outputs);
251          out_vertices_per_patch = nir_imm_int(b, s->info->tcs.tcs_vertices_out);
252       } else {
253          if (s->info->inputs_linked) {
254             out_vertices_per_patch = nir_imm_int(b, s->info->tes.tcs_vertices_out);
255             num_tcs_outputs = nir_imm_int(b, s->info->tes.num_linked_inputs);
256          } else {
257             nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_OUT_PATCH_CP);
258             out_vertices_per_patch = nir_iadd_imm_nuw(b, n, 1);
259             num_tcs_outputs = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS);
260          }
261       }
262 
263       nir_def *per_vertex_output_patch_size =
264          nir_imul(b, out_vertices_per_patch, nir_imul_imm(b, num_tcs_outputs, 16u));
265 
266       if (s->info->num_tess_patches) {
267          unsigned num_patches = s->info->num_tess_patches;
268          replacement = nir_imul_imm(b, per_vertex_output_patch_size, num_patches);
269       } else {
270          nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
271          nir_def *num_patches = nir_iadd_imm_nuw(b, n, 1);
272          replacement = nir_imul(b, per_vertex_output_patch_size, num_patches);
273       }
274       break;
275    }
276    case nir_intrinsic_load_sample_positions_amd: {
277       uint32_t sample_pos_offset = (RING_PS_SAMPLE_POSITIONS * 16) - 8;
278 
279       nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, s->args->ac.ring_offsets);
280       nir_def *addr = nir_pack_64_2x32(b, ring_offsets);
281       nir_def *sample_id = nir_umin(b, intrin->src[0].ssa, nir_imm_int(b, 7));
282       nir_def *offset = nir_ishl_imm(b, sample_id, 3); /* 2 floats containing samplepos.xy */
283 
284       nir_const_value *const_num_samples = nir_src_as_const_value(intrin->src[1]);
285       if (const_num_samples) {
286          sample_pos_offset += (const_num_samples->u32 << 3);
287       } else {
288          offset = nir_iadd(b, offset, nir_ishl_imm(b, intrin->src[1].ssa, 3));
289       }
290 
291       replacement =
292          nir_load_global_amd(b, 2, 32, addr, offset, .base = sample_pos_offset, .access = ACCESS_NON_WRITEABLE);
293       break;
294    }
295    case nir_intrinsic_load_rasterization_samples_amd:
296       if (s->gfx_state->dynamic_rasterization_samples) {
297          replacement = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_NUM_SAMPLES);
298       } else {
299          replacement = nir_imm_int(b, s->gfx_state->ms.rasterization_samples);
300       }
301       break;
302    case nir_intrinsic_load_layer_id:
303       replacement = ac_nir_unpack_arg(b, &s->args->ac, s->args->ac.ancillary, 16, s->gfx_level >= GFX12 ? 14 : 13);
304       break;
305    case nir_intrinsic_load_provoking_vtx_in_prim_amd: {
306       if (s->gfx_state->dynamic_provoking_vtx_mode) {
307          replacement = GET_SGPR_FIELD_NIR(s->args->ngg_state, NGG_STATE_PROVOKING_VTX);
308       } else {
309          unsigned provoking_vertex = 0;
310          if (s->gfx_state->rs.provoking_vtx_last) {
311             if (stage == MESA_SHADER_VERTEX) {
312                provoking_vertex = radv_get_num_vertices_per_prim(s->gfx_state) - 1;
313             } else if (stage == MESA_SHADER_GEOMETRY) {
314                provoking_vertex = b->shader->info.gs.vertices_in - 1;
315             } else {
316                /* TES won't use this intrinsic, because it can get primitive id directly
317                 * instead of using this intrinsic to pass primitive id by LDS.
318                 */
319                unreachable("load_provoking_vtx_in_prim_amd is only supported in VS and GS");
320             }
321          }
322 
323          replacement = nir_imm_int(b, provoking_vertex);
324       }
325       break;
326    }
327    case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
328    case nir_intrinsic_atomic_add_gen_prim_count_amd:
329    case nir_intrinsic_atomic_add_xfb_prim_count_amd:
330    case nir_intrinsic_atomic_add_shader_invocation_count_amd: {
331       uint32_t offset;
332 
333       if (intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd) {
334          offset = RADV_SHADER_QUERY_GS_PRIM_EMIT_OFFSET;
335       } else if (intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd) {
336          offset = stage == MESA_SHADER_MESH ? RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET
337                                             : RADV_SHADER_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin));
338       } else if (intrin->intrinsic == nir_intrinsic_atomic_add_xfb_prim_count_amd) {
339          offset = RADV_SHADER_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin));
340       } else {
341          assert(intrin->intrinsic == nir_intrinsic_atomic_add_shader_invocation_count_amd);
342 
343          if (stage == MESA_SHADER_MESH) {
344             offset = RADV_SHADER_QUERY_MS_INVOCATION_OFFSET;
345          } else if (stage == MESA_SHADER_TASK) {
346             offset = RADV_SHADER_QUERY_TS_INVOCATION_OFFSET;
347          } else {
348             offset = RADV_SHADER_QUERY_GS_INVOCATION_OFFSET;
349          }
350       }
351 
352       if (s->gfx_level >= GFX12) {
353          nir_def *va = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_buf_va),
354                                               nir_imm_int(b, s->address32_hi));
355 
356          /* Only generated/written primitives query are emulated on GFX12+. */
357          offset -= RADV_SHADER_QUERY_PRIM_GEN_OFFSET(0);
358          assert(offset <= RADV_SHADER_QUERY_PRIM_XFB_OFFSET(3));
359 
360          nir_global_atomic_amd(b, 32, va, intrin->src[0].ssa, nir_imm_int(b, offset), .atomic_op = nir_atomic_op_iadd);
361       } else {
362          nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, offset), nir_imm_int(b, 0x100));
363       }
364       break;
365    }
366    case nir_intrinsic_load_streamout_buffer_amd: {
367       nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
368                                             nir_imm_int(b, s->address32_hi));
369       replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16));
370       break;
371    }
372    case nir_intrinsic_load_xfb_state_address_gfx12_amd:
373       replacement = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_state),
374                                            nir_imm_int(b, s->address32_hi));
375       break;
376    case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd:
377       if (s->info->merged_shader_compiled_separately) {
378          replacement = GET_SGPR_FIELD_NIR(s->args->ngg_lds_layout, NGG_LDS_LAYOUT_GS_OUT_VERTEX_BASE);
379       } else {
380          replacement = nir_imm_int(b, s->info->ngg_info.esgs_ring_size);
381       }
382       break;
383    case nir_intrinsic_load_lds_ngg_scratch_base_amd:
384       if (s->info->merged_shader_compiled_separately) {
385          replacement = GET_SGPR_FIELD_NIR(s->args->ngg_lds_layout, NGG_LDS_LAYOUT_SCRATCH_BASE);
386       } else {
387          replacement = nir_imm_int(b, s->info->ngg_info.scratch_lds_base);
388       }
389       break;
390    case nir_intrinsic_load_num_vertices_per_primitive_amd: {
391       unsigned num_vertices;
392 
393       if (stage == MESA_SHADER_VERTEX) {
394          /* For dynamic primitive topology with streamout. */
395          if (s->info->vs.dynamic_num_verts_per_prim) {
396             replacement = GET_SGPR_FIELD_NIR(s->args->ngg_state, NGG_STATE_NUM_VERTS_PER_PRIM);
397          } else {
398             replacement = nir_imm_int(b, radv_get_num_vertices_per_prim(s->gfx_state));
399          }
400       } else if (stage == MESA_SHADER_TESS_EVAL) {
401          if (s->info->tes.point_mode) {
402             num_vertices = 1;
403          } else if (s->info->tes._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
404             num_vertices = 2;
405          } else {
406             num_vertices = 3;
407          }
408          replacement = nir_imm_int(b, num_vertices);
409       } else {
410          assert(stage == MESA_SHADER_GEOMETRY);
411          switch (s->info->gs.output_prim) {
412          case MESA_PRIM_POINTS:
413             num_vertices = 1;
414             break;
415          case MESA_PRIM_LINE_STRIP:
416             num_vertices = 2;
417             break;
418          case MESA_PRIM_TRIANGLE_STRIP:
419             num_vertices = 3;
420             break;
421          default:
422             unreachable("invalid GS output primitive");
423             break;
424          }
425          replacement = nir_imm_int(b, num_vertices);
426       }
427       break;
428    }
429    case nir_intrinsic_load_force_vrs_rates_amd:
430       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.force_vrs_rates);
431       break;
432    case nir_intrinsic_load_fully_covered: {
433       nir_def *sample_coverage = ac_nir_load_arg(b, &s->args->ac, s->args->ac.sample_coverage);
434       replacement = nir_ine_imm(b, sample_coverage, 0);
435       break;
436    }
437    case nir_intrinsic_load_poly_line_smooth_enabled: {
438       nir_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
439       replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH);
440       break;
441    }
442    case nir_intrinsic_load_initial_edgeflags_amd:
443       replacement = nir_imm_int(b, 0);
444       break;
445    case nir_intrinsic_load_provoking_vtx_amd:
446       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.load_provoking_vtx);
447       break;
448    case nir_intrinsic_load_rasterization_primitive_amd:
449       assert(s->gfx_state->unknown_rast_prim);
450       /* Load the primitive topology from an user SGPR when it's unknown at compile time (GPL). */
451       replacement = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_RAST_PRIM);
452       break;
453    default:
454       progress = false;
455       break;
456    }
457 
458    if (!progress)
459       return false;
460 
461    if (replacement)
462       nir_def_rewrite_uses(&intrin->def, replacement);
463 
464    nir_instr_remove(&intrin->instr);
465    nir_instr_free(&intrin->instr);
466 
467    return true;
468 }
469 
470 static nir_def *
load_gsvs_ring(nir_builder * b,lower_abi_state * s,unsigned stream_id)471 load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id)
472 {
473    nir_def *ring = load_ring(b, RING_GSVS_GS, s);
474    unsigned stream_offset = 0;
475    unsigned stride = 0;
476    for (unsigned i = 0; i <= stream_id; i++) {
477       stride = 4 * s->info->gs.num_stream_output_components[i] * s->info->gs.vertices_out;
478       if (i < stream_id)
479          stream_offset += stride * s->info->wave_size;
480    }
481 
482    /* Limit on the stride field for <= GFX7. */
483    assert(stride < (1 << 14));
484 
485    if (stream_offset) {
486       nir_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
487       addr = nir_iadd_imm(b, addr, stream_offset);
488       ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_x(b, addr), 0);
489       ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_y(b, addr), 1);
490    }
491 
492    ring = nir_vector_insert_imm(b, ring, nir_ior_imm(b, nir_channel(b, ring, 1), S_008F04_STRIDE(stride)), 1);
493    return nir_vector_insert_imm(b, ring, nir_imm_int(b, s->info->wave_size), 2);
494 }
495 
496 void
radv_nir_lower_abi(nir_shader * shader,enum amd_gfx_level gfx_level,const struct radv_shader_stage * stage,const struct radv_graphics_state_key * gfx_state,uint32_t address32_hi)497 radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_stage *stage,
498                    const struct radv_graphics_state_key *gfx_state, uint32_t address32_hi)
499 {
500    lower_abi_state state = {
501       .gfx_level = gfx_level,
502       .info = &stage->info,
503       .args = &stage->args,
504       .gfx_state = gfx_state,
505       .address32_hi = address32_hi,
506    };
507 
508    if (shader->info.stage == MESA_SHADER_GEOMETRY && !stage->info.is_ngg) {
509       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
510 
511       nir_builder b = nir_builder_at(nir_before_impl(impl));
512 
513       u_foreach_bit (i, shader->info.gs.active_stream_mask)
514          state.gsvs_ring[i] = load_gsvs_ring(&b, &state, i);
515    }
516 
517    nir_shader_intrinsics_pass(shader, lower_abi_instr, nir_metadata_control_flow, &state);
518 }
519