• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 
8 #ifndef AC_NIR_H
9 #define AC_NIR_H
10 
11 #include "ac_hw_stage.h"
12 #include "ac_shader_args.h"
13 #include "ac_shader_util.h"
14 #include "nir.h"
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 enum
21 {
22    /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
23    AC_EXP_PARAM_OFFSET_0 = 0,
24    AC_EXP_PARAM_OFFSET_31 = 31,
25    /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
26    AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
27    AC_EXP_PARAM_DEFAULT_VAL_0001,
28    AC_EXP_PARAM_DEFAULT_VAL_1110,
29    AC_EXP_PARAM_DEFAULT_VAL_1111,
30    AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */
31 };
32 
33 enum {
34    AC_EXP_FLAG_COMPRESSED = (1 << 0),
35    AC_EXP_FLAG_DONE       = (1 << 1),
36    AC_EXP_FLAG_VALID_MASK = (1 << 2),
37 };
38 
39 struct ac_nir_config {
40    enum amd_gfx_level gfx_level;
41    bool uses_aco;
42 };
43 
44 /* Maps I/O semantics to the actual location used by the lowering pass. */
45 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
46 
47 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */
48 struct nir_builder;
49 typedef struct nir_builder nir_builder;
50 
51 struct nir_xfb_info;
52 typedef struct nir_xfb_info nir_xfb_info;
53 
54 /* Executed by ac_nir_cull when the current primitive is accepted. */
55 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
56 
57 void
58 ac_nir_set_options(struct radeon_info *info, bool use_llvm,
59                    nir_shader_compiler_options *options);
60 
61 nir_def *
62 ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
63                           struct ac_arg arg, unsigned relative_index);
64 
65 nir_def *
66 ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg);
67 
68 nir_def *
69 ac_nir_load_arg_upper_bound(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
70                             unsigned upper_bound);
71 
72 void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
73                       nir_def *val);
74 
75 nir_def *
76 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
77                   unsigned rshift, unsigned bitwidth);
78 
79 bool ac_nir_lower_sin_cos(nir_shader *shader);
80 
81 bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level,
82                                      bool has_ls_vgpr_init_bug, const enum ac_hw_stage hw_stage,
83                                      unsigned wave_size, unsigned workgroup_size,
84                                      const struct ac_shader_args *ac_args);
85 
86 nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir);
87 
88 bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
89                              int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
90                              uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]);
91 
92 void
93 ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
94                                ac_nir_map_io_driver_location map,
95                                enum amd_gfx_level gfx_level,
96                                bool tcs_in_out_eq,
97                                uint64_t tcs_inputs_via_temp,
98                                uint64_t tcs_inputs_via_lds);
99 
100 void
101 ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
102                               ac_nir_map_io_driver_location map,
103                               enum amd_gfx_level gfx_level,
104                               bool tcs_in_out_eq,
105                               uint64_t tcs_inputs_via_temp,
106                               uint64_t tcs_inputs_via_lds);
107 
108 void
109 ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info,
110                                ac_nir_map_io_driver_location map,
111                                enum amd_gfx_level gfx_level,
112                                uint64_t tes_inputs_read,
113                                uint32_t tes_patch_inputs_read,
114                                unsigned wave_size);
115 
116 void
117 ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
118                                ac_nir_map_io_driver_location map);
119 
120 void
121 ac_nir_compute_tess_wg_info(const struct radeon_info *info, const struct shader_info *tcs_info,
122                             unsigned wave_size, bool tess_uses_primid, bool all_invocations_define_tess_levels,
123                             unsigned num_tcs_input_cp, unsigned lds_input_vertex_size,
124                             unsigned num_mem_tcs_outputs, unsigned num_mem_tcs_patch_outputs,
125                             unsigned *num_patches_per_wg, unsigned *hw_lds_size);
126 
127 void
128 ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
129                                ac_nir_map_io_driver_location map,
130                                enum amd_gfx_level gfx_level,
131                                unsigned esgs_itemsize,
132                                uint64_t gs_inputs_read);
133 
134 void
135 ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
136                               ac_nir_map_io_driver_location map,
137                               enum amd_gfx_level gfx_level,
138                               bool triangle_strip_adjacency_fix);
139 
140 bool
141 ac_nir_lower_indirect_derefs(nir_shader *shader,
142                              enum amd_gfx_level gfx_level);
143 
144 typedef struct {
145    const struct radeon_info *hw_info;
146 
147    unsigned max_workgroup_size;
148    unsigned wave_size;
149    uint8_t clip_cull_dist_mask;
150    const uint8_t *vs_output_param_offset; /* GFX11+ */
151    bool has_param_exports;
152    bool can_cull;
153    bool disable_streamout;
154    bool has_gen_prim_query;
155    bool has_xfb_prim_query;
156    bool use_gfx12_xfb_intrinsic;
157    bool has_gs_invocations_query;
158    bool has_gs_primitives_query;
159    bool kill_pointsize;
160    bool kill_layer;
161    bool force_vrs;
162    bool compact_primitives;
163 
164    /* VS */
165    unsigned num_vertices_per_primitive;
166    bool early_prim_export;
167    bool passthrough;
168    bool use_edgeflags;
169    bool export_primitive_id;
170    bool export_primitive_id_per_prim;
171    uint32_t instance_rate_inputs;
172    uint32_t user_clip_plane_enable_mask;
173 
174    /* GS */
175    unsigned gs_out_vtx_bytes;
176 } ac_nir_lower_ngg_options;
177 
178 void
179 ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
180 
181 void
182 ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
183 
184 void
185 ac_nir_lower_ngg_mesh(nir_shader *shader,
186                       const struct radeon_info *hw_info,
187                       uint32_t clipdist_enable_mask,
188                       const uint8_t *vs_output_param_offset,
189                       bool has_param_exports,
190                       bool *out_needs_scratch_ring,
191                       unsigned wave_size,
192                       unsigned workgroup_size,
193                       bool multiview,
194                       bool has_query,
195                       bool fast_launch_2);
196 
197 void
198 ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
199                                  unsigned task_payload_entry_bytes,
200                                  unsigned task_num_entries,
201                                  bool has_query);
202 
203 void
204 ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
205                                 unsigned task_payload_entry_bytes,
206                                 unsigned task_num_entries);
207 
208 bool
209 ac_nir_lower_global_access(nir_shader *shader);
210 
211 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
212 bool ac_nir_lower_image_opcodes(nir_shader *nir);
213 
214 typedef struct ac_nir_gs_output_info {
215    const uint8_t *streams;
216    const uint8_t *streams_16bit_lo;
217    const uint8_t *streams_16bit_hi;
218 
219    const uint8_t *varying_mask;
220    const uint8_t *varying_mask_16bit_lo;
221    const uint8_t *varying_mask_16bit_hi;
222 
223    const uint8_t *sysval_mask;
224 
225    /* type for each 16bit slot component */
226    nir_alu_type (*types_16bit_lo)[4];
227    nir_alu_type (*types_16bit_hi)[4];
228 } ac_nir_gs_output_info;
229 
230 nir_shader *
231 ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
232                              enum amd_gfx_level gfx_level,
233                              uint32_t clip_cull_mask,
234                              const uint8_t *param_offsets,
235                              bool has_param_exports,
236                              bool disable_streamout,
237                              bool kill_pointsize,
238                              bool kill_layer,
239                              bool force_vrs,
240                              ac_nir_gs_output_info *output_info);
241 
242 void
243 ac_nir_lower_legacy_vs(nir_shader *nir,
244                        enum amd_gfx_level gfx_level,
245                        uint32_t clip_cull_mask,
246                        const uint8_t *param_offsets,
247                        bool has_param_exports,
248                        bool export_primitive_id,
249                        bool disable_streamout,
250                        bool kill_pointsize,
251                        bool kill_layer,
252                        bool force_vrs);
253 
254 void
255 ac_nir_lower_legacy_gs(nir_shader *nir,
256                        bool has_gen_prim_query,
257                        bool has_pipeline_stats_query,
258                        ac_nir_gs_output_info *output_info);
259 
260 /* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't
261  * generate AMD-specific intrinsics.
262  */
263 typedef struct {
264    /* System values. */
265    bool force_center_interp_no_msaa; /* true if MSAA is disabled, false may mean that the state is unknown */
266    bool uses_vrs_coarse_shading;
267    bool load_sample_positions_always_loads_current_ones;
268    bool dynamic_rasterization_samples;
269    int force_front_face; /* 0 -> keep, 1 -> set to true, -1 -> set to false */
270    bool optimize_frag_coord; /* TODO: remove this after RADV can handle it */
271    bool frag_coord_is_center; /* GL requirement for sample shading */
272 
273    /* frag_coord/pixel_coord:
274     *    allow_pixel_coord && (frag_coord_is_center || ps_iter_samples == 1 ||
275     *                          force_center_interp_no_msaa ||
276     *                          the fractional part of frag_coord.xy isn't used):
277     *       * frag_coord.xy is replaced by u2f(pixel_coord) + 0.5.
278     *    else:
279     *       * pixel_coord is replaced by f2u16(frag_coord.xy)
280     *       * ps_iter_samples == 0 means the state is unknown.
281     *
282     * barycentrics:
283     *    force_center_interp_no_msaa:
284     *       * All barycentrics including at_sample but excluding at_offset are changed to
285     *         barycentric_pixel
286     *    ps_iter_samples >= 2:
287     *       * All barycentrics are changed to per-sample interpolation except at_offset/at_sample.
288     *       * barycentric_at_sample(sample_id) is replaced by barycentric_sample.
289     *
290     * sample_mask_in:
291     *    force_center_interp_no_msaa && !uses_vrs_coarse_shading:
292     *       * sample_mask_in is replaced by b2i32(!helper_invocation)
293     *    ps_iter_samples == 2, 4:
294     *       * sample_mask_in is changed to (sample_mask_in & (ps_iter_mask << sample_id))
295     *    ps_iter_samples == 8:
296     *       * sample_mask_in is replaced by 1 << sample_id.
297     *
298     * When ps_iter_samples is equal to rasterization samples, set ps_iter_samples = 8 for this pass.
299     */
300    unsigned ps_iter_samples;
301 
302    /* fbfetch_output */
303    bool fbfetch_is_1D;
304    bool fbfetch_layered;
305    bool fbfetch_msaa;
306    bool fbfetch_apply_fmask;
307 
308    /* Outputs. */
309    bool clamp_color;                /* GL only */
310    bool alpha_test_alpha_to_one;    /* GL only, this only affects alpha test */
311    enum compare_func alpha_func;    /* GL only */
312    bool keep_alpha_for_mrtz;        /* this prevents killing alpha based on spi_shader_col_format_hint */
313    unsigned spi_shader_col_format_hint; /* this only shrinks and eliminates output stores */
314    bool kill_z;
315    bool kill_stencil;
316    bool kill_samplemask;
317 } ac_nir_lower_ps_early_options;
318 
319 bool
320 ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options);
321 
322 /* This is a post-link pass. It shouldn't eliminate any code and it shouldn't affect shader_info
323  * (those should be done in the early pass).
324  */
325 typedef struct {
326    enum amd_gfx_level gfx_level;
327    enum radeon_family family;
328    bool use_aco;
329 
330    /* System values. */
331    bool bc_optimize_for_persp;
332    bool bc_optimize_for_linear;
333 
334    /* Exports. */
335    bool uses_discard;
336    bool alpha_to_coverage_via_mrtz;
337    bool dual_src_blend_swizzle;
338    unsigned spi_shader_col_format;
339    unsigned color_is_int8;
340    unsigned color_is_int10;
341    bool alpha_to_one;
342 
343    /* Vulkan only */
344    unsigned enable_mrt_output_nan_fixup;
345    bool no_color_export;
346    bool no_depth_export;
347 } ac_nir_lower_ps_late_options;
348 
349 bool
350 ac_nir_lower_ps_late(nir_shader *nir, const ac_nir_lower_ps_late_options *options);
351 
352 typedef struct {
353    enum amd_gfx_level gfx_level;
354 
355    /* If true, round the layer component of the coordinates source to the nearest
356     * integer for all array ops. This is always done for cube array ops.
357     */
358    bool lower_array_layer_round_even;
359 
360    /* Fix derivatives of constants and FS inputs in control flow.
361     *
362     * Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads,
363     * pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and
364     * texture samples with nir_tex_src_min_lod.
365     *
366     * The layer must also be a constant or FS input.
367     */
368    bool fix_derivs_in_divergent_cf;
369    unsigned max_wqm_vgprs;
370 } ac_nir_lower_tex_options;
371 
372 bool
373 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options);
374 
375 void
376 ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4);
377 
378 bool
379 ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level);
380 
381 unsigned
382 ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer);
383 
384 bool
385 ac_nir_opt_shared_append(nir_shader *shader);
386 
387 bool
388 ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering);
389 
390 bool
391 ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
392 
393 bool
394 ac_nir_optimize_uniform_atomics(nir_shader *nir);
395 
396 unsigned
397 ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data);
398 
399 bool
400 ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
401                               unsigned num_components, int64_t hole_size,
402                               nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data);
403 
404 bool
405 ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const void *data);
406 
407 enum gl_access_qualifier
408 ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr);
409 
410 #ifdef __cplusplus
411 }
412 #endif
413 
414 #endif /* AC_NIR_H */
415