• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 
8 #ifndef AC_NIR_H
9 #define AC_NIR_H
10 
11 #include "ac_hw_stage.h"
12 #include "ac_shader_args.h"
13 #include "ac_shader_util.h"
14 #include "nir.h"
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 enum
21 {
22    /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
23    AC_EXP_PARAM_OFFSET_0 = 0,
24    AC_EXP_PARAM_OFFSET_31 = 31,
25    /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
26    AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
27    AC_EXP_PARAM_DEFAULT_VAL_0001,
28    AC_EXP_PARAM_DEFAULT_VAL_1110,
29    AC_EXP_PARAM_DEFAULT_VAL_1111,
30    AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */
31 };
32 
33 enum {
34    AC_EXP_FLAG_COMPRESSED = (1 << 0),
35    AC_EXP_FLAG_DONE       = (1 << 1),
36    AC_EXP_FLAG_VALID_MASK = (1 << 2),
37 };
38 
39 struct ac_nir_config {
40    enum amd_gfx_level gfx_level;
41    bool uses_aco;
42 };
43 
44 /* TODO: Remove these once radeonsi gathers shader_info before lowering. */
45 #define AC_VECTOR_ARG_FLAG(name, value)      (((name) & 0xf) | ((value) << 4))
46 #define     AC_VECTOR_ARG_UNSET              0
47 #define     AC_VECTOR_ARG_INTERP_MODE        1
48 #define     AC_VECTOR_ARG_IS_COLOR           2
49 #define AC_VECTOR_ARG_FLAG_GET_NAME(intr)    (nir_intrinsic_flags(intr) & 0xf)
50 #define AC_VECTOR_ARG_FLAG_GET_VALUE(intr)   (nir_intrinsic_flags(intr) >> 4)
51 
52 /* Maps I/O semantics to the actual location used by the lowering pass. */
53 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
54 
55 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */
56 struct nir_builder;
57 typedef struct nir_builder nir_builder;
58 
59 struct nir_xfb_info;
60 typedef struct nir_xfb_info nir_xfb_info;
61 
62 /* Executed by ac_nir_cull when the current primitive is accepted. */
63 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
64 
65 void
66 ac_nir_set_options(struct radeon_info *info, bool use_llvm,
67                    nir_shader_compiler_options *options);
68 
69 nir_def *
70 ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
71                           struct ac_arg arg, unsigned relative_index);
72 
73 nir_def *
74 ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg);
75 
76 nir_def *
77 ac_nir_load_arg_upper_bound(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
78                             unsigned upper_bound);
79 
80 void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
81                       nir_def *val);
82 
83 nir_def *
84 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
85                   unsigned rshift, unsigned bitwidth);
86 
87 bool ac_nir_lower_sin_cos(nir_shader *shader);
88 
89 bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level,
90                                      bool has_ls_vgpr_init_bug, const enum ac_hw_stage hw_stage,
91                                      unsigned wave_size, unsigned workgroup_size,
92                                      const struct ac_shader_args *ac_args);
93 
94 nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir);
95 
96 bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
97                              int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
98                              uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]);
99 
100 void
101 ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
102                                ac_nir_map_io_driver_location map,
103                                enum amd_gfx_level gfx_level,
104                                bool tcs_in_out_eq,
105                                uint64_t tcs_inputs_via_temp,
106                                uint64_t tcs_inputs_via_lds);
107 
108 void
109 ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
110                               ac_nir_map_io_driver_location map,
111                               enum amd_gfx_level gfx_level,
112                               bool tcs_in_out_eq,
113                               uint64_t tcs_inputs_via_temp,
114                               uint64_t tcs_inputs_via_lds);
115 
116 void
117 ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info,
118                                ac_nir_map_io_driver_location map,
119                                enum amd_gfx_level gfx_level,
120                                uint64_t tes_inputs_read,
121                                uint32_t tes_patch_inputs_read,
122                                unsigned wave_size);
123 
124 void
125 ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
126                                ac_nir_map_io_driver_location map);
127 
128 void
129 ac_nir_compute_tess_wg_info(const struct radeon_info *info, const struct shader_info *tcs_info,
130                             unsigned wave_size, bool tess_uses_primid, bool all_invocations_define_tess_levels,
131                             unsigned num_tcs_input_cp, unsigned lds_input_vertex_size,
132                             unsigned num_mem_tcs_outputs, unsigned num_mem_tcs_patch_outputs,
133                             unsigned *num_patches_per_wg, unsigned *hw_lds_size);
134 
135 void
136 ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
137                                ac_nir_map_io_driver_location map,
138                                enum amd_gfx_level gfx_level,
139                                unsigned esgs_itemsize,
140                                uint64_t gs_inputs_read);
141 
142 void
143 ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
144                               ac_nir_map_io_driver_location map,
145                               enum amd_gfx_level gfx_level,
146                               bool triangle_strip_adjacency_fix);
147 
148 bool
149 ac_nir_lower_indirect_derefs(nir_shader *shader,
150                              enum amd_gfx_level gfx_level);
151 
152 typedef struct {
153    enum radeon_family family;
154    enum amd_gfx_level gfx_level;
155 
156    unsigned max_workgroup_size;
157    unsigned wave_size;
158    uint8_t clip_cull_dist_mask;
159    const uint8_t *vs_output_param_offset; /* GFX11+ */
160    bool has_param_exports;
161    bool can_cull;
162    bool disable_streamout;
163    bool has_gen_prim_query;
164    bool has_xfb_prim_query;
165    bool use_gfx12_xfb_intrinsic;
166    bool has_gs_invocations_query;
167    bool has_gs_primitives_query;
168    bool kill_pointsize;
169    bool kill_layer;
170    bool force_vrs;
171    bool compact_primitives;
172 
173    /* VS */
174    unsigned num_vertices_per_primitive;
175    bool early_prim_export;
176    bool passthrough;
177    bool use_edgeflags;
178    bool export_primitive_id;
179    bool export_primitive_id_per_prim;
180    uint32_t instance_rate_inputs;
181    uint32_t user_clip_plane_enable_mask;
182 
183    /* GS */
184    unsigned gs_out_vtx_bytes;
185 } ac_nir_lower_ngg_options;
186 
187 void
188 ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
189 
190 void
191 ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
192 
193 void
194 ac_nir_lower_ngg_mesh(nir_shader *shader,
195                     enum amd_gfx_level gfx_level,
196                     uint32_t clipdist_enable_mask,
197                     const uint8_t *vs_output_param_offset,
198                     bool has_param_exports,
199                     bool *out_needs_scratch_ring,
200                     unsigned wave_size,
201                     unsigned workgroup_size,
202                     bool multiview,
203                     bool has_query,
204                     bool fast_launch_2);
205 
206 void
207 ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
208                                  unsigned task_payload_entry_bytes,
209                                  unsigned task_num_entries,
210                                  bool has_query);
211 
212 void
213 ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
214                                 unsigned task_payload_entry_bytes,
215                                 unsigned task_num_entries);
216 
217 bool
218 ac_nir_lower_global_access(nir_shader *shader);
219 
220 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
221 bool ac_nir_lower_image_opcodes(nir_shader *nir);
222 
223 typedef struct ac_nir_gs_output_info {
224    const uint8_t *streams;
225    const uint8_t *streams_16bit_lo;
226    const uint8_t *streams_16bit_hi;
227 
228    const uint8_t *varying_mask;
229    const uint8_t *varying_mask_16bit_lo;
230    const uint8_t *varying_mask_16bit_hi;
231 
232    const uint8_t *sysval_mask;
233 
234    /* type for each 16bit slot component */
235    nir_alu_type (*types_16bit_lo)[4];
236    nir_alu_type (*types_16bit_hi)[4];
237 } ac_nir_gs_output_info;
238 
239 nir_shader *
240 ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
241                              enum amd_gfx_level gfx_level,
242                              uint32_t clip_cull_mask,
243                              const uint8_t *param_offsets,
244                              bool has_param_exports,
245                              bool disable_streamout,
246                              bool kill_pointsize,
247                              bool kill_layer,
248                              bool force_vrs,
249                              ac_nir_gs_output_info *output_info);
250 
251 void
252 ac_nir_lower_legacy_vs(nir_shader *nir,
253                        enum amd_gfx_level gfx_level,
254                        uint32_t clip_cull_mask,
255                        const uint8_t *param_offsets,
256                        bool has_param_exports,
257                        bool export_primitive_id,
258                        bool disable_streamout,
259                        bool kill_pointsize,
260                        bool kill_layer,
261                        bool force_vrs);
262 
263 void
264 ac_nir_lower_legacy_gs(nir_shader *nir,
265                        bool has_gen_prim_query,
266                        bool has_pipeline_stats_query,
267                        ac_nir_gs_output_info *output_info);
268 
269 typedef struct {
270    /* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't
271     * generate AMD-specific intrinsics.
272     */
273    /* System values. */
274    bool force_persp_sample_interp;
275    bool force_linear_sample_interp;
276    bool force_persp_center_interp;
277    bool force_linear_center_interp;
278    unsigned ps_iter_samples;
279 
280    /* Outputs. */
281    bool clamp_color;                /* GL only */
282    bool alpha_test_alpha_to_one;    /* GL only, this only affects alpha test */
283    enum compare_func alpha_func;    /* GL only */
284    bool keep_alpha_for_mrtz;        /* this prevents killing alpha based on spi_shader_col_format_hint */
285    unsigned spi_shader_col_format_hint; /* this only shrinks and eliminates output stores */
286    bool kill_z;
287    bool kill_stencil;
288    bool kill_samplemask;
289 } ac_nir_lower_ps_early_options;
290 
291 void
292 ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options);
293 
294 typedef struct {
295    /* This is a post-link pass. It shouldn't eliminate any code and it shouldn't affect shader_info
296     * (those should be done in the early pass).
297     */
298    enum amd_gfx_level gfx_level;
299    enum radeon_family family;
300    bool use_aco;
301 
302    /* System values. */
303    bool bc_optimize_for_persp;
304    bool bc_optimize_for_linear;
305 
306    /* Exports. */
307    bool uses_discard;
308    bool alpha_to_coverage_via_mrtz;
309    bool dual_src_blend_swizzle;
310    unsigned spi_shader_col_format;
311    unsigned color_is_int8;
312    unsigned color_is_int10;
313    bool alpha_to_one;
314 
315    /* Vulkan only */
316    unsigned enable_mrt_output_nan_fixup;
317    bool no_color_export;
318    bool no_depth_export;
319 } ac_nir_lower_ps_late_options;
320 
321 void
322 ac_nir_lower_ps_late(nir_shader *nir, const ac_nir_lower_ps_late_options *options);
323 
324 typedef struct {
325    enum amd_gfx_level gfx_level;
326 
327    /* If true, round the layer component of the coordinates source to the nearest
328     * integer for all array ops. This is always done for cube array ops.
329     */
330    bool lower_array_layer_round_even;
331 
332    /* Fix derivatives of constants and FS inputs in control flow.
333     *
334     * Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads,
335     * pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and
336     * texture samples with nir_tex_src_min_lod.
337     *
338     * The layer must also be a constant or FS input.
339     */
340    bool fix_derivs_in_divergent_cf;
341    unsigned max_wqm_vgprs;
342 } ac_nir_lower_tex_options;
343 
344 bool
345 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options);
346 
347 void
348 ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4);
349 
350 bool
351 ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level);
352 
353 unsigned
354 ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer);
355 
356 bool
357 ac_nir_opt_shared_append(nir_shader *shader);
358 
359 bool
360 ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering);
361 
362 bool
363 ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
364 
365 bool
366 ac_nir_optimize_uniform_atomics(nir_shader *nir);
367 
368 unsigned
369 ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data);
370 
371 bool
372 ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
373                               unsigned num_components, int64_t hole_size,
374                               nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data);
375 
376 bool
377 ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const void *data);
378 
379 enum gl_access_qualifier
380 ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr);
381 
382 #ifdef __cplusplus
383 }
384 #endif
385 
386 #endif /* AC_NIR_H */
387