• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 
8 #ifndef AC_NIR_H
9 #define AC_NIR_H
10 
11 #include "ac_hw_stage.h"
12 #include "ac_shader_args.h"
13 #include "ac_shader_util.h"
14 #include "nir.h"
15 #include "nir_builder.h"
16 
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20 
21 enum
22 {
23    /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
24    AC_EXP_PARAM_OFFSET_0 = 0,
25    AC_EXP_PARAM_OFFSET_31 = 31,
26    /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
27    AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
28    AC_EXP_PARAM_DEFAULT_VAL_0001,
29    AC_EXP_PARAM_DEFAULT_VAL_1110,
30    AC_EXP_PARAM_DEFAULT_VAL_1111,
31    AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */
32 };
33 
34 enum {
35    AC_EXP_FLAG_COMPRESSED = (1 << 0),
36    AC_EXP_FLAG_DONE       = (1 << 1),
37    AC_EXP_FLAG_VALID_MASK = (1 << 2),
38 };
39 
40 /* Maps I/O semantics to the actual location used by the lowering pass. */
41 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
42 
43 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */
44 struct nir_builder;
45 typedef struct nir_builder nir_builder;
46 
47 /* Executed by ac_nir_cull when the current primitive is accepted. */
48 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
49 
50 nir_def *
51 ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
52                           struct ac_arg arg, unsigned relative_index);
53 
54 static inline nir_def *
ac_nir_load_arg(nir_builder * b,const struct ac_shader_args * ac_args,struct ac_arg arg)55 ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg)
56 {
57    return ac_nir_load_arg_at_offset(b, ac_args, arg, 0);
58 }
59 
60 void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
61                       nir_def *val);
62 
63 nir_def *
64 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
65                   unsigned rshift, unsigned bitwidth);
66 
67 bool ac_nir_lower_sin_cos(nir_shader *shader);
68 
69 bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level,
70                                      const enum ac_hw_stage hw_stage,
71                                      const struct ac_shader_args *ac_args);
72 
73 void
74 ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_def *value,
75                             unsigned component, unsigned writemask);
76 
77 void
78 ac_nir_export_primitive(nir_builder *b, nir_def *prim, nir_def *row);
79 
80 void
81 ac_nir_export_position(nir_builder *b,
82                        enum amd_gfx_level gfx_level,
83                        uint32_t clip_cull_mask,
84                        bool no_param_export,
85                        bool force_vrs,
86                        bool done,
87                        uint64_t outputs_written,
88                        nir_def *(*outputs)[4],
89                        nir_def *row);
90 
91 void
92 ac_nir_export_parameters(nir_builder *b,
93                          const uint8_t *param_offsets,
94                          uint64_t outputs_written,
95                          uint16_t outputs_written_16bit,
96                          nir_def *(*outputs)[4],
97                          nir_def *(*outputs_16bit_lo)[4],
98                          nir_def *(*outputs_16bit_hi)[4]);
99 
100 nir_def *
101 ac_nir_calc_io_offset(nir_builder *b,
102                       nir_intrinsic_instr *intrin,
103                       nir_def *base_stride,
104                       unsigned component_stride,
105                       ac_nir_map_io_driver_location map_io);
106 
107 bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
108                              int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
109                              uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]);
110 
111 void
112 ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
113                                ac_nir_map_io_driver_location map,
114                                bool tcs_in_out_eq,
115                                uint64_t tcs_temp_only_inputs);
116 
117 void
118 ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
119                               ac_nir_map_io_driver_location map,
120                               bool tcs_in_out_eq);
121 
122 void
123 ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
124                                ac_nir_map_io_driver_location map,
125                                enum amd_gfx_level gfx_level,
126                                bool tes_reads_tessfactors,
127                                uint64_t tes_inputs_read,
128                                uint64_t tes_patch_inputs_read,
129                                unsigned num_reserved_tcs_outputs,
130                                unsigned num_reserved_tcs_patch_outputs,
131                                unsigned wave_size,
132                                bool no_inputs_in_lds,
133                                bool pass_tessfactors_by_reg,
134                                bool emit_tess_factor_write);
135 
136 void
137 ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
138                                ac_nir_map_io_driver_location map);
139 
140 void
141 ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
142                                ac_nir_map_io_driver_location map,
143                                enum amd_gfx_level gfx_level,
144                                unsigned esgs_itemsize);
145 
146 void
147 ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
148                               ac_nir_map_io_driver_location map,
149                               enum amd_gfx_level gfx_level,
150                               bool triangle_strip_adjacency_fix);
151 
152 bool
153 ac_nir_lower_indirect_derefs(nir_shader *shader,
154                              enum amd_gfx_level gfx_level);
155 
156 typedef struct {
157    enum radeon_family family;
158    enum amd_gfx_level gfx_level;
159 
160    unsigned max_workgroup_size;
161    unsigned wave_size;
162    uint8_t clip_cull_dist_mask;
163    const uint8_t *vs_output_param_offset; /* GFX11+ */
164    bool has_param_exports;
165    bool can_cull;
166    bool disable_streamout;
167    bool has_gen_prim_query;
168    bool has_xfb_prim_query;
169    bool has_gs_invocations_query;
170    bool has_gs_primitives_query;
171    bool kill_pointsize;
172    bool kill_layer;
173    bool force_vrs;
174 
175    /* VS */
176    unsigned num_vertices_per_primitive;
177    bool early_prim_export;
178    bool passthrough;
179    bool use_edgeflags;
180    bool export_primitive_id;
181    uint32_t instance_rate_inputs;
182    uint32_t user_clip_plane_enable_mask;
183 
184    /* GS */
185    unsigned gs_out_vtx_bytes;
186 } ac_nir_lower_ngg_options;
187 
188 void
189 ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
190 
191 void
192 ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
193 
194 void
195 ac_nir_lower_ngg_ms(nir_shader *shader,
196                     enum amd_gfx_level gfx_level,
197                     uint32_t clipdist_enable_mask,
198                     const uint8_t *vs_output_param_offset,
199                     bool has_param_exports,
200                     bool *out_needs_scratch_ring,
201                     unsigned wave_size,
202                     unsigned workgroup_size,
203                     bool multiview,
204                     bool has_query,
205                     bool fast_launch_2);
206 
207 void
208 ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
209                                  unsigned task_payload_entry_bytes,
210                                  unsigned task_num_entries,
211                                  bool has_query);
212 
213 void
214 ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
215                                 unsigned task_payload_entry_bytes,
216                                 unsigned task_num_entries);
217 
218 nir_def *
219 ac_nir_cull_primitive(nir_builder *b,
220                       nir_def *initially_accepted,
221                       nir_def *pos[3][4],
222                       unsigned num_vertices,
223                       ac_nir_cull_accepted accept_func,
224                       void *state);
225 
226 bool
227 ac_nir_lower_global_access(nir_shader *shader);
228 
229 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
230 bool ac_nir_lower_image_opcodes(nir_shader *nir);
231 
232 typedef struct ac_nir_gs_output_info {
233    const uint8_t *streams;
234    const uint8_t *streams_16bit_lo;
235    const uint8_t *streams_16bit_hi;
236 
237    const uint8_t *usage_mask;
238    const uint8_t *usage_mask_16bit_lo;
239    const uint8_t *usage_mask_16bit_hi;
240 
241    /* type for each 16bit slot component */
242    nir_alu_type (*types_16bit_lo)[4];
243    nir_alu_type (*types_16bit_hi)[4];
244 } ac_nir_gs_output_info;
245 
246 nir_shader *
247 ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
248                              enum amd_gfx_level gfx_level,
249                              uint32_t clip_cull_mask,
250                              const uint8_t *param_offsets,
251                              bool has_param_exports,
252                              bool disable_streamout,
253                              bool kill_pointsize,
254                              bool kill_layer,
255                              bool force_vrs,
256                              ac_nir_gs_output_info *output_info);
257 
258 void
259 ac_nir_lower_legacy_vs(nir_shader *nir,
260                        enum amd_gfx_level gfx_level,
261                        uint32_t clip_cull_mask,
262                        const uint8_t *param_offsets,
263                        bool has_param_exports,
264                        bool export_primitive_id,
265                        bool disable_streamout,
266                        bool kill_pointsize,
267                        bool kill_layer,
268                        bool force_vrs);
269 
270 bool
271 ac_nir_gs_shader_query(nir_builder *b,
272                        bool has_gen_prim_query,
273                        bool has_gs_invocations_query,
274                        bool has_gs_primitives_query,
275                        unsigned num_vertices_per_primitive,
276                        unsigned wave_size,
277                        nir_def *vertex_count[4],
278                        nir_def *primitive_count[4]);
279 
280 void
281 ac_nir_lower_legacy_gs(nir_shader *nir,
282                        bool has_gen_prim_query,
283                        bool has_pipeline_stats_query,
284                        ac_nir_gs_output_info *output_info);
285 
286 typedef struct {
287    /* Which load instructions to lower depending on whether the number of
288     * components being loaded is 1 or more than 1.
289     */
290    nir_variable_mode modes_1_comp;  /* lower 1-component loads for these */
291    nir_variable_mode modes_N_comps; /* lower multi-component loads for these */
292 } ac_nir_lower_subdword_options;
293 
294 bool ac_nir_lower_subdword_loads(nir_shader *nir, ac_nir_lower_subdword_options options);
295 
296 typedef struct {
297    enum radeon_family family;
298    enum amd_gfx_level gfx_level;
299 
300    bool use_aco;
301    bool uses_discard;
302    bool alpha_to_coverage_via_mrtz;
303    bool dual_src_blend_swizzle;
304    unsigned spi_shader_col_format;
305    unsigned color_is_int8;
306    unsigned color_is_int10;
307 
308    bool bc_optimize_for_persp;
309    bool bc_optimize_for_linear;
310    bool force_persp_sample_interp;
311    bool force_linear_sample_interp;
312    bool force_persp_center_interp;
313    bool force_linear_center_interp;
314    unsigned ps_iter_samples;
315 
316    /* OpenGL only */
317    bool clamp_color;
318    bool alpha_to_one;
319    bool kill_samplemask;
320    enum compare_func alpha_func;
321    unsigned broadcast_last_cbuf;
322 
323    /* Vulkan only */
324    unsigned enable_mrt_output_nan_fixup;
325    bool no_color_export;
326    bool no_depth_export;
327 } ac_nir_lower_ps_options;
328 
329 void
330 ac_nir_lower_ps(nir_shader *nir, const ac_nir_lower_ps_options *options);
331 
332 typedef struct {
333    enum amd_gfx_level gfx_level;
334 
335    /* If true, round the layer component of the coordinates source to the nearest
336     * integer for all array ops. This is always done for cube array ops.
337     */
338    bool lower_array_layer_round_even;
339 
340    /* Fix derivatives of constants and FS inputs in control flow.
341     *
342     * Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads,
343     * pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and
344     * texture samples with nir_tex_src_min_lod.
345     *
346     * The layer must also be a constant or FS input.
347     */
348    bool fix_derivs_in_divergent_cf;
349    unsigned max_wqm_vgprs;
350 } ac_nir_lower_tex_options;
351 
352 bool
353 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options);
354 
355 #ifdef __cplusplus
356 }
357 #endif
358 
359 #endif /* AC_NIR_H */
360