1 /*
2 * Copyright © 2021 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7
8 #ifndef AC_NIR_H
9 #define AC_NIR_H
10
11 #include "ac_hw_stage.h"
12 #include "ac_shader_args.h"
13 #include "ac_shader_util.h"
14 #include "nir.h"
15 #include "nir_builder.h"
16
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20
21 enum
22 {
23 /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
24 AC_EXP_PARAM_OFFSET_0 = 0,
25 AC_EXP_PARAM_OFFSET_31 = 31,
26 /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
27 AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
28 AC_EXP_PARAM_DEFAULT_VAL_0001,
29 AC_EXP_PARAM_DEFAULT_VAL_1110,
30 AC_EXP_PARAM_DEFAULT_VAL_1111,
31 AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */
32 };
33
34 enum {
35 AC_EXP_FLAG_COMPRESSED = (1 << 0),
36 AC_EXP_FLAG_DONE = (1 << 1),
37 AC_EXP_FLAG_VALID_MASK = (1 << 2),
38 };
39
40 /* Maps I/O semantics to the actual location used by the lowering pass. */
41 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
42
43 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */
44 struct nir_builder;
45 typedef struct nir_builder nir_builder;
46
47 /* Executed by ac_nir_cull when the current primitive is accepted. */
48 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
49
50 nir_def *
51 ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
52 struct ac_arg arg, unsigned relative_index);
53
54 static inline nir_def *
ac_nir_load_arg(nir_builder * b,const struct ac_shader_args * ac_args,struct ac_arg arg)55 ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg)
56 {
57 return ac_nir_load_arg_at_offset(b, ac_args, arg, 0);
58 }
59
60 void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
61 nir_def *val);
62
63 nir_def *
64 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
65 unsigned rshift, unsigned bitwidth);
66
67 bool ac_nir_lower_sin_cos(nir_shader *shader);
68
69 bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level,
70 const enum ac_hw_stage hw_stage,
71 const struct ac_shader_args *ac_args);
72
73 void
74 ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_def *value,
75 unsigned component, unsigned writemask);
76
77 void
78 ac_nir_export_primitive(nir_builder *b, nir_def *prim, nir_def *row);
79
80 void
81 ac_nir_export_position(nir_builder *b,
82 enum amd_gfx_level gfx_level,
83 uint32_t clip_cull_mask,
84 bool no_param_export,
85 bool force_vrs,
86 bool done,
87 uint64_t outputs_written,
88 nir_def *(*outputs)[4],
89 nir_def *row);
90
91 void
92 ac_nir_export_parameters(nir_builder *b,
93 const uint8_t *param_offsets,
94 uint64_t outputs_written,
95 uint16_t outputs_written_16bit,
96 nir_def *(*outputs)[4],
97 nir_def *(*outputs_16bit_lo)[4],
98 nir_def *(*outputs_16bit_hi)[4]);
99
100 nir_def *
101 ac_nir_calc_io_offset(nir_builder *b,
102 nir_intrinsic_instr *intrin,
103 nir_def *base_stride,
104 unsigned component_stride,
105 ac_nir_map_io_driver_location map_io);
106
107 bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
108 int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
109 uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]);
110
111 void
112 ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
113 ac_nir_map_io_driver_location map,
114 bool tcs_in_out_eq,
115 uint64_t tcs_temp_only_inputs);
116
117 void
118 ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
119 ac_nir_map_io_driver_location map,
120 bool tcs_in_out_eq);
121
122 void
123 ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
124 ac_nir_map_io_driver_location map,
125 enum amd_gfx_level gfx_level,
126 bool tes_reads_tessfactors,
127 uint64_t tes_inputs_read,
128 uint64_t tes_patch_inputs_read,
129 unsigned num_reserved_tcs_outputs,
130 unsigned num_reserved_tcs_patch_outputs,
131 unsigned wave_size,
132 bool no_inputs_in_lds,
133 bool pass_tessfactors_by_reg,
134 bool emit_tess_factor_write);
135
136 void
137 ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
138 ac_nir_map_io_driver_location map);
139
140 void
141 ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
142 ac_nir_map_io_driver_location map,
143 enum amd_gfx_level gfx_level,
144 unsigned esgs_itemsize);
145
146 void
147 ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
148 ac_nir_map_io_driver_location map,
149 enum amd_gfx_level gfx_level,
150 bool triangle_strip_adjacency_fix);
151
152 bool
153 ac_nir_lower_indirect_derefs(nir_shader *shader,
154 enum amd_gfx_level gfx_level);
155
156 typedef struct {
157 enum radeon_family family;
158 enum amd_gfx_level gfx_level;
159
160 unsigned max_workgroup_size;
161 unsigned wave_size;
162 uint8_t clip_cull_dist_mask;
163 const uint8_t *vs_output_param_offset; /* GFX11+ */
164 bool has_param_exports;
165 bool can_cull;
166 bool disable_streamout;
167 bool has_gen_prim_query;
168 bool has_xfb_prim_query;
169 bool has_gs_invocations_query;
170 bool has_gs_primitives_query;
171 bool kill_pointsize;
172 bool kill_layer;
173 bool force_vrs;
174
175 /* VS */
176 unsigned num_vertices_per_primitive;
177 bool early_prim_export;
178 bool passthrough;
179 bool use_edgeflags;
180 bool export_primitive_id;
181 uint32_t instance_rate_inputs;
182 uint32_t user_clip_plane_enable_mask;
183
184 /* GS */
185 unsigned gs_out_vtx_bytes;
186 } ac_nir_lower_ngg_options;
187
188 void
189 ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
190
191 void
192 ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
193
194 void
195 ac_nir_lower_ngg_ms(nir_shader *shader,
196 enum amd_gfx_level gfx_level,
197 uint32_t clipdist_enable_mask,
198 const uint8_t *vs_output_param_offset,
199 bool has_param_exports,
200 bool *out_needs_scratch_ring,
201 unsigned wave_size,
202 unsigned workgroup_size,
203 bool multiview,
204 bool has_query,
205 bool fast_launch_2);
206
207 void
208 ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
209 unsigned task_payload_entry_bytes,
210 unsigned task_num_entries,
211 bool has_query);
212
213 void
214 ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
215 unsigned task_payload_entry_bytes,
216 unsigned task_num_entries);
217
218 nir_def *
219 ac_nir_cull_primitive(nir_builder *b,
220 nir_def *initially_accepted,
221 nir_def *pos[3][4],
222 unsigned num_vertices,
223 ac_nir_cull_accepted accept_func,
224 void *state);
225
226 bool
227 ac_nir_lower_global_access(nir_shader *shader);
228
229 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
230 bool ac_nir_lower_image_opcodes(nir_shader *nir);
231
232 typedef struct ac_nir_gs_output_info {
233 const uint8_t *streams;
234 const uint8_t *streams_16bit_lo;
235 const uint8_t *streams_16bit_hi;
236
237 const uint8_t *usage_mask;
238 const uint8_t *usage_mask_16bit_lo;
239 const uint8_t *usage_mask_16bit_hi;
240
241 /* type for each 16bit slot component */
242 nir_alu_type (*types_16bit_lo)[4];
243 nir_alu_type (*types_16bit_hi)[4];
244 } ac_nir_gs_output_info;
245
246 nir_shader *
247 ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
248 enum amd_gfx_level gfx_level,
249 uint32_t clip_cull_mask,
250 const uint8_t *param_offsets,
251 bool has_param_exports,
252 bool disable_streamout,
253 bool kill_pointsize,
254 bool kill_layer,
255 bool force_vrs,
256 ac_nir_gs_output_info *output_info);
257
258 void
259 ac_nir_lower_legacy_vs(nir_shader *nir,
260 enum amd_gfx_level gfx_level,
261 uint32_t clip_cull_mask,
262 const uint8_t *param_offsets,
263 bool has_param_exports,
264 bool export_primitive_id,
265 bool disable_streamout,
266 bool kill_pointsize,
267 bool kill_layer,
268 bool force_vrs);
269
270 bool
271 ac_nir_gs_shader_query(nir_builder *b,
272 bool has_gen_prim_query,
273 bool has_gs_invocations_query,
274 bool has_gs_primitives_query,
275 unsigned num_vertices_per_primitive,
276 unsigned wave_size,
277 nir_def *vertex_count[4],
278 nir_def *primitive_count[4]);
279
280 void
281 ac_nir_lower_legacy_gs(nir_shader *nir,
282 bool has_gen_prim_query,
283 bool has_pipeline_stats_query,
284 ac_nir_gs_output_info *output_info);
285
286 typedef struct {
287 /* Which load instructions to lower depending on whether the number of
288 * components being loaded is 1 or more than 1.
289 */
290 nir_variable_mode modes_1_comp; /* lower 1-component loads for these */
291 nir_variable_mode modes_N_comps; /* lower multi-component loads for these */
292 } ac_nir_lower_subdword_options;
293
294 bool ac_nir_lower_subdword_loads(nir_shader *nir, ac_nir_lower_subdword_options options);
295
296 typedef struct {
297 enum radeon_family family;
298 enum amd_gfx_level gfx_level;
299
300 bool use_aco;
301 bool uses_discard;
302 bool alpha_to_coverage_via_mrtz;
303 bool dual_src_blend_swizzle;
304 unsigned spi_shader_col_format;
305 unsigned color_is_int8;
306 unsigned color_is_int10;
307
308 bool bc_optimize_for_persp;
309 bool bc_optimize_for_linear;
310 bool force_persp_sample_interp;
311 bool force_linear_sample_interp;
312 bool force_persp_center_interp;
313 bool force_linear_center_interp;
314 unsigned ps_iter_samples;
315
316 /* OpenGL only */
317 bool clamp_color;
318 bool alpha_to_one;
319 bool kill_samplemask;
320 enum compare_func alpha_func;
321 unsigned broadcast_last_cbuf;
322
323 /* Vulkan only */
324 unsigned enable_mrt_output_nan_fixup;
325 bool no_color_export;
326 bool no_depth_export;
327 } ac_nir_lower_ps_options;
328
329 void
330 ac_nir_lower_ps(nir_shader *nir, const ac_nir_lower_ps_options *options);
331
332 typedef struct {
333 enum amd_gfx_level gfx_level;
334
335 /* If true, round the layer component of the coordinates source to the nearest
336 * integer for all array ops. This is always done for cube array ops.
337 */
338 bool lower_array_layer_round_even;
339
340 /* Fix derivatives of constants and FS inputs in control flow.
341 *
342 * Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads,
343 * pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and
344 * texture samples with nir_tex_src_min_lod.
345 *
346 * The layer must also be a constant or FS input.
347 */
348 bool fix_derivs_in_divergent_cf;
349 unsigned max_wqm_vgprs;
350 } ac_nir_lower_tex_options;
351
352 bool
353 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options);
354
355 #ifdef __cplusplus
356 }
357 #endif
358
359 #endif /* AC_NIR_H */
360