1 /* 2 * Copyright © 2021 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 8 #ifndef AC_NIR_H 9 #define AC_NIR_H 10 11 #include "ac_hw_stage.h" 12 #include "ac_shader_args.h" 13 #include "ac_shader_util.h" 14 #include "nir.h" 15 16 #ifdef __cplusplus 17 extern "C" { 18 #endif 19 20 enum 21 { 22 /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ 23 AC_EXP_PARAM_OFFSET_0 = 0, 24 AC_EXP_PARAM_OFFSET_31 = 31, 25 /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ 26 AC_EXP_PARAM_DEFAULT_VAL_0000 = 64, 27 AC_EXP_PARAM_DEFAULT_VAL_0001, 28 AC_EXP_PARAM_DEFAULT_VAL_1110, 29 AC_EXP_PARAM_DEFAULT_VAL_1111, 30 AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */ 31 }; 32 33 enum { 34 AC_EXP_FLAG_COMPRESSED = (1 << 0), 35 AC_EXP_FLAG_DONE = (1 << 1), 36 AC_EXP_FLAG_VALID_MASK = (1 << 2), 37 }; 38 39 struct ac_nir_config { 40 enum amd_gfx_level gfx_level; 41 bool uses_aco; 42 }; 43 44 /* Maps I/O semantics to the actual location used by the lowering pass. */ 45 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic); 46 47 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */ 48 struct nir_builder; 49 typedef struct nir_builder nir_builder; 50 51 struct nir_xfb_info; 52 typedef struct nir_xfb_info nir_xfb_info; 53 54 /* Executed by ac_nir_cull when the current primitive is accepted. */ 55 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state); 56 57 void 58 ac_nir_set_options(struct radeon_info *info, bool use_llvm, 59 nir_shader_compiler_options *options); 60 61 nir_def * 62 ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args, 63 struct ac_arg arg, unsigned relative_index); 64 65 nir_def * 66 ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg); 67 68 nir_def * 69 ac_nir_load_arg_upper_bound(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, 70 unsigned upper_bound); 71 72 void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, 73 nir_def *val); 74 75 nir_def * 76 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, 77 unsigned rshift, unsigned bitwidth); 78 79 bool ac_nir_lower_sin_cos(nir_shader *shader); 80 81 bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level, 82 bool has_ls_vgpr_init_bug, const enum ac_hw_stage hw_stage, 83 unsigned wave_size, unsigned workgroup_size, 84 const struct ac_shader_args *ac_args); 85 86 nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir); 87 88 bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed, 89 int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS], 90 uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]); 91 92 void 93 ac_nir_lower_ls_outputs_to_mem(nir_shader *ls, 94 ac_nir_map_io_driver_location map, 95 enum amd_gfx_level gfx_level, 96 bool tcs_in_out_eq, 97 uint64_t tcs_inputs_via_temp, 98 uint64_t tcs_inputs_via_lds); 99 100 void 101 ac_nir_lower_hs_inputs_to_mem(nir_shader *shader, 102 ac_nir_map_io_driver_location map, 103 enum amd_gfx_level gfx_level, 104 bool tcs_in_out_eq, 105 uint64_t tcs_inputs_via_temp, 106 uint64_t tcs_inputs_via_lds); 107 108 void 109 ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info, 110 ac_nir_map_io_driver_location map, 111 enum amd_gfx_level gfx_level, 112 uint64_t tes_inputs_read, 113 uint32_t tes_patch_inputs_read, 114 unsigned wave_size); 115 116 void 117 ac_nir_lower_tes_inputs_to_mem(nir_shader *shader, 118 ac_nir_map_io_driver_location map); 119 120 void 121 ac_nir_compute_tess_wg_info(const struct radeon_info *info, const struct shader_info *tcs_info, 122 unsigned wave_size, bool tess_uses_primid, bool all_invocations_define_tess_levels, 123 unsigned num_tcs_input_cp, unsigned lds_input_vertex_size, 124 unsigned num_mem_tcs_outputs, unsigned num_mem_tcs_patch_outputs, 125 unsigned *num_patches_per_wg, unsigned *hw_lds_size); 126 127 void 128 ac_nir_lower_es_outputs_to_mem(nir_shader *shader, 129 ac_nir_map_io_driver_location map, 130 enum amd_gfx_level gfx_level, 131 unsigned esgs_itemsize, 132 uint64_t gs_inputs_read); 133 134 void 135 ac_nir_lower_gs_inputs_to_mem(nir_shader *shader, 136 ac_nir_map_io_driver_location map, 137 enum amd_gfx_level gfx_level, 138 bool triangle_strip_adjacency_fix); 139 140 bool 141 ac_nir_lower_indirect_derefs(nir_shader *shader, 142 enum amd_gfx_level gfx_level); 143 144 typedef struct { 145 const struct radeon_info *hw_info; 146 147 unsigned max_workgroup_size; 148 unsigned wave_size; 149 uint8_t clip_cull_dist_mask; 150 const uint8_t *vs_output_param_offset; /* GFX11+ */ 151 bool has_param_exports; 152 bool can_cull; 153 bool disable_streamout; 154 bool has_gen_prim_query; 155 bool has_xfb_prim_query; 156 bool use_gfx12_xfb_intrinsic; 157 bool has_gs_invocations_query; 158 bool has_gs_primitives_query; 159 bool kill_pointsize; 160 bool kill_layer; 161 bool force_vrs; 162 bool compact_primitives; 163 164 /* VS */ 165 unsigned num_vertices_per_primitive; 166 bool early_prim_export; 167 bool passthrough; 168 bool use_edgeflags; 169 bool export_primitive_id; 170 bool export_primitive_id_per_prim; 171 uint32_t instance_rate_inputs; 172 uint32_t user_clip_plane_enable_mask; 173 174 /* GS */ 175 unsigned gs_out_vtx_bytes; 176 } ac_nir_lower_ngg_options; 177 178 void 179 ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options); 180 181 void 182 ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options); 183 184 void 185 ac_nir_lower_ngg_mesh(nir_shader *shader, 186 const struct radeon_info *hw_info, 187 uint32_t clipdist_enable_mask, 188 const uint8_t *vs_output_param_offset, 189 bool has_param_exports, 190 bool *out_needs_scratch_ring, 191 unsigned wave_size, 192 unsigned workgroup_size, 193 bool multiview, 194 bool has_query, 195 bool fast_launch_2); 196 197 void 198 ac_nir_lower_task_outputs_to_mem(nir_shader *shader, 199 unsigned task_payload_entry_bytes, 200 unsigned task_num_entries, 201 bool has_query); 202 203 void 204 ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader, 205 unsigned task_payload_entry_bytes, 206 unsigned task_num_entries); 207 208 bool 209 ac_nir_lower_global_access(nir_shader *shader); 210 211 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level); 212 bool ac_nir_lower_image_opcodes(nir_shader *nir); 213 214 typedef struct ac_nir_gs_output_info { 215 const uint8_t *streams; 216 const uint8_t *streams_16bit_lo; 217 const uint8_t *streams_16bit_hi; 218 219 const uint8_t *varying_mask; 220 const uint8_t *varying_mask_16bit_lo; 221 const uint8_t *varying_mask_16bit_hi; 222 223 const uint8_t *sysval_mask; 224 225 /* type for each 16bit slot component */ 226 nir_alu_type (*types_16bit_lo)[4]; 227 nir_alu_type (*types_16bit_hi)[4]; 228 } ac_nir_gs_output_info; 229 230 nir_shader * 231 ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, 232 enum amd_gfx_level gfx_level, 233 uint32_t clip_cull_mask, 234 const uint8_t *param_offsets, 235 bool has_param_exports, 236 bool disable_streamout, 237 bool kill_pointsize, 238 bool kill_layer, 239 bool force_vrs, 240 ac_nir_gs_output_info *output_info); 241 242 void 243 ac_nir_lower_legacy_vs(nir_shader *nir, 244 enum amd_gfx_level gfx_level, 245 uint32_t clip_cull_mask, 246 const uint8_t *param_offsets, 247 bool has_param_exports, 248 bool export_primitive_id, 249 bool disable_streamout, 250 bool kill_pointsize, 251 bool kill_layer, 252 bool force_vrs); 253 254 void 255 ac_nir_lower_legacy_gs(nir_shader *nir, 256 bool has_gen_prim_query, 257 bool has_pipeline_stats_query, 258 ac_nir_gs_output_info *output_info); 259 260 /* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't 261 * generate AMD-specific intrinsics. 262 */ 263 typedef struct { 264 /* System values. */ 265 bool force_center_interp_no_msaa; /* true if MSAA is disabled, false may mean that the state is unknown */ 266 bool uses_vrs_coarse_shading; 267 bool load_sample_positions_always_loads_current_ones; 268 bool dynamic_rasterization_samples; 269 int force_front_face; /* 0 -> keep, 1 -> set to true, -1 -> set to false */ 270 bool optimize_frag_coord; /* TODO: remove this after RADV can handle it */ 271 bool frag_coord_is_center; /* GL requirement for sample shading */ 272 273 /* frag_coord/pixel_coord: 274 * allow_pixel_coord && (frag_coord_is_center || ps_iter_samples == 1 || 275 * force_center_interp_no_msaa || 276 * the fractional part of frag_coord.xy isn't used): 277 * * frag_coord.xy is replaced by u2f(pixel_coord) + 0.5. 278 * else: 279 * * pixel_coord is replaced by f2u16(frag_coord.xy) 280 * * ps_iter_samples == 0 means the state is unknown. 281 * 282 * barycentrics: 283 * force_center_interp_no_msaa: 284 * * All barycentrics including at_sample but excluding at_offset are changed to 285 * barycentric_pixel 286 * ps_iter_samples >= 2: 287 * * All barycentrics are changed to per-sample interpolation except at_offset/at_sample. 288 * * barycentric_at_sample(sample_id) is replaced by barycentric_sample. 289 * 290 * sample_mask_in: 291 * force_center_interp_no_msaa && !uses_vrs_coarse_shading: 292 * * sample_mask_in is replaced by b2i32(!helper_invocation) 293 * ps_iter_samples == 2, 4: 294 * * sample_mask_in is changed to (sample_mask_in & (ps_iter_mask << sample_id)) 295 * ps_iter_samples == 8: 296 * * sample_mask_in is replaced by 1 << sample_id. 297 * 298 * When ps_iter_samples is equal to rasterization samples, set ps_iter_samples = 8 for this pass. 299 */ 300 unsigned ps_iter_samples; 301 302 /* fbfetch_output */ 303 bool fbfetch_is_1D; 304 bool fbfetch_layered; 305 bool fbfetch_msaa; 306 bool fbfetch_apply_fmask; 307 308 /* Outputs. */ 309 bool clamp_color; /* GL only */ 310 bool alpha_test_alpha_to_one; /* GL only, this only affects alpha test */ 311 enum compare_func alpha_func; /* GL only */ 312 bool keep_alpha_for_mrtz; /* this prevents killing alpha based on spi_shader_col_format_hint */ 313 unsigned spi_shader_col_format_hint; /* this only shrinks and eliminates output stores */ 314 bool kill_z; 315 bool kill_stencil; 316 bool kill_samplemask; 317 } ac_nir_lower_ps_early_options; 318 319 bool 320 ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options); 321 322 /* This is a post-link pass. It shouldn't eliminate any code and it shouldn't affect shader_info 323 * (those should be done in the early pass). 324 */ 325 typedef struct { 326 enum amd_gfx_level gfx_level; 327 enum radeon_family family; 328 bool use_aco; 329 330 /* System values. */ 331 bool bc_optimize_for_persp; 332 bool bc_optimize_for_linear; 333 334 /* Exports. */ 335 bool uses_discard; 336 bool alpha_to_coverage_via_mrtz; 337 bool dual_src_blend_swizzle; 338 unsigned spi_shader_col_format; 339 unsigned color_is_int8; 340 unsigned color_is_int10; 341 bool alpha_to_one; 342 343 /* Vulkan only */ 344 unsigned enable_mrt_output_nan_fixup; 345 bool no_color_export; 346 bool no_depth_export; 347 } ac_nir_lower_ps_late_options; 348 349 bool 350 ac_nir_lower_ps_late(nir_shader *nir, const ac_nir_lower_ps_late_options *options); 351 352 typedef struct { 353 enum amd_gfx_level gfx_level; 354 355 /* If true, round the layer component of the coordinates source to the nearest 356 * integer for all array ops. This is always done for cube array ops. 357 */ 358 bool lower_array_layer_round_even; 359 360 /* Fix derivatives of constants and FS inputs in control flow. 361 * 362 * Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads, 363 * pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and 364 * texture samples with nir_tex_src_min_lod. 365 * 366 * The layer must also be a constant or FS input. 367 */ 368 bool fix_derivs_in_divergent_cf; 369 unsigned max_wqm_vgprs; 370 } ac_nir_lower_tex_options; 371 372 bool 373 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options); 374 375 void 376 ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4); 377 378 bool 379 ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level); 380 381 unsigned 382 ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer); 383 384 bool 385 ac_nir_opt_shared_append(nir_shader *shader); 386 387 bool 388 ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering); 389 390 bool 391 ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm); 392 393 bool 394 ac_nir_optimize_uniform_atomics(nir_shader *nir); 395 396 unsigned 397 ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data); 398 399 bool 400 ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, 401 unsigned num_components, int64_t hole_size, 402 nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); 403 404 bool 405 ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const void *data); 406 407 enum gl_access_qualifier 408 ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr); 409 410 #ifdef __cplusplus 411 } 412 #endif 413 414 #endif /* AC_NIR_H */ 415