1 /* 2 * Copyright © 2021 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 8 #ifndef AC_NIR_H 9 #define AC_NIR_H 10 11 #include "ac_hw_stage.h" 12 #include "ac_shader_args.h" 13 #include "ac_shader_util.h" 14 #include "nir.h" 15 16 #ifdef __cplusplus 17 extern "C" { 18 #endif 19 20 enum 21 { 22 /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ 23 AC_EXP_PARAM_OFFSET_0 = 0, 24 AC_EXP_PARAM_OFFSET_31 = 31, 25 /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ 26 AC_EXP_PARAM_DEFAULT_VAL_0000 = 64, 27 AC_EXP_PARAM_DEFAULT_VAL_0001, 28 AC_EXP_PARAM_DEFAULT_VAL_1110, 29 AC_EXP_PARAM_DEFAULT_VAL_1111, 30 AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */ 31 }; 32 33 enum { 34 AC_EXP_FLAG_COMPRESSED = (1 << 0), 35 AC_EXP_FLAG_DONE = (1 << 1), 36 AC_EXP_FLAG_VALID_MASK = (1 << 2), 37 }; 38 39 struct ac_nir_config { 40 enum amd_gfx_level gfx_level; 41 bool uses_aco; 42 }; 43 44 /* TODO: Remove these once radeonsi gathers shader_info before lowering. */ 45 #define AC_VECTOR_ARG_FLAG(name, value) (((name) & 0xf) | ((value) << 4)) 46 #define AC_VECTOR_ARG_UNSET 0 47 #define AC_VECTOR_ARG_INTERP_MODE 1 48 #define AC_VECTOR_ARG_IS_COLOR 2 49 #define AC_VECTOR_ARG_FLAG_GET_NAME(intr) (nir_intrinsic_flags(intr) & 0xf) 50 #define AC_VECTOR_ARG_FLAG_GET_VALUE(intr) (nir_intrinsic_flags(intr) >> 4) 51 52 /* Maps I/O semantics to the actual location used by the lowering pass. */ 53 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic); 54 55 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */ 56 struct nir_builder; 57 typedef struct nir_builder nir_builder; 58 59 struct nir_xfb_info; 60 typedef struct nir_xfb_info nir_xfb_info; 61 62 /* Executed by ac_nir_cull when the current primitive is accepted. */ 63 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state); 64 65 void 66 ac_nir_set_options(struct radeon_info *info, bool use_llvm, 67 nir_shader_compiler_options *options); 68 69 nir_def * 70 ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args, 71 struct ac_arg arg, unsigned relative_index); 72 73 nir_def * 74 ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg); 75 76 nir_def * 77 ac_nir_load_arg_upper_bound(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, 78 unsigned upper_bound); 79 80 void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, 81 nir_def *val); 82 83 nir_def * 84 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, 85 unsigned rshift, unsigned bitwidth); 86 87 bool ac_nir_lower_sin_cos(nir_shader *shader); 88 89 bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level, 90 bool has_ls_vgpr_init_bug, const enum ac_hw_stage hw_stage, 91 unsigned wave_size, unsigned workgroup_size, 92 const struct ac_shader_args *ac_args); 93 94 nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir); 95 96 bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed, 97 int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS], 98 uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]); 99 100 void 101 ac_nir_lower_ls_outputs_to_mem(nir_shader *ls, 102 ac_nir_map_io_driver_location map, 103 enum amd_gfx_level gfx_level, 104 bool tcs_in_out_eq, 105 uint64_t tcs_inputs_via_temp, 106 uint64_t tcs_inputs_via_lds); 107 108 void 109 ac_nir_lower_hs_inputs_to_mem(nir_shader *shader, 110 ac_nir_map_io_driver_location map, 111 enum amd_gfx_level gfx_level, 112 bool tcs_in_out_eq, 113 uint64_t tcs_inputs_via_temp, 114 uint64_t tcs_inputs_via_lds); 115 116 void 117 ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info, 118 ac_nir_map_io_driver_location map, 119 enum amd_gfx_level gfx_level, 120 uint64_t tes_inputs_read, 121 uint32_t tes_patch_inputs_read, 122 unsigned wave_size); 123 124 void 125 ac_nir_lower_tes_inputs_to_mem(nir_shader *shader, 126 ac_nir_map_io_driver_location map); 127 128 void 129 ac_nir_compute_tess_wg_info(const struct radeon_info *info, const struct shader_info *tcs_info, 130 unsigned wave_size, bool tess_uses_primid, bool all_invocations_define_tess_levels, 131 unsigned num_tcs_input_cp, unsigned lds_input_vertex_size, 132 unsigned num_mem_tcs_outputs, unsigned num_mem_tcs_patch_outputs, 133 unsigned *num_patches_per_wg, unsigned *hw_lds_size); 134 135 void 136 ac_nir_lower_es_outputs_to_mem(nir_shader *shader, 137 ac_nir_map_io_driver_location map, 138 enum amd_gfx_level gfx_level, 139 unsigned esgs_itemsize, 140 uint64_t gs_inputs_read); 141 142 void 143 ac_nir_lower_gs_inputs_to_mem(nir_shader *shader, 144 ac_nir_map_io_driver_location map, 145 enum amd_gfx_level gfx_level, 146 bool triangle_strip_adjacency_fix); 147 148 bool 149 ac_nir_lower_indirect_derefs(nir_shader *shader, 150 enum amd_gfx_level gfx_level); 151 152 typedef struct { 153 enum radeon_family family; 154 enum amd_gfx_level gfx_level; 155 156 unsigned max_workgroup_size; 157 unsigned wave_size; 158 uint8_t clip_cull_dist_mask; 159 const uint8_t *vs_output_param_offset; /* GFX11+ */ 160 bool has_param_exports; 161 bool can_cull; 162 bool disable_streamout; 163 bool has_gen_prim_query; 164 bool has_xfb_prim_query; 165 bool use_gfx12_xfb_intrinsic; 166 bool has_gs_invocations_query; 167 bool has_gs_primitives_query; 168 bool kill_pointsize; 169 bool kill_layer; 170 bool force_vrs; 171 bool compact_primitives; 172 173 /* VS */ 174 unsigned num_vertices_per_primitive; 175 bool early_prim_export; 176 bool passthrough; 177 bool use_edgeflags; 178 bool export_primitive_id; 179 bool export_primitive_id_per_prim; 180 uint32_t instance_rate_inputs; 181 uint32_t user_clip_plane_enable_mask; 182 183 /* GS */ 184 unsigned gs_out_vtx_bytes; 185 } ac_nir_lower_ngg_options; 186 187 void 188 ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options); 189 190 void 191 ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options); 192 193 void 194 ac_nir_lower_ngg_mesh(nir_shader *shader, 195 enum amd_gfx_level gfx_level, 196 uint32_t clipdist_enable_mask, 197 const uint8_t *vs_output_param_offset, 198 bool has_param_exports, 199 bool *out_needs_scratch_ring, 200 unsigned wave_size, 201 unsigned workgroup_size, 202 bool multiview, 203 bool has_query, 204 bool fast_launch_2); 205 206 void 207 ac_nir_lower_task_outputs_to_mem(nir_shader *shader, 208 unsigned task_payload_entry_bytes, 209 unsigned task_num_entries, 210 bool has_query); 211 212 void 213 ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader, 214 unsigned task_payload_entry_bytes, 215 unsigned task_num_entries); 216 217 bool 218 ac_nir_lower_global_access(nir_shader *shader); 219 220 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level); 221 bool ac_nir_lower_image_opcodes(nir_shader *nir); 222 223 typedef struct ac_nir_gs_output_info { 224 const uint8_t *streams; 225 const uint8_t *streams_16bit_lo; 226 const uint8_t *streams_16bit_hi; 227 228 const uint8_t *varying_mask; 229 const uint8_t *varying_mask_16bit_lo; 230 const uint8_t *varying_mask_16bit_hi; 231 232 const uint8_t *sysval_mask; 233 234 /* type for each 16bit slot component */ 235 nir_alu_type (*types_16bit_lo)[4]; 236 nir_alu_type (*types_16bit_hi)[4]; 237 } ac_nir_gs_output_info; 238 239 nir_shader * 240 ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, 241 enum amd_gfx_level gfx_level, 242 uint32_t clip_cull_mask, 243 const uint8_t *param_offsets, 244 bool has_param_exports, 245 bool disable_streamout, 246 bool kill_pointsize, 247 bool kill_layer, 248 bool force_vrs, 249 ac_nir_gs_output_info *output_info); 250 251 void 252 ac_nir_lower_legacy_vs(nir_shader *nir, 253 enum amd_gfx_level gfx_level, 254 uint32_t clip_cull_mask, 255 const uint8_t *param_offsets, 256 bool has_param_exports, 257 bool export_primitive_id, 258 bool disable_streamout, 259 bool kill_pointsize, 260 bool kill_layer, 261 bool force_vrs); 262 263 void 264 ac_nir_lower_legacy_gs(nir_shader *nir, 265 bool has_gen_prim_query, 266 bool has_pipeline_stats_query, 267 ac_nir_gs_output_info *output_info); 268 269 typedef struct { 270 /* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't 271 * generate AMD-specific intrinsics. 272 */ 273 /* System values. */ 274 bool force_persp_sample_interp; 275 bool force_linear_sample_interp; 276 bool force_persp_center_interp; 277 bool force_linear_center_interp; 278 unsigned ps_iter_samples; 279 280 /* Outputs. */ 281 bool clamp_color; /* GL only */ 282 bool alpha_test_alpha_to_one; /* GL only, this only affects alpha test */ 283 enum compare_func alpha_func; /* GL only */ 284 bool keep_alpha_for_mrtz; /* this prevents killing alpha based on spi_shader_col_format_hint */ 285 unsigned spi_shader_col_format_hint; /* this only shrinks and eliminates output stores */ 286 bool kill_z; 287 bool kill_stencil; 288 bool kill_samplemask; 289 } ac_nir_lower_ps_early_options; 290 291 void 292 ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options); 293 294 typedef struct { 295 /* This is a post-link pass. It shouldn't eliminate any code and it shouldn't affect shader_info 296 * (those should be done in the early pass). 297 */ 298 enum amd_gfx_level gfx_level; 299 enum radeon_family family; 300 bool use_aco; 301 302 /* System values. */ 303 bool bc_optimize_for_persp; 304 bool bc_optimize_for_linear; 305 306 /* Exports. */ 307 bool uses_discard; 308 bool alpha_to_coverage_via_mrtz; 309 bool dual_src_blend_swizzle; 310 unsigned spi_shader_col_format; 311 unsigned color_is_int8; 312 unsigned color_is_int10; 313 bool alpha_to_one; 314 315 /* Vulkan only */ 316 unsigned enable_mrt_output_nan_fixup; 317 bool no_color_export; 318 bool no_depth_export; 319 } ac_nir_lower_ps_late_options; 320 321 void 322 ac_nir_lower_ps_late(nir_shader *nir, const ac_nir_lower_ps_late_options *options); 323 324 typedef struct { 325 enum amd_gfx_level gfx_level; 326 327 /* If true, round the layer component of the coordinates source to the nearest 328 * integer for all array ops. This is always done for cube array ops. 329 */ 330 bool lower_array_layer_round_even; 331 332 /* Fix derivatives of constants and FS inputs in control flow. 333 * 334 * Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads, 335 * pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and 336 * texture samples with nir_tex_src_min_lod. 337 * 338 * The layer must also be a constant or FS input. 339 */ 340 bool fix_derivs_in_divergent_cf; 341 unsigned max_wqm_vgprs; 342 } ac_nir_lower_tex_options; 343 344 bool 345 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options); 346 347 void 348 ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4); 349 350 bool 351 ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level); 352 353 unsigned 354 ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer); 355 356 bool 357 ac_nir_opt_shared_append(nir_shader *shader); 358 359 bool 360 ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering); 361 362 bool 363 ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm); 364 365 bool 366 ac_nir_optimize_uniform_atomics(nir_shader *nir); 367 368 unsigned 369 ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data); 370 371 bool 372 ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, 373 unsigned num_components, int64_t hole_size, 374 nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); 375 376 bool 377 ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const void *data); 378 379 enum gl_access_qualifier 380 ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr); 381 382 #ifdef __cplusplus 383 } 384 #endif 385 386 #endif /* AC_NIR_H */ 387