/* * Copyright © 2021 Valve Corporation * * SPDX-License-Identifier: MIT */ #ifndef AC_NIR_H #define AC_NIR_H #include "ac_hw_stage.h" #include "ac_shader_args.h" #include "ac_shader_util.h" #include "nir.h" #ifdef __cplusplus extern "C" { #endif enum { /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ AC_EXP_PARAM_OFFSET_0 = 0, AC_EXP_PARAM_OFFSET_31 = 31, /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ AC_EXP_PARAM_DEFAULT_VAL_0000 = 64, AC_EXP_PARAM_DEFAULT_VAL_0001, AC_EXP_PARAM_DEFAULT_VAL_1110, AC_EXP_PARAM_DEFAULT_VAL_1111, AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */ }; enum { AC_EXP_FLAG_COMPRESSED = (1 << 0), AC_EXP_FLAG_DONE = (1 << 1), AC_EXP_FLAG_VALID_MASK = (1 << 2), }; struct ac_nir_config { enum amd_gfx_level gfx_level; bool uses_aco; }; /* TODO: Remove these once radeonsi gathers shader_info before lowering. */ #define AC_VECTOR_ARG_FLAG(name, value) (((name) & 0xf) | ((value) << 4)) #define AC_VECTOR_ARG_UNSET 0 #define AC_VECTOR_ARG_INTERP_MODE 1 #define AC_VECTOR_ARG_IS_COLOR 2 #define AC_VECTOR_ARG_FLAG_GET_NAME(intr) (nir_intrinsic_flags(intr) & 0xf) #define AC_VECTOR_ARG_FLAG_GET_VALUE(intr) (nir_intrinsic_flags(intr) >> 4) /* Maps I/O semantics to the actual location used by the lowering pass. */ typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic); /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */ struct nir_builder; typedef struct nir_builder nir_builder; struct nir_xfb_info; typedef struct nir_xfb_info nir_xfb_info; /* Executed by ac_nir_cull when the current primitive is accepted. */ typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state); void ac_nir_set_options(struct radeon_info *info, bool use_llvm, nir_shader_compiler_options *options); nir_def * ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, unsigned relative_index); nir_def * ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg); nir_def * ac_nir_load_arg_upper_bound(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, unsigned upper_bound); void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, nir_def *val); nir_def * ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, unsigned rshift, unsigned bitwidth); bool ac_nir_lower_sin_cos(nir_shader *shader); bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level, bool has_ls_vgpr_init_bug, const enum ac_hw_stage hw_stage, unsigned wave_size, unsigned workgroup_size, const struct ac_shader_args *ac_args); nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir); bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed, int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS], uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]); void ac_nir_lower_ls_outputs_to_mem(nir_shader *ls, ac_nir_map_io_driver_location map, enum amd_gfx_level gfx_level, bool tcs_in_out_eq, uint64_t tcs_inputs_via_temp, uint64_t tcs_inputs_via_lds); void ac_nir_lower_hs_inputs_to_mem(nir_shader *shader, ac_nir_map_io_driver_location map, enum amd_gfx_level gfx_level, bool tcs_in_out_eq, uint64_t tcs_inputs_via_temp, uint64_t tcs_inputs_via_lds); void ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info, ac_nir_map_io_driver_location map, enum amd_gfx_level gfx_level, uint64_t tes_inputs_read, uint32_t tes_patch_inputs_read, unsigned wave_size); void ac_nir_lower_tes_inputs_to_mem(nir_shader *shader, ac_nir_map_io_driver_location map); void ac_nir_compute_tess_wg_info(const struct radeon_info *info, const struct shader_info *tcs_info, unsigned wave_size, bool tess_uses_primid, bool all_invocations_define_tess_levels, unsigned num_tcs_input_cp, unsigned lds_input_vertex_size, unsigned num_mem_tcs_outputs, unsigned num_mem_tcs_patch_outputs, unsigned *num_patches_per_wg, unsigned *hw_lds_size); void ac_nir_lower_es_outputs_to_mem(nir_shader *shader, ac_nir_map_io_driver_location map, enum amd_gfx_level gfx_level, unsigned esgs_itemsize, uint64_t gs_inputs_read); void ac_nir_lower_gs_inputs_to_mem(nir_shader *shader, ac_nir_map_io_driver_location map, enum amd_gfx_level gfx_level, bool triangle_strip_adjacency_fix); bool ac_nir_lower_indirect_derefs(nir_shader *shader, enum amd_gfx_level gfx_level); typedef struct { enum radeon_family family; enum amd_gfx_level gfx_level; unsigned max_workgroup_size; unsigned wave_size; uint8_t clip_cull_dist_mask; const uint8_t *vs_output_param_offset; /* GFX11+ */ bool has_param_exports; bool can_cull; bool disable_streamout; bool has_gen_prim_query; bool has_xfb_prim_query; bool use_gfx12_xfb_intrinsic; bool has_gs_invocations_query; bool has_gs_primitives_query; bool kill_pointsize; bool kill_layer; bool force_vrs; bool compact_primitives; /* VS */ unsigned num_vertices_per_primitive; bool early_prim_export; bool passthrough; bool use_edgeflags; bool export_primitive_id; bool export_primitive_id_per_prim; uint32_t instance_rate_inputs; uint32_t user_clip_plane_enable_mask; /* GS */ unsigned gs_out_vtx_bytes; } ac_nir_lower_ngg_options; void ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options); void ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options); void ac_nir_lower_ngg_mesh(nir_shader *shader, enum amd_gfx_level gfx_level, uint32_t clipdist_enable_mask, const uint8_t *vs_output_param_offset, bool has_param_exports, bool *out_needs_scratch_ring, unsigned wave_size, unsigned workgroup_size, bool multiview, bool has_query, bool fast_launch_2); void ac_nir_lower_task_outputs_to_mem(nir_shader *shader, unsigned task_payload_entry_bytes, unsigned task_num_entries, bool has_query); void ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader, unsigned task_payload_entry_bytes, unsigned task_num_entries); bool ac_nir_lower_global_access(nir_shader *shader); bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level); bool ac_nir_lower_image_opcodes(nir_shader *nir); typedef struct ac_nir_gs_output_info { const uint8_t *streams; const uint8_t *streams_16bit_lo; const uint8_t *streams_16bit_hi; const uint8_t *varying_mask; const uint8_t *varying_mask_16bit_lo; const uint8_t *varying_mask_16bit_hi; const uint8_t *sysval_mask; /* type for each 16bit slot component */ nir_alu_type (*types_16bit_lo)[4]; nir_alu_type (*types_16bit_hi)[4]; } ac_nir_gs_output_info; nir_shader * ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, enum amd_gfx_level gfx_level, uint32_t clip_cull_mask, const uint8_t *param_offsets, bool has_param_exports, bool disable_streamout, bool kill_pointsize, bool kill_layer, bool force_vrs, ac_nir_gs_output_info *output_info); void ac_nir_lower_legacy_vs(nir_shader *nir, enum amd_gfx_level gfx_level, uint32_t clip_cull_mask, const uint8_t *param_offsets, bool has_param_exports, bool export_primitive_id, bool disable_streamout, bool kill_pointsize, bool kill_layer, bool force_vrs); void ac_nir_lower_legacy_gs(nir_shader *nir, bool has_gen_prim_query, bool has_pipeline_stats_query, ac_nir_gs_output_info *output_info); typedef struct { /* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't * generate AMD-specific intrinsics. */ /* System values. */ bool force_persp_sample_interp; bool force_linear_sample_interp; bool force_persp_center_interp; bool force_linear_center_interp; unsigned ps_iter_samples; /* Outputs. */ bool clamp_color; /* GL only */ bool alpha_test_alpha_to_one; /* GL only, this only affects alpha test */ enum compare_func alpha_func; /* GL only */ bool keep_alpha_for_mrtz; /* this prevents killing alpha based on spi_shader_col_format_hint */ unsigned spi_shader_col_format_hint; /* this only shrinks and eliminates output stores */ bool kill_z; bool kill_stencil; bool kill_samplemask; } ac_nir_lower_ps_early_options; void ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options); typedef struct { /* This is a post-link pass. It shouldn't eliminate any code and it shouldn't affect shader_info * (those should be done in the early pass). */ enum amd_gfx_level gfx_level; enum radeon_family family; bool use_aco; /* System values. */ bool bc_optimize_for_persp; bool bc_optimize_for_linear; /* Exports. */ bool uses_discard; bool alpha_to_coverage_via_mrtz; bool dual_src_blend_swizzle; unsigned spi_shader_col_format; unsigned color_is_int8; unsigned color_is_int10; bool alpha_to_one; /* Vulkan only */ unsigned enable_mrt_output_nan_fixup; bool no_color_export; bool no_depth_export; } ac_nir_lower_ps_late_options; void ac_nir_lower_ps_late(nir_shader *nir, const ac_nir_lower_ps_late_options *options); typedef struct { enum amd_gfx_level gfx_level; /* If true, round the layer component of the coordinates source to the nearest * integer for all array ops. This is always done for cube array ops. */ bool lower_array_layer_round_even; /* Fix derivatives of constants and FS inputs in control flow. * * Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads, * pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and * texture samples with nir_tex_src_min_lod. * * The layer must also be a constant or FS input. */ bool fix_derivs_in_divergent_cf; unsigned max_wqm_vgprs; } ac_nir_lower_tex_options; bool ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options); void ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4); bool ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level); unsigned ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer); bool ac_nir_opt_shared_append(nir_shader *shader); bool ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering); bool ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm); bool ac_nir_optimize_uniform_atomics(nir_shader *nir); unsigned ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data); bool ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); bool ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const void *data); enum gl_access_qualifier ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr); #ifdef __cplusplus } #endif #endif /* AC_NIR_H */