1 /* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * SPDX-License-Identifier: MIT 9 */ 10 11 #ifndef RADV_SHADER_INFO_H 12 #define RADV_SHADER_INFO_H 13 14 #include <inttypes.h> 15 #include <stdbool.h> 16 17 #include "nir.h" 18 #include "radv_constants.h" 19 #include "radv_shader_args.h" 20 21 struct radv_device; 22 struct nir_shader; 23 struct radv_shader_layout; 24 struct radv_shader_stage_key; 25 enum radv_pipeline_type; 26 struct radv_shader_stage; 27 28 enum radv_shader_type { 29 RADV_SHADER_TYPE_DEFAULT = 0, 30 RADV_SHADER_TYPE_GS_COPY, 31 RADV_SHADER_TYPE_TRAP_HANDLER, 32 RADV_SHADER_TYPE_RT_PROLOG, 33 }; 34 35 struct radv_vs_output_info { 36 uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; 37 uint8_t clip_dist_mask; 38 uint8_t cull_dist_mask; 39 uint8_t param_exports; 40 uint8_t prim_param_exports; 41 bool writes_pointsize; 42 bool writes_layer; 43 bool writes_layer_per_primitive; 44 bool writes_viewport_index; 45 bool writes_viewport_index_per_primitive; 46 bool writes_primitive_shading_rate; 47 bool writes_primitive_shading_rate_per_primitive; 48 bool export_prim_id; 49 bool export_prim_id_per_primitive; 50 unsigned pos_exports; 51 }; 52 53 struct radv_streamout_info { 54 uint16_t num_outputs; 55 uint16_t strides[MAX_SO_BUFFERS]; 56 uint32_t enabled_stream_buffers_mask; 57 }; 58 59 struct radv_legacy_gs_info { 60 uint32_t gs_inst_prims_in_subgroup; 61 uint32_t es_verts_per_subgroup; 62 uint32_t gs_prims_per_subgroup; 63 uint32_t esgs_itemsize; 64 uint32_t lds_size; 65 uint32_t esgs_ring_size; 66 uint32_t gsvs_ring_size; 67 }; 68 69 struct gfx10_ngg_info { 70 uint16_t ngg_emit_size; /* in dwords */ 71 uint32_t hw_max_esverts; 72 uint32_t max_gsprims; 73 uint32_t max_out_verts; 74 uint32_t prim_amp_factor; 75 uint32_t vgt_esgs_ring_itemsize; 76 uint32_t esgs_ring_size; 77 uint32_t scratch_lds_base; 78 uint32_t lds_size; 79 bool max_vert_out_per_gs_instance; 80 }; 81 82 struct radv_shader_info { 83 uint64_t inline_push_constant_mask; 84 bool can_inline_all_push_constants; 85 bool loads_push_constants; 86 bool loads_dynamic_offsets; 87 uint32_t desc_set_used_mask; 88 bool uses_view_index; 89 bool uses_invocation_id; 90 bool uses_prim_id; 91 uint8_t wave_size; 92 uint8_t ballot_bit_size; 93 struct radv_userdata_locations user_sgprs_locs; 94 bool is_ngg; 95 bool is_ngg_passthrough; 96 bool has_ngg_culling; 97 bool has_ngg_early_prim_export; 98 bool has_prim_query; 99 bool has_xfb_query; 100 uint32_t num_tess_patches; 101 uint32_t esgs_itemsize; /* Only for VS or TES as ES */ 102 struct radv_vs_output_info outinfo; 103 unsigned workgroup_size; 104 bool force_vrs_per_vertex; 105 gl_shader_stage stage; 106 gl_shader_stage next_stage; 107 enum radv_shader_type type; 108 uint32_t user_data_0; 109 bool inputs_linked; 110 bool outputs_linked; 111 bool merged_shader_compiled_separately; /* GFX9+ */ 112 bool force_indirect_desc_sets; 113 uint64_t gs_inputs_read; /* Mask of GS inputs read (only used by linked ES) */ 114 115 struct { 116 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 117 bool needs_draw_id; 118 bool needs_instance_id; 119 bool as_es; 120 bool as_ls; 121 bool tcs_in_out_eq; 122 uint64_t tcs_inputs_via_temp; 123 uint64_t tcs_inputs_via_lds; 124 uint8_t num_linked_outputs; 125 bool needs_base_instance; 126 bool use_per_attribute_vb_descs; 127 uint32_t vb_desc_usage_mask; 128 uint32_t input_slot_usage_mask; 129 bool has_prolog; 130 bool dynamic_inputs; 131 bool dynamic_num_verts_per_prim; 132 uint32_t num_outputs; /* For NGG streamout only */ 133 } vs; 134 struct { 135 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 136 uint8_t num_stream_output_components[4]; 137 uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; 138 uint8_t max_stream; 139 unsigned gsvs_vertex_size; 140 unsigned max_gsvs_emit_size; 141 unsigned vertices_in; 142 unsigned vertices_out; 143 unsigned input_prim; 144 unsigned output_prim; 145 unsigned invocations; 146 unsigned es_type; /* GFX9: VS or TES */ 147 uint8_t num_linked_inputs; 148 bool has_pipeline_stat_query; 149 } gs; 150 struct { 151 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 152 bool as_es; 153 enum tess_primitive_mode _primitive_mode; 154 enum gl_tess_spacing spacing; 155 bool ccw; 156 bool point_mode; 157 bool reads_tess_factors; 158 unsigned tcs_vertices_out; 159 uint8_t num_linked_inputs; /* Number of reserved per-vertex input slots in VRAM. */ 160 uint8_t num_linked_patch_inputs; /* Number of reserved per-patch input slots in VRAM. */ 161 uint8_t num_linked_outputs; 162 uint32_t num_outputs; /* For NGG streamout only */ 163 } tes; 164 struct { 165 bool uses_sample_shading; 166 bool needs_sample_positions; 167 bool needs_poly_line_smooth; 168 bool writes_memory; 169 bool writes_z; 170 bool writes_stencil; 171 bool writes_sample_mask; 172 bool writes_mrt0_alpha; 173 bool exports_mrtz_via_epilog; 174 bool has_pcoord; 175 bool prim_id_input; 176 bool viewport_index_input; 177 uint8_t input_clips_culls_mask; 178 uint32_t input_mask; 179 uint32_t input_per_primitive_mask; 180 uint32_t float32_shaded_mask; 181 uint32_t explicit_shaded_mask; 182 uint32_t explicit_strict_shaded_mask; 183 uint32_t float16_shaded_mask; 184 uint32_t float16_hi_shaded_mask; 185 uint32_t num_inputs; 186 bool can_discard; 187 bool early_fragment_test; 188 bool post_depth_coverage; 189 bool reads_sample_mask_in; 190 bool reads_front_face; 191 bool reads_sample_id; 192 bool reads_frag_shading_rate; 193 bool reads_barycentric_model; 194 bool reads_persp_sample; 195 bool reads_persp_center; 196 bool reads_persp_centroid; 197 bool reads_linear_sample; 198 bool reads_linear_center; 199 bool reads_linear_centroid; 200 bool reads_fully_covered; 201 bool reads_pixel_coord; 202 bool reads_layer; 203 uint8_t reads_frag_coord_mask; 204 uint8_t reads_sample_pos_mask; 205 uint8_t depth_layout; 206 bool allow_flat_shading; 207 bool pops; /* Uses Primitive Ordered Pixel Shading (fragment shader interlock) */ 208 bool pops_is_per_sample; 209 bool mrt0_is_dual_src; 210 uint32_t spi_ps_input_ena; 211 uint32_t spi_ps_input_addr; 212 uint32_t colors_written; /* Mask of outputs written */ 213 uint32_t spi_shader_col_format; 214 uint32_t cb_shader_mask; 215 uint8_t color0_written; 216 bool load_provoking_vtx; 217 bool load_rasterization_prim; 218 bool force_sample_iter_shading_rate; 219 bool uses_fbfetch_output; 220 bool has_epilog; 221 } ps; 222 struct { 223 bool uses_grid_size; 224 bool uses_block_id[3]; 225 bool uses_thread_id[3]; 226 bool uses_local_invocation_idx; 227 unsigned block_size[3]; 228 229 bool uses_rt; 230 bool uses_full_subgroups; 231 bool linear_taskmesh_dispatch; 232 bool has_query; /* Task shader only */ 233 234 bool regalloc_hang_bug; 235 } cs; 236 struct { 237 uint64_t tes_inputs_read; 238 uint64_t tes_patch_inputs_read; 239 uint64_t tcs_outputs_read; 240 uint64_t tcs_outputs_written; 241 uint32_t tcs_patch_outputs_read; 242 uint32_t tcs_patch_outputs_written; 243 unsigned tcs_vertices_out; 244 uint32_t num_lds_blocks; 245 uint8_t num_linked_inputs; /* Number of reserved per-vertex input slots in LDS. */ 246 uint8_t num_linked_outputs; /* Number of reserved per-vertex output slots in VRAM. */ 247 uint8_t num_linked_patch_outputs; /* Number of reserved per-patch output slots in VRAM. */ 248 bool tes_reads_tess_factors : 1; 249 nir_tcs_info info; 250 } tcs; 251 struct { 252 enum mesa_prim output_prim; 253 bool needs_ms_scratch_ring; 254 bool has_task; /* If mesh shader is used together with a task shader. */ 255 bool has_query; 256 } ms; 257 258 struct radv_streamout_info so; 259 260 struct radv_legacy_gs_info gs_ring_info; 261 struct gfx10_ngg_info ngg_info; 262 263 /* Precomputed register values. */ 264 struct { 265 uint32_t pgm_lo; 266 uint32_t pgm_rsrc1; 267 uint32_t pgm_rsrc2; 268 uint32_t pgm_rsrc3; 269 270 struct { 271 uint32_t spi_shader_late_alloc_vs; 272 uint32_t spi_shader_pgm_rsrc3_vs; 273 uint32_t vgt_reuse_off; 274 } vs; 275 276 struct { 277 uint32_t vgt_esgs_ring_itemsize; 278 uint32_t vgt_gs_instance_cnt; 279 uint32_t vgt_gs_max_prims_per_subgroup; 280 uint32_t vgt_gs_vert_itemsize[4]; 281 uint32_t vgt_gsvs_ring_itemsize; 282 uint32_t vgt_gsvs_ring_offset[3]; 283 } gs; 284 285 struct { 286 uint32_t ge_cntl; /* Not fully precomputed. */ 287 uint32_t ge_max_output_per_subgroup; 288 uint32_t ge_ngg_subgrp_cntl; 289 uint32_t spi_shader_idx_format; 290 uint32_t vgt_primitiveid_en; 291 } ngg; 292 293 struct { 294 uint32_t spi_shader_gs_meshlet_dim; 295 uint32_t spi_shader_gs_meshlet_exp_alloc; 296 } ms; 297 298 struct { 299 uint32_t db_shader_control; 300 uint32_t pa_sc_shader_control; 301 uint32_t spi_ps_in_control; 302 uint32_t spi_shader_z_format; 303 uint32_t spi_gs_out_config_ps; 304 uint32_t pa_sc_hisz_control; 305 } ps; 306 307 struct { 308 uint32_t compute_num_thread_x; 309 uint32_t compute_num_thread_y; 310 uint32_t compute_num_thread_z; 311 uint32_t compute_resource_limits; 312 } cs; 313 314 /* Common registers between stages. */ 315 uint32_t vgt_gs_max_vert_out; 316 uint32_t vgt_gs_onchip_cntl; 317 uint32_t spi_shader_pgm_rsrc3_gs; 318 uint32_t spi_shader_pgm_rsrc4_gs; 319 uint32_t ge_pc_alloc; 320 uint32_t pa_cl_vs_out_cntl; 321 uint32_t spi_vs_out_config; 322 uint32_t spi_shader_pos_format; 323 uint32_t vgt_gs_instance_cnt; 324 } regs; 325 }; 326 327 void radv_nir_shader_info_init(gl_shader_stage stage, gl_shader_stage next_stage, struct radv_shader_info *info); 328 329 void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, 330 const struct radv_shader_layout *layout, const struct radv_shader_stage_key *stage_key, 331 const struct radv_graphics_state_key *gfx_state, 332 const enum radv_pipeline_type pipeline_type, bool consider_force_vrs, 333 struct radv_shader_info *info); 334 335 void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, 336 struct radv_shader_info *gs_info, struct gfx10_ngg_info *out); 337 338 void radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, 339 struct radv_shader_stage *stages); 340 341 enum ac_hw_stage radv_select_hw_stage(const struct radv_shader_info *const info, const enum amd_gfx_level gfx_level); 342 343 uint64_t radv_gather_unlinked_io_mask(const uint64_t nir_mask); 344 345 uint64_t radv_gather_unlinked_patch_io_mask(const uint64_t nir_io_mask, const uint32_t nir_patch_io_mask); 346 347 #endif /* RADV_SHADER_INFO_H */ 348