1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef SHADER_INFO_H 26 #define SHADER_INFO_H 27 28 #include "util/bitset.h" 29 #include "util/mesa-blake3.h" 30 #include "shader_enums.h" 31 #include <stdint.h> 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 #define MAX_XFB_BUFFERS 4 38 #define MAX_INLINABLE_UNIFORMS 4 39 40 typedef struct shader_info { 41 const char *name; 42 43 /* Descriptive name provided by the client; may be NULL */ 44 const char *label; 45 46 /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */ 47 bool internal; 48 49 /* BLAKE3 of the original source, used by shader detection in drivers. */ 50 blake3_hash source_blake3; 51 52 /** The shader stage, such as MESA_SHADER_VERTEX. */ 53 gl_shader_stage stage:8; 54 55 /** The shader stage in a non SSO linked program that follows this stage, 56 * such as MESA_SHADER_FRAGMENT. 57 */ 58 gl_shader_stage next_stage:8; 59 60 /* Number of textures used by this shader */ 61 uint8_t num_textures; 62 /* Number of uniform buffers used by this shader */ 63 uint8_t num_ubos; 64 /* Number of atomic buffers used by this shader */ 65 uint8_t num_abos; 66 /* Number of shader storage buffers (max .driver_location + 1) used by this 67 * shader. In the case of nir_lower_atomics_to_ssbo being used, this will 68 * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs 69 * and atomic counters in nir_shader->info. 70 */ 71 uint8_t num_ssbos; 72 /* Number of images used by this shader */ 73 uint8_t num_images; 74 75 /* Which inputs are actually read */ 76 uint64_t inputs_read; 77 /* Which inputs occupy 2 slots. */ 78 uint64_t dual_slot_inputs; 79 /* Which outputs are actually written */ 80 uint64_t outputs_written; 81 /* Which outputs are actually read */ 82 uint64_t outputs_read; 83 /* Which system values are actually read */ 84 BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX); 85 86 /* Which I/O is per-primitive, for read/written information combine with 87 * the fields above. 88 */ 89 uint64_t per_primitive_inputs; 90 uint64_t per_primitive_outputs; 91 92 /* Which I/O is per-view */ 93 uint64_t per_view_outputs; 94 /* Enabled view mask, for per-view outputs */ 95 uint32_t view_mask; 96 97 /* Which 16-bit inputs and outputs are used corresponding to 98 * VARYING_SLOT_VARn_16BIT. 99 */ 100 uint16_t inputs_read_16bit; 101 uint16_t outputs_written_16bit; 102 uint16_t outputs_read_16bit; 103 uint16_t inputs_read_indirectly_16bit; 104 uint16_t outputs_accessed_indirectly_16bit; 105 106 /* Which patch inputs are actually read */ 107 uint32_t patch_inputs_read; 108 /* Which patch outputs are actually written */ 109 uint32_t patch_outputs_written; 110 /* Which patch outputs are read */ 111 uint32_t patch_outputs_read; 112 113 /* Which inputs are read indirectly (subset of inputs_read) */ 114 uint64_t inputs_read_indirectly; 115 /* Which outputs are read or written indirectly */ 116 uint64_t outputs_accessed_indirectly; 117 /* Which patch inputs are read indirectly (subset of patch_inputs_read) */ 118 uint64_t patch_inputs_read_indirectly; 119 /* Which patch outputs are read or written indirectly */ 120 uint64_t patch_outputs_accessed_indirectly; 121 122 /** Bitfield of which textures are used */ 123 BITSET_DECLARE(textures_used, 128); 124 125 /** Bitfield of which textures are used by texelFetch() */ 126 BITSET_DECLARE(textures_used_by_txf, 128); 127 128 /** Bitfield of which samplers are used */ 129 BITSET_DECLARE(samplers_used, 32); 130 131 /** Bitfield of which images are used */ 132 BITSET_DECLARE(images_used, 64); 133 /** Bitfield of which images are buffers. */ 134 BITSET_DECLARE(image_buffers, 64); 135 /** Bitfield of which images are MSAA. */ 136 BITSET_DECLARE(msaa_images, 64); 137 138 /* SPV_KHR_float_controls: execution mode for floating point ops */ 139 uint32_t float_controls_execution_mode; 140 141 /** 142 * Size of shared variables accessed by compute/task/mesh shaders. 143 */ 144 unsigned shared_size; 145 146 /** 147 * Size of task payload variables accessed by task/mesh shaders. 148 */ 149 unsigned task_payload_size; 150 151 /** 152 * Number of ray tracing queries in the shader (counts all elements of all 153 * variables). 154 */ 155 unsigned ray_queries; 156 157 /** 158 * Local workgroup size used by compute/task/mesh shaders. 159 */ 160 uint16_t workgroup_size[3]; 161 162 enum gl_subgroup_size subgroup_size; 163 uint8_t num_subgroups; 164 165 /** 166 * Uses subgroup intrinsics which can communicate across a quad. 167 */ 168 bool uses_wide_subgroup_intrinsics; 169 170 /* Transform feedback buffer strides in dwords, max. 1K - 4. */ 171 uint8_t xfb_stride[MAX_XFB_BUFFERS]; 172 173 uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS]; 174 uint8_t num_inlinable_uniforms:4; 175 176 /* The size of the gl_ClipDistance[] array, if declared. */ 177 uint8_t clip_distance_array_size:4; 178 179 /* The size of the gl_CullDistance[] array, if declared. */ 180 uint8_t cull_distance_array_size:4; 181 182 /* Whether or not this shader ever uses textureGather() */ 183 bool uses_texture_gather:1; 184 185 /* Whether texture size, levels, or samples is queried. */ 186 bool uses_resource_info_query:1; 187 188 /** Has divergence analysis ever been run? */ 189 bool divergence_analysis_run:1; 190 191 /* Bitmask of bit-sizes used with ALU instructions. */ 192 uint8_t bit_sizes_float; 193 uint8_t bit_sizes_int; 194 195 /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */ 196 bool first_ubo_is_default_ubo:1; 197 198 /* Whether or not separate shader objects were used */ 199 bool separate_shader:1; 200 201 /** Was this shader linked with any transform feedback varyings? */ 202 bool has_transform_feedback_varyings:1; 203 204 /* Whether flrp has been lowered. */ 205 bool flrp_lowered:1; 206 207 /* Whether nir_lower_io has been called to lower derefs. 208 * nir_variables for inputs and outputs might not be present in the IR. 209 */ 210 bool io_lowered:1; 211 212 /** Has nir_lower_var_copies called. To avoid calling any 213 * lowering/optimization that would introduce any copy_deref later. 214 */ 215 bool var_copies_lowered:1; 216 217 /* Whether the shader writes memory, including transform feedback. */ 218 bool writes_memory:1; 219 220 /* Whether gl_Layer is viewport-relative */ 221 bool layer_viewport_relative:1; 222 223 /* Whether explicit barriers are used */ 224 bool uses_control_barrier : 1; 225 bool uses_memory_barrier : 1; 226 227 /* Whether ARB_bindless_texture ops or variables are used */ 228 bool uses_bindless : 1; 229 230 /** 231 * Shared memory types have explicit layout set. Used for 232 * SPV_KHR_workgroup_storage_explicit_layout. 233 */ 234 bool shared_memory_explicit_layout:1; 235 236 /** 237 * Used for VK_KHR_zero_initialize_workgroup_memory. 238 */ 239 bool zero_initialize_shared_memory:1; 240 241 /** 242 * Used for ARB_compute_variable_group_size. 243 */ 244 bool workgroup_size_variable:1; 245 246 /** 247 * Whether the shader uses printf instructions. 248 */ 249 bool uses_printf:1; 250 251 /** 252 * VK_KHR_shader_maximal_reconvergence 253 */ 254 bool maximally_reconverges:1; 255 256 /* Use ACO instead of LLVM on AMD. */ 257 bool use_aco_amd:1; 258 259 /** 260 * Set if this shader uses legacy (DX9 or ARB assembly) math rules. 261 * 262 * From the ARB_fragment_program specification: 263 * 264 * "The following rules apply to multiplication: 265 * 266 * 1. <x> * <y> == <y> * <x>, for all <x> and <y>. 267 * 2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to 268 * *representable numbers (IEEE "not a number" and "infinity" 269 * *encodings may be exceptions). 270 * 3. +1.0 * <x> = <x>, for all <x>."" 271 * 272 * However, in effect this was due to DX9 semantics implying that 0*x=0 even 273 * for inf/nan if the hardware generated them instead of float_min/max. So, 274 * you should not have an exception for inf/nan to rule 2 above. 275 * 276 * One implementation of this behavior would be to flush all generated NaNs 277 * to zero, at which point 0*Inf=Nan=0. Most DX9/ARB-asm hardware did not 278 * generate NaNs, and the only way the GPU saw one was to possibly feed it 279 * in as a uniform. 280 */ 281 bool use_legacy_math_rules; 282 283 /* 284 * Arrangement of invocations used to calculate derivatives in 285 * compute/task/mesh shaders. From KHR_compute_shader_derivatives. 286 */ 287 enum gl_derivative_group derivative_group:2; 288 289 union { 290 struct { 291 /* Which inputs are doubles */ 292 uint64_t double_inputs; 293 294 /* For AMD-specific driver-internal shaders. It replaces vertex 295 * buffer loads with code generating VS inputs from scalar registers. 296 * 297 * Valid values: SI_VS_BLIT_SGPRS_POS_* 298 */ 299 uint8_t blit_sgprs_amd:4; 300 301 /* Software TES executing as HW VS */ 302 bool tes_agx:1; 303 304 /* True if the shader writes position in window space coordinates pre-transform */ 305 bool window_space_position:1; 306 307 /** Is an edge flag input needed? */ 308 bool needs_edge_flag:1; 309 } vs; 310 311 struct { 312 /** The output primitive type */ 313 enum mesa_prim output_primitive; 314 315 /** The input primitive type */ 316 enum mesa_prim input_primitive; 317 318 /** The maximum number of vertices the geometry shader might write. */ 319 uint16_t vertices_out; 320 321 /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ 322 uint8_t invocations; 323 324 /** The number of vertices received per input primitive (max. 6) */ 325 uint8_t vertices_in:3; 326 327 /** Whether or not this shader uses EndPrimitive */ 328 bool uses_end_primitive:1; 329 330 /** The streams used in this shaders (max. 4) */ 331 uint8_t active_stream_mask:4; 332 } gs; 333 334 struct { 335 bool uses_discard:1; 336 bool uses_fbfetch_output:1; 337 bool fbfetch_coherent:1; 338 bool color_is_dual_source:1; 339 340 /** 341 * True if this fragment shader requires full quad invocations. 342 */ 343 bool require_full_quads:1; 344 345 /** 346 * Whether the derivative group must be equivalent to the quad group. 347 */ 348 bool quad_derivatives:1; 349 350 /** 351 * True if this fragment shader requires helper invocations. This 352 * can be caused by the use of ALU derivative ops, texture 353 * instructions which do implicit derivatives, the use of quad 354 * subgroup operations or if the shader requires full quads. 355 */ 356 bool needs_quad_helper_invocations:1; 357 358 /** 359 * Whether any inputs are declared with the "sample" qualifier. 360 */ 361 bool uses_sample_qualifier:1; 362 363 /** 364 * Whether sample shading is used. 365 */ 366 bool uses_sample_shading:1; 367 368 /** 369 * Whether early fragment tests are enabled as defined by 370 * ARB_shader_image_load_store. 371 */ 372 bool early_fragment_tests:1; 373 374 /** 375 * Defined by INTEL_conservative_rasterization. 376 */ 377 bool inner_coverage:1; 378 379 bool post_depth_coverage:1; 380 381 /** 382 * \name ARB_fragment_coord_conventions 383 * @{ 384 */ 385 bool pixel_center_integer:1; 386 bool origin_upper_left:1; 387 /*@}*/ 388 389 bool pixel_interlock_ordered:1; 390 bool pixel_interlock_unordered:1; 391 bool sample_interlock_ordered:1; 392 bool sample_interlock_unordered:1; 393 394 /** 395 * Flags whether NIR's base types on the FS color outputs should be 396 * ignored. 397 * 398 * GLSL requires that fragment shader output base types match the 399 * render target's base types for the behavior to be defined. From 400 * the GL 4.6 spec: 401 * 402 * "If the values written by the fragment shader do not match the 403 * format(s) of the corresponding color buffer(s), the result is 404 * undefined." 405 * 406 * However, for NIR shaders translated from TGSI, we don't have the 407 * output types any more, so the driver will need to do whatever 408 * fixups are necessary to handle effectively untyped data being 409 * output from the FS. 410 */ 411 bool untyped_color_outputs:1; 412 413 /** gl_FragDepth layout for ARB_conservative_depth. */ 414 enum gl_frag_depth_layout depth_layout:3; 415 416 /** 417 * Interpolation qualifiers for drivers that lowers color inputs 418 * to system values. 419 */ 420 unsigned color0_interp:3; /* glsl_interp_mode */ 421 bool color0_sample:1; 422 bool color0_centroid:1; 423 unsigned color1_interp:3; /* glsl_interp_mode */ 424 bool color1_sample:1; 425 bool color1_centroid:1; 426 427 /* Bitmask of gl_advanced_blend_mode values that may be used with this 428 * shader. 429 */ 430 unsigned advanced_blend_modes; 431 432 /** 433 * Defined by AMD_shader_early_and_late_fragment_tests. 434 */ 435 bool early_and_late_fragment_tests:1; 436 enum gl_frag_stencil_layout stencil_front_layout:3; 437 enum gl_frag_stencil_layout stencil_back_layout:3; 438 } fs; 439 440 struct { 441 uint16_t workgroup_size_hint[3]; 442 443 uint8_t user_data_components_amd:4; 444 445 /* 446 * If the shader might run with shared mem on top of `shared_size`. 447 */ 448 bool has_variable_shared_mem:1; 449 450 /** 451 * If the shader has any use of a cooperative matrix. From 452 * SPV_KHR_cooperative_matrix. 453 */ 454 bool has_cooperative_matrix:1; 455 456 /** 457 * Number of bytes of shared imageblock memory per thread. Currently, 458 * this requires that the workgroup size is 32x32x1 and that 459 * shared_size = 0. These requirements could be lifted in the future. 460 * However, there is no current OpenGL/Vulkan API support for 461 * imageblocks. This is only used internally to accelerate blit/copy. 462 */ 463 uint8_t image_block_size_per_thread_agx; 464 465 /** 466 * pointer size is: 467 * AddressingModelLogical: 0 (default) 468 * AddressingModelPhysical32: 32 469 * AddressingModelPhysical64: 64 470 */ 471 unsigned ptr_size; 472 473 /** Index provided by VkPipelineShaderStageNodeCreateInfoAMDX or ShaderIndexAMDX */ 474 uint32_t shader_index; 475 476 /** Maximum size required by any output node payload array */ 477 uint32_t node_payloads_size; 478 479 /** Static workgroup count for overwriting the enqueued workgroup count. (0 if dynamic) */ 480 uint32_t workgroup_count[3]; 481 } cs; 482 483 /* Applies to both TCS and TES. */ 484 struct { 485 enum tess_primitive_mode _primitive_mode; 486 487 /** The number of vertices in the TCS output patch. */ 488 uint8_t tcs_vertices_out; 489 unsigned spacing:2; /*gl_tess_spacing*/ 490 491 /** Is the vertex order counterclockwise? */ 492 bool ccw:1; 493 bool point_mode:1; 494 495 /* Bit mask of TCS per-vertex inputs (VS outputs) that are used 496 * with a vertex index that is equal to the invocation id. 497 * 498 * Not mutually exclusive with tcs_cross_invocation_inputs_read, i.e. 499 * both input[0] and input[invocation_id] can be present. 500 */ 501 uint64_t tcs_same_invocation_inputs_read; 502 503 /* Bit mask of TCS per-vertex inputs (VS outputs) that are used 504 * with a vertex index that is NOT the invocation id 505 */ 506 uint64_t tcs_cross_invocation_inputs_read; 507 508 /* Bit mask of TCS per-vertex outputs that are used 509 * with a vertex index that is NOT the invocation id 510 */ 511 uint64_t tcs_cross_invocation_outputs_read; 512 } tess; 513 514 /* Applies to MESH and TASK. */ 515 struct { 516 /* Bit mask of MS outputs that are used 517 * with an index that is NOT the local invocation index. 518 */ 519 uint64_t ms_cross_invocation_output_access; 520 521 /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT) 522 * when they are known compile-time constants. 523 * 0 means they are not known. 524 */ 525 uint32_t ts_mesh_dispatch_dimensions[3]; 526 527 uint16_t max_vertices_out; 528 uint16_t max_primitives_out; 529 enum mesa_prim primitive_type; /* POINTS, LINES or TRIANGLES. */ 530 531 /* TODO: remove this when we stop supporting NV_mesh_shader. */ 532 bool nv; 533 } mesh; 534 }; 535 } shader_info; 536 537 #ifdef __cplusplus 538 } 539 #endif 540 541 #endif /* SHADER_INFO_H */ 542