1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef SHADER_INFO_H 26 #define SHADER_INFO_H 27 28 #include "util/bitset.h" 29 #include "util/sha1/sha1.h" 30 #include "shader_enums.h" 31 #include <stdint.h> 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 #define MAX_XFB_BUFFERS 4 38 #define MAX_INLINABLE_UNIFORMS 4 39 40 struct spirv_supported_capabilities { 41 bool address; 42 bool amd_fragment_mask; 43 bool amd_gcn_shader; 44 bool amd_image_gather_bias_lod; 45 bool amd_image_read_write_lod; 46 bool amd_shader_ballot; 47 bool amd_shader_explicit_vertex_parameter; 48 bool amd_trinary_minmax; 49 bool atomic_storage; 50 bool cooperative_matrix; 51 bool demote_to_helper_invocation; 52 bool derivative_group; 53 bool descriptor_array_dynamic_indexing; 54 bool descriptor_array_non_uniform_indexing; 55 bool descriptor_indexing; 56 bool device_group; 57 bool draw_parameters; 58 bool float_controls; 59 bool float16_atomic_add; 60 bool float16_atomic_min_max; 61 bool float16; 62 bool float32_atomic_add; 63 bool float32_atomic_min_max; 64 bool float64_atomic_add; 65 bool float64_atomic_min_max; 66 bool float64; 67 bool fragment_barycentric; 68 bool fragment_density; 69 bool fragment_fully_covered; 70 bool fragment_shader_pixel_interlock; 71 bool fragment_shader_sample_interlock; 72 bool fragment_shading_rate; 73 bool generic_pointers; 74 bool geometry_streams; 75 bool groups; 76 bool image_atomic_int64; 77 bool image_ms_array; 78 bool image_read_without_format; 79 bool image_write_without_format; 80 bool int16; 81 bool int64_atomics; 82 bool int64; 83 bool int8; 84 bool integer_functions2; 85 bool kernel_image_read_write; 86 bool kernel_image; 87 bool kernel; 88 bool linkage; 89 bool literal_sampler; 90 bool mesh_shading_nv; 91 bool mesh_shading; 92 bool min_lod; 93 bool multiview; 94 bool per_view_attributes_nv; 95 bool physical_storage_buffer_address; 96 bool post_depth_coverage; 97 bool printf; 98 bool quad_control; 99 bool ray_cull_mask; 100 bool ray_query; 101 bool ray_tracing; 102 bool ray_traversal_primitive_culling; 103 bool ray_tracing_position_fetch; 104 bool runtime_descriptor_array; 105 bool shader_clock; 106 bool shader_enqueue; 107 bool shader_sm_builtins_nv; 108 bool shader_viewport_index_layer; 109 bool shader_viewport_mask_nv; 110 bool sparse_residency; 111 bool stencil_export; 112 bool storage_16bit; 113 bool storage_8bit; 114 bool storage_image_ms; 115 bool subgroup_arithmetic; 116 bool subgroup_ballot; 117 bool subgroup_basic; 118 bool subgroup_dispatch; 119 bool subgroup_quad; 120 bool subgroup_rotate; 121 bool subgroup_shuffle; 122 bool subgroup_uniform_control_flow; 123 bool subgroup_vote; 124 bool tessellation; 125 bool transform_feedback; 126 bool variable_pointers; 127 bool vk_memory_model_device_scope; 128 bool vk_memory_model; 129 bool workgroup_memory_explicit_layout; 130 131 bool intel_subgroup_shuffle; 132 bool intel_subgroup_buffer_block_io; 133 }; 134 135 typedef struct shader_info { 136 const char *name; 137 138 /* Descriptive name provided by the client; may be NULL */ 139 const char *label; 140 141 /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */ 142 bool internal; 143 144 /* SHA1 of the original source, used by shader detection in drivers. */ 145 uint8_t source_sha1[SHA1_DIGEST_LENGTH]; 146 147 /** The shader stage, such as MESA_SHADER_VERTEX. */ 148 gl_shader_stage stage:8; 149 150 /** The shader stage in a non SSO linked program that follows this stage, 151 * such as MESA_SHADER_FRAGMENT. 152 */ 153 gl_shader_stage next_stage:8; 154 155 /* Number of textures used by this shader */ 156 uint8_t num_textures; 157 /* Number of uniform buffers used by this shader */ 158 uint8_t num_ubos; 159 /* Number of atomic buffers used by this shader */ 160 uint8_t num_abos; 161 /* Number of shader storage buffers (max .driver_location + 1) used by this 162 * shader. In the case of nir_lower_atomics_to_ssbo being used, this will 163 * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs 164 * and atomic counters in nir_shader->info. 165 */ 166 uint8_t num_ssbos; 167 /* Number of images used by this shader */ 168 uint8_t num_images; 169 170 /* Which inputs are actually read */ 171 uint64_t inputs_read; 172 /* Which inputs occupy 2 slots. */ 173 uint64_t dual_slot_inputs; 174 /* Which outputs are actually written */ 175 uint64_t outputs_written; 176 /* Which outputs are actually read */ 177 uint64_t outputs_read; 178 /* Which system values are actually read */ 179 BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX); 180 181 /* Which I/O is per-primitive, for read/written information combine with 182 * the fields above. 183 */ 184 uint64_t per_primitive_inputs; 185 uint64_t per_primitive_outputs; 186 187 /* Which I/O is per-view */ 188 uint64_t per_view_outputs; 189 190 /* Which 16-bit inputs and outputs are used corresponding to 191 * VARYING_SLOT_VARn_16BIT. 192 */ 193 uint16_t inputs_read_16bit; 194 uint16_t outputs_written_16bit; 195 uint16_t outputs_read_16bit; 196 uint16_t inputs_read_indirectly_16bit; 197 uint16_t outputs_accessed_indirectly_16bit; 198 199 /* Which patch inputs are actually read */ 200 uint32_t patch_inputs_read; 201 /* Which patch outputs are actually written */ 202 uint32_t patch_outputs_written; 203 /* Which patch outputs are read */ 204 uint32_t patch_outputs_read; 205 206 /* Which inputs are read indirectly (subset of inputs_read) */ 207 uint64_t inputs_read_indirectly; 208 /* Which outputs are read or written indirectly */ 209 uint64_t outputs_accessed_indirectly; 210 /* Which patch inputs are read indirectly (subset of patch_inputs_read) */ 211 uint64_t patch_inputs_read_indirectly; 212 /* Which patch outputs are read or written indirectly */ 213 uint64_t patch_outputs_accessed_indirectly; 214 215 /** Bitfield of which textures are used */ 216 BITSET_DECLARE(textures_used, 128); 217 218 /** Bitfield of which textures are used by texelFetch() */ 219 BITSET_DECLARE(textures_used_by_txf, 128); 220 221 /** Bitfield of which samplers are used */ 222 BITSET_DECLARE(samplers_used, 32); 223 224 /** Bitfield of which images are used */ 225 BITSET_DECLARE(images_used, 64); 226 /** Bitfield of which images are buffers. */ 227 BITSET_DECLARE(image_buffers, 64); 228 /** Bitfield of which images are MSAA. */ 229 BITSET_DECLARE(msaa_images, 64); 230 231 /* SPV_KHR_float_controls: execution mode for floating point ops */ 232 uint32_t float_controls_execution_mode; 233 234 /** 235 * Size of shared variables accessed by compute/task/mesh shaders. 236 */ 237 unsigned shared_size; 238 239 /** 240 * Size of task payload variables accessed by task/mesh shaders. 241 */ 242 unsigned task_payload_size; 243 244 /** 245 * Number of ray tracing queries in the shader (counts all elements of all 246 * variables). 247 */ 248 unsigned ray_queries; 249 250 /** 251 * Local workgroup size used by compute/task/mesh shaders. 252 */ 253 uint16_t workgroup_size[3]; 254 255 enum gl_subgroup_size subgroup_size; 256 uint8_t num_subgroups; 257 258 /** 259 * Uses subgroup intrinsics which can communicate across a quad. 260 */ 261 bool uses_wide_subgroup_intrinsics; 262 263 /* Transform feedback buffer strides in dwords, max. 1K - 4. */ 264 uint8_t xfb_stride[MAX_XFB_BUFFERS]; 265 266 uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS]; 267 uint8_t num_inlinable_uniforms:4; 268 269 /* The size of the gl_ClipDistance[] array, if declared. */ 270 uint8_t clip_distance_array_size:4; 271 272 /* The size of the gl_CullDistance[] array, if declared. */ 273 uint8_t cull_distance_array_size:4; 274 275 /* Whether or not this shader ever uses textureGather() */ 276 bool uses_texture_gather:1; 277 278 /* Whether texture size, levels, or samples is queried. */ 279 bool uses_resource_info_query:1; 280 281 /** 282 * True if this shader uses the fddx/fddy opcodes. 283 * 284 * Note that this does not include the "fine" and "coarse" variants. 285 */ 286 bool uses_fddx_fddy:1; 287 288 /** Has divergence analysis ever been run? */ 289 bool divergence_analysis_run:1; 290 291 /* Bitmask of bit-sizes used with ALU instructions. */ 292 uint8_t bit_sizes_float; 293 uint8_t bit_sizes_int; 294 295 /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */ 296 bool first_ubo_is_default_ubo:1; 297 298 /* Whether or not separate shader objects were used */ 299 bool separate_shader:1; 300 301 /** Was this shader linked with any transform feedback varyings? */ 302 bool has_transform_feedback_varyings:1; 303 304 /* Whether flrp has been lowered. */ 305 bool flrp_lowered:1; 306 307 /* Whether nir_lower_io has been called to lower derefs. 308 * nir_variables for inputs and outputs might not be present in the IR. 309 */ 310 bool io_lowered:1; 311 312 /** Has nir_lower_var_copies called. To avoid calling any 313 * lowering/optimization that would introduce any copy_deref later. 314 */ 315 bool var_copies_lowered:1; 316 317 /* Whether the shader writes memory, including transform feedback. */ 318 bool writes_memory:1; 319 320 /* Whether gl_Layer is viewport-relative */ 321 bool layer_viewport_relative:1; 322 323 /* Whether explicit barriers are used */ 324 bool uses_control_barrier : 1; 325 bool uses_memory_barrier : 1; 326 327 /* Whether ARB_bindless_texture ops or variables are used */ 328 bool uses_bindless : 1; 329 330 /** 331 * Shared memory types have explicit layout set. Used for 332 * SPV_KHR_workgroup_storage_explicit_layout. 333 */ 334 bool shared_memory_explicit_layout:1; 335 336 /** 337 * Used for VK_KHR_zero_initialize_workgroup_memory. 338 */ 339 bool zero_initialize_shared_memory:1; 340 341 /** 342 * Used for ARB_compute_variable_group_size. 343 */ 344 bool workgroup_size_variable:1; 345 346 /** 347 * Whether the shader uses printf instructions. 348 */ 349 bool uses_printf:1; 350 351 /** 352 * VK_KHR_shader_maximal_reconvergence 353 */ 354 bool maximally_reconverges:1; 355 356 /** 357 * Set if this shader uses legacy (DX9 or ARB assembly) math rules. 358 * 359 * From the ARB_fragment_program specification: 360 * 361 * "The following rules apply to multiplication: 362 * 363 * 1. <x> * <y> == <y> * <x>, for all <x> and <y>. 364 * 2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to 365 * *representable numbers (IEEE "not a number" and "infinity" 366 * *encodings may be exceptions). 367 * 3. +1.0 * <x> = <x>, for all <x>."" 368 * 369 * However, in effect this was due to DX9 semantics implying that 0*x=0 even 370 * for inf/nan if the hardware generated them instead of float_min/max. So, 371 * you should not have an exception for inf/nan to rule 2 above. 372 * 373 * One implementation of this behavior would be to flush all generated NaNs 374 * to zero, at which point 0*Inf=Nan=0. Most DX9/ARB-asm hardware did not 375 * generate NaNs, and the only way the GPU saw one was to possibly feed it 376 * in as a uniform. 377 */ 378 bool use_legacy_math_rules; 379 380 union { 381 struct { 382 /* Which inputs are doubles */ 383 uint64_t double_inputs; 384 385 /* For AMD-specific driver-internal shaders. It replaces vertex 386 * buffer loads with code generating VS inputs from scalar registers. 387 * 388 * Valid values: SI_VS_BLIT_SGPRS_POS_* 389 */ 390 uint8_t blit_sgprs_amd:4; 391 392 /* Software TES executing as HW VS */ 393 bool tes_agx:1; 394 395 /* True if the shader writes position in window space coordinates pre-transform */ 396 bool window_space_position:1; 397 398 /** Is an edge flag input needed? */ 399 bool needs_edge_flag:1; 400 } vs; 401 402 struct { 403 /** The output primitive type */ 404 enum mesa_prim output_primitive; 405 406 /** The input primitive type */ 407 enum mesa_prim input_primitive; 408 409 /** The maximum number of vertices the geometry shader might write. */ 410 uint16_t vertices_out; 411 412 /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ 413 uint8_t invocations; 414 415 /** The number of vertices received per input primitive (max. 6) */ 416 uint8_t vertices_in:3; 417 418 /** Whether or not this shader uses EndPrimitive */ 419 bool uses_end_primitive:1; 420 421 /** The streams used in this shaders (max. 4) */ 422 uint8_t active_stream_mask:4; 423 } gs; 424 425 struct { 426 bool uses_discard:1; 427 bool uses_demote:1; 428 bool uses_fbfetch_output:1; 429 bool fbfetch_coherent:1; 430 bool color_is_dual_source:1; 431 432 /** 433 * True if this fragment shader requires full quad invocations. 434 */ 435 bool require_full_quads:1; 436 437 /** 438 * Whether the derivative group must be equivalent to the quad group. 439 */ 440 bool quad_derivatives:1; 441 442 /** 443 * True if this fragment shader requires helper invocations. This 444 * can be caused by the use of ALU derivative ops, texture 445 * instructions which do implicit derivatives, the use of quad 446 * subgroup operations or if the shader requires full quads. 447 */ 448 bool needs_quad_helper_invocations:1; 449 450 /** 451 * Whether any inputs are declared with the "sample" qualifier. 452 */ 453 bool uses_sample_qualifier:1; 454 455 /** 456 * Whether sample shading is used. 457 */ 458 bool uses_sample_shading:1; 459 460 /** 461 * Whether early fragment tests are enabled as defined by 462 * ARB_shader_image_load_store. 463 */ 464 bool early_fragment_tests:1; 465 466 /** 467 * Defined by INTEL_conservative_rasterization. 468 */ 469 bool inner_coverage:1; 470 471 bool post_depth_coverage:1; 472 473 /** 474 * \name ARB_fragment_coord_conventions 475 * @{ 476 */ 477 bool pixel_center_integer:1; 478 bool origin_upper_left:1; 479 /*@}*/ 480 481 bool pixel_interlock_ordered:1; 482 bool pixel_interlock_unordered:1; 483 bool sample_interlock_ordered:1; 484 bool sample_interlock_unordered:1; 485 486 /** 487 * Flags whether NIR's base types on the FS color outputs should be 488 * ignored. 489 * 490 * GLSL requires that fragment shader output base types match the 491 * render target's base types for the behavior to be defined. From 492 * the GL 4.6 spec: 493 * 494 * "If the values written by the fragment shader do not match the 495 * format(s) of the corresponding color buffer(s), the result is 496 * undefined." 497 * 498 * However, for NIR shaders translated from TGSI, we don't have the 499 * output types any more, so the driver will need to do whatever 500 * fixups are necessary to handle effectively untyped data being 501 * output from the FS. 502 */ 503 bool untyped_color_outputs:1; 504 505 /** gl_FragDepth layout for ARB_conservative_depth. */ 506 enum gl_frag_depth_layout depth_layout:3; 507 508 /** 509 * Interpolation qualifiers for drivers that lowers color inputs 510 * to system values. 511 */ 512 unsigned color0_interp:3; /* glsl_interp_mode */ 513 bool color0_sample:1; 514 bool color0_centroid:1; 515 unsigned color1_interp:3; /* glsl_interp_mode */ 516 bool color1_sample:1; 517 bool color1_centroid:1; 518 519 /* Bitmask of gl_advanced_blend_mode values that may be used with this 520 * shader. 521 */ 522 unsigned advanced_blend_modes; 523 524 /** 525 * Defined by AMD_shader_early_and_late_fragment_tests. 526 */ 527 bool early_and_late_fragment_tests:1; 528 enum gl_frag_stencil_layout stencil_front_layout:3; 529 enum gl_frag_stencil_layout stencil_back_layout:3; 530 } fs; 531 532 struct { 533 uint16_t workgroup_size_hint[3]; 534 535 uint8_t user_data_components_amd:3; 536 537 /* 538 * Arrangement of invocations used to calculate derivatives in a compute 539 * shader. From NV_compute_shader_derivatives. 540 */ 541 enum gl_derivative_group derivative_group:2; 542 543 /* 544 * If the shader might run with shared mem on top of `shared_size`. 545 */ 546 bool has_variable_shared_mem:1; 547 548 /** 549 * If the shader has any use of a cooperative matrix. From 550 * SPV_KHR_cooperative_matrix. 551 */ 552 bool has_cooperative_matrix:1; 553 554 /** 555 * pointer size is: 556 * AddressingModelLogical: 0 (default) 557 * AddressingModelPhysical32: 32 558 * AddressingModelPhysical64: 64 559 */ 560 unsigned ptr_size; 561 562 /** Index provided by VkPipelineShaderStageNodeCreateInfoAMDX or ShaderIndexAMDX */ 563 uint32_t shader_index; 564 565 /** Maximum size required by any output node payload array */ 566 uint32_t node_payloads_size; 567 568 /** Static workgroup count for overwriting the enqueued workgroup count. (0 if dynamic) */ 569 uint32_t workgroup_count[3]; 570 } cs; 571 572 /* Applies to both TCS and TES. */ 573 struct { 574 enum tess_primitive_mode _primitive_mode; 575 576 /** The number of vertices in the TCS output patch. */ 577 uint8_t tcs_vertices_out; 578 unsigned spacing:2; /*gl_tess_spacing*/ 579 580 /** Is the vertex order counterclockwise? */ 581 bool ccw:1; 582 bool point_mode:1; 583 584 /* Bit mask of TCS per-vertex inputs (VS outputs) that are used 585 * with a vertex index that is NOT the invocation id 586 */ 587 uint64_t tcs_cross_invocation_inputs_read; 588 589 /* Bit mask of TCS per-vertex outputs that are used 590 * with a vertex index that is NOT the invocation id 591 */ 592 uint64_t tcs_cross_invocation_outputs_read; 593 } tess; 594 595 /* Applies to MESH and TASK. */ 596 struct { 597 /* Bit mask of MS outputs that are used 598 * with an index that is NOT the local invocation index. 599 */ 600 uint64_t ms_cross_invocation_output_access; 601 602 /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT) 603 * when they are known compile-time constants. 604 * 0 means they are not known. 605 */ 606 uint32_t ts_mesh_dispatch_dimensions[3]; 607 608 uint16_t max_vertices_out; 609 uint16_t max_primitives_out; 610 enum mesa_prim primitive_type; /* POINTS, LINES or TRIANGLES. */ 611 612 /* TODO: remove this when we stop supporting NV_mesh_shader. */ 613 bool nv; 614 } mesh; 615 }; 616 } shader_info; 617 618 #ifdef __cplusplus 619 } 620 #endif 621 622 #endif /* SHADER_INFO_H */ 623