1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef SHADER_INFO_H 26 #define SHADER_INFO_H 27 28 #include "util/bitset.h" 29 #include "util/sha1/sha1.h" 30 #include "shader_enums.h" 31 #include <stdint.h> 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 #define MAX_XFB_BUFFERS 4 38 #define MAX_INLINABLE_UNIFORMS 4 39 40 struct spirv_supported_capabilities { 41 bool address; 42 bool atomic_storage; 43 bool demote_to_helper_invocation; 44 bool derivative_group; 45 bool descriptor_array_dynamic_indexing; 46 bool descriptor_array_non_uniform_indexing; 47 bool descriptor_indexing; 48 bool device_group; 49 bool draw_parameters; 50 bool float16_atomic_add; 51 bool float16_atomic_min_max; 52 bool float32_atomic_add; 53 bool float32_atomic_min_max; 54 bool float64; 55 bool float64_atomic_add; 56 bool float64_atomic_min_max; 57 bool fragment_shader_sample_interlock; 58 bool fragment_shader_pixel_interlock; 59 bool fragment_shading_rate; 60 bool generic_pointers; 61 bool geometry_streams; 62 bool groups; 63 bool image_ms_array; 64 bool image_read_without_format; 65 bool image_write_without_format; 66 bool image_atomic_int64; 67 bool int8; 68 bool int16; 69 bool int64; 70 bool int64_atomics; 71 bool integer_functions2; 72 bool kernel; 73 bool kernel_image; 74 bool kernel_image_read_write; 75 bool linkage; 76 bool literal_sampler; 77 bool mesh_shading_nv; 78 bool min_lod; 79 bool multiview; 80 bool per_view_attributes_nv; 81 bool physical_storage_buffer_address; 82 bool post_depth_coverage; 83 bool printf; 84 bool ray_cull_mask; 85 bool ray_tracing; 86 bool ray_query; 87 bool ray_traversal_primitive_culling; 88 bool runtime_descriptor_array; 89 bool float_controls; 90 bool shader_clock; 91 bool shader_viewport_index_layer; 92 bool shader_viewport_mask_nv; 93 bool sparse_residency; 94 bool stencil_export; 95 bool storage_8bit; 96 bool storage_16bit; 97 bool storage_image_ms; 98 bool subgroup_arithmetic; 99 bool subgroup_ballot; 100 bool subgroup_basic; 101 bool subgroup_dispatch; 102 bool subgroup_quad; 103 bool subgroup_shuffle; 104 bool subgroup_uniform_control_flow; 105 bool subgroup_vote; 106 bool tessellation; 107 bool transform_feedback; 108 bool variable_pointers; 109 bool vk_memory_model; 110 bool vk_memory_model_device_scope; 111 bool workgroup_memory_explicit_layout; 112 bool float16; 113 bool amd_fragment_mask; 114 bool amd_gcn_shader; 115 bool amd_shader_ballot; 116 bool amd_trinary_minmax; 117 bool amd_image_read_write_lod; 118 bool amd_shader_explicit_vertex_parameter; 119 bool amd_image_gather_bias_lod; 120 121 bool intel_subgroup_shuffle; 122 bool intel_subgroup_buffer_block_io; 123 }; 124 125 typedef struct shader_info { 126 const char *name; 127 128 /* Descriptive name provided by the client; may be NULL */ 129 const char *label; 130 131 /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */ 132 bool internal; 133 134 /* SHA1 of the original source, used by shader detection in drivers. */ 135 uint8_t source_sha1[SHA1_DIGEST_LENGTH]; 136 137 /** The shader stage, such as MESA_SHADER_VERTEX. */ 138 gl_shader_stage stage:8; 139 140 /** The shader stage in a non SSO linked program that follows this stage, 141 * such as MESA_SHADER_FRAGMENT. 142 */ 143 gl_shader_stage next_stage:8; 144 145 /* Number of textures used by this shader */ 146 uint8_t num_textures; 147 /* Number of uniform buffers used by this shader */ 148 uint8_t num_ubos; 149 /* Number of atomic buffers used by this shader */ 150 uint8_t num_abos; 151 /* Number of shader storage buffers (max .driver_location + 1) used by this 152 * shader. In the case of nir_lower_atomics_to_ssbo being used, this will 153 * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs 154 * and atomic counters in nir_shader->info. 155 */ 156 uint8_t num_ssbos; 157 /* Number of images used by this shader */ 158 uint8_t num_images; 159 160 /* Which inputs are actually read */ 161 uint64_t inputs_read; 162 /* Which outputs are actually written */ 163 uint64_t outputs_written; 164 /* Which outputs are actually read */ 165 uint64_t outputs_read; 166 /* Which system values are actually read */ 167 BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX); 168 169 /* Which I/O is per-primitive, for read/written information combine with 170 * the fields above. 171 */ 172 uint64_t per_primitive_inputs; 173 uint64_t per_primitive_outputs; 174 175 /* Which 16-bit inputs and outputs are used corresponding to 176 * VARYING_SLOT_VARn_16BIT. 177 */ 178 uint16_t inputs_read_16bit; 179 uint16_t outputs_written_16bit; 180 uint16_t outputs_read_16bit; 181 uint16_t inputs_read_indirectly_16bit; 182 uint16_t outputs_accessed_indirectly_16bit; 183 184 /* Which patch inputs are actually read */ 185 uint32_t patch_inputs_read; 186 /* Which patch outputs are actually written */ 187 uint32_t patch_outputs_written; 188 /* Which patch outputs are read */ 189 uint32_t patch_outputs_read; 190 191 /* Which inputs are read indirectly (subset of inputs_read) */ 192 uint64_t inputs_read_indirectly; 193 /* Which outputs are read or written indirectly */ 194 uint64_t outputs_accessed_indirectly; 195 /* Which patch inputs are read indirectly (subset of patch_inputs_read) */ 196 uint64_t patch_inputs_read_indirectly; 197 /* Which patch outputs are read or written indirectly */ 198 uint64_t patch_outputs_accessed_indirectly; 199 200 /** Bitfield of which textures are used */ 201 BITSET_DECLARE(textures_used, 128); 202 203 /** Bitfield of which textures are used by texelFetch() */ 204 BITSET_DECLARE(textures_used_by_txf, 128); 205 206 /** Bitfield of which samplers are used */ 207 BITSET_DECLARE(samplers_used, 32); 208 209 /** Bitfield of which images are used */ 210 BITSET_DECLARE(images_used, 64); 211 /** Bitfield of which images are buffers. */ 212 BITSET_DECLARE(image_buffers, 64); 213 /** Bitfield of which images are MSAA. */ 214 BITSET_DECLARE(msaa_images, 64); 215 216 /* SPV_KHR_float_controls: execution mode for floating point ops */ 217 uint16_t float_controls_execution_mode; 218 219 /** 220 * Size of shared variables accessed by compute/task/mesh shaders. 221 */ 222 unsigned shared_size; 223 224 /** 225 * Size of task payload variables accessed by task/mesh shaders. 226 */ 227 unsigned task_payload_size; 228 229 /** 230 * Number of ray tracing queries in the shader (counts all elements of all 231 * variables). 232 */ 233 unsigned ray_queries; 234 235 /** 236 * Local workgroup size used by compute/task/mesh shaders. 237 */ 238 uint16_t workgroup_size[3]; 239 240 enum gl_subgroup_size subgroup_size; 241 242 /* Transform feedback buffer strides in dwords, max. 1K - 4. */ 243 uint8_t xfb_stride[MAX_XFB_BUFFERS]; 244 245 uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS]; 246 uint8_t num_inlinable_uniforms:4; 247 248 /* The size of the gl_ClipDistance[] array, if declared. */ 249 uint8_t clip_distance_array_size:4; 250 251 /* The size of the gl_CullDistance[] array, if declared. */ 252 uint8_t cull_distance_array_size:4; 253 254 /* Whether or not this shader ever uses textureGather() */ 255 bool uses_texture_gather:1; 256 257 /** 258 * True if this shader uses the fddx/fddy opcodes. 259 * 260 * Note that this does not include the "fine" and "coarse" variants. 261 */ 262 bool uses_fddx_fddy:1; 263 264 /** Has divergence analysis ever been run? */ 265 bool divergence_analysis_run:1; 266 267 /* Bitmask of bit-sizes used with ALU instructions. */ 268 uint8_t bit_sizes_float; 269 uint8_t bit_sizes_int; 270 271 /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */ 272 bool first_ubo_is_default_ubo:1; 273 274 /* Whether or not separate shader objects were used */ 275 bool separate_shader:1; 276 277 /** Was this shader linked with any transform feedback varyings? */ 278 bool has_transform_feedback_varyings:1; 279 280 /* Whether flrp has been lowered. */ 281 bool flrp_lowered:1; 282 283 /* Whether nir_lower_io has been called to lower derefs. 284 * nir_variables for inputs and outputs might not be present in the IR. 285 */ 286 bool io_lowered:1; 287 288 /* Whether the shader writes memory, including transform feedback. */ 289 bool writes_memory:1; 290 291 /* Whether gl_Layer is viewport-relative */ 292 bool layer_viewport_relative:1; 293 294 /* Whether explicit barriers are used */ 295 bool uses_control_barrier : 1; 296 bool uses_memory_barrier : 1; 297 298 /** 299 * Shared memory types have explicit layout set. Used for 300 * SPV_KHR_workgroup_storage_explicit_layout. 301 */ 302 bool shared_memory_explicit_layout:1; 303 304 /** 305 * Used for VK_KHR_zero_initialize_workgroup_memory. 306 */ 307 bool zero_initialize_shared_memory:1; 308 309 /** 310 * Used for ARB_compute_variable_group_size. 311 */ 312 bool workgroup_size_variable:1; 313 314 /** 315 * Set if this shader uses legacy (DX9 or ARB assembly) math rules. 316 * 317 * From the ARB_fragment_program specification: 318 * 319 * "The following rules apply to multiplication: 320 * 321 * 1. <x> * <y> == <y> * <x>, for all <x> and <y>. 322 * 2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to 323 * *representable numbers (IEEE "not a number" and "infinity" 324 * *encodings may be exceptions). 325 * 3. +1.0 * <x> = <x>, for all <x>."" 326 * 327 * However, in effect this was due to DX9 semantics implying that 0*x=0 even 328 * for inf/nan if the hardware generated them instead of float_min/max. So, 329 * you should not have an exception for inf/nan to rule 2 above. 330 * 331 * One implementation of this behavior would be to flush all generated NaNs 332 * to zero, at which point 0*Inf=Nan=0. Most DX9/ARB-asm hardware did not 333 * generate NaNs, and the only way the GPU saw one was to possibly feed it 334 * in as a uniform. 335 */ 336 bool use_legacy_math_rules; 337 338 union { 339 struct { 340 /* Which inputs are doubles */ 341 uint64_t double_inputs; 342 343 /* For AMD-specific driver-internal shaders. It replaces vertex 344 * buffer loads with code generating VS inputs from scalar registers. 345 * 346 * Valid values: SI_VS_BLIT_SGPRS_POS_* 347 */ 348 uint8_t blit_sgprs_amd:4; 349 350 /* True if the shader writes position in window space coordinates pre-transform */ 351 bool window_space_position:1; 352 353 /** Is an edge flag input needed? */ 354 bool needs_edge_flag:1; 355 } vs; 356 357 struct { 358 /** The output primitive type */ 359 uint16_t output_primitive; 360 361 /** The input primitive type */ 362 uint16_t input_primitive; 363 364 /** The maximum number of vertices the geometry shader might write. */ 365 uint16_t vertices_out; 366 367 /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ 368 uint8_t invocations; 369 370 /** The number of vertices received per input primitive (max. 6) */ 371 uint8_t vertices_in:3; 372 373 /** Whether or not this shader uses EndPrimitive */ 374 bool uses_end_primitive:1; 375 376 /** The streams used in this shaders (max. 4) */ 377 uint8_t active_stream_mask:4; 378 } gs; 379 380 struct { 381 bool uses_discard:1; 382 bool uses_demote:1; 383 bool uses_fbfetch_output:1; 384 bool color_is_dual_source:1; 385 386 /** 387 * True if this fragment shader requires helper invocations. This 388 * can be caused by the use of ALU derivative ops, texture 389 * instructions which do implicit derivatives, and the use of quad 390 * subgroup operations. 391 */ 392 bool needs_quad_helper_invocations:1; 393 394 /** 395 * True if this fragment shader requires helper invocations for 396 * all subgroup operations, not just quad ops and derivatives. 397 */ 398 bool needs_all_helper_invocations:1; 399 400 /** 401 * Whether any inputs are declared with the "sample" qualifier. 402 */ 403 bool uses_sample_qualifier:1; 404 405 /** 406 * Whether sample shading is used. 407 */ 408 bool uses_sample_shading:1; 409 410 /** 411 * Whether early fragment tests are enabled as defined by 412 * ARB_shader_image_load_store. 413 */ 414 bool early_fragment_tests:1; 415 416 /** 417 * Defined by INTEL_conservative_rasterization. 418 */ 419 bool inner_coverage:1; 420 421 bool post_depth_coverage:1; 422 423 /** 424 * \name ARB_fragment_coord_conventions 425 * @{ 426 */ 427 bool pixel_center_integer:1; 428 bool origin_upper_left:1; 429 /*@}*/ 430 431 bool pixel_interlock_ordered:1; 432 bool pixel_interlock_unordered:1; 433 bool sample_interlock_ordered:1; 434 bool sample_interlock_unordered:1; 435 436 /** 437 * Flags whether NIR's base types on the FS color outputs should be 438 * ignored. 439 * 440 * GLSL requires that fragment shader output base types match the 441 * render target's base types for the behavior to be defined. From 442 * the GL 4.6 spec: 443 * 444 * "If the values written by the fragment shader do not match the 445 * format(s) of the corresponding color buffer(s), the result is 446 * undefined." 447 * 448 * However, for NIR shaders translated from TGSI, we don't have the 449 * output types any more, so the driver will need to do whatever 450 * fixups are necessary to handle effectively untyped data being 451 * output from the FS. 452 */ 453 bool untyped_color_outputs:1; 454 455 /** gl_FragDepth layout for ARB_conservative_depth. */ 456 enum gl_frag_depth_layout depth_layout:3; 457 458 /** 459 * Interpolation qualifiers for drivers that lowers color inputs 460 * to system values. 461 */ 462 unsigned color0_interp:3; /* glsl_interp_mode */ 463 bool color0_sample:1; 464 bool color0_centroid:1; 465 unsigned color1_interp:3; /* glsl_interp_mode */ 466 bool color1_sample:1; 467 bool color1_centroid:1; 468 469 /* Bitmask of gl_advanced_blend_mode values that may be used with this 470 * shader. 471 */ 472 unsigned advanced_blend_modes; 473 } fs; 474 475 struct { 476 uint16_t workgroup_size_hint[3]; 477 478 uint8_t user_data_components_amd:3; 479 480 /* 481 * Arrangement of invocations used to calculate derivatives in a compute 482 * shader. From NV_compute_shader_derivatives. 483 */ 484 enum gl_derivative_group derivative_group:2; 485 486 /** 487 * pointer size is: 488 * AddressingModelLogical: 0 (default) 489 * AddressingModelPhysical32: 32 490 * AddressingModelPhysical64: 64 491 */ 492 unsigned ptr_size; 493 494 /** 495 * Uses subgroup intrinsics which can communicate across a quad. 496 */ 497 bool uses_wide_subgroup_intrinsics; 498 } cs; 499 500 /* Applies to both TCS and TES. */ 501 struct { 502 enum tess_primitive_mode _primitive_mode; 503 504 /** The number of vertices in the TCS output patch. */ 505 uint8_t tcs_vertices_out; 506 unsigned spacing:2; /*gl_tess_spacing*/ 507 508 /** Is the vertex order counterclockwise? */ 509 bool ccw:1; 510 bool point_mode:1; 511 512 /* Bit mask of TCS per-vertex inputs (VS outputs) that are used 513 * with a vertex index that is NOT the invocation id 514 */ 515 uint64_t tcs_cross_invocation_inputs_read; 516 517 /* Bit mask of TCS per-vertex outputs that are used 518 * with a vertex index that is NOT the invocation id 519 */ 520 uint64_t tcs_cross_invocation_outputs_read; 521 } tess; 522 523 /* Applies to MESH. */ 524 struct { 525 /* Bit mask of MS outputs that are used 526 * with an index that is NOT the local invocation index. 527 */ 528 uint64_t ms_cross_invocation_output_access; 529 530 uint16_t max_vertices_out; 531 uint16_t max_primitives_out; 532 uint16_t primitive_type; /* GL_POINTS, GL_LINES or GL_TRIANGLES. */ 533 } mesh; 534 }; 535 } shader_info; 536 537 #ifdef __cplusplus 538 } 539 #endif 540 541 #endif /* SHADER_INFO_H */ 542