• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef SHADER_INFO_H
26 #define SHADER_INFO_H
27 
28 #include "util/bitset.h"
29 #include "util/sha1/sha1.h"
30 #include "shader_enums.h"
31 #include <stdint.h>
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 #define MAX_XFB_BUFFERS        4
38 #define MAX_INLINABLE_UNIFORMS 4
39 
40 struct spirv_supported_capabilities {
41    bool address;
42    bool amd_fragment_mask;
43    bool amd_gcn_shader;
44    bool amd_image_gather_bias_lod;
45    bool amd_image_read_write_lod;
46    bool amd_shader_ballot;
47    bool amd_shader_explicit_vertex_parameter;
48    bool amd_trinary_minmax;
49    bool atomic_storage;
50    bool cooperative_matrix;
51    bool demote_to_helper_invocation;
52    bool derivative_group;
53    bool descriptor_array_dynamic_indexing;
54    bool descriptor_array_non_uniform_indexing;
55    bool descriptor_indexing;
56    bool device_group;
57    bool draw_parameters;
58    bool float_controls;
59    bool float16_atomic_add;
60    bool float16_atomic_min_max;
61    bool float16;
62    bool float32_atomic_add;
63    bool float32_atomic_min_max;
64    bool float64_atomic_add;
65    bool float64_atomic_min_max;
66    bool float64;
67    bool fragment_barycentric;
68    bool fragment_density;
69    bool fragment_fully_covered;
70    bool fragment_shader_pixel_interlock;
71    bool fragment_shader_sample_interlock;
72    bool fragment_shading_rate;
73    bool generic_pointers;
74    bool geometry_streams;
75    bool groups;
76    bool image_atomic_int64;
77    bool image_ms_array;
78    bool image_read_without_format;
79    bool image_write_without_format;
80    bool int16;
81    bool int64_atomics;
82    bool int64;
83    bool int8;
84    bool integer_functions2;
85    bool kernel_image_read_write;
86    bool kernel_image;
87    bool kernel;
88    bool linkage;
89    bool literal_sampler;
90    bool mesh_shading_nv;
91    bool mesh_shading;
92    bool min_lod;
93    bool multiview;
94    bool per_view_attributes_nv;
95    bool physical_storage_buffer_address;
96    bool post_depth_coverage;
97    bool printf;
98    bool quad_control;
99    bool ray_cull_mask;
100    bool ray_query;
101    bool ray_tracing;
102    bool ray_traversal_primitive_culling;
103    bool ray_tracing_position_fetch;
104    bool runtime_descriptor_array;
105    bool shader_clock;
106    bool shader_enqueue;
107    bool shader_sm_builtins_nv;
108    bool shader_viewport_index_layer;
109    bool shader_viewport_mask_nv;
110    bool sparse_residency;
111    bool stencil_export;
112    bool storage_16bit;
113    bool storage_8bit;
114    bool storage_image_ms;
115    bool subgroup_arithmetic;
116    bool subgroup_ballot;
117    bool subgroup_basic;
118    bool subgroup_dispatch;
119    bool subgroup_quad;
120    bool subgroup_rotate;
121    bool subgroup_shuffle;
122    bool subgroup_uniform_control_flow;
123    bool subgroup_vote;
124    bool tessellation;
125    bool transform_feedback;
126    bool variable_pointers;
127    bool vk_memory_model_device_scope;
128    bool vk_memory_model;
129    bool workgroup_memory_explicit_layout;
130 
131    bool intel_subgroup_shuffle;
132    bool intel_subgroup_buffer_block_io;
133 };
134 
135 typedef struct shader_info {
136    const char *name;
137 
138    /* Descriptive name provided by the client; may be NULL */
139    const char *label;
140 
141    /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */
142    bool internal;
143 
144    /* SHA1 of the original source, used by shader detection in drivers. */
145    uint8_t source_sha1[SHA1_DIGEST_LENGTH];
146 
147    /** The shader stage, such as MESA_SHADER_VERTEX. */
148    gl_shader_stage stage:8;
149 
150    /** The shader stage in a non SSO linked program that follows this stage,
151      * such as MESA_SHADER_FRAGMENT.
152      */
153    gl_shader_stage next_stage:8;
154 
155    /* Number of textures used by this shader */
156    uint8_t num_textures;
157    /* Number of uniform buffers used by this shader */
158    uint8_t num_ubos;
159    /* Number of atomic buffers used by this shader */
160    uint8_t num_abos;
161    /* Number of shader storage buffers (max .driver_location + 1) used by this
162     * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
163     * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
164     * and atomic counters in nir_shader->info.
165     */
166    uint8_t num_ssbos;
167    /* Number of images used by this shader */
168    uint8_t num_images;
169 
170    /* Which inputs are actually read */
171    uint64_t inputs_read;
172    /* Which inputs occupy 2 slots. */
173    uint64_t dual_slot_inputs;
174    /* Which outputs are actually written */
175    uint64_t outputs_written;
176    /* Which outputs are actually read */
177    uint64_t outputs_read;
178    /* Which system values are actually read */
179    BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
180 
181    /* Which I/O is per-primitive, for read/written information combine with
182     * the fields above.
183     */
184    uint64_t per_primitive_inputs;
185    uint64_t per_primitive_outputs;
186 
187    /* Which I/O is per-view */
188    uint64_t per_view_outputs;
189 
190    /* Which 16-bit inputs and outputs are used corresponding to
191     * VARYING_SLOT_VARn_16BIT.
192     */
193    uint16_t inputs_read_16bit;
194    uint16_t outputs_written_16bit;
195    uint16_t outputs_read_16bit;
196    uint16_t inputs_read_indirectly_16bit;
197    uint16_t outputs_accessed_indirectly_16bit;
198 
199    /* Which patch inputs are actually read */
200    uint32_t patch_inputs_read;
201    /* Which patch outputs are actually written */
202    uint32_t patch_outputs_written;
203    /* Which patch outputs are read */
204    uint32_t patch_outputs_read;
205 
206    /* Which inputs are read indirectly (subset of inputs_read) */
207    uint64_t inputs_read_indirectly;
208    /* Which outputs are read or written indirectly */
209    uint64_t outputs_accessed_indirectly;
210    /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
211    uint64_t patch_inputs_read_indirectly;
212    /* Which patch outputs are read or written indirectly */
213    uint64_t patch_outputs_accessed_indirectly;
214 
215    /** Bitfield of which textures are used */
216    BITSET_DECLARE(textures_used, 128);
217 
218    /** Bitfield of which textures are used by texelFetch() */
219    BITSET_DECLARE(textures_used_by_txf, 128);
220 
221    /** Bitfield of which samplers are used */
222    BITSET_DECLARE(samplers_used, 32);
223 
224    /** Bitfield of which images are used */
225    BITSET_DECLARE(images_used, 64);
226    /** Bitfield of which images are buffers. */
227    BITSET_DECLARE(image_buffers, 64);
228    /** Bitfield of which images are MSAA. */
229    BITSET_DECLARE(msaa_images, 64);
230 
231    /* SPV_KHR_float_controls: execution mode for floating point ops */
232    uint32_t float_controls_execution_mode;
233 
234    /**
235     * Size of shared variables accessed by compute/task/mesh shaders.
236     */
237    unsigned shared_size;
238 
239    /**
240     * Size of task payload variables accessed by task/mesh shaders.
241     */
242    unsigned task_payload_size;
243 
244    /**
245     * Number of ray tracing queries in the shader (counts all elements of all
246     * variables).
247     */
248    unsigned ray_queries;
249 
250    /**
251     * Local workgroup size used by compute/task/mesh shaders.
252     */
253    uint16_t workgroup_size[3];
254 
255    enum gl_subgroup_size subgroup_size;
256    uint8_t num_subgroups;
257 
258    /**
259     * Uses subgroup intrinsics which can communicate across a quad.
260     */
261    bool uses_wide_subgroup_intrinsics;
262 
263    /* Transform feedback buffer strides in dwords, max. 1K - 4. */
264    uint8_t xfb_stride[MAX_XFB_BUFFERS];
265 
266    uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
267    uint8_t num_inlinable_uniforms:4;
268 
269    /* The size of the gl_ClipDistance[] array, if declared. */
270    uint8_t clip_distance_array_size:4;
271 
272    /* The size of the gl_CullDistance[] array, if declared. */
273    uint8_t cull_distance_array_size:4;
274 
275    /* Whether or not this shader ever uses textureGather() */
276    bool uses_texture_gather:1;
277 
278    /* Whether texture size, levels, or samples is queried. */
279    bool uses_resource_info_query:1;
280 
281    /**
282     * True if this shader uses the fddx/fddy opcodes.
283     *
284     * Note that this does not include the "fine" and "coarse" variants.
285     */
286    bool uses_fddx_fddy:1;
287 
288    /** Has divergence analysis ever been run? */
289    bool divergence_analysis_run:1;
290 
291    /* Bitmask of bit-sizes used with ALU instructions. */
292    uint8_t bit_sizes_float;
293    uint8_t bit_sizes_int;
294 
295    /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
296    bool first_ubo_is_default_ubo:1;
297 
298    /* Whether or not separate shader objects were used */
299    bool separate_shader:1;
300 
301    /** Was this shader linked with any transform feedback varyings? */
302    bool has_transform_feedback_varyings:1;
303 
304    /* Whether flrp has been lowered. */
305    bool flrp_lowered:1;
306 
307    /* Whether nir_lower_io has been called to lower derefs.
308     * nir_variables for inputs and outputs might not be present in the IR.
309     */
310    bool io_lowered:1;
311 
312    /** Has nir_lower_var_copies called. To avoid calling any
313     * lowering/optimization that would introduce any copy_deref later.
314     */
315    bool var_copies_lowered:1;
316 
317    /* Whether the shader writes memory, including transform feedback. */
318    bool writes_memory:1;
319 
320    /* Whether gl_Layer is viewport-relative */
321    bool layer_viewport_relative:1;
322 
323    /* Whether explicit barriers are used */
324    bool uses_control_barrier : 1;
325    bool uses_memory_barrier : 1;
326 
327    /* Whether ARB_bindless_texture ops or variables are used */
328    bool uses_bindless : 1;
329 
330    /**
331     * Shared memory types have explicit layout set.  Used for
332     * SPV_KHR_workgroup_storage_explicit_layout.
333     */
334    bool shared_memory_explicit_layout:1;
335 
336    /**
337     * Used for VK_KHR_zero_initialize_workgroup_memory.
338     */
339    bool zero_initialize_shared_memory:1;
340 
341    /**
342     * Used for ARB_compute_variable_group_size.
343     */
344    bool workgroup_size_variable:1;
345 
346    /**
347     * Whether the shader uses printf instructions.
348     */
349    bool uses_printf:1;
350 
351    /**
352     * VK_KHR_shader_maximal_reconvergence
353     */
354    bool maximally_reconverges:1;
355 
356    /**
357      * Set if this shader uses legacy (DX9 or ARB assembly) math rules.
358      *
359      * From the ARB_fragment_program specification:
360      *
361      *    "The following rules apply to multiplication:
362      *
363      *      1. <x> * <y> == <y> * <x>, for all <x> and <y>.
364      *      2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to
365      *         *representable numbers (IEEE "not a number" and "infinity"
366      *         *encodings may be exceptions).
367      *      3. +1.0 * <x> = <x>, for all <x>.""
368      *
369      * However, in effect this was due to DX9 semantics implying that 0*x=0 even
370      * for inf/nan if the hardware generated them instead of float_min/max.  So,
371      * you should not have an exception for inf/nan to rule 2 above.
372      *
373      * One implementation of this behavior would be to flush all generated NaNs
374      * to zero, at which point 0*Inf=Nan=0.  Most DX9/ARB-asm hardware did not
375      * generate NaNs, and the only way the GPU saw one was to possibly feed it
376      * in as a uniform.
377      */
378    bool use_legacy_math_rules;
379 
380    union {
381       struct {
382          /* Which inputs are doubles */
383          uint64_t double_inputs;
384 
385          /* For AMD-specific driver-internal shaders. It replaces vertex
386           * buffer loads with code generating VS inputs from scalar registers.
387           *
388           * Valid values: SI_VS_BLIT_SGPRS_POS_*
389           */
390          uint8_t blit_sgprs_amd:4;
391 
392          /* Software TES executing as HW VS */
393          bool tes_agx:1;
394 
395          /* True if the shader writes position in window space coordinates pre-transform */
396          bool window_space_position:1;
397 
398          /** Is an edge flag input needed? */
399          bool needs_edge_flag:1;
400       } vs;
401 
402       struct {
403          /** The output primitive type */
404          enum mesa_prim output_primitive;
405 
406          /** The input primitive type */
407          enum mesa_prim input_primitive;
408 
409          /** The maximum number of vertices the geometry shader might write. */
410          uint16_t vertices_out;
411 
412          /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
413          uint8_t invocations;
414 
415          /** The number of vertices received per input primitive (max. 6) */
416          uint8_t vertices_in:3;
417 
418          /** Whether or not this shader uses EndPrimitive */
419          bool uses_end_primitive:1;
420 
421          /** The streams used in this shaders (max. 4) */
422          uint8_t active_stream_mask:4;
423       } gs;
424 
425       struct {
426          bool uses_discard:1;
427          bool uses_demote:1;
428          bool uses_fbfetch_output:1;
429          bool fbfetch_coherent:1;
430          bool color_is_dual_source:1;
431 
432          /**
433           * True if this fragment shader requires full quad invocations.
434           */
435          bool require_full_quads:1;
436 
437          /**
438           * Whether the derivative group must be equivalent to the quad group.
439           */
440          bool quad_derivatives:1;
441 
442          /**
443           * True if this fragment shader requires helper invocations.  This
444           * can be caused by the use of ALU derivative ops, texture
445           * instructions which do implicit derivatives, the use of quad
446           * subgroup operations or if the shader requires full quads.
447           */
448          bool needs_quad_helper_invocations:1;
449 
450          /**
451           * Whether any inputs are declared with the "sample" qualifier.
452           */
453          bool uses_sample_qualifier:1;
454 
455          /**
456           * Whether sample shading is used.
457           */
458          bool uses_sample_shading:1;
459 
460          /**
461           * Whether early fragment tests are enabled as defined by
462           * ARB_shader_image_load_store.
463           */
464          bool early_fragment_tests:1;
465 
466          /**
467           * Defined by INTEL_conservative_rasterization.
468           */
469          bool inner_coverage:1;
470 
471          bool post_depth_coverage:1;
472 
473          /**
474           * \name ARB_fragment_coord_conventions
475           * @{
476           */
477          bool pixel_center_integer:1;
478          bool origin_upper_left:1;
479          /*@}*/
480 
481          bool pixel_interlock_ordered:1;
482          bool pixel_interlock_unordered:1;
483          bool sample_interlock_ordered:1;
484          bool sample_interlock_unordered:1;
485 
486          /**
487           * Flags whether NIR's base types on the FS color outputs should be
488           * ignored.
489           *
490           * GLSL requires that fragment shader output base types match the
491           * render target's base types for the behavior to be defined.  From
492           * the GL 4.6 spec:
493           *
494           *     "If the values written by the fragment shader do not match the
495           *      format(s) of the corresponding color buffer(s), the result is
496           *      undefined."
497           *
498           * However, for NIR shaders translated from TGSI, we don't have the
499           * output types any more, so the driver will need to do whatever
500           * fixups are necessary to handle effectively untyped data being
501           * output from the FS.
502           */
503          bool untyped_color_outputs:1;
504 
505          /** gl_FragDepth layout for ARB_conservative_depth. */
506          enum gl_frag_depth_layout depth_layout:3;
507 
508          /**
509           * Interpolation qualifiers for drivers that lowers color inputs
510           * to system values.
511           */
512          unsigned color0_interp:3; /* glsl_interp_mode */
513          bool color0_sample:1;
514          bool color0_centroid:1;
515          unsigned color1_interp:3; /* glsl_interp_mode */
516          bool color1_sample:1;
517          bool color1_centroid:1;
518 
519          /* Bitmask of gl_advanced_blend_mode values that may be used with this
520           * shader.
521           */
522          unsigned advanced_blend_modes;
523 
524          /**
525           * Defined by AMD_shader_early_and_late_fragment_tests.
526           */
527          bool early_and_late_fragment_tests:1;
528          enum gl_frag_stencil_layout stencil_front_layout:3;
529          enum gl_frag_stencil_layout stencil_back_layout:3;
530       } fs;
531 
532       struct {
533          uint16_t workgroup_size_hint[3];
534 
535          uint8_t user_data_components_amd:3;
536 
537          /*
538           * Arrangement of invocations used to calculate derivatives in a compute
539           * shader.  From NV_compute_shader_derivatives.
540           */
541          enum gl_derivative_group derivative_group:2;
542 
543          /*
544           * If the shader might run with shared mem on top of `shared_size`.
545           */
546          bool has_variable_shared_mem:1;
547 
548          /**
549           * If the shader has any use of a cooperative matrix. From
550           * SPV_KHR_cooperative_matrix.
551           */
552          bool has_cooperative_matrix:1;
553 
554          /**
555           * pointer size is:
556           *   AddressingModelLogical:    0    (default)
557           *   AddressingModelPhysical32: 32
558           *   AddressingModelPhysical64: 64
559           */
560          unsigned ptr_size;
561 
562          /** Index provided by VkPipelineShaderStageNodeCreateInfoAMDX or ShaderIndexAMDX */
563          uint32_t shader_index;
564 
565          /** Maximum size required by any output node payload array */
566          uint32_t node_payloads_size;
567 
568          /** Static workgroup count for overwriting the enqueued workgroup count. (0 if dynamic) */
569          uint32_t workgroup_count[3];
570       } cs;
571 
572       /* Applies to both TCS and TES. */
573       struct {
574          enum tess_primitive_mode _primitive_mode;
575 
576          /** The number of vertices in the TCS output patch. */
577          uint8_t tcs_vertices_out;
578          unsigned spacing:2; /*gl_tess_spacing*/
579 
580          /** Is the vertex order counterclockwise? */
581          bool ccw:1;
582          bool point_mode:1;
583 
584          /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
585           * with a vertex index that is NOT the invocation id
586           */
587          uint64_t tcs_cross_invocation_inputs_read;
588 
589          /* Bit mask of TCS per-vertex outputs that are used
590           * with a vertex index that is NOT the invocation id
591           */
592          uint64_t tcs_cross_invocation_outputs_read;
593       } tess;
594 
595       /* Applies to MESH and TASK. */
596       struct {
597          /* Bit mask of MS outputs that are used
598           * with an index that is NOT the local invocation index.
599           */
600          uint64_t ms_cross_invocation_output_access;
601 
602          /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT)
603           * when they are known compile-time constants.
604           * 0 means they are not known.
605           */
606          uint32_t ts_mesh_dispatch_dimensions[3];
607 
608          uint16_t max_vertices_out;
609          uint16_t max_primitives_out;
610          enum mesa_prim primitive_type; /* POINTS, LINES or TRIANGLES. */
611 
612          /* TODO: remove this when we stop supporting NV_mesh_shader. */
613          bool nv;
614       } mesh;
615    };
616 } shader_info;
617 
618 #ifdef __cplusplus
619 }
620 #endif
621 
622 #endif /* SHADER_INFO_H */
623