• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef SHADER_INFO_H
26 #define SHADER_INFO_H
27 
28 #include "util/bitset.h"
29 #include "util/sha1/sha1.h"
30 #include "shader_enums.h"
31 #include <stdint.h>
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 #define MAX_XFB_BUFFERS        4
38 #define MAX_INLINABLE_UNIFORMS 4
39 
40 struct spirv_supported_capabilities {
41    bool address;
42    bool atomic_storage;
43    bool demote_to_helper_invocation;
44    bool derivative_group;
45    bool descriptor_array_dynamic_indexing;
46    bool descriptor_array_non_uniform_indexing;
47    bool descriptor_indexing;
48    bool device_group;
49    bool draw_parameters;
50    bool float16_atomic_add;
51    bool float16_atomic_min_max;
52    bool float32_atomic_add;
53    bool float32_atomic_min_max;
54    bool float64;
55    bool float64_atomic_add;
56    bool float64_atomic_min_max;
57    bool fragment_shader_sample_interlock;
58    bool fragment_shader_pixel_interlock;
59    bool fragment_shading_rate;
60    bool generic_pointers;
61    bool geometry_streams;
62    bool groups;
63    bool image_ms_array;
64    bool image_read_without_format;
65    bool image_write_without_format;
66    bool image_atomic_int64;
67    bool int8;
68    bool int16;
69    bool int64;
70    bool int64_atomics;
71    bool integer_functions2;
72    bool kernel;
73    bool kernel_image;
74    bool kernel_image_read_write;
75    bool linkage;
76    bool literal_sampler;
77    bool mesh_shading_nv;
78    bool min_lod;
79    bool multiview;
80    bool per_view_attributes_nv;
81    bool physical_storage_buffer_address;
82    bool post_depth_coverage;
83    bool printf;
84    bool ray_cull_mask;
85    bool ray_tracing;
86    bool ray_query;
87    bool ray_traversal_primitive_culling;
88    bool runtime_descriptor_array;
89    bool float_controls;
90    bool shader_clock;
91    bool shader_viewport_index_layer;
92    bool shader_viewport_mask_nv;
93    bool sparse_residency;
94    bool stencil_export;
95    bool storage_8bit;
96    bool storage_16bit;
97    bool storage_image_ms;
98    bool subgroup_arithmetic;
99    bool subgroup_ballot;
100    bool subgroup_basic;
101    bool subgroup_dispatch;
102    bool subgroup_quad;
103    bool subgroup_shuffle;
104    bool subgroup_uniform_control_flow;
105    bool subgroup_vote;
106    bool tessellation;
107    bool transform_feedback;
108    bool variable_pointers;
109    bool vk_memory_model;
110    bool vk_memory_model_device_scope;
111    bool workgroup_memory_explicit_layout;
112    bool float16;
113    bool amd_fragment_mask;
114    bool amd_gcn_shader;
115    bool amd_shader_ballot;
116    bool amd_trinary_minmax;
117    bool amd_image_read_write_lod;
118    bool amd_shader_explicit_vertex_parameter;
119    bool amd_image_gather_bias_lod;
120 
121    bool intel_subgroup_shuffle;
122    bool intel_subgroup_buffer_block_io;
123 };
124 
125 typedef struct shader_info {
126    const char *name;
127 
128    /* Descriptive name provided by the client; may be NULL */
129    const char *label;
130 
131    /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */
132    bool internal;
133 
134    /* SHA1 of the original source, used by shader detection in drivers. */
135    uint8_t source_sha1[SHA1_DIGEST_LENGTH];
136 
137    /** The shader stage, such as MESA_SHADER_VERTEX. */
138    gl_shader_stage stage:8;
139 
140    /** The shader stage in a non SSO linked program that follows this stage,
141      * such as MESA_SHADER_FRAGMENT.
142      */
143    gl_shader_stage next_stage:8;
144 
145    /* Number of textures used by this shader */
146    uint8_t num_textures;
147    /* Number of uniform buffers used by this shader */
148    uint8_t num_ubos;
149    /* Number of atomic buffers used by this shader */
150    uint8_t num_abos;
151    /* Number of shader storage buffers (max .driver_location + 1) used by this
152     * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
153     * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
154     * and atomic counters in nir_shader->info.
155     */
156    uint8_t num_ssbos;
157    /* Number of images used by this shader */
158    uint8_t num_images;
159 
160    /* Which inputs are actually read */
161    uint64_t inputs_read;
162    /* Which outputs are actually written */
163    uint64_t outputs_written;
164    /* Which outputs are actually read */
165    uint64_t outputs_read;
166    /* Which system values are actually read */
167    BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
168 
169    /* Which I/O is per-primitive, for read/written information combine with
170     * the fields above.
171     */
172    uint64_t per_primitive_inputs;
173    uint64_t per_primitive_outputs;
174 
175    /* Which 16-bit inputs and outputs are used corresponding to
176     * VARYING_SLOT_VARn_16BIT.
177     */
178    uint16_t inputs_read_16bit;
179    uint16_t outputs_written_16bit;
180    uint16_t outputs_read_16bit;
181    uint16_t inputs_read_indirectly_16bit;
182    uint16_t outputs_accessed_indirectly_16bit;
183 
184    /* Which patch inputs are actually read */
185    uint32_t patch_inputs_read;
186    /* Which patch outputs are actually written */
187    uint32_t patch_outputs_written;
188    /* Which patch outputs are read */
189    uint32_t patch_outputs_read;
190 
191    /* Which inputs are read indirectly (subset of inputs_read) */
192    uint64_t inputs_read_indirectly;
193    /* Which outputs are read or written indirectly */
194    uint64_t outputs_accessed_indirectly;
195    /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
196    uint64_t patch_inputs_read_indirectly;
197    /* Which patch outputs are read or written indirectly */
198    uint64_t patch_outputs_accessed_indirectly;
199 
200    /** Bitfield of which textures are used */
201    BITSET_DECLARE(textures_used, 128);
202 
203    /** Bitfield of which textures are used by texelFetch() */
204    BITSET_DECLARE(textures_used_by_txf, 128);
205 
206    /** Bitfield of which samplers are used */
207    BITSET_DECLARE(samplers_used, 32);
208 
209    /** Bitfield of which images are used */
210    BITSET_DECLARE(images_used, 64);
211    /** Bitfield of which images are buffers. */
212    BITSET_DECLARE(image_buffers, 64);
213    /** Bitfield of which images are MSAA. */
214    BITSET_DECLARE(msaa_images, 64);
215 
216    /* SPV_KHR_float_controls: execution mode for floating point ops */
217    uint16_t float_controls_execution_mode;
218 
219    /**
220     * Size of shared variables accessed by compute/task/mesh shaders.
221     */
222    unsigned shared_size;
223 
224    /**
225     * Size of task payload variables accessed by task/mesh shaders.
226     */
227    unsigned task_payload_size;
228 
229    /**
230     * Number of ray tracing queries in the shader (counts all elements of all
231     * variables).
232     */
233    unsigned ray_queries;
234 
235    /**
236     * Local workgroup size used by compute/task/mesh shaders.
237     */
238    uint16_t workgroup_size[3];
239 
240    enum gl_subgroup_size subgroup_size;
241 
242    /* Transform feedback buffer strides in dwords, max. 1K - 4. */
243    uint8_t xfb_stride[MAX_XFB_BUFFERS];
244 
245    uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
246    uint8_t num_inlinable_uniforms:4;
247 
248    /* The size of the gl_ClipDistance[] array, if declared. */
249    uint8_t clip_distance_array_size:4;
250 
251    /* The size of the gl_CullDistance[] array, if declared. */
252    uint8_t cull_distance_array_size:4;
253 
254    /* Whether or not this shader ever uses textureGather() */
255    bool uses_texture_gather:1;
256 
257    /**
258     * True if this shader uses the fddx/fddy opcodes.
259     *
260     * Note that this does not include the "fine" and "coarse" variants.
261     */
262    bool uses_fddx_fddy:1;
263 
264    /** Has divergence analysis ever been run? */
265    bool divergence_analysis_run:1;
266 
267    /* Bitmask of bit-sizes used with ALU instructions. */
268    uint8_t bit_sizes_float;
269    uint8_t bit_sizes_int;
270 
271    /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
272    bool first_ubo_is_default_ubo:1;
273 
274    /* Whether or not separate shader objects were used */
275    bool separate_shader:1;
276 
277    /** Was this shader linked with any transform feedback varyings? */
278    bool has_transform_feedback_varyings:1;
279 
280    /* Whether flrp has been lowered. */
281    bool flrp_lowered:1;
282 
283    /* Whether nir_lower_io has been called to lower derefs.
284     * nir_variables for inputs and outputs might not be present in the IR.
285     */
286    bool io_lowered:1;
287 
288    /* Whether the shader writes memory, including transform feedback. */
289    bool writes_memory:1;
290 
291    /* Whether gl_Layer is viewport-relative */
292    bool layer_viewport_relative:1;
293 
294    /* Whether explicit barriers are used */
295    bool uses_control_barrier : 1;
296    bool uses_memory_barrier : 1;
297 
298    /**
299     * Shared memory types have explicit layout set.  Used for
300     * SPV_KHR_workgroup_storage_explicit_layout.
301     */
302    bool shared_memory_explicit_layout:1;
303 
304    /**
305     * Used for VK_KHR_zero_initialize_workgroup_memory.
306     */
307    bool zero_initialize_shared_memory:1;
308 
309    /**
310     * Used for ARB_compute_variable_group_size.
311     */
312    bool workgroup_size_variable:1;
313 
314    /**
315      * Set if this shader uses legacy (DX9 or ARB assembly) math rules.
316      *
317      * From the ARB_fragment_program specification:
318      *
319      *    "The following rules apply to multiplication:
320      *
321      *      1. <x> * <y> == <y> * <x>, for all <x> and <y>.
322      *      2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to
323      *         *representable numbers (IEEE "not a number" and "infinity"
324      *         *encodings may be exceptions).
325      *      3. +1.0 * <x> = <x>, for all <x>.""
326      *
327      * However, in effect this was due to DX9 semantics implying that 0*x=0 even
328      * for inf/nan if the hardware generated them instead of float_min/max.  So,
329      * you should not have an exception for inf/nan to rule 2 above.
330      *
331      * One implementation of this behavior would be to flush all generated NaNs
332      * to zero, at which point 0*Inf=Nan=0.  Most DX9/ARB-asm hardware did not
333      * generate NaNs, and the only way the GPU saw one was to possibly feed it
334      * in as a uniform.
335      */
336    bool use_legacy_math_rules;
337 
338    union {
339       struct {
340          /* Which inputs are doubles */
341          uint64_t double_inputs;
342 
343          /* For AMD-specific driver-internal shaders. It replaces vertex
344           * buffer loads with code generating VS inputs from scalar registers.
345           *
346           * Valid values: SI_VS_BLIT_SGPRS_POS_*
347           */
348          uint8_t blit_sgprs_amd:4;
349 
350          /* True if the shader writes position in window space coordinates pre-transform */
351          bool window_space_position:1;
352 
353          /** Is an edge flag input needed? */
354          bool needs_edge_flag:1;
355       } vs;
356 
357       struct {
358          /** The output primitive type */
359          uint16_t output_primitive;
360 
361          /** The input primitive type */
362          uint16_t input_primitive;
363 
364          /** The maximum number of vertices the geometry shader might write. */
365          uint16_t vertices_out;
366 
367          /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
368          uint8_t invocations;
369 
370          /** The number of vertices received per input primitive (max. 6) */
371          uint8_t vertices_in:3;
372 
373          /** Whether or not this shader uses EndPrimitive */
374          bool uses_end_primitive:1;
375 
376          /** The streams used in this shaders (max. 4) */
377          uint8_t active_stream_mask:4;
378       } gs;
379 
380       struct {
381          bool uses_discard:1;
382          bool uses_demote:1;
383          bool uses_fbfetch_output:1;
384          bool color_is_dual_source:1;
385 
386          /**
387           * True if this fragment shader requires helper invocations.  This
388           * can be caused by the use of ALU derivative ops, texture
389           * instructions which do implicit derivatives, and the use of quad
390           * subgroup operations.
391           */
392          bool needs_quad_helper_invocations:1;
393 
394          /**
395           * True if this fragment shader requires helper invocations for
396           * all subgroup operations, not just quad ops and derivatives.
397           */
398          bool needs_all_helper_invocations:1;
399 
400          /**
401           * Whether any inputs are declared with the "sample" qualifier.
402           */
403          bool uses_sample_qualifier:1;
404 
405          /**
406           * Whether sample shading is used.
407           */
408          bool uses_sample_shading:1;
409 
410          /**
411           * Whether early fragment tests are enabled as defined by
412           * ARB_shader_image_load_store.
413           */
414          bool early_fragment_tests:1;
415 
416          /**
417           * Defined by INTEL_conservative_rasterization.
418           */
419          bool inner_coverage:1;
420 
421          bool post_depth_coverage:1;
422 
423          /**
424           * \name ARB_fragment_coord_conventions
425           * @{
426           */
427          bool pixel_center_integer:1;
428          bool origin_upper_left:1;
429          /*@}*/
430 
431          bool pixel_interlock_ordered:1;
432          bool pixel_interlock_unordered:1;
433          bool sample_interlock_ordered:1;
434          bool sample_interlock_unordered:1;
435 
436          /**
437           * Flags whether NIR's base types on the FS color outputs should be
438           * ignored.
439           *
440           * GLSL requires that fragment shader output base types match the
441           * render target's base types for the behavior to be defined.  From
442           * the GL 4.6 spec:
443           *
444           *     "If the values written by the fragment shader do not match the
445           *      format(s) of the corresponding color buffer(s), the result is
446           *      undefined."
447           *
448           * However, for NIR shaders translated from TGSI, we don't have the
449           * output types any more, so the driver will need to do whatever
450           * fixups are necessary to handle effectively untyped data being
451           * output from the FS.
452           */
453          bool untyped_color_outputs:1;
454 
455          /** gl_FragDepth layout for ARB_conservative_depth. */
456          enum gl_frag_depth_layout depth_layout:3;
457 
458          /**
459           * Interpolation qualifiers for drivers that lowers color inputs
460           * to system values.
461           */
462          unsigned color0_interp:3; /* glsl_interp_mode */
463          bool color0_sample:1;
464          bool color0_centroid:1;
465          unsigned color1_interp:3; /* glsl_interp_mode */
466          bool color1_sample:1;
467          bool color1_centroid:1;
468 
469          /* Bitmask of gl_advanced_blend_mode values that may be used with this
470           * shader.
471           */
472          unsigned advanced_blend_modes;
473       } fs;
474 
475       struct {
476          uint16_t workgroup_size_hint[3];
477 
478          uint8_t user_data_components_amd:3;
479 
480          /*
481           * Arrangement of invocations used to calculate derivatives in a compute
482           * shader.  From NV_compute_shader_derivatives.
483           */
484          enum gl_derivative_group derivative_group:2;
485 
486          /**
487           * pointer size is:
488           *   AddressingModelLogical:    0    (default)
489           *   AddressingModelPhysical32: 32
490           *   AddressingModelPhysical64: 64
491           */
492          unsigned ptr_size;
493 
494          /**
495           * Uses subgroup intrinsics which can communicate across a quad.
496           */
497          bool uses_wide_subgroup_intrinsics;
498       } cs;
499 
500       /* Applies to both TCS and TES. */
501       struct {
502 	 enum tess_primitive_mode _primitive_mode;
503 
504          /** The number of vertices in the TCS output patch. */
505          uint8_t tcs_vertices_out;
506          unsigned spacing:2; /*gl_tess_spacing*/
507 
508          /** Is the vertex order counterclockwise? */
509          bool ccw:1;
510          bool point_mode:1;
511 
512          /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
513           * with a vertex index that is NOT the invocation id
514           */
515          uint64_t tcs_cross_invocation_inputs_read;
516 
517          /* Bit mask of TCS per-vertex outputs that are used
518           * with a vertex index that is NOT the invocation id
519           */
520          uint64_t tcs_cross_invocation_outputs_read;
521       } tess;
522 
523       /* Applies to MESH. */
524       struct {
525          /* Bit mask of MS outputs that are used
526           * with an index that is NOT the local invocation index.
527           */
528          uint64_t ms_cross_invocation_output_access;
529 
530          uint16_t max_vertices_out;
531          uint16_t max_primitives_out;
532          uint16_t primitive_type;  /* GL_POINTS, GL_LINES or GL_TRIANGLES. */
533       } mesh;
534    };
535 } shader_info;
536 
537 #ifdef __cplusplus
538 }
539 #endif
540 
541 #endif /* SHADER_INFO_H */
542