• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef SHADER_INFO_H
26 #define SHADER_INFO_H
27 
28 #include "util/bitset.h"
29 #include "util/mesa-blake3.h"
30 #include "shader_enums.h"
31 #include <stdint.h>
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 #define MAX_XFB_BUFFERS        4
38 #define MAX_INLINABLE_UNIFORMS 4
39 
40 typedef struct shader_info {
41    const char *name;
42 
43    /* Descriptive name provided by the client; may be NULL */
44    const char *label;
45 
46    /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */
47    bool internal;
48 
49    /* BLAKE3 of the original source, used by shader detection in drivers. */
50    blake3_hash source_blake3;
51 
52    /** The shader stage, such as MESA_SHADER_VERTEX. */
53    gl_shader_stage stage:8;
54 
55    /** The shader stage in a non SSO linked program that follows this stage,
56      * such as MESA_SHADER_FRAGMENT.
57      */
58    gl_shader_stage next_stage:8;
59 
60    /* Number of textures used by this shader */
61    uint8_t num_textures;
62    /* Number of uniform buffers used by this shader */
63    uint8_t num_ubos;
64    /* Number of atomic buffers used by this shader */
65    uint8_t num_abos;
66    /* Number of shader storage buffers (max .driver_location + 1) used by this
67     * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
68     * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
69     * and atomic counters in nir_shader->info.
70     */
71    uint8_t num_ssbos;
72    /* Number of images used by this shader */
73    uint8_t num_images;
74 
75    /* Which inputs are actually read */
76    uint64_t inputs_read;
77    /* Which inputs occupy 2 slots. */
78    uint64_t dual_slot_inputs;
79    /* Which outputs are actually written */
80    uint64_t outputs_written;
81    /* Which outputs are actually read */
82    uint64_t outputs_read;
83    /* Which system values are actually read */
84    BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
85 
86    /* Which I/O is per-primitive, for read/written information combine with
87     * the fields above.
88     */
89    uint64_t per_primitive_inputs;
90    uint64_t per_primitive_outputs;
91 
92    /* Which I/O is per-view */
93    uint64_t per_view_outputs;
94    /* Enabled view mask, for per-view outputs */
95    uint32_t view_mask;
96 
97    /* Which 16-bit inputs and outputs are used corresponding to
98     * VARYING_SLOT_VARn_16BIT.
99     */
100    uint16_t inputs_read_16bit;
101    uint16_t outputs_written_16bit;
102    uint16_t outputs_read_16bit;
103    uint16_t inputs_read_indirectly_16bit;
104    uint16_t outputs_accessed_indirectly_16bit;
105 
106    /* Which patch inputs are actually read */
107    uint32_t patch_inputs_read;
108    /* Which patch outputs are actually written */
109    uint32_t patch_outputs_written;
110    /* Which patch outputs are read */
111    uint32_t patch_outputs_read;
112 
113    /* Which inputs are read indirectly (subset of inputs_read) */
114    uint64_t inputs_read_indirectly;
115    /* Which outputs are read or written indirectly */
116    uint64_t outputs_accessed_indirectly;
117    /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
118    uint64_t patch_inputs_read_indirectly;
119    /* Which patch outputs are read or written indirectly */
120    uint64_t patch_outputs_accessed_indirectly;
121 
122    /** Bitfield of which textures are used */
123    BITSET_DECLARE(textures_used, 128);
124 
125    /** Bitfield of which textures are used by texelFetch() */
126    BITSET_DECLARE(textures_used_by_txf, 128);
127 
128    /** Bitfield of which samplers are used */
129    BITSET_DECLARE(samplers_used, 32);
130 
131    /** Bitfield of which images are used */
132    BITSET_DECLARE(images_used, 64);
133    /** Bitfield of which images are buffers. */
134    BITSET_DECLARE(image_buffers, 64);
135    /** Bitfield of which images are MSAA. */
136    BITSET_DECLARE(msaa_images, 64);
137 
138    /* SPV_KHR_float_controls: execution mode for floating point ops */
139    uint32_t float_controls_execution_mode;
140 
141    /**
142     * Size of shared variables accessed by compute/task/mesh shaders.
143     */
144    unsigned shared_size;
145 
146    /**
147     * Size of task payload variables accessed by task/mesh shaders.
148     */
149    unsigned task_payload_size;
150 
151    /**
152     * Number of ray tracing queries in the shader (counts all elements of all
153     * variables).
154     */
155    unsigned ray_queries;
156 
157    /**
158     * Local workgroup size used by compute/task/mesh shaders.
159     */
160    uint16_t workgroup_size[3];
161 
162    enum gl_subgroup_size subgroup_size;
163    uint8_t num_subgroups;
164 
165    /**
166     * Uses subgroup intrinsics which can communicate across a quad.
167     */
168    bool uses_wide_subgroup_intrinsics;
169 
170    /* Transform feedback buffer strides in dwords, max. 1K - 4. */
171    uint8_t xfb_stride[MAX_XFB_BUFFERS];
172 
173    uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
174    uint8_t num_inlinable_uniforms:4;
175 
176    /* The size of the gl_ClipDistance[] array, if declared. */
177    uint8_t clip_distance_array_size:4;
178 
179    /* The size of the gl_CullDistance[] array, if declared. */
180    uint8_t cull_distance_array_size:4;
181 
182    /* Whether or not this shader ever uses textureGather() */
183    bool uses_texture_gather:1;
184 
185    /* Whether texture size, levels, or samples is queried. */
186    bool uses_resource_info_query:1;
187 
188    /** Has divergence analysis ever been run? */
189    bool divergence_analysis_run:1;
190 
191    /* Bitmask of bit-sizes used with ALU instructions. */
192    uint8_t bit_sizes_float;
193    uint8_t bit_sizes_int;
194 
195    /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
196    bool first_ubo_is_default_ubo:1;
197 
198    /* Whether or not separate shader objects were used */
199    bool separate_shader:1;
200 
201    /** Was this shader linked with any transform feedback varyings? */
202    bool has_transform_feedback_varyings:1;
203 
204    /* Whether flrp has been lowered. */
205    bool flrp_lowered:1;
206 
207    /* Whether nir_lower_io has been called to lower derefs.
208     * nir_variables for inputs and outputs might not be present in the IR.
209     */
210    bool io_lowered:1;
211 
212    /** Has nir_lower_var_copies called. To avoid calling any
213     * lowering/optimization that would introduce any copy_deref later.
214     */
215    bool var_copies_lowered:1;
216 
217    /* Whether the shader writes memory, including transform feedback. */
218    bool writes_memory:1;
219 
220    /* Whether gl_Layer is viewport-relative */
221    bool layer_viewport_relative:1;
222 
223    /* Whether explicit barriers are used */
224    bool uses_control_barrier : 1;
225    bool uses_memory_barrier : 1;
226 
227    /* Whether ARB_bindless_texture ops or variables are used */
228    bool uses_bindless : 1;
229 
230    /**
231     * Shared memory types have explicit layout set.  Used for
232     * SPV_KHR_workgroup_storage_explicit_layout.
233     */
234    bool shared_memory_explicit_layout:1;
235 
236    /**
237     * Used for VK_KHR_zero_initialize_workgroup_memory.
238     */
239    bool zero_initialize_shared_memory:1;
240 
241    /**
242     * Used for ARB_compute_variable_group_size.
243     */
244    bool workgroup_size_variable:1;
245 
246    /**
247     * Whether the shader uses printf instructions.
248     */
249    bool uses_printf:1;
250 
251    /**
252     * VK_KHR_shader_maximal_reconvergence
253     */
254    bool maximally_reconverges:1;
255 
256    /* Use ACO instead of LLVM on AMD. */
257    bool use_aco_amd:1;
258 
259    /**
260      * Set if this shader uses legacy (DX9 or ARB assembly) math rules.
261      *
262      * From the ARB_fragment_program specification:
263      *
264      *    "The following rules apply to multiplication:
265      *
266      *      1. <x> * <y> == <y> * <x>, for all <x> and <y>.
267      *      2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to
268      *         *representable numbers (IEEE "not a number" and "infinity"
269      *         *encodings may be exceptions).
270      *      3. +1.0 * <x> = <x>, for all <x>.""
271      *
272      * However, in effect this was due to DX9 semantics implying that 0*x=0 even
273      * for inf/nan if the hardware generated them instead of float_min/max.  So,
274      * you should not have an exception for inf/nan to rule 2 above.
275      *
276      * One implementation of this behavior would be to flush all generated NaNs
277      * to zero, at which point 0*Inf=Nan=0.  Most DX9/ARB-asm hardware did not
278      * generate NaNs, and the only way the GPU saw one was to possibly feed it
279      * in as a uniform.
280      */
281    bool use_legacy_math_rules;
282 
283    /*
284     * Arrangement of invocations used to calculate derivatives in
285     * compute/task/mesh shaders.  From KHR_compute_shader_derivatives.
286     */
287    enum gl_derivative_group derivative_group:2;
288 
289    union {
290       struct {
291          /* Which inputs are doubles */
292          uint64_t double_inputs;
293 
294          /* For AMD-specific driver-internal shaders. It replaces vertex
295           * buffer loads with code generating VS inputs from scalar registers.
296           *
297           * Valid values: SI_VS_BLIT_SGPRS_POS_*
298           */
299          uint8_t blit_sgprs_amd:4;
300 
301          /* Software TES executing as HW VS */
302          bool tes_agx:1;
303 
304          /* True if the shader writes position in window space coordinates pre-transform */
305          bool window_space_position:1;
306 
307          /** Is an edge flag input needed? */
308          bool needs_edge_flag:1;
309       } vs;
310 
311       struct {
312          /** The output primitive type */
313          enum mesa_prim output_primitive;
314 
315          /** The input primitive type */
316          enum mesa_prim input_primitive;
317 
318          /** The maximum number of vertices the geometry shader might write. */
319          uint16_t vertices_out;
320 
321          /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
322          uint8_t invocations;
323 
324          /** The number of vertices received per input primitive (max. 6) */
325          uint8_t vertices_in:3;
326 
327          /** Whether or not this shader uses EndPrimitive */
328          bool uses_end_primitive:1;
329 
330          /** The streams used in this shaders (max. 4) */
331          uint8_t active_stream_mask:4;
332       } gs;
333 
334       struct {
335          bool uses_discard:1;
336          bool uses_fbfetch_output:1;
337          bool fbfetch_coherent:1;
338          bool color_is_dual_source:1;
339 
340          /**
341           * True if this fragment shader requires full quad invocations.
342           */
343          bool require_full_quads:1;
344 
345          /**
346           * Whether the derivative group must be equivalent to the quad group.
347           */
348          bool quad_derivatives:1;
349 
350          /**
351           * True if this fragment shader requires helper invocations.  This
352           * can be caused by the use of ALU derivative ops, texture
353           * instructions which do implicit derivatives, the use of quad
354           * subgroup operations or if the shader requires full quads.
355           */
356          bool needs_quad_helper_invocations:1;
357 
358          /**
359           * Whether any inputs are declared with the "sample" qualifier.
360           */
361          bool uses_sample_qualifier:1;
362 
363          /**
364           * Whether sample shading is used.
365           */
366          bool uses_sample_shading:1;
367 
368          /**
369           * Whether early fragment tests are enabled as defined by
370           * ARB_shader_image_load_store.
371           */
372          bool early_fragment_tests:1;
373 
374          /**
375           * Defined by INTEL_conservative_rasterization.
376           */
377          bool inner_coverage:1;
378 
379          bool post_depth_coverage:1;
380 
381          /**
382           * \name ARB_fragment_coord_conventions
383           * @{
384           */
385          bool pixel_center_integer:1;
386          bool origin_upper_left:1;
387          /*@}*/
388 
389          bool pixel_interlock_ordered:1;
390          bool pixel_interlock_unordered:1;
391          bool sample_interlock_ordered:1;
392          bool sample_interlock_unordered:1;
393 
394          /**
395           * Flags whether NIR's base types on the FS color outputs should be
396           * ignored.
397           *
398           * GLSL requires that fragment shader output base types match the
399           * render target's base types for the behavior to be defined.  From
400           * the GL 4.6 spec:
401           *
402           *     "If the values written by the fragment shader do not match the
403           *      format(s) of the corresponding color buffer(s), the result is
404           *      undefined."
405           *
406           * However, for NIR shaders translated from TGSI, we don't have the
407           * output types any more, so the driver will need to do whatever
408           * fixups are necessary to handle effectively untyped data being
409           * output from the FS.
410           */
411          bool untyped_color_outputs:1;
412 
413          /** gl_FragDepth layout for ARB_conservative_depth. */
414          enum gl_frag_depth_layout depth_layout:3;
415 
416          /**
417           * Interpolation qualifiers for drivers that lowers color inputs
418           * to system values.
419           */
420          unsigned color0_interp:3; /* glsl_interp_mode */
421          bool color0_sample:1;
422          bool color0_centroid:1;
423          unsigned color1_interp:3; /* glsl_interp_mode */
424          bool color1_sample:1;
425          bool color1_centroid:1;
426 
427          /* Bitmask of gl_advanced_blend_mode values that may be used with this
428           * shader.
429           */
430          unsigned advanced_blend_modes;
431 
432          /**
433           * Defined by AMD_shader_early_and_late_fragment_tests.
434           */
435          bool early_and_late_fragment_tests:1;
436          enum gl_frag_stencil_layout stencil_front_layout:3;
437          enum gl_frag_stencil_layout stencil_back_layout:3;
438       } fs;
439 
440       struct {
441          uint16_t workgroup_size_hint[3];
442 
443          uint8_t user_data_components_amd:4;
444 
445          /*
446           * If the shader might run with shared mem on top of `shared_size`.
447           */
448          bool has_variable_shared_mem:1;
449 
450          /**
451           * If the shader has any use of a cooperative matrix. From
452           * SPV_KHR_cooperative_matrix.
453           */
454          bool has_cooperative_matrix:1;
455 
456          /**
457           * Number of bytes of shared imageblock memory per thread. Currently,
458           * this requires that the workgroup size is 32x32x1 and that
459           * shared_size = 0. These requirements could be lifted in the future.
460           * However, there is no current OpenGL/Vulkan API support for
461           * imageblocks. This is only used internally to accelerate blit/copy.
462           */
463          uint8_t image_block_size_per_thread_agx;
464 
465          /**
466           * pointer size is:
467           *   AddressingModelLogical:    0    (default)
468           *   AddressingModelPhysical32: 32
469           *   AddressingModelPhysical64: 64
470           */
471          unsigned ptr_size;
472 
473          /** Index provided by VkPipelineShaderStageNodeCreateInfoAMDX or ShaderIndexAMDX */
474          uint32_t shader_index;
475 
476          /** Maximum size required by any output node payload array */
477          uint32_t node_payloads_size;
478 
479          /** Static workgroup count for overwriting the enqueued workgroup count. (0 if dynamic) */
480          uint32_t workgroup_count[3];
481       } cs;
482 
483       /* Applies to both TCS and TES. */
484       struct {
485          enum tess_primitive_mode _primitive_mode;
486 
487          /** The number of vertices in the TCS output patch. */
488          uint8_t tcs_vertices_out;
489          unsigned spacing:2; /*gl_tess_spacing*/
490 
491          /** Is the vertex order counterclockwise? */
492          bool ccw:1;
493          bool point_mode:1;
494 
495          /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
496           * with a vertex index that is equal to the invocation id.
497           *
498           * Not mutually exclusive with tcs_cross_invocation_inputs_read, i.e.
499           * both input[0] and input[invocation_id] can be present.
500           */
501          uint64_t tcs_same_invocation_inputs_read;
502 
503          /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
504           * with a vertex index that is NOT the invocation id
505           */
506          uint64_t tcs_cross_invocation_inputs_read;
507 
508          /* Bit mask of TCS per-vertex outputs that are used
509           * with a vertex index that is NOT the invocation id
510           */
511          uint64_t tcs_cross_invocation_outputs_read;
512       } tess;
513 
514       /* Applies to MESH and TASK. */
515       struct {
516          /* Bit mask of MS outputs that are used
517           * with an index that is NOT the local invocation index.
518           */
519          uint64_t ms_cross_invocation_output_access;
520 
521          /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT)
522           * when they are known compile-time constants.
523           * 0 means they are not known.
524           */
525          uint32_t ts_mesh_dispatch_dimensions[3];
526 
527          uint16_t max_vertices_out;
528          uint16_t max_primitives_out;
529          enum mesa_prim primitive_type; /* POINTS, LINES or TRIANGLES. */
530 
531          /* TODO: remove this when we stop supporting NV_mesh_shader. */
532          bool nv;
533       } mesh;
534    };
535 } shader_info;
536 
537 #ifdef __cplusplus
538 }
539 #endif
540 
541 #endif /* SHADER_INFO_H */
542