• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #pragma once
7 
8 #include <stdint.h>
9 
10 #include "compiler/shader_enums.h"
11 #include "util/enum_operators.h"
12 
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16 
17 /** A tri-state value to track states that are potentially dynamic */
18 enum intel_sometimes {
19    INTEL_NEVER = 0,
20    INTEL_SOMETIMES,
21    INTEL_ALWAYS
22 };
23 
24 static inline enum intel_sometimes
intel_sometimes_invert(enum intel_sometimes x)25 intel_sometimes_invert(enum intel_sometimes x)
26 {
27    return (enum intel_sometimes)((int)INTEL_ALWAYS - (int)x);
28 }
29 
30 enum intel_msaa_flags {
31    /** Must be set whenever any dynamic MSAA is used
32     *
33     * This flag mostly exists to let us assert that the driver understands
34     * dynamic MSAA so we don't run into trouble with drivers that don't.
35     */
36    INTEL_MSAA_FLAG_ENABLE_DYNAMIC = (1 << 0),
37 
38    /** True if the framebuffer is multisampled */
39    INTEL_MSAA_FLAG_MULTISAMPLE_FBO = (1 << 1),
40 
41    /** True if this shader has been dispatched per-sample */
42    INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
43 
44    /** True if inputs should be interpolated per-sample by default */
45    INTEL_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3),
46 
47    /** True if this shader has been dispatched with alpha-to-coverage */
48    INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE = (1 << 4),
49 
50    /** True if this shader has been dispatched coarse
51     *
52     * This is intentionally chose to be bit 15 to correspond to the coarse bit
53     * in the pixel interpolator messages.
54     */
55    INTEL_MSAA_FLAG_COARSE_PI_MSG = (1 << 15),
56 
57    /** True if this shader has been dispatched coarse
58     *
59     * This is intentionally chose to be bit 18 to correspond to the coarse bit
60     * in the render target messages.
61     */
62    INTEL_MSAA_FLAG_COARSE_RT_WRITES = (1 << 18),
63 };
64 MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(intel_msaa_flags)
65 
66 /**
67  * @defgroup Tessellator parameter enumerations.
68  *
69  * These correspond to the hardware values in 3DSTATE_TE, and are provided
70  * as part of the tessellation evaluation shader.
71  *
72  * @{
73  */
74 enum intel_tess_partitioning {
75    INTEL_TESS_PARTITIONING_INTEGER         = 0,
76    INTEL_TESS_PARTITIONING_ODD_FRACTIONAL  = 1,
77    INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL = 2,
78 };
79 
80 enum intel_tess_output_topology {
81    INTEL_TESS_OUTPUT_TOPOLOGY_POINT   = 0,
82    INTEL_TESS_OUTPUT_TOPOLOGY_LINE    = 1,
83    INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW  = 2,
84    INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3,
85 };
86 
87 enum intel_tess_domain {
88    INTEL_TESS_DOMAIN_QUAD    = 0,
89    INTEL_TESS_DOMAIN_TRI     = 1,
90    INTEL_TESS_DOMAIN_ISOLINE = 2,
91 };
92 /** @} */
93 
94 enum intel_shader_dispatch_mode {
95    INTEL_DISPATCH_MODE_4X1_SINGLE = 0,
96    INTEL_DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
97    INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
98    INTEL_DISPATCH_MODE_SIMD8 = 3,
99 
100    INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH = 0,
101    INTEL_DISPATCH_MODE_TCS_MULTI_PATCH = 2,
102 };
103 
104 enum intel_barycentric_mode {
105    INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL       = 0,
106    INTEL_BARYCENTRIC_PERSPECTIVE_CENTROID    = 1,
107    INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE      = 2,
108    INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL    = 3,
109    INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID = 4,
110    INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE   = 5,
111    INTEL_BARYCENTRIC_MODE_COUNT              = 6
112 };
113 #define INTEL_BARYCENTRIC_PERSPECTIVE_BITS \
114    ((1 << INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL) | \
115     (1 << INTEL_BARYCENTRIC_PERSPECTIVE_CENTROID) | \
116     (1 << INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))
117 #define INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS \
118    ((1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL) | \
119     (1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
120     (1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
121 
122 /**
123  * Data structure recording the relationship between the gl_varying_slot enum
124  * and "slots" within the vertex URB entry (VUE).  A "slot" is defined as a
125  * single octaword within the VUE (128 bits).
126  *
127  * Note that each BRW register contains 256 bits (2 octawords), so when
128  * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
129  * consecutive VUE slots.  When accessing the VUE in URB_INTERLEAVED mode (as
130  * in a vertex shader), each register corresponds to a single VUE slot, since
131  * it contains data for two separate vertices.
132  */
133 struct intel_vue_map {
134    /**
135     * Bitfield representing all varying slots that are (a) stored in this VUE
136     * map, and (b) actually written by the shader.  Does not include any of
137     * the additional varying slots defined in brw_varying_slot.
138     */
139    uint64_t slots_valid;
140 
141    /**
142     * Is this VUE map for a separate shader pipeline?
143     *
144     * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
145     * without the linker having a chance to dead code eliminate unused varyings.
146     *
147     * This means that we have to use a fixed slot layout, based on the output's
148     * location field, rather than assigning slots in a compact contiguous block.
149     */
150    bool separate;
151 
152    /**
153     * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
154     * not stored in a slot (because they are not written, or because
155     * additional processing is applied before storing them in the VUE), the
156     * value is -1.
157     */
158    signed char varying_to_slot[VARYING_SLOT_TESS_MAX];
159 
160    /**
161     * Map from VUE slot to gl_varying_slot value.  For slots that do not
162     * directly correspond to a gl_varying_slot, the value comes from
163     * brw_varying_slot.
164     *
165     * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
166     */
167    signed char slot_to_varying[VARYING_SLOT_TESS_MAX];
168 
169    /**
170     * Total number of VUE slots in use
171     */
172    int num_slots;
173 
174    /**
175     * Number of position VUE slots.  If num_pos_slots > 1, primitive
176     * replication is being used.
177     */
178    int num_pos_slots;
179 
180    /**
181     * Number of per-patch VUE slots. Only valid for tessellation control
182     * shader outputs and tessellation evaluation shader inputs.
183     */
184    int num_per_patch_slots;
185 
186    /**
187     * Number of per-vertex VUE slots. Only valid for tessellation control
188     * shader outputs and tessellation evaluation shader inputs.
189     */
190    int num_per_vertex_slots;
191 };
192 
193 struct intel_cs_dispatch_info {
194    uint32_t group_size;
195    uint32_t simd_size;
196    uint32_t threads;
197 
198    /* RightExecutionMask field used in GPGPU_WALKER. */
199    uint32_t right_mask;
200 };
201 
202 enum PACKED intel_compute_walk_order {
203    INTEL_WALK_ORDER_XYZ = 0,
204    INTEL_WALK_ORDER_XZY = 1,
205    INTEL_WALK_ORDER_YXZ = 2,
206    INTEL_WALK_ORDER_YZX = 3,
207    INTEL_WALK_ORDER_ZXY = 4,
208    INTEL_WALK_ORDER_ZYX = 5,
209 };
210 
211 static inline bool
intel_fs_is_persample(enum intel_sometimes shader_persample_dispatch,bool shader_per_sample_shading,enum intel_msaa_flags pushed_msaa_flags)212 intel_fs_is_persample(enum intel_sometimes shader_persample_dispatch,
213                       bool shader_per_sample_shading,
214                       enum intel_msaa_flags pushed_msaa_flags)
215 {
216    if (shader_persample_dispatch != INTEL_SOMETIMES)
217       return shader_persample_dispatch;
218 
219    assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
220 
221    if (!(pushed_msaa_flags & INTEL_MSAA_FLAG_MULTISAMPLE_FBO))
222       return false;
223 
224    if (shader_per_sample_shading)
225       assert(pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH);
226 
227    return (pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH) != 0;
228 }
229 
230 static inline uint32_t
intel_fs_barycentric_modes(enum intel_sometimes shader_persample_dispatch,uint32_t shader_barycentric_modes,enum intel_msaa_flags pushed_msaa_flags)231 intel_fs_barycentric_modes(enum intel_sometimes shader_persample_dispatch,
232                            uint32_t shader_barycentric_modes,
233                            enum intel_msaa_flags pushed_msaa_flags)
234 {
235    /* In the non dynamic case, we can just return the computed shader_barycentric_modes from
236     * compilation time.
237     */
238    if (shader_persample_dispatch != INTEL_SOMETIMES)
239       return shader_barycentric_modes;
240 
241    uint32_t modes = shader_barycentric_modes;
242 
243    assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
244 
245    if (pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_INTERP) {
246       assert(pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH);
247 
248       /* Making dynamic per-sample interpolation work is a bit tricky.  The
249        * hardware will hang if SAMPLE is requested but per-sample dispatch is
250        * not enabled.  This means we can't preemptively add SAMPLE to the
251        * barycentrics bitfield.  Instead, we have to add it late and only
252        * on-demand.  Annoyingly, changing the number of barycentrics requested
253        * changes the whole PS shader payload so we very much don't want to do
254        * that.  Instead, if the dynamic per-sample interpolation flag is set,
255        * we check to see if SAMPLE was requested and, if not, replace the
256        * highest barycentric bit in the [non]perspective grouping (CENTROID,
257        * if it exists, else PIXEL) with SAMPLE.  The shader will stomp all the
258        * barycentrics in the shader with SAMPLE so it really doesn't matter
259        * which one we replace.  The important thing is that we keep the number
260        * of barycentrics in each [non]perspective grouping the same.
261        */
262       if ((modes & INTEL_BARYCENTRIC_PERSPECTIVE_BITS) &&
263           !(modes & BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))) {
264          int sample_mode =
265             util_last_bit(modes & INTEL_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
266          assert(modes & BITFIELD_BIT(sample_mode));
267 
268          modes &= ~BITFIELD_BIT(sample_mode);
269          modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE);
270       }
271 
272       if ((modes & INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS) &&
273           !(modes & BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))) {
274          int sample_mode =
275             util_last_bit(modes & INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
276          assert(modes & BITFIELD_BIT(sample_mode));
277 
278          modes &= ~BITFIELD_BIT(sample_mode);
279          modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE);
280       }
281    } else {
282       /* If we're not using per-sample interpolation, we need to disable the
283        * per-sample bits.
284        *
285        * SKL PRMs, Volume 2a: Command Reference: Instructions,
286        * 3DSTATE_WM:Barycentric Interpolation Mode:
287 
288        *    "MSDISPMODE_PERSAMPLE is required in order to select Perspective
289        *     Sample or Non-perspective Sample barycentric coordinates."
290        */
291       uint32_t sample_bits = (BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE) |
292                               BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE));
293       uint32_t requested_sample = modes & sample_bits;
294       modes &= ~sample_bits;
295       /*
296        * If the shader requested some sample modes and we have to disable
297        * them, make sure we add back the pixel variant back to not mess up the
298        * thread payload.
299        *
300        * Why does this works out? Because of the ordering in the thread payload :
301        *
302        *   R7:10  Perspective Centroid Barycentric
303        *   R11:14 Perspective Sample Barycentric
304        *   R15:18 Linear Pixel Location Barycentric
305        *
306        * In the backend when persample dispatch is dynamic, we always select
307        * the sample barycentric and turn off the pixel location (even if
308        * requested through intrinsics). That way when we dynamically select
309        * pixel or sample dispatch, the barycentric always match, since the
310        * pixel location barycentric register offset will align with the sample
311        * barycentric.
312        */
313       if (requested_sample) {
314          if (requested_sample & BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))
315             modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL);
316          if (requested_sample & BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
317             modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL);
318       }
319    }
320 
321    return modes;
322 }
323 
324 
325 static inline bool
intel_fs_is_coarse(enum intel_sometimes shader_coarse_pixel_dispatch,enum intel_msaa_flags pushed_msaa_flags)326 intel_fs_is_coarse(enum intel_sometimes shader_coarse_pixel_dispatch,
327                    enum intel_msaa_flags pushed_msaa_flags)
328 {
329    if (shader_coarse_pixel_dispatch != INTEL_SOMETIMES)
330       return shader_coarse_pixel_dispatch;
331 
332    assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
333 
334    if (pushed_msaa_flags & INTEL_MSAA_FLAG_COARSE_RT_WRITES)
335       assert(shader_coarse_pixel_dispatch != INTEL_NEVER);
336    else
337       assert(shader_coarse_pixel_dispatch != INTEL_ALWAYS);
338 
339    return (pushed_msaa_flags & INTEL_MSAA_FLAG_COARSE_RT_WRITES) != 0;
340 }
341 
342 #ifdef __cplusplus
343 } /* extern "C" */
344 #endif
345