1 /*
2 * Copyright 2024 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #pragma once
7
8 #include <stdint.h>
9
10 #include "compiler/shader_enums.h"
11 #include "util/enum_operators.h"
12
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16
17 /** A tri-state value to track states that are potentially dynamic */
18 enum intel_sometimes {
19 INTEL_NEVER = 0,
20 INTEL_SOMETIMES,
21 INTEL_ALWAYS
22 };
23
24 static inline enum intel_sometimes
intel_sometimes_invert(enum intel_sometimes x)25 intel_sometimes_invert(enum intel_sometimes x)
26 {
27 return (enum intel_sometimes)((int)INTEL_ALWAYS - (int)x);
28 }
29
30 enum intel_msaa_flags {
31 /** Must be set whenever any dynamic MSAA is used
32 *
33 * This flag mostly exists to let us assert that the driver understands
34 * dynamic MSAA so we don't run into trouble with drivers that don't.
35 */
36 INTEL_MSAA_FLAG_ENABLE_DYNAMIC = (1 << 0),
37
38 /** True if the framebuffer is multisampled */
39 INTEL_MSAA_FLAG_MULTISAMPLE_FBO = (1 << 1),
40
41 /** True if this shader has been dispatched per-sample */
42 INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
43
44 /** True if inputs should be interpolated per-sample by default */
45 INTEL_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3),
46
47 /** True if this shader has been dispatched with alpha-to-coverage */
48 INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE = (1 << 4),
49
50 /** True if this shader has been dispatched coarse
51 *
52 * This is intentionally chose to be bit 15 to correspond to the coarse bit
53 * in the pixel interpolator messages.
54 */
55 INTEL_MSAA_FLAG_COARSE_PI_MSG = (1 << 15),
56
57 /** True if this shader has been dispatched coarse
58 *
59 * This is intentionally chose to be bit 18 to correspond to the coarse bit
60 * in the render target messages.
61 */
62 INTEL_MSAA_FLAG_COARSE_RT_WRITES = (1 << 18),
63 };
64 MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(intel_msaa_flags)
65
66 /**
67 * @defgroup Tessellator parameter enumerations.
68 *
69 * These correspond to the hardware values in 3DSTATE_TE, and are provided
70 * as part of the tessellation evaluation shader.
71 *
72 * @{
73 */
74 enum intel_tess_partitioning {
75 INTEL_TESS_PARTITIONING_INTEGER = 0,
76 INTEL_TESS_PARTITIONING_ODD_FRACTIONAL = 1,
77 INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL = 2,
78 };
79
80 enum intel_tess_output_topology {
81 INTEL_TESS_OUTPUT_TOPOLOGY_POINT = 0,
82 INTEL_TESS_OUTPUT_TOPOLOGY_LINE = 1,
83 INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2,
84 INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3,
85 };
86
87 enum intel_tess_domain {
88 INTEL_TESS_DOMAIN_QUAD = 0,
89 INTEL_TESS_DOMAIN_TRI = 1,
90 INTEL_TESS_DOMAIN_ISOLINE = 2,
91 };
92 /** @} */
93
94 enum intel_shader_dispatch_mode {
95 INTEL_DISPATCH_MODE_4X1_SINGLE = 0,
96 INTEL_DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
97 INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
98 INTEL_DISPATCH_MODE_SIMD8 = 3,
99
100 INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH = 0,
101 INTEL_DISPATCH_MODE_TCS_MULTI_PATCH = 2,
102 };
103
104 enum intel_barycentric_mode {
105 INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL = 0,
106 INTEL_BARYCENTRIC_PERSPECTIVE_CENTROID = 1,
107 INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE = 2,
108 INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL = 3,
109 INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID = 4,
110 INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE = 5,
111 INTEL_BARYCENTRIC_MODE_COUNT = 6
112 };
113 #define INTEL_BARYCENTRIC_PERSPECTIVE_BITS \
114 ((1 << INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL) | \
115 (1 << INTEL_BARYCENTRIC_PERSPECTIVE_CENTROID) | \
116 (1 << INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))
117 #define INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS \
118 ((1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL) | \
119 (1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
120 (1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
121
122 /**
123 * Data structure recording the relationship between the gl_varying_slot enum
124 * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
125 * single octaword within the VUE (128 bits).
126 *
127 * Note that each BRW register contains 256 bits (2 octawords), so when
128 * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
129 * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
130 * in a vertex shader), each register corresponds to a single VUE slot, since
131 * it contains data for two separate vertices.
132 */
133 struct intel_vue_map {
134 /**
135 * Bitfield representing all varying slots that are (a) stored in this VUE
136 * map, and (b) actually written by the shader. Does not include any of
137 * the additional varying slots defined in brw_varying_slot.
138 */
139 uint64_t slots_valid;
140
141 /**
142 * Is this VUE map for a separate shader pipeline?
143 *
144 * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
145 * without the linker having a chance to dead code eliminate unused varyings.
146 *
147 * This means that we have to use a fixed slot layout, based on the output's
148 * location field, rather than assigning slots in a compact contiguous block.
149 */
150 bool separate;
151
152 /**
153 * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
154 * not stored in a slot (because they are not written, or because
155 * additional processing is applied before storing them in the VUE), the
156 * value is -1.
157 */
158 signed char varying_to_slot[VARYING_SLOT_TESS_MAX];
159
160 /**
161 * Map from VUE slot to gl_varying_slot value. For slots that do not
162 * directly correspond to a gl_varying_slot, the value comes from
163 * brw_varying_slot.
164 *
165 * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
166 */
167 signed char slot_to_varying[VARYING_SLOT_TESS_MAX];
168
169 /**
170 * Total number of VUE slots in use
171 */
172 int num_slots;
173
174 /**
175 * Number of position VUE slots. If num_pos_slots > 1, primitive
176 * replication is being used.
177 */
178 int num_pos_slots;
179
180 /**
181 * Number of per-patch VUE slots. Only valid for tessellation control
182 * shader outputs and tessellation evaluation shader inputs.
183 */
184 int num_per_patch_slots;
185
186 /**
187 * Number of per-vertex VUE slots. Only valid for tessellation control
188 * shader outputs and tessellation evaluation shader inputs.
189 */
190 int num_per_vertex_slots;
191 };
192
193 struct intel_cs_dispatch_info {
194 uint32_t group_size;
195 uint32_t simd_size;
196 uint32_t threads;
197
198 /* RightExecutionMask field used in GPGPU_WALKER. */
199 uint32_t right_mask;
200 };
201
202 enum PACKED intel_compute_walk_order {
203 INTEL_WALK_ORDER_XYZ = 0,
204 INTEL_WALK_ORDER_XZY = 1,
205 INTEL_WALK_ORDER_YXZ = 2,
206 INTEL_WALK_ORDER_YZX = 3,
207 INTEL_WALK_ORDER_ZXY = 4,
208 INTEL_WALK_ORDER_ZYX = 5,
209 };
210
211 static inline bool
intel_fs_is_persample(enum intel_sometimes shader_persample_dispatch,bool shader_per_sample_shading,enum intel_msaa_flags pushed_msaa_flags)212 intel_fs_is_persample(enum intel_sometimes shader_persample_dispatch,
213 bool shader_per_sample_shading,
214 enum intel_msaa_flags pushed_msaa_flags)
215 {
216 if (shader_persample_dispatch != INTEL_SOMETIMES)
217 return shader_persample_dispatch;
218
219 assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
220
221 if (!(pushed_msaa_flags & INTEL_MSAA_FLAG_MULTISAMPLE_FBO))
222 return false;
223
224 if (shader_per_sample_shading)
225 assert(pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH);
226
227 return (pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH) != 0;
228 }
229
230 static inline uint32_t
intel_fs_barycentric_modes(enum intel_sometimes shader_persample_dispatch,uint32_t shader_barycentric_modes,enum intel_msaa_flags pushed_msaa_flags)231 intel_fs_barycentric_modes(enum intel_sometimes shader_persample_dispatch,
232 uint32_t shader_barycentric_modes,
233 enum intel_msaa_flags pushed_msaa_flags)
234 {
235 /* In the non dynamic case, we can just return the computed shader_barycentric_modes from
236 * compilation time.
237 */
238 if (shader_persample_dispatch != INTEL_SOMETIMES)
239 return shader_barycentric_modes;
240
241 uint32_t modes = shader_barycentric_modes;
242
243 assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
244
245 if (pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_INTERP) {
246 assert(pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH);
247
248 /* Making dynamic per-sample interpolation work is a bit tricky. The
249 * hardware will hang if SAMPLE is requested but per-sample dispatch is
250 * not enabled. This means we can't preemptively add SAMPLE to the
251 * barycentrics bitfield. Instead, we have to add it late and only
252 * on-demand. Annoyingly, changing the number of barycentrics requested
253 * changes the whole PS shader payload so we very much don't want to do
254 * that. Instead, if the dynamic per-sample interpolation flag is set,
255 * we check to see if SAMPLE was requested and, if not, replace the
256 * highest barycentric bit in the [non]perspective grouping (CENTROID,
257 * if it exists, else PIXEL) with SAMPLE. The shader will stomp all the
258 * barycentrics in the shader with SAMPLE so it really doesn't matter
259 * which one we replace. The important thing is that we keep the number
260 * of barycentrics in each [non]perspective grouping the same.
261 */
262 if ((modes & INTEL_BARYCENTRIC_PERSPECTIVE_BITS) &&
263 !(modes & BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))) {
264 int sample_mode =
265 util_last_bit(modes & INTEL_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
266 assert(modes & BITFIELD_BIT(sample_mode));
267
268 modes &= ~BITFIELD_BIT(sample_mode);
269 modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE);
270 }
271
272 if ((modes & INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS) &&
273 !(modes & BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))) {
274 int sample_mode =
275 util_last_bit(modes & INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
276 assert(modes & BITFIELD_BIT(sample_mode));
277
278 modes &= ~BITFIELD_BIT(sample_mode);
279 modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE);
280 }
281 } else {
282 /* If we're not using per-sample interpolation, we need to disable the
283 * per-sample bits.
284 *
285 * SKL PRMs, Volume 2a: Command Reference: Instructions,
286 * 3DSTATE_WM:Barycentric Interpolation Mode:
287
288 * "MSDISPMODE_PERSAMPLE is required in order to select Perspective
289 * Sample or Non-perspective Sample barycentric coordinates."
290 */
291 uint32_t sample_bits = (BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE) |
292 BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE));
293 uint32_t requested_sample = modes & sample_bits;
294 modes &= ~sample_bits;
295 /*
296 * If the shader requested some sample modes and we have to disable
297 * them, make sure we add back the pixel variant back to not mess up the
298 * thread payload.
299 *
300 * Why does this works out? Because of the ordering in the thread payload :
301 *
302 * R7:10 Perspective Centroid Barycentric
303 * R11:14 Perspective Sample Barycentric
304 * R15:18 Linear Pixel Location Barycentric
305 *
306 * In the backend when persample dispatch is dynamic, we always select
307 * the sample barycentric and turn off the pixel location (even if
308 * requested through intrinsics). That way when we dynamically select
309 * pixel or sample dispatch, the barycentric always match, since the
310 * pixel location barycentric register offset will align with the sample
311 * barycentric.
312 */
313 if (requested_sample) {
314 if (requested_sample & BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))
315 modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL);
316 if (requested_sample & BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
317 modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL);
318 }
319 }
320
321 return modes;
322 }
323
324
325 static inline bool
intel_fs_is_coarse(enum intel_sometimes shader_coarse_pixel_dispatch,enum intel_msaa_flags pushed_msaa_flags)326 intel_fs_is_coarse(enum intel_sometimes shader_coarse_pixel_dispatch,
327 enum intel_msaa_flags pushed_msaa_flags)
328 {
329 if (shader_coarse_pixel_dispatch != INTEL_SOMETIMES)
330 return shader_coarse_pixel_dispatch;
331
332 assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
333
334 if (pushed_msaa_flags & INTEL_MSAA_FLAG_COARSE_RT_WRITES)
335 assert(shader_coarse_pixel_dispatch != INTEL_NEVER);
336 else
337 assert(shader_coarse_pixel_dispatch != INTEL_ALWAYS);
338
339 return (pushed_msaa_flags & INTEL_MSAA_FLAG_COARSE_RT_WRITES) != 0;
340 }
341
342 #ifdef __cplusplus
343 } /* extern "C" */
344 #endif
345