1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 // Skipping clang-format due to parsing by simplistic python scripts
29 // clang-format off
30 #pragma once
31
32 #include "common/formats.h"
33 #include "common/intrin.h"
34 #include "common/rdtsc_buckets.h"
35 #include <functional>
36 #include <algorithm>
37
38 using gfxptr_t = unsigned long long;
39
40 //////////////////////////////////////////////////////////////////////////
41 /// PRIMITIVE_TOPOLOGY.
42 //////////////////////////////////////////////////////////////////////////
43 enum PRIMITIVE_TOPOLOGY
44 {
45 TOP_UNKNOWN = 0x0,
46 TOP_POINT_LIST = 0x1,
47 TOP_LINE_LIST = 0x2,
48 TOP_LINE_STRIP = 0x3,
49 TOP_TRIANGLE_LIST = 0x4,
50 TOP_TRIANGLE_STRIP = 0x5,
51 TOP_TRIANGLE_FAN = 0x6,
52 TOP_QUAD_LIST = 0x7,
53 TOP_QUAD_STRIP = 0x8,
54 TOP_LINE_LIST_ADJ = 0x9,
55 TOP_LISTSTRIP_ADJ = 0xA,
56 TOP_TRI_LIST_ADJ = 0xB,
57 TOP_TRI_STRIP_ADJ = 0xC,
58 TOP_TRI_STRIP_REVERSE = 0xD,
59 TOP_POLYGON = 0xE,
60 TOP_RECT_LIST = 0xF,
61 TOP_LINE_LOOP = 0x10,
62 TOP_POINT_LIST_BF = 0x11,
63 TOP_LINE_STRIP_CONT = 0x12,
64 TOP_LINE_STRIP_BF = 0x13,
65 TOP_LINE_STRIP_CONT_BF = 0x14,
66 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
67 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
68
69 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
70 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
71 TOP_PATCHLIST_2 = 0x21,
72 TOP_PATCHLIST_3 = 0x22,
73 TOP_PATCHLIST_4 = 0x23,
74 TOP_PATCHLIST_5 = 0x24,
75 TOP_PATCHLIST_6 = 0x25,
76 TOP_PATCHLIST_7 = 0x26,
77 TOP_PATCHLIST_8 = 0x27,
78 TOP_PATCHLIST_9 = 0x28,
79 TOP_PATCHLIST_10 = 0x29,
80 TOP_PATCHLIST_11 = 0x2A,
81 TOP_PATCHLIST_12 = 0x2B,
82 TOP_PATCHLIST_13 = 0x2C,
83 TOP_PATCHLIST_14 = 0x2D,
84 TOP_PATCHLIST_15 = 0x2E,
85 TOP_PATCHLIST_16 = 0x2F,
86 TOP_PATCHLIST_17 = 0x30,
87 TOP_PATCHLIST_18 = 0x31,
88 TOP_PATCHLIST_19 = 0x32,
89 TOP_PATCHLIST_20 = 0x33,
90 TOP_PATCHLIST_21 = 0x34,
91 TOP_PATCHLIST_22 = 0x35,
92 TOP_PATCHLIST_23 = 0x36,
93 TOP_PATCHLIST_24 = 0x37,
94 TOP_PATCHLIST_25 = 0x38,
95 TOP_PATCHLIST_26 = 0x39,
96 TOP_PATCHLIST_27 = 0x3A,
97 TOP_PATCHLIST_28 = 0x3B,
98 TOP_PATCHLIST_29 = 0x3C,
99 TOP_PATCHLIST_30 = 0x3D,
100 TOP_PATCHLIST_31 = 0x3E,
101 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
102 };
103
104 //////////////////////////////////////////////////////////////////////////
105 /// SWR_SHADER_TYPE
106 //////////////////////////////////////////////////////////////////////////
107 enum SWR_SHADER_TYPE
108 {
109 SHADER_VERTEX,
110 SHADER_GEOMETRY,
111 SHADER_DOMAIN,
112 SHADER_HULL,
113 SHADER_PIXEL,
114 SHADER_COMPUTE,
115
116 NUM_SHADER_TYPES,
117 };
118
119 //////////////////////////////////////////////////////////////////////////
120 /// SWR_RENDERTARGET_ATTACHMENT
121 /// @todo Its not clear what an "attachment" means. Its not common term.
122 //////////////////////////////////////////////////////////////////////////
123 enum SWR_RENDERTARGET_ATTACHMENT
124 {
125 SWR_ATTACHMENT_COLOR0,
126 SWR_ATTACHMENT_COLOR1,
127 SWR_ATTACHMENT_COLOR2,
128 SWR_ATTACHMENT_COLOR3,
129 SWR_ATTACHMENT_COLOR4,
130 SWR_ATTACHMENT_COLOR5,
131 SWR_ATTACHMENT_COLOR6,
132 SWR_ATTACHMENT_COLOR7,
133 SWR_ATTACHMENT_DEPTH,
134 SWR_ATTACHMENT_STENCIL,
135
136 SWR_NUM_ATTACHMENTS
137 };
138
139 #define SWR_NUM_RENDERTARGETS 8
140
141 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
142 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
143 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
144 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
145 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
146 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
147 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
148 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
149 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
150 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
151 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
152 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
153
154
155 //////////////////////////////////////////////////////////////////////////
156 /// @brief SWR Inner Tessellation factor ID
157 /// See above GetTessFactorOutputPosition code for documentation
158 enum SWR_INNER_TESSFACTOR_ID
159 {
160 SWR_QUAD_U_TRI_INSIDE,
161 SWR_QUAD_V_INSIDE,
162
163 SWR_NUM_INNER_TESS_FACTORS,
164 };
165
166 //////////////////////////////////////////////////////////////////////////
167 /// @brief SWR Outer Tessellation factor ID
168 /// See above GetTessFactorOutputPosition code for documentation
169 enum SWR_OUTER_TESSFACTOR_ID
170 {
171 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
172 SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY,
173 SWR_QUAD_V_EQ0_TRI_W,
174 SWR_QUAD_V_EQ1,
175
176 SWR_NUM_OUTER_TESS_FACTORS,
177 };
178
179 /////////////////////////////////////////////////////////////////////////
180 /// simdvertex
181 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
182 /// Contains space for position, SGV, and 32 generic attributes
183 /////////////////////////////////////////////////////////////////////////
184 enum SWR_VTX_SLOTS
185 {
186 VERTEX_SGV_SLOT = 0,
187 VERTEX_SGV_RTAI_COMP = 0,
188 VERTEX_SGV_VAI_COMP = 1,
189 VERTEX_SGV_POINT_SIZE_COMP = 2,
190 VERTEX_POSITION_SLOT = 1,
191 VERTEX_POSITION_END_SLOT = 1,
192 VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
193 VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
194 VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT),
195 VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT),
196 SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT)
197 };
198
199 // SoAoSoA
200 struct simdvertex
201 {
202 simdvector attrib[SWR_VTX_NUM_SLOTS];
203 };
204
205 struct simd16vertex
206 {
207 simd16vector attrib[SWR_VTX_NUM_SLOTS];
208 };
209
210 template <typename SIMD_T>
211 struct SIMDVERTEX_T
212 {
213 typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
214 };
215
216 struct SWR_WORKER_DATA
217 {
218 HANDLE hArContext; // handle to the archrast context
219 };
220
221 //////////////////////////////////////////////////////////////////////////
222 /// SWR_SHADER_STATS
223 /// @brief Structure passed to shader for stats collection.
224 /////////////////////////////////////////////////////////////////////////
225 struct SWR_SHADER_STATS
226 {
227 uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86.
228 uint32_t numSampleExecuted;
229 uint32_t numSampleLExecuted;
230 uint32_t numSampleBExecuted;
231 uint32_t numSampleCExecuted;
232 uint32_t numSampleCLZExecuted;
233 uint32_t numSampleCDExecuted;
234 uint32_t numGather4Executed;
235 uint32_t numGather4CExecuted;
236 uint32_t numGather4CPOExecuted;
237 uint32_t numGather4CPOCExecuted;
238 uint32_t numLodExecuted;
239 };
240
241
242 //////////////////////////////////////////////////////////////////////////
243 /// SWR_VS_CONTEXT
244 /// @brief Input to vertex shader
245 /////////////////////////////////////////////////////////////////////////
246 struct SWR_VS_CONTEXT
247 {
248 simdvertex* pVin; // IN: SIMD input vertex data store
249 simdvertex* pVout; // OUT: SIMD output vertex data store
250
251 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
252 simdscalari VertexID; // IN: Vertex ID
253 simdscalari mask; // IN: Active mask for shader
254
255 // SIMD16 Frontend fields.
256 uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in
257 // simd16vertex output
258 simd16scalari mask16; // IN: Active mask for shader (16-wide)
259 simd16scalari VertexID16; // IN: Vertex ID (16-wide)
260
261 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
262 };
263
264 /////////////////////////////////////////////////////////////////////////
265 /// ScalarCPoint
266 /// @brief defines a control point element as passed from the output
267 /// of the hull shader to the input of the domain shader
268 /////////////////////////////////////////////////////////////////////////
269 struct ScalarAttrib
270 {
271 float x;
272 float y;
273 float z;
274 float w;
275 };
276
277 struct ScalarCPoint
278 {
279 ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
280 };
281
282 //////////////////////////////////////////////////////////////////////////
283 /// SWR_TESSELLATION_FACTORS
284 /// @brief Tessellation factors structure (non-vector)
285 /////////////////////////////////////////////////////////////////////////
286 struct SWR_TESSELLATION_FACTORS
287 {
288 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
289 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
290 float pad[2];
291 };
292
293 SWR_STATIC_ASSERT(sizeof(SWR_TESSELLATION_FACTORS) == 32);
294
295 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
296 struct ScalarPatch
297 {
298 SWR_TESSELLATION_FACTORS tessFactors;
299 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
300 ScalarCPoint patchData;
301 };
302
303 //////////////////////////////////////////////////////////////////////////
304 /// SWR_HS_CONTEXT
305 /// @brief Input to hull shader
306 /////////////////////////////////////////////////////////////////////////
307 struct SWR_HS_CONTEXT
308 {
309 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
310 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
311 simdscalari mask; // IN: Active mask for shader
312 uint32_t outputSize; // IN: Size of HS output (per lane)
313 ScalarPatch* pCPout; // OUT: Output control point patch SIMD-sized-array of SCALAR patches
314 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
315 };
316
317 //////////////////////////////////////////////////////////////////////////
318 /// SWR_DS_CONTEXT
319 /// @brief Input to domain shader
320 /////////////////////////////////////////////////////////////////////////
321 struct SWR_DS_CONTEXT
322 {
323 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
324 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
325 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
326 uint32_t outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage.
327 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
328 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
329 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
330 simdscalari mask; // IN: Active mask for shader
331 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
332 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
333 };
334
335 //////////////////////////////////////////////////////////////////////////
336 /// SWR_GS_CONTEXT
337 /// @brief Input to geometry shader.
338 /////////////////////////////////////////////////////////////////////////
339 struct SWR_GS_CONTEXT
340 {
341 simdvector* pVerts; // IN: input primitive data for SIMD prims
342 uint32_t inputVertStride; // IN: input vertex stride, in attributes
343 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
344 uint32_t InstanceID; // IN: input instance ID
345 simdscalari mask; // IN: Active mask for shader
346 uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
347 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
348 };
349
350 struct PixelPositions
351 {
352 simdscalar UL;
353 simdscalar center;
354 simdscalar sample;
355 simdscalar centroid;
356 };
357
358 #define SWR_MAX_NUM_MULTISAMPLES 16
359
360 //////////////////////////////////////////////////////////////////////////
361 /// SWR_PS_CONTEXT
362 /// @brief Input to pixel shader.
363 /////////////////////////////////////////////////////////////////////////
364 struct SWR_PS_CONTEXT
365 {
366 PixelPositions vX; // IN: x location(s) of pixels
367 PixelPositions vY; // IN: x location(s) of pixels
368 simdscalar vZ; // INOUT: z location of pixels
369 simdscalari activeMask; // OUT: mask for kill
370 simdscalar inputMask; // IN: input coverage mask for all samples
371 simdscalari oMask; // OUT: mask for output coverage
372
373 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
374 PixelPositions vJ;
375 PixelPositions vOneOverW; // IN: 1/w
376
377 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
378 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
379 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
380 const float* I; // IN: Barycentric A, B, and C coefs used to compute I
381 const float* J; // IN: Barycentric A, B, and C coefs used to compute J
382 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
383 const float* pSamplePosX; // IN: array of sample positions
384 const float* pSamplePosY; // IN: array of sample positions
385 simdvector shaded[SWR_NUM_RENDERTARGETS]; // OUT: result color per rendertarget
386
387 uint32_t frontFace; // IN: front- 1, back- 0
388 uint32_t sampleIndex; // IN: sampleIndex
389 uint32_t renderTargetArrayIndex; // IN: render target array index from GS
390 uint32_t viewportIndex; // IN: viewport index from GS
391 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
392
393 uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
394
395 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
396
397 BucketManager *pBucketManager; // @llvm_struct - IN: performance buckets.
398 };
399
400 //////////////////////////////////////////////////////////////////////////
401 /// SWR_CS_CONTEXT
402 /// @brief Input to compute shader.
403 /////////////////////////////////////////////////////////////////////////
404 struct SWR_CS_CONTEXT
405 {
406 // The ThreadGroupId is the current thread group index relative
407 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
408 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
409
410 // Compute shader accepts the following system values.
411 // o ThreadId - Current thread id relative to all other threads in dispatch.
412 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
413 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
414 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
415 //
416 // All of these system values can be computed in the shader. They will be
417 // derived from the current tile counter. The tile counter is an atomic counter that
418 // resides in the draw context and is initialized to the product of the dispatch dims.
419 //
420 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
421 //
422 // Each CPU worker thread will atomically decrement this counter and passes the current
423 // count into the shader. When the count reaches 0 then all thread groups in the
424 // dispatch call have been completed.
425
426 uint32_t tileCounter; // The tile counter value for this thread group.
427
428 // Dispatch dimensions used by shader to compute system values from the tile counter.
429 uint32_t dispatchDims[3];
430
431 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
432 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
433 uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is
434 // responsible for subdividing scratch space per instance/simd
435 uint32_t scratchSpacePerWarp; // Scratch space per work item x SIMD_WIDTH
436
437 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
438 };
439
440 // enums
441 enum SWR_TILE_MODE
442 {
443 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
444 SWR_TILE_MODE_WMAJOR, // W major tiling
445 SWR_TILE_MODE_XMAJOR, // X major tiling
446 SWR_TILE_MODE_YMAJOR, // Y major tiling
447 SWR_TILE_SWRZ, // SWR-Z tiling
448
449
450 SWR_TILE_MODE_COUNT
451 };
452
453 enum SWR_SURFACE_TYPE
454 {
455 SURFACE_1D = 0,
456 SURFACE_2D = 1,
457 SURFACE_3D = 2,
458 SURFACE_CUBE = 3,
459 SURFACE_BUFFER = 4,
460 SURFACE_STRUCTURED_BUFFER = 5,
461 SURFACE_NULL = 7
462 };
463
464 enum SWR_ZFUNCTION
465 {
466 ZFUNC_ALWAYS,
467 ZFUNC_NEVER,
468 ZFUNC_LT,
469 ZFUNC_EQ,
470 ZFUNC_LE,
471 ZFUNC_GT,
472 ZFUNC_NE,
473 ZFUNC_GE,
474 NUM_ZFUNC
475 };
476
477 enum SWR_STENCILOP
478 {
479 STENCILOP_KEEP,
480 STENCILOP_ZERO,
481 STENCILOP_REPLACE,
482 STENCILOP_INCRSAT,
483 STENCILOP_DECRSAT,
484 STENCILOP_INCR,
485 STENCILOP_DECR,
486 STENCILOP_INVERT
487 };
488
489 enum SWR_BLEND_FACTOR
490 {
491 BLENDFACTOR_ONE,
492 BLENDFACTOR_SRC_COLOR,
493 BLENDFACTOR_SRC_ALPHA,
494 BLENDFACTOR_DST_ALPHA,
495 BLENDFACTOR_DST_COLOR,
496 BLENDFACTOR_SRC_ALPHA_SATURATE,
497 BLENDFACTOR_CONST_COLOR,
498 BLENDFACTOR_CONST_ALPHA,
499 BLENDFACTOR_SRC1_COLOR,
500 BLENDFACTOR_SRC1_ALPHA,
501 BLENDFACTOR_ZERO,
502 BLENDFACTOR_INV_SRC_COLOR,
503 BLENDFACTOR_INV_SRC_ALPHA,
504 BLENDFACTOR_INV_DST_ALPHA,
505 BLENDFACTOR_INV_DST_COLOR,
506 BLENDFACTOR_INV_CONST_COLOR,
507 BLENDFACTOR_INV_CONST_ALPHA,
508 BLENDFACTOR_INV_SRC1_COLOR,
509 BLENDFACTOR_INV_SRC1_ALPHA
510 };
511
512 enum SWR_BLEND_OP
513 {
514 BLENDOP_ADD,
515 BLENDOP_SUBTRACT,
516 BLENDOP_REVSUBTRACT,
517 BLENDOP_MIN,
518 BLENDOP_MAX,
519 };
520
521 enum SWR_LOGIC_OP
522 {
523 LOGICOP_CLEAR,
524 LOGICOP_NOR,
525 LOGICOP_AND_INVERTED,
526 LOGICOP_COPY_INVERTED,
527 LOGICOP_AND_REVERSE,
528 LOGICOP_INVERT,
529 LOGICOP_XOR,
530 LOGICOP_NAND,
531 LOGICOP_AND,
532 LOGICOP_EQUIV,
533 LOGICOP_NOOP,
534 LOGICOP_OR_INVERTED,
535 LOGICOP_COPY,
536 LOGICOP_OR_REVERSE,
537 LOGICOP_OR,
538 LOGICOP_SET,
539 };
540
541 //////////////////////////////////////////////////////////////////////////
542 /// SWR_AUX_MODE
543 /// @brief Specifies how the auxiliary buffer is used by the driver.
544 //////////////////////////////////////////////////////////////////////////
545 enum SWR_AUX_MODE
546 {
547 AUX_MODE_NONE,
548 AUX_MODE_COLOR,
549 AUX_MODE_UAV,
550 AUX_MODE_DEPTH,
551 };
552
553 // vertex fetch state
554 // WARNING- any changes to this struct need to be reflected
555 // in the fetch shader jit
556 struct SWR_VERTEX_BUFFER_STATE
557 {
558 gfxptr_t xpData;
559 uint32_t index;
560 uint32_t pitch;
561 uint32_t size;
562 uint32_t minVertex; // min vertex (for bounds checking)
563 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
564 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for
565 // partially OOB vertices
566 };
567
568 struct SWR_INDEX_BUFFER_STATE
569 {
570 gfxptr_t xpIndices;
571 // Format type for indices (e.g. UINT16, UINT32, etc.)
572 SWR_FORMAT format; // @llvm_enum
573 uint32_t size;
574 };
575
576 //////////////////////////////////////////////////////////////////////////
577 /// SWR_FETCH_CONTEXT
578 /// @brief Input to fetch shader.
579 /// @note WARNING - Changes to this struct need to be reflected in the
580 /// fetch shader jit.
581 /////////////////////////////////////////////////////////////////////////
582 struct SWR_FETCH_CONTEXT
583 {
584 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
585 gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws
586 gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking
587 uint32_t CurInstance; // IN: current instance
588 uint32_t BaseVertex; // IN: base vertex
589 uint32_t StartVertex; // IN: start vertex
590 uint32_t StartInstance; // IN: start instance
591 simdscalari VertexID; // OUT: vector of vertex IDs
592 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
593 #if USE_SIMD16_SHADERS
594 // simd16scalari VertexID; // OUT: vector of vertex IDs
595 // simd16scalari CutMask; // OUT: vector mask of indices which have the
596 // cut index value
597 simdscalari VertexID2; // OUT: vector of vertex IDs
598 simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
599 #endif
600 };
601
602 //////////////////////////////////////////////////////////////////////////
603 /// SWR_STATS
604 ///
605 /// @brief All statistics generated by SWR go here. These are public
606 /// to driver.
607 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)608 OSALIGNLINE(struct) SWR_STATS
609 {
610 // Occlusion Query
611 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
612
613 // Pipeline Stats
614 uint64_t PsInvocations; // Number of Pixel Shader invocations
615 uint64_t CsInvocations; // Number of Compute Shader invocations
616
617 };
618
619 //////////////////////////////////////////////////////////////////////////
620 /// SWR_STATS
621 ///
622 /// @brief All statistics generated by FE.
623 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)624 OSALIGNLINE(struct) SWR_STATS_FE
625 {
626 uint64_t IaVertices; // Number of Fetch Shader vertices
627 uint64_t IaPrimitives; // Number of PA primitives.
628 uint64_t VsInvocations; // Number of Vertex Shader invocations
629 uint64_t HsInvocations; // Number of Hull Shader invocations
630 uint64_t DsInvocations; // Number of Domain Shader invocations
631 uint64_t GsInvocations; // Number of Geometry Shader invocations
632 uint64_t GsPrimitives; // Number of prims GS outputs.
633 uint64_t CInvocations; // Number of clipper invocations
634 uint64_t CPrimitives; // Number of clipper primitives.
635
636 // Streamout Stats
637 uint64_t SoPrimStorageNeeded[4];
638 uint64_t SoNumPrimsWritten[4];
639 };
640
641 //////////////////////////////////////////////////////////////////////////
642 /// STREAMOUT_BUFFERS
643 /////////////////////////////////////////////////////////////////////////
644
645 #define MAX_SO_STREAMS 4
646 #define MAX_SO_BUFFERS 4
647 #define MAX_ATTRIBUTES 32
648
649 struct SWR_STREAMOUT_BUFFER
650 {
651 // Pointers to streamout buffers.
652 gfxptr_t pBuffer;
653
654 // Offset to the SO write offset. If not null then we update offset here.
655 gfxptr_t pWriteOffset;
656
657 bool enable;
658 bool soWriteEnable;
659
660 // Size of buffer in dwords.
661 uint32_t bufferSize;
662
663 // Vertex pitch of buffer in dwords.
664 uint32_t pitch;
665
666 // Offset into buffer in dwords. SOS will increment this offset.
667 uint32_t streamOffset;
668 };
669
670 //////////////////////////////////////////////////////////////////////////
671 /// STREAMOUT_STATE
672 /////////////////////////////////////////////////////////////////////////
673 struct SWR_STREAMOUT_STATE
674 {
675 // This disables stream output.
676 bool soEnable;
677
678 // which streams are enabled for streamout
679 bool streamEnable[MAX_SO_STREAMS];
680
681 // If set then do not send any streams to the rasterizer.
682 bool rasterizerDisable;
683
684 // Specifies which stream to send to the rasterizer.
685 uint32_t streamToRasterizer;
686
687 // The stream masks specify which attributes are sent to which streams.
688 // These masks help the FE to setup the pPrimData buffer that is passed
689 // the Stream Output Shader (SOS) function.
690 uint64_t streamMasks[MAX_SO_STREAMS];
691
692 // Number of attributes, including position, per vertex that are streamed out.
693 // This should match number of bits in stream mask.
694 uint32_t streamNumEntries[MAX_SO_STREAMS];
695
696 // Offset to the start of the attributes of the input vertices, in simdvector units
697 uint32_t vertexAttribOffset[MAX_SO_STREAMS];
698 };
699
700 //////////////////////////////////////////////////////////////////////////
701 /// STREAMOUT_CONTEXT - Passed to SOS
702 /////////////////////////////////////////////////////////////////////////
703 struct SWR_STREAMOUT_CONTEXT
704 {
705 uint32_t* pPrimData;
706 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
707
708 // Num prims written for this stream
709 uint32_t numPrimsWritten;
710
711 // Num prims that should have been written if there were no overflow.
712 uint32_t numPrimStorageNeeded;
713 };
714
715 //////////////////////////////////////////////////////////////////////////
716 /// SWR_GS_STATE - Geometry shader state
717 /////////////////////////////////////////////////////////////////////////
718 struct SWR_GS_STATE
719 {
720 bool gsEnable;
721
722 // If true, geometry shader emits a single stream, with separate cut buffer.
723 // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a
724 // separate StreamID buffer to map vertices to streams
725 bool isSingleStream;
726
727 // Number of input attributes per vertex. Used by the frontend to
728 // optimize assembling primitives for GS
729 uint32_t numInputAttribs;
730
731 // Stride of incoming verts in attributes
732 uint32_t inputVertStride;
733
734 // Output topology - can be point, tristrip, linestrip, or rectlist
735 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
736
737 // Maximum number of verts that can be emitted by a single instance of the GS
738 uint32_t maxNumVerts;
739
740 // Instance count
741 uint32_t instanceCount;
742
743 // When single stream is enabled, singleStreamID dictates which stream is being output.
744 // field ignored if isSingleStream is false
745 uint32_t singleStreamID;
746
747 // Total amount of memory to allocate for one instance of the shader output in bytes
748 uint32_t allocationSize;
749
750 // Offset to start reading data per input vertex in simdvector units. This can be used to
751 // skip over any vertex data output from the previous stage that is unused in the GS, removing
752 // unnecessary vertex processing.
753 uint32_t vertexAttribOffset;
754
755 // Size of the control data section which contains cut or streamID data, in simdscalar units.
756 // Should be sized to handle the maximum number of verts output by the GS. Can be 0 if there are
757 // no cuts or streamID bits.
758 uint32_t controlDataSize;
759
760 // Offset to the control data section, in bytes
761 uint32_t controlDataOffset;
762
763 // Total size of an output vertex, in simdvector units
764 uint32_t outputVertexSize;
765
766 // Offset to the start of the vertex section, in bytes
767 uint32_t outputVertexOffset;
768
769 // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero,
770 // shader is expected to store the final vertex count in the first dword of the gs output
771 // stream.
772 uint32_t staticVertexCount;
773 };
774
775 //////////////////////////////////////////////////////////////////////////
776 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
777 /////////////////////////////////////////////////////////////////////////
778 enum SWR_TS_OUTPUT_TOPOLOGY
779 {
780 SWR_TS_OUTPUT_POINT,
781 SWR_TS_OUTPUT_LINE,
782 SWR_TS_OUTPUT_TRI_CW,
783 SWR_TS_OUTPUT_TRI_CCW,
784
785 SWR_TS_OUTPUT_TOPOLOGY_COUNT
786 };
787
788 //////////////////////////////////////////////////////////////////////////
789 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
790 /////////////////////////////////////////////////////////////////////////
791 enum SWR_TS_PARTITIONING
792 {
793 SWR_TS_INTEGER,
794 SWR_TS_ODD_FRACTIONAL,
795 SWR_TS_EVEN_FRACTIONAL,
796
797 SWR_TS_PARTITIONING_COUNT
798 };
799
800 //////////////////////////////////////////////////////////////////////////
801 /// SWR_TS_DOMAIN - Defines Tessellation Domain
802 /////////////////////////////////////////////////////////////////////////
803 enum SWR_TS_DOMAIN
804 {
805 SWR_TS_QUAD,
806 SWR_TS_TRI,
807 SWR_TS_ISOLINE,
808
809 SWR_TS_DOMAIN_COUNT
810 };
811
812 //////////////////////////////////////////////////////////////////////////
813 /// SWR_TS_STATE - Tessellation state
814 /////////////////////////////////////////////////////////////////////////
815 struct SWR_TS_STATE
816 {
817 bool tsEnable;
818
819 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
820 SWR_TS_PARTITIONING partitioning; // @llvm_enum
821 SWR_TS_DOMAIN domain; // @llvm_enum
822
823 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
824
825 uint32_t numHsInputAttribs;
826 uint32_t numHsOutputAttribs;
827 uint32_t hsAllocationSize; // Size of HS output in bytes, per lane
828
829 uint32_t numDsOutputAttribs;
830 uint32_t dsAllocationSize;
831 uint32_t dsOutVtxAttribOffset;
832
833 // Offset to the start of the attributes of the input vertices, in simdvector units
834 uint32_t srcVertexAttribOffset;
835
836 // Offset to the start of the attributes expected by the hull shader
837 uint32_t vertexAttribOffset;
838 };
839
840 // output merger state
841 struct SWR_RENDER_TARGET_BLEND_STATE
842 {
843 uint8_t writeDisableRed : 1;
844 uint8_t writeDisableGreen : 1;
845 uint8_t writeDisableBlue : 1;
846 uint8_t writeDisableAlpha : 1;
847 };
848 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1,
849 "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
850
851 enum SWR_MULTISAMPLE_COUNT
852 {
853 SWR_MULTISAMPLE_1X = 0,
854 SWR_MULTISAMPLE_2X,
855 SWR_MULTISAMPLE_4X,
856 SWR_MULTISAMPLE_8X,
857 SWR_MULTISAMPLE_16X,
858 SWR_MULTISAMPLE_TYPE_COUNT
859 };
860
GetNumSamples(int sampleCountEnum)861 static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start
862 {
863 return uint32_t(1) << sampleCountEnum;
864 } // @llvm_func_end
865
866 struct SWR_BLEND_STATE
867 {
868 // constant blend factor color in RGBA float
869 float constantColor[4];
870
871 // alpha test reference value in unorm8 or float32
872 uint32_t alphaTestReference;
873 uint32_t sampleMask;
874 // all RT's have the same sample count
875 ///@todo move this to Output Merger state when we refactor
876 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
877
878 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
879 };
880 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
881
882 struct SWR_BLEND_CONTEXT
883 {
884 const SWR_BLEND_STATE* pBlendState;
885 simdvector* src;
886 simdvector* src1;
887 simdvector* src0alpha;
888 uint32_t sampleNum;
889 simdvector* pDst;
890 simdvector* result;
891 simdscalari* oMask;
892 simdscalari* pMask;
893 uint32_t isAlphaTested;
894 uint32_t isAlphaBlended;
895 };
896
897 //////////////////////////////////////////////////////////////////////////
898 /// FUNCTION POINTERS FOR SHADERS
899
900 #if USE_SIMD16_SHADERS
901 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
902 #else
903 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
904 #endif
905 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext);
906 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext);
907 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
908 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
909 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
910 typedef void(__cdecl *PFN_SO_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_STREAMOUT_CONTEXT& soContext);
911 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
912 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
913 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
914 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
915
916
917 //////////////////////////////////////////////////////////////////////////
918 /// FRONTEND_STATE
919 /////////////////////////////////////////////////////////////////////////
920 struct SWR_FRONTEND_STATE
921 {
922 // skip clip test, perspective divide, and viewport transform
923 // intended for verts in screen space
924 bool vpTransformDisable;
925 bool bEnableCutIndex;
926 union
927 {
928 struct
929 {
930 uint32_t triFan : 2;
931 uint32_t lineStripList : 1;
932 uint32_t triStripList : 2;
933 };
934 uint32_t bits;
935 } provokingVertex;
936 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
937
938 // Size of a vertex in simdvector units. Should be sized to the
939 // maximum of the input/output of the vertex shader.
940 uint32_t vsVertexSize;
941 };
942
943 //////////////////////////////////////////////////////////////////////////
944 /// VIEWPORT_MATRIX
945 /////////////////////////////////////////////////////////////////////////
946 struct SWR_VIEWPORT_MATRIX
947 {
948 float m00;
949 float m11;
950 float m22;
951 float m30;
952 float m31;
953 float m32;
954 };
955
956 //////////////////////////////////////////////////////////////////////////
957 /// VIEWPORT_MATRIXES
958 /////////////////////////////////////////////////////////////////////////
959 struct SWR_VIEWPORT_MATRICES
960 {
961 float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
962 float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
963 float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
964 float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
965 float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
966 float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
967 };
968
969 //////////////////////////////////////////////////////////////////////////
970 /// SWR_VIEWPORT
971 /////////////////////////////////////////////////////////////////////////
972 struct SWR_VIEWPORT
973 {
974 float x;
975 float y;
976 float width;
977 float height;
978 float minZ;
979 float maxZ;
980 };
981
982 //////////////////////////////////////////////////////////////////////////
983 /// SWR_CULLMODE
984 //////////////////////////////////////////////////////////////////////////
985 enum SWR_CULLMODE
986 {
987 SWR_CULLMODE_BOTH,
988 SWR_CULLMODE_NONE,
989 SWR_CULLMODE_FRONT,
990 SWR_CULLMODE_BACK
991 };
992
993 enum SWR_FILLMODE
994 {
995 SWR_FILLMODE_POINT,
996 SWR_FILLMODE_WIREFRAME,
997 SWR_FILLMODE_SOLID
998 };
999
1000 enum SWR_FRONTWINDING
1001 {
1002 SWR_FRONTWINDING_CW,
1003 SWR_FRONTWINDING_CCW
1004 };
1005
1006
1007 enum SWR_PIXEL_LOCATION
1008 {
1009 SWR_PIXEL_LOCATION_CENTER,
1010 SWR_PIXEL_LOCATION_UL,
1011 };
1012
1013 // fixed point screen space sample locations within a pixel
1014 struct SWR_MULTISAMPLE_POS
1015 {
1016 public:
SetXiSWR_MULTISAMPLE_POS1017 INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
SetYiSWR_MULTISAMPLE_POS1018 INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
XiSWR_MULTISAMPLE_POS1019 INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
YiSWR_MULTISAMPLE_POS1020 INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
SetXSWR_MULTISAMPLE_POS1021 INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
SetYSWR_MULTISAMPLE_POS1022 INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
XSWR_MULTISAMPLE_POS1023 INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
YSWR_MULTISAMPLE_POS1024 INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
1025 typedef const float (&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
XSWR_MULTISAMPLE_POS1026 INLINE sampleArrayT X() const { return _x; }; // @llvm_func
YSWR_MULTISAMPLE_POS1027 INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
vXiSWR_MULTISAMPLE_POS1028 INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
vYiSWR_MULTISAMPLE_POS1029 INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
vXSWR_MULTISAMPLE_POS1030 INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
vYSWR_MULTISAMPLE_POS1031 INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
TileSampleOffsetsXSWR_MULTISAMPLE_POS1032 INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
TileSampleOffsetsYSWR_MULTISAMPLE_POS1033 INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
1034
1035 INLINE void PrecalcSampleData(int numSamples); //@llvm_func
1036
1037 private:
1038 template <typename MaskT>
1039 INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
1040 INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
1041
1042 // scalar sample values
1043 uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
1044 uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
1045 float _x[SWR_MAX_NUM_MULTISAMPLES];
1046 float _y[SWR_MAX_NUM_MULTISAMPLES];
1047
1048 // precalc'd / vectorized samples
1049 __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
1050 __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
1051 simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
1052 simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
1053 __m128i tileSampleOffsetsX;
1054 __m128i tileSampleOffsetsY;
1055 };
1056
1057 //////////////////////////////////////////////////////////////////////////
1058 /// SWR_RASTSTATE
1059 //////////////////////////////////////////////////////////////////////////
1060 struct SWR_RASTSTATE
1061 {
1062 uint32_t cullMode : 2;
1063 uint32_t fillMode : 2;
1064 uint32_t frontWinding : 1;
1065 uint32_t scissorEnable : 1;
1066 uint32_t depthClipEnable : 1;
1067 uint32_t clipEnable : 1;
1068 uint32_t clipHalfZ : 1;
1069 uint32_t pointParam : 1;
1070 uint32_t pointSpriteEnable : 1;
1071 uint32_t pointSpriteTopOrigin : 1;
1072 uint32_t forcedSampleCount : 1;
1073 uint32_t pixelOffset : 1;
1074 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
1075 uint32_t conservativeRast : 1;
1076
1077 float pointSize;
1078 float lineWidth;
1079
1080 float depthBias;
1081 float slopeScaledDepthBias;
1082 float depthBiasClamp;
1083 SWR_FORMAT depthFormat; // @llvm_enum
1084
1085 // sample count the rasterizer is running at
1086 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
1087 uint32_t pixelLocation; // UL or Center
1088 SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
1089 bool bIsCenterPattern; // @llvm_enum
1090 };
1091
1092
1093 enum SWR_CONSTANT_SOURCE
1094 {
1095 SWR_CONSTANT_SOURCE_CONST_0000,
1096 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
1097 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
1098 SWR_CONSTANT_SOURCE_PRIM_ID
1099 };
1100
1101 struct SWR_ATTRIB_SWIZZLE
1102 {
1103 uint16_t sourceAttrib : 5; // source attribute
1104 uint16_t constantSource : 2; // constant source to apply
1105 uint16_t componentOverrideMask : 4; // override component with constant source
1106 };
1107
1108 // backend state
1109 struct SWR_BACKEND_STATE
1110 {
1111 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant
1112 // interpolation
1113 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be
1114 // interpreted as tex coordinates
1115
1116 bool swizzleEnable; // when enabled, core will parse the swizzle map when
1117 // setting up attributes for the backend, otherwise
1118 // all attributes up to numAttributes will be sent
1119 uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
1120 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some
1121 // calculations for unneeded components
1122
1123 bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the
1124 // backend
1125 bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
1126
1127 // User clip/cull distance enables
1128 uint8_t cullDistanceMask;
1129 uint8_t clipDistanceMask;
1130
1131 // padding to ensure swizzleMap starts 64B offset from start of the struct
1132 // and that the next fields are dword aligned.
1133 uint8_t pad[10];
1134
1135 // Offset to the start of the attributes of the input vertices, in simdvector units
1136 uint32_t vertexAttribOffset;
1137
1138 // Offset to clip/cull attrib section of the vertex, in simdvector units
1139 uint32_t vertexClipCullOffset;
1140
1141 SWR_ATTRIB_SWIZZLE swizzleMap[32];
1142 };
1143 static_assert(sizeof(SWR_BACKEND_STATE) == 128,
1144 "Adjust padding to keep size (or remove this assert)");
1145
1146
1147 union SWR_DEPTH_STENCIL_STATE
1148 {
1149 struct
1150 {
1151 // dword 0
1152 uint32_t depthWriteEnable : 1;
1153 uint32_t depthTestEnable : 1;
1154 uint32_t stencilWriteEnable : 1;
1155 uint32_t stencilTestEnable : 1;
1156 uint32_t doubleSidedStencilTestEnable : 1;
1157
1158 uint32_t depthTestFunc : 3;
1159 uint32_t stencilTestFunc : 3;
1160
1161 uint32_t backfaceStencilPassDepthPassOp : 3;
1162 uint32_t backfaceStencilPassDepthFailOp : 3;
1163 uint32_t backfaceStencilFailOp : 3;
1164 uint32_t backfaceStencilTestFunc : 3;
1165 uint32_t stencilPassDepthPassOp : 3;
1166 uint32_t stencilPassDepthFailOp : 3;
1167 uint32_t stencilFailOp : 3;
1168
1169 // dword 1
1170 uint8_t backfaceStencilWriteMask;
1171 uint8_t backfaceStencilTestMask;
1172 uint8_t stencilWriteMask;
1173 uint8_t stencilTestMask;
1174
1175 // dword 2
1176 uint8_t backfaceStencilRefValue;
1177 uint8_t stencilRefValue;
1178 };
1179 uint32_t value[3];
1180 };
1181
1182 enum SWR_SHADING_RATE
1183 {
1184 SWR_SHADING_RATE_PIXEL,
1185 SWR_SHADING_RATE_SAMPLE,
1186 SWR_SHADING_RATE_COUNT,
1187 };
1188
1189 enum SWR_INPUT_COVERAGE
1190 {
1191 SWR_INPUT_COVERAGE_NONE,
1192 SWR_INPUT_COVERAGE_NORMAL,
1193 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1194 SWR_INPUT_COVERAGE_COUNT,
1195 };
1196
1197 enum SWR_PS_POSITION_OFFSET
1198 {
1199 SWR_PS_POSITION_SAMPLE_NONE,
1200 SWR_PS_POSITION_SAMPLE_OFFSET,
1201 SWR_PS_POSITION_CENTROID_OFFSET,
1202 SWR_PS_POSITION_OFFSET_COUNT,
1203 };
1204
1205 enum SWR_BARYCENTRICS_MASK
1206 {
1207 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1208 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1209 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1210 };
1211
1212 // pixel shader state
1213 struct SWR_PS_STATE
1214 {
1215 // dword 0-1
1216 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1217
1218 // dword 2
1219 uint32_t killsPixel : 1; // pixel shader can kill pixels
1220 uint32_t inputCoverage : 2; // ps uses input coverage
1221 uint32_t writesODepth : 1; // pixel shader writes to depth
1222 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1223 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1224 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1225 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate
1226 // attributes with
1227 uint32_t usesUAV : 1; // pixel shader accesses UAV
1228 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1229
1230 uint8_t renderTargetMask; // Mask of render targets written
1231 };
1232
1233 // depth bounds state
1234 struct SWR_DEPTH_BOUNDS_STATE
1235 {
1236 bool depthBoundsTestEnable;
1237 float depthBoundsTestMinValue;
1238 float depthBoundsTestMaxValue;
1239 };
1240 // clang-format on
1241