• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29 
30 #include "common/formats.h"
31 #include "common/simdintrin.h"
32 
33 //////////////////////////////////////////////////////////////////////////
34 /// PRIMITIVE_TOPOLOGY.
35 //////////////////////////////////////////////////////////////////////////
36 enum PRIMITIVE_TOPOLOGY
37 {
38     TOP_UNKNOWN = 0x0,
39     TOP_POINT_LIST = 0x1,
40     TOP_LINE_LIST = 0x2,
41     TOP_LINE_STRIP = 0x3,
42     TOP_TRIANGLE_LIST = 0x4,
43     TOP_TRIANGLE_STRIP = 0x5,
44     TOP_TRIANGLE_FAN = 0x6,
45     TOP_QUAD_LIST = 0x7,
46     TOP_QUAD_STRIP = 0x8,
47     TOP_LINE_LIST_ADJ = 0x9,
48     TOP_LISTSTRIP_ADJ = 0xA,
49     TOP_TRI_LIST_ADJ = 0xB,
50     TOP_TRI_STRIP_ADJ = 0xC,
51     TOP_TRI_STRIP_REVERSE = 0xD,
52     TOP_POLYGON = 0xE,
53     TOP_RECT_LIST = 0xF,
54     TOP_LINE_LOOP = 0x10,
55     TOP_POINT_LIST_BF = 0x11,
56     TOP_LINE_STRIP_CONT = 0x12,
57     TOP_LINE_STRIP_BF = 0x13,
58     TOP_LINE_STRIP_CONT_BF = 0x14,
59     TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
60     TOP_TRIANGLE_DISC = 0x17,   /// @todo What is this??
61 
62     TOP_PATCHLIST_BASE = 0x1F,  // Invalid topology, used to calculate num verts for a patchlist.
63     TOP_PATCHLIST_1 = 0x20,     // List of 1-vertex patches
64     TOP_PATCHLIST_2 = 0x21,
65     TOP_PATCHLIST_3 = 0x22,
66     TOP_PATCHLIST_4 = 0x23,
67     TOP_PATCHLIST_5 = 0x24,
68     TOP_PATCHLIST_6 = 0x25,
69     TOP_PATCHLIST_7 = 0x26,
70     TOP_PATCHLIST_8 = 0x27,
71     TOP_PATCHLIST_9 = 0x28,
72     TOP_PATCHLIST_10 = 0x29,
73     TOP_PATCHLIST_11 = 0x2A,
74     TOP_PATCHLIST_12 = 0x2B,
75     TOP_PATCHLIST_13 = 0x2C,
76     TOP_PATCHLIST_14 = 0x2D,
77     TOP_PATCHLIST_15 = 0x2E,
78     TOP_PATCHLIST_16 = 0x2F,
79     TOP_PATCHLIST_17 = 0x30,
80     TOP_PATCHLIST_18 = 0x31,
81     TOP_PATCHLIST_19 = 0x32,
82     TOP_PATCHLIST_20 = 0x33,
83     TOP_PATCHLIST_21 = 0x34,
84     TOP_PATCHLIST_22 = 0x35,
85     TOP_PATCHLIST_23 = 0x36,
86     TOP_PATCHLIST_24 = 0x37,
87     TOP_PATCHLIST_25 = 0x38,
88     TOP_PATCHLIST_26 = 0x39,
89     TOP_PATCHLIST_27 = 0x3A,
90     TOP_PATCHLIST_28 = 0x3B,
91     TOP_PATCHLIST_29 = 0x3C,
92     TOP_PATCHLIST_30 = 0x3D,
93     TOP_PATCHLIST_31 = 0x3E,
94     TOP_PATCHLIST_32 = 0x3F,   // List of 32-vertex patches
95 };
96 
97 //////////////////////////////////////////////////////////////////////////
98 /// SWR_SHADER_TYPE
99 //////////////////////////////////////////////////////////////////////////
100 enum SWR_SHADER_TYPE
101 {
102     SHADER_VERTEX,
103     SHADER_GEOMETRY,
104     SHADER_DOMAIN,
105     SHADER_HULL,
106     SHADER_PIXEL,
107     SHADER_COMPUTE,
108 
109     NUM_SHADER_TYPES,
110 };
111 
112 //////////////////////////////////////////////////////////////////////////
113 /// SWR_RENDERTARGET_ATTACHMENT
114 /// @todo Its not clear what an "attachment" means. Its not common term.
115 //////////////////////////////////////////////////////////////////////////
116 enum SWR_RENDERTARGET_ATTACHMENT
117 {
118     SWR_ATTACHMENT_COLOR0,
119     SWR_ATTACHMENT_COLOR1,
120     SWR_ATTACHMENT_COLOR2,
121     SWR_ATTACHMENT_COLOR3,
122     SWR_ATTACHMENT_COLOR4,
123     SWR_ATTACHMENT_COLOR5,
124     SWR_ATTACHMENT_COLOR6,
125     SWR_ATTACHMENT_COLOR7,
126     SWR_ATTACHMENT_DEPTH,
127     SWR_ATTACHMENT_STENCIL,
128 
129     SWR_NUM_ATTACHMENTS
130 };
131 
132 #define SWR_NUM_RENDERTARGETS 8
133 
134 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
135 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
136 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
137 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
138 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
139 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
140 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
141 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
142 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
143 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
144 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
145 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
146 
147 
148 //////////////////////////////////////////////////////////////////////////
149 /// @brief SWR Inner Tessellation factor ID
150 /// See above GetTessFactorOutputPosition code for documentation
151 enum SWR_INNER_TESSFACTOR_ID
152 {
153     SWR_QUAD_U_TRI_INSIDE,
154     SWR_QUAD_V_INSIDE,
155 
156     SWR_NUM_INNER_TESS_FACTORS,
157 };
158 
159 //////////////////////////////////////////////////////////////////////////
160 /// @brief SWR Outer Tessellation factor ID
161 /// See above GetTessFactorOutputPosition code for documentation
162 enum SWR_OUTER_TESSFACTOR_ID
163 {
164     SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
165     SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
166     SWR_QUAD_U_EQ1_TRI_W,
167     SWR_QUAD_V_EQ1,
168 
169     SWR_NUM_OUTER_TESS_FACTORS,
170 };
171 
172 
173 /////////////////////////////////////////////////////////////////////////
174 /// simdvertex
175 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
176 ///        Contains position in clip space, hardcoded to attribute 0,
177 ///        space for up to 32 attributes, as well as any SGV values generated
178 ///        by the pipeline
179 /////////////////////////////////////////////////////////////////////////
180 #define VERTEX_POSITION_SLOT 0
181 #define VERTEX_ATTRIB_START_SLOT 1
182 #define VERTEX_ATTRIB_END_SLOT 32
183 #define VERTEX_RTAI_SLOT 33         // GS writes RenderTargetArrayIndex here
184 #define VERTEX_PRIMID_SLOT 34       // GS writes PrimId here
185 #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
186 #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
187 #define VERTEX_POINT_SIZE_SLOT 37       // VS writes point size here
188 #define VERTEX_VIEWPORT_ARRAY_INDEX_SLOT 38
189 // SoAoSoA
190 struct simdvertex
191 {
192     simdvector    attrib[KNOB_NUM_ATTRIBUTES];
193 };
194 
195 //////////////////////////////////////////////////////////////////////////
196 /// SWR_VS_CONTEXT
197 /// @brief Input to vertex shader
198 /////////////////////////////////////////////////////////////////////////
199 struct SWR_VS_CONTEXT
200 {
201     simdvertex* pVin;           // IN: SIMD input vertex data store
202     simdvertex* pVout;          // OUT: SIMD output vertex data store
203 
204     uint32_t InstanceID;    // IN: Instance ID, constant across all verts of the SIMD
205     simdscalari VertexID;   // IN: Vertex ID
206     simdscalari mask;       // IN: Active mask for shader
207 };
208 
209 /////////////////////////////////////////////////////////////////////////
210 /// ScalarCPoint
211 /// @brief defines a control point element as passed from the output
212 /// of the hull shader to the input of the domain shader
213 /////////////////////////////////////////////////////////////////////////
214 struct ScalarAttrib
215 {
216     float x;
217     float y;
218     float z;
219     float w;
220 };
221 
222 struct ScalarCPoint
223 {
224     ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES];
225 };
226 
227 //////////////////////////////////////////////////////////////////////////
228 /// SWR_TESSELLATION_FACTORS
229 /// @brief Tessellation factors structure (non-vector)
230 /////////////////////////////////////////////////////////////////////////
231 struct SWR_TESSELLATION_FACTORS
232 {
233     float  OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
234     float  InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
235 };
236 
237 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
238 struct ScalarPatch
239 {
240     SWR_TESSELLATION_FACTORS tessFactors;
241     ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
242     ScalarCPoint patchData;
243 };
244 
245 //////////////////////////////////////////////////////////////////////////
246 /// SWR_HS_CONTEXT
247 /// @brief Input to hull shader
248 /////////////////////////////////////////////////////////////////////////
249 struct SWR_HS_CONTEXT
250 {
251     simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
252     simdscalari PrimitiveID;    // IN: (SIMD) primitive ID generated from the draw call
253     simdscalari mask;           // IN: Active mask for shader
254     ScalarPatch* pCPout;        // OUT: Output control point patch
255                                 // SIMD-sized-array of SCALAR patches
256 };
257 
258 //////////////////////////////////////////////////////////////////////////
259 /// SWR_DS_CONTEXT
260 /// @brief Input to domain shader
261 /////////////////////////////////////////////////////////////////////////
262 struct SWR_DS_CONTEXT
263 {
264     uint32_t        PrimitiveID;    // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
265     uint32_t        vectorOffset;   // IN: (SCALAR) vector index offset into SIMD data.
266     uint32_t        vectorStride;   // IN: (SCALAR) stride (in vectors) of output data per attribute-component
267     ScalarPatch*    pCpIn;          // IN: (SCALAR) Control patch
268     simdscalar*     pDomainU;       // IN: (SIMD) Domain Point U coords
269     simdscalar*     pDomainV;       // IN: (SIMD) Domain Point V coords
270     simdscalari     mask;           // IN: Active mask for shader
271     simdscalar*     pOutputData;    // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
272 };
273 
274 //////////////////////////////////////////////////////////////////////////
275 /// SWR_GS_CONTEXT
276 /// @brief Input to geometry shader.
277 /////////////////////////////////////////////////////////////////////////
278 struct SWR_GS_CONTEXT
279 {
280     simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: input primitive data for SIMD prims
281     simdscalari PrimitiveID;        // IN: input primitive ID generated from the draw call
282     uint32_t InstanceID;            // IN: input instance ID
283     simdscalari mask;               // IN: Active mask for shader
284     uint8_t* pStream;               // OUT: output stream (contains vertices for all output streams)
285     uint8_t* pCutOrStreamIdBuffer;  // OUT: cut or stream id buffer
286     simdscalari vertexCount;        // OUT: num vertices emitted per SIMD lane
287 };
288 
289 struct PixelPositions
290 {
291     simdscalar UL;
292     simdscalar center;
293     simdscalar sample;
294     simdscalar centroid;
295 };
296 
297 #define SWR_MAX_NUM_MULTISAMPLES 16
298 
299 //////////////////////////////////////////////////////////////////////////
300 /// SWR_PS_CONTEXT
301 /// @brief Input to pixel shader.
302 /////////////////////////////////////////////////////////////////////////
303 struct SWR_PS_CONTEXT
304 {
305     PixelPositions vX;          // IN: x location(s) of pixels
306     PixelPositions vY;          // IN: x location(s) of pixels
307     simdscalar vZ;              // INOUT: z location of pixels
308     simdscalari activeMask;     // OUT: mask for kill
309     simdscalar  inputMask;      // IN: input coverage mask for all samples
310     simdscalari oMask;          // OUT: mask for output coverage
311 
312     PixelPositions vI;          // barycentric coords evaluated at pixel center, sample position, centroid
313     PixelPositions vJ;
314     PixelPositions vOneOverW;   // IN: 1/w
315 
316     const float* pAttribs;      // IN: pointer to attribute barycentric coefficients
317     const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
318     const float* pRecipW;       // IN: pointer to 1/w coord for each vertex
319     const float *I;             // IN: Barycentric A, B, and C coefs used to compute I
320     const float *J;             // IN: Barycentric A, B, and C coefs used to compute J
321     float recipDet;             // IN: 1/Det, used when barycentric interpolating attributes
322     const float* pSamplePosX;   // IN: array of sample positions
323     const float* pSamplePosY;   // IN: array of sample positions
324     simdvector shaded[SWR_NUM_RENDERTARGETS];
325                                 // OUT: result color per rendertarget
326 
327     uint32_t frontFace;         // IN: front- 1, back- 0
328     uint32_t primID;            // IN: primitive ID
329     uint32_t sampleIndex;       // IN: sampleIndex
330 
331     uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
332 
333 };
334 
335 //////////////////////////////////////////////////////////////////////////
336 /// SWR_CS_CONTEXT
337 /// @brief Input to compute shader.
338 /////////////////////////////////////////////////////////////////////////
339 struct SWR_CS_CONTEXT
340 {
341     // The ThreadGroupId is the current thread group index relative
342     // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
343     // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
344 
345     // Compute shader accepts the following system values.
346     // o ThreadId - Current thread id relative to all other threads in dispatch.
347     // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
348     // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
349     // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
350     //
351     // All of these system values can be computed in the shader. They will be
352     // derived from the current tile counter. The tile counter is an atomic counter that
353     // resides in the draw context and is initialized to the product of the dispatch dims.
354     //
355     //  tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
356     //
357     // Each CPU worker thread will atomically decrement this counter and passes the current
358     // count into the shader. When the count reaches 0 then all thread groups in the
359     // dispatch call have been completed.
360 
361     uint32_t tileCounter;  // The tile counter value for this thread group.
362 
363     // Dispatch dimensions used by shader to compute system values from the tile counter.
364     uint32_t dispatchDims[3];
365 
366     uint8_t* pTGSM;  // Thread Group Shared Memory pointer.
367 
368     uint8_t* pSpillFillBuffer;  // Spill/fill buffer for barrier support
369 };
370 
371 // enums
372 enum SWR_TILE_MODE
373 {
374     SWR_TILE_NONE = 0x0,    // Linear mode (no tiling)
375     SWR_TILE_MODE_WMAJOR,   // W major tiling
376     SWR_TILE_MODE_XMAJOR,   // X major tiling
377     SWR_TILE_MODE_YMAJOR,   // Y major tiling
378     SWR_TILE_SWRZ,          // SWR-Z tiling
379 
380     SWR_TILE_MODE_COUNT
381 };
382 
383 enum SWR_SURFACE_TYPE
384 {
385     SURFACE_1D        = 0,
386     SURFACE_2D        = 1,
387     SURFACE_3D        = 2,
388     SURFACE_CUBE      = 3,
389     SURFACE_BUFFER    = 4,
390     SURFACE_STRUCTURED_BUFFER = 5,
391     SURFACE_NULL       = 7
392 };
393 
394 enum SWR_ZFUNCTION
395 {
396     ZFUNC_ALWAYS,
397     ZFUNC_NEVER,
398     ZFUNC_LT,
399     ZFUNC_EQ,
400     ZFUNC_LE,
401     ZFUNC_GT,
402     ZFUNC_NE,
403     ZFUNC_GE,
404     NUM_ZFUNC
405 };
406 
407 enum SWR_STENCILOP
408 {
409     STENCILOP_KEEP,
410     STENCILOP_ZERO,
411     STENCILOP_REPLACE,
412     STENCILOP_INCRSAT,
413     STENCILOP_DECRSAT,
414     STENCILOP_INCR,
415     STENCILOP_DECR,
416     STENCILOP_INVERT
417 };
418 
419 enum SWR_BLEND_FACTOR
420 {
421     BLENDFACTOR_ONE,
422     BLENDFACTOR_SRC_COLOR,
423     BLENDFACTOR_SRC_ALPHA,
424     BLENDFACTOR_DST_ALPHA,
425     BLENDFACTOR_DST_COLOR,
426     BLENDFACTOR_SRC_ALPHA_SATURATE,
427     BLENDFACTOR_CONST_COLOR,
428     BLENDFACTOR_CONST_ALPHA,
429     BLENDFACTOR_SRC1_COLOR,
430     BLENDFACTOR_SRC1_ALPHA,
431     BLENDFACTOR_ZERO,
432     BLENDFACTOR_INV_SRC_COLOR,
433     BLENDFACTOR_INV_SRC_ALPHA,
434     BLENDFACTOR_INV_DST_ALPHA,
435     BLENDFACTOR_INV_DST_COLOR,
436     BLENDFACTOR_INV_CONST_COLOR,
437     BLENDFACTOR_INV_CONST_ALPHA,
438     BLENDFACTOR_INV_SRC1_COLOR,
439     BLENDFACTOR_INV_SRC1_ALPHA
440 };
441 
442 enum SWR_BLEND_OP
443 {
444     BLENDOP_ADD,
445     BLENDOP_SUBTRACT,
446     BLENDOP_REVSUBTRACT,
447     BLENDOP_MIN,
448     BLENDOP_MAX,
449 };
450 
451 enum SWR_LOGIC_OP
452 {
453     LOGICOP_CLEAR,
454     LOGICOP_NOR,
455     LOGICOP_AND_INVERTED,
456     LOGICOP_COPY_INVERTED,
457     LOGICOP_AND_REVERSE,
458     LOGICOP_INVERT,
459     LOGICOP_XOR,
460     LOGICOP_NAND,
461     LOGICOP_AND,
462     LOGICOP_EQUIV,
463     LOGICOP_NOOP,
464     LOGICOP_OR_INVERTED,
465     LOGICOP_COPY,
466     LOGICOP_OR_REVERSE,
467     LOGICOP_OR,
468     LOGICOP_SET,
469 };
470 
471 //////////////////////////////////////////////////////////////////////////
472 /// SWR_AUX_MODE
473 /// @brief Specifies how the auxiliary buffer is used by the driver.
474 //////////////////////////////////////////////////////////////////////////
475 enum SWR_AUX_MODE
476 {
477     AUX_MODE_NONE,
478     AUX_MODE_COLOR,
479     AUX_MODE_UAV,
480     AUX_MODE_DEPTH,
481 };
482 
483 //////////////////////////////////////////////////////////////////////////
484 /// SWR_SURFACE_STATE
485 //////////////////////////////////////////////////////////////////////////
486 struct SWR_SURFACE_STATE
487 {
488     uint8_t *pBaseAddress;
489     SWR_SURFACE_TYPE type;  // @llvm_enum
490     SWR_FORMAT format;      // @llvm_enum
491     uint32_t width;
492     uint32_t height;
493     uint32_t depth;
494     uint32_t numSamples;
495     uint32_t samplePattern;
496     uint32_t pitch;
497     uint32_t qpitch;
498     uint32_t minLod;            // for sampled surfaces, the most detailed LOD that can be accessed by sampler
499     uint32_t maxLod;            // for sampled surfaces, the max LOD that can be accessed
500     float resourceMinLod;       // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
501     uint32_t lod;               // for render targets, the lod being rendered to
502     uint32_t arrayIndex;        // for render targets, the array index being rendered to for arrayed surfaces
503     SWR_TILE_MODE tileMode;     // @llvm_enum
504     uint32_t halign;
505     uint32_t valign;
506     uint32_t xOffset;
507     uint32_t yOffset;
508 
509     uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
510 
511     uint8_t *pAuxBaseAddress;   // Used for compression, append/consume counter, etc.
512     SWR_AUX_MODE auxMode;      // @llvm_enum
513 
514     bool bInterleavedSamples;   // are MSAA samples stored interleaved or planar
515 };
516 
517 // vertex fetch state
518 // WARNING- any changes to this struct need to be reflected
519 // in the fetch shader jit
520 struct SWR_VERTEX_BUFFER_STATE
521 {
522     uint32_t index;
523     uint32_t pitch;
524     const uint8_t *pData;
525     uint32_t size;
526     uint32_t numaNode;
527     uint32_t maxVertex;             // size / pitch.  precalculated value used by fetch shader for OOB checks
528     uint32_t partialInboundsSize;   // size % pitch.  precalculated value used by fetch shader for partially OOB vertices
529 };
530 
531 struct SWR_INDEX_BUFFER_STATE
532 {
533     // Format type for indices (e.g. UINT16, UINT32, etc.)
534     SWR_FORMAT format; // @llvm_enum
535     const void *pIndices;
536     uint32_t size;
537 };
538 
539 
540 //////////////////////////////////////////////////////////////////////////
541 /// SWR_FETCH_CONTEXT
542 /// @brief Input to fetch shader.
543 /// @note WARNING - Changes to this struct need to be reflected in the
544 ///                 fetch shader jit.
545 /////////////////////////////////////////////////////////////////////////
546 struct SWR_FETCH_CONTEXT
547 {
548     const SWR_VERTEX_BUFFER_STATE* pStreams;    // IN: array of bound vertex buffers
549     const int32_t* pIndices;                    // IN: pointer to index buffer for indexed draws
550     const int32_t* pLastIndex;                  // IN: pointer to end of index buffer, used for bounds checking
551     uint32_t CurInstance;                       // IN: current instance
552     uint32_t BaseVertex;                        // IN: base vertex
553     uint32_t StartVertex;                       // IN: start vertex
554     uint32_t StartInstance;                     // IN: start instance
555     simdscalari VertexID;                       // OUT: vector of vertex IDs
556     simdscalari CutMask;                        // OUT: vector mask of indices which have the cut index value
557 };
558 
559 //////////////////////////////////////////////////////////////////////////
560 /// SWR_STATS
561 ///
562 /// @brief All statistics generated by SWR go here. These are public
563 ///        to driver.
564 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)565 OSALIGNLINE(struct) SWR_STATS
566 {
567     // Occlusion Query
568     uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
569 
570     // Pipeline Stats
571     uint64_t PsInvocations;  // Number of Pixel Shader invocations
572     uint64_t CsInvocations;  // Number of Compute Shader invocations
573 
574 };
575 
576 //////////////////////////////////////////////////////////////////////////
577 /// SWR_STATS
578 ///
579 /// @brief All statistics generated by FE.
580 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)581 OSALIGNLINE(struct) SWR_STATS_FE
582 {
583     uint64_t IaVertices;    // Number of Fetch Shader vertices
584     uint64_t IaPrimitives;  // Number of PA primitives.
585     uint64_t VsInvocations; // Number of Vertex Shader invocations
586     uint64_t HsInvocations; // Number of Hull Shader invocations
587     uint64_t DsInvocations; // Number of Domain Shader invocations
588     uint64_t GsInvocations; // Number of Geometry Shader invocations
589     uint64_t GsPrimitives;  // Number of prims GS outputs.
590     uint64_t CInvocations;  // Number of clipper invocations
591     uint64_t CPrimitives;   // Number of clipper primitives.
592 
593     // Streamout Stats
594     uint64_t SoPrimStorageNeeded[4];
595     uint64_t SoNumPrimsWritten[4];
596 };
597 
598 //////////////////////////////////////////////////////////////////////////
599 /// STREAMOUT_BUFFERS
600 /////////////////////////////////////////////////////////////////////////
601 
602 #define MAX_SO_STREAMS 4
603 #define MAX_SO_BUFFERS 4
604 #define MAX_ATTRIBUTES 32
605 
606 struct SWR_STREAMOUT_BUFFER
607 {
608     bool enable;
609     bool soWriteEnable;
610 
611     // Pointers to streamout buffers.
612     uint32_t* pBuffer;
613 
614     // Size of buffer in dwords.
615     uint32_t bufferSize;
616 
617     // Vertex pitch of buffer in dwords.
618     uint32_t pitch;
619 
620     // Offset into buffer in dwords. SOS will increment this offset.
621     uint32_t streamOffset;
622 
623     // Offset to the SO write offset. If not null then we update offset here.
624     uint32_t* pWriteOffset;
625 
626 };
627 
628 //////////////////////////////////////////////////////////////////////////
629 /// STREAMOUT_STATE
630 /////////////////////////////////////////////////////////////////////////
631 struct SWR_STREAMOUT_STATE
632 {
633     // This disables stream output.
634     bool soEnable;
635 
636     // which streams are enabled for streamout
637     bool streamEnable[MAX_SO_STREAMS];
638 
639     // If set then do not send any streams to the rasterizer.
640     bool rasterizerDisable;
641 
642     // Specifies which stream to send to the rasterizer.
643     uint32_t streamToRasterizer;
644 
645     // The stream masks specify which attributes are sent to which streams.
646     // These masks help the FE to setup the pPrimData buffer that is passed
647     // the Stream Output Shader (SOS) function.
648     uint32_t streamMasks[MAX_SO_STREAMS];
649 
650     // Number of attributes, including position, per vertex that are streamed out.
651     // This should match number of bits in stream mask.
652     uint32_t streamNumEntries[MAX_SO_STREAMS];
653 };
654 
655 //////////////////////////////////////////////////////////////////////////
656 /// STREAMOUT_CONTEXT - Passed to SOS
657 /////////////////////////////////////////////////////////////////////////
658 struct SWR_STREAMOUT_CONTEXT
659 {
660     uint32_t* pPrimData;
661     SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
662 
663     // Num prims written for this stream
664     uint32_t numPrimsWritten;
665 
666     // Num prims that should have been written if there were no overflow.
667     uint32_t numPrimStorageNeeded;
668 };
669 
670 //////////////////////////////////////////////////////////////////////////
671 /// SWR_GS_STATE - Geometry shader state
672 /////////////////////////////////////////////////////////////////////////
673 struct SWR_GS_STATE
674 {
675     bool gsEnable;
676 
677     // number of input attributes per vertex. used by the frontend to
678     // optimize assembling primitives for GS
679     uint32_t numInputAttribs;
680 
681     // output topology - can be point, tristrip, or linestrip
682     PRIMITIVE_TOPOLOGY outputTopology;      // @llvm_enum
683 
684     // maximum number of verts that can be emitted by a single instance of the GS
685     uint32_t maxNumVerts;
686 
687     // instance count
688     uint32_t instanceCount;
689 
690     // geometry shader emits renderTargetArrayIndex
691     bool emitsRenderTargetArrayIndex;
692 
693     // geometry shader emits PrimitiveID
694     bool emitsPrimitiveID;
695 
696     // geometry shader emits ViewportArrayIndex
697     bool emitsViewportArrayIndex;
698 
699     // if true, geometry shader emits a single stream, with separate cut buffer.
700     // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
701     // to map vertices to streams
702     bool isSingleStream;
703 
704     // when single stream is enabled, singleStreamID dictates which stream is being output.
705     // field ignored if isSingleStream is false
706     uint32_t singleStreamID;
707 };
708 
709 
710 //////////////////////////////////////////////////////////////////////////
711 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
712 /////////////////////////////////////////////////////////////////////////
713 enum SWR_TS_OUTPUT_TOPOLOGY
714 {
715     SWR_TS_OUTPUT_POINT,
716     SWR_TS_OUTPUT_LINE,
717     SWR_TS_OUTPUT_TRI_CW,
718     SWR_TS_OUTPUT_TRI_CCW,
719 
720     SWR_TS_OUTPUT_TOPOLOGY_COUNT
721 };
722 
723 //////////////////////////////////////////////////////////////////////////
724 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
725 /////////////////////////////////////////////////////////////////////////
726 enum SWR_TS_PARTITIONING
727 {
728     SWR_TS_INTEGER,
729     SWR_TS_ODD_FRACTIONAL,
730     SWR_TS_EVEN_FRACTIONAL,
731 
732     SWR_TS_PARTITIONING_COUNT
733 };
734 
735 //////////////////////////////////////////////////////////////////////////
736 /// SWR_TS_DOMAIN - Defines Tessellation Domain
737 /////////////////////////////////////////////////////////////////////////
738 enum SWR_TS_DOMAIN
739 {
740     SWR_TS_QUAD,
741     SWR_TS_TRI,
742     SWR_TS_ISOLINE,
743 
744     SWR_TS_DOMAIN_COUNT
745 };
746 
747 //////////////////////////////////////////////////////////////////////////
748 /// SWR_TS_STATE - Tessellation state
749 /////////////////////////////////////////////////////////////////////////
750 struct SWR_TS_STATE
751 {
752     bool                    tsEnable;
753     SWR_TS_OUTPUT_TOPOLOGY  tsOutputTopology;   // @llvm_enum
754     SWR_TS_PARTITIONING     partitioning;       // @llvm_enum
755     SWR_TS_DOMAIN           domain;             // @llvm_enum
756 
757     PRIMITIVE_TOPOLOGY      postDSTopology;     // @llvm_enum
758 
759     uint32_t                numHsInputAttribs;
760     uint32_t                numHsOutputAttribs;
761     uint32_t                numDsOutputAttribs;
762 };
763 
764 // output merger state
765 struct SWR_RENDER_TARGET_BLEND_STATE
766 {
767     uint8_t writeDisableRed : 1;
768     uint8_t writeDisableGreen : 1;
769     uint8_t writeDisableBlue : 1;
770     uint8_t writeDisableAlpha : 1;
771 };
772 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
773 
774 enum SWR_MULTISAMPLE_COUNT
775 {
776     SWR_MULTISAMPLE_1X = 0,
777     SWR_MULTISAMPLE_2X,
778     SWR_MULTISAMPLE_4X,
779     SWR_MULTISAMPLE_8X,
780     SWR_MULTISAMPLE_16X,
781     SWR_MULTISAMPLE_TYPE_COUNT
782 };
783 
784 struct SWR_BLEND_STATE
785 {
786     // constant blend factor color in RGBA float
787     float constantColor[4];
788 
789     // alpha test reference value in unorm8 or float32
790     uint32_t alphaTestReference;
791     uint32_t sampleMask;
792     // all RT's have the same sample count
793     ///@todo move this to Output Merger state when we refactor
794     SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
795 
796     SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
797 };
798 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
799 
800 //////////////////////////////////////////////////////////////////////////
801 /// FUNCTION POINTERS FOR SHADERS
802 
803 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
804 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
805 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
806 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
807 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
808 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
809 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
810 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
811 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
812 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
813     simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
814     uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
815 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar);
816 
817 
818 
819 //////////////////////////////////////////////////////////////////////////
820 /// FRONTEND_STATE
821 /////////////////////////////////////////////////////////////////////////
822 struct SWR_FRONTEND_STATE
823 {
824     // skip clip test, perspective divide, and viewport transform
825     // intended for verts in screen space
826     bool vpTransformDisable;
827     bool bEnableCutIndex;
828     union
829     {
830         struct
831         {
832             uint32_t triFan : 2;
833             uint32_t lineStripList : 1;
834             uint32_t triStripList : 2;
835         };
836         uint32_t bits;
837     } provokingVertex;
838     uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
839 };
840 
841 //////////////////////////////////////////////////////////////////////////
842 /// VIEWPORT_MATRIX
843 /////////////////////////////////////////////////////////////////////////
844 struct SWR_VIEWPORT_MATRIX
845 {
846     float m00;
847     float m11;
848     float m22;
849     float m30;
850     float m31;
851     float m32;
852 };
853 
854 //////////////////////////////////////////////////////////////////////////
855 /// VIEWPORT_MATRIXES
856 /////////////////////////////////////////////////////////////////////////
857 struct SWR_VIEWPORT_MATRICES
858 {
859     float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
860     float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
861     float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
862     float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
863     float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
864     float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
865 };
866 
867 //////////////////////////////////////////////////////////////////////////
868 /// SWR_VIEWPORT
869 /////////////////////////////////////////////////////////////////////////
870 struct SWR_VIEWPORT
871 {
872     float x;
873     float y;
874     float width;
875     float height;
876     float minZ;
877     float maxZ;
878 };
879 
880 //////////////////////////////////////////////////////////////////////////
881 /// SWR_CULLMODE
882 //////////////////////////////////////////////////////////////////////////
883 enum SWR_CULLMODE
884 {
885     SWR_CULLMODE_BOTH,
886     SWR_CULLMODE_NONE,
887     SWR_CULLMODE_FRONT,
888     SWR_CULLMODE_BACK
889 };
890 
891 enum SWR_FILLMODE
892 {
893     SWR_FILLMODE_POINT,
894     SWR_FILLMODE_WIREFRAME,
895     SWR_FILLMODE_SOLID
896 };
897 
898 enum SWR_FRONTWINDING
899 {
900     SWR_FRONTWINDING_CW,
901     SWR_FRONTWINDING_CCW
902 };
903 
904 
905 enum SWR_MSAA_SAMPLE_PATTERN
906 {
907     SWR_MSAA_CENTER_PATTERN,
908     SWR_MSAA_STANDARD_PATTERN,
909     SWR_MSAA_SAMPLE_PATTERN_COUNT
910 };
911 
912 enum SWR_PIXEL_LOCATION
913 {
914     SWR_PIXEL_LOCATION_CENTER,
915     SWR_PIXEL_LOCATION_UL,
916 };
917 
918 // fixed point screen space sample locations within a pixel
919 struct SWR_MULTISAMPLE_POS
920 {
921     uint32_t x;
922     uint32_t y;
923 };
924 
925 enum SWR_MSAA_RASTMODE
926 {
927     SWR_MSAA_RASTMODE_OFF_PIXEL,
928     SWR_MSAA_RASTMODE_OFF_PATTERN,
929     SWR_MSAA_RASTMODE_ON_PIXEL,
930     SWR_MSAA_RASTMODE_ON_PATTERN
931 };
932 
933 //////////////////////////////////////////////////////////////////////////
934 /// SWR_RASTSTATE
935 //////////////////////////////////////////////////////////////////////////
936 struct SWR_RASTSTATE
937 {
938     uint32_t cullMode               : 2;
939     uint32_t fillMode               : 2;
940     uint32_t frontWinding           : 1;
941     uint32_t scissorEnable          : 1;
942     uint32_t depthClipEnable        : 1;
943     uint32_t clipHalfZ              : 1;
944     uint32_t pointParam             : 1;
945     uint32_t pointSpriteEnable      : 1;
946     uint32_t pointSpriteTopOrigin   : 1;
947     uint32_t msaaRastEnable         : 1;
948     uint32_t forcedSampleCount      : 1;
949     uint32_t pixelOffset            : 1;
950     uint32_t depthBiasPreAdjusted   : 1;    ///< depth bias constant is in float units, not per-format Z units
951     uint32_t conservativeRast       : 1;
952 
953     float pointSize;
954     float lineWidth;
955 
956     float depthBias;
957     float slopeScaledDepthBias;
958     float depthBiasClamp;
959     SWR_FORMAT depthFormat;     // @llvm_enum
960 
961     ///@todo: MSAA lines
962     // multisample state for MSAA lines
963     SWR_MSAA_RASTMODE rastMode;    // @llvm_enum
964 
965     // sample count the rasterizer is running at
966     SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
967     uint32_t pixelLocation;     // UL or Center
968     SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];
969     SWR_MSAA_SAMPLE_PATTERN samplePattern;   // @llvm_enum
970 
971     // user clip/cull distance enables
972     uint8_t cullDistanceMask;
973     uint8_t clipDistanceMask;
974 };
975 
976 enum SWR_CONSTANT_SOURCE
977 {
978     SWR_CONSTANT_SOURCE_CONST_0000,
979     SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
980     SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
981     SWR_CONSTANT_SOURCE_PRIM_ID
982 };
983 
984 struct SWR_ATTRIB_SWIZZLE
985 {
986     uint16_t sourceAttrib : 5;          // source attribute
987     uint16_t constantSource : 2;        // constant source to apply
988     uint16_t componentOverrideMask : 4; // override component with constant source
989 };
990 
991 // backend state
992 struct SWR_BACKEND_STATE
993 {
994     uint32_t constantInterpolationMask;     // bitmask indicating which attributes have constant interpolation
995     uint32_t pointSpriteTexCoordMask;       // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
996 
997     uint8_t numAttributes;                  // total number of attributes to send to backend (up to 32)
998     uint8_t numComponents[32];              // number of components to setup per attribute, this reduces some calculations for unneeded components
999 
1000     bool swizzleEnable;                 // when enabled, core will parse the swizzle map when
1001                                         // setting up attributes for the backend, otherwise
1002                                         // all attributes up to numAttributes will be sent
1003     SWR_ATTRIB_SWIZZLE swizzleMap[32];
1004 };
1005 
1006 
1007 union SWR_DEPTH_STENCIL_STATE
1008 {
1009     struct
1010     {
1011         // dword 0
1012         uint32_t depthWriteEnable : 1;
1013         uint32_t depthTestEnable : 1;
1014         uint32_t stencilWriteEnable : 1;
1015         uint32_t stencilTestEnable : 1;
1016         uint32_t doubleSidedStencilTestEnable : 1;
1017 
1018         uint32_t depthTestFunc : 3;
1019         uint32_t stencilTestFunc : 3;
1020 
1021         uint32_t backfaceStencilPassDepthPassOp : 3;
1022         uint32_t backfaceStencilPassDepthFailOp : 3;
1023         uint32_t backfaceStencilFailOp : 3;
1024         uint32_t backfaceStencilTestFunc : 3;
1025         uint32_t stencilPassDepthPassOp : 3;
1026         uint32_t stencilPassDepthFailOp : 3;
1027         uint32_t stencilFailOp : 3;
1028 
1029         // dword 1
1030         uint8_t backfaceStencilWriteMask;
1031         uint8_t backfaceStencilTestMask;
1032         uint8_t stencilWriteMask;
1033         uint8_t stencilTestMask;
1034 
1035         // dword 2
1036         uint8_t backfaceStencilRefValue;
1037         uint8_t stencilRefValue;
1038     };
1039     uint32_t value[3];
1040 };
1041 
1042 enum SWR_SHADING_RATE
1043 {
1044     SWR_SHADING_RATE_PIXEL,
1045     SWR_SHADING_RATE_SAMPLE,
1046     SWR_SHADING_RATE_COUNT,
1047 };
1048 
1049 enum SWR_INPUT_COVERAGE
1050 {
1051     SWR_INPUT_COVERAGE_NONE,
1052     SWR_INPUT_COVERAGE_NORMAL,
1053     SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1054     SWR_INPUT_COVERAGE_COUNT,
1055 };
1056 
1057 enum SWR_PS_POSITION_OFFSET
1058 {
1059     SWR_PS_POSITION_SAMPLE_NONE,
1060     SWR_PS_POSITION_SAMPLE_OFFSET,
1061     SWR_PS_POSITION_CENTROID_OFFSET,
1062     SWR_PS_POSITION_OFFSET_COUNT,
1063 };
1064 
1065 enum SWR_BARYCENTRICS_MASK
1066 {
1067     SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1068     SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1069     SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1070 };
1071 
1072 // pixel shader state
1073 struct SWR_PS_STATE
1074 {
1075     // dword 0-1
1076     PFN_PIXEL_KERNEL pfnPixelShader;  // @llvm_pfn
1077 
1078     // dword 2
1079     uint32_t killsPixel         : 1;    // pixel shader can kill pixels
1080     uint32_t inputCoverage      : 2;    // ps uses input coverage
1081     uint32_t writesODepth       : 1;    // pixel shader writes to depth
1082     uint32_t usesSourceDepth    : 1;    // pixel shader reads depth
1083     uint32_t shadingRate        : 2;    // shading per pixel / sample / coarse pixel
1084     uint32_t numRenderTargets   : 4;    // number of render target outputs in use (0-8)
1085     uint32_t posOffset          : 2;    // type of offset (none, sample, centroid) to add to pixel position
1086     uint32_t barycentricsMask   : 3;    // which type(s) of barycentric coords does the PS interpolate attributes with
1087     uint32_t usesUAV            : 1;    // pixel shader accesses UAV
1088     uint32_t forceEarlyZ        : 1;    // force execution of early depth/stencil test
1089 
1090 };
1091 
1092 // depth bounds state
1093 struct SWR_DEPTH_BOUNDS_STATE
1094 {
1095     bool    depthBoundsTestEnable;
1096     float   depthBoundsTestMinValue;
1097     float   depthBoundsTestMaxValue;
1098 };
1099 
1100