1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29
30 #include "common/formats.h"
31 #include "common/simdintrin.h"
32
33 //////////////////////////////////////////////////////////////////////////
34 /// PRIMITIVE_TOPOLOGY.
35 //////////////////////////////////////////////////////////////////////////
36 enum PRIMITIVE_TOPOLOGY
37 {
38 TOP_UNKNOWN = 0x0,
39 TOP_POINT_LIST = 0x1,
40 TOP_LINE_LIST = 0x2,
41 TOP_LINE_STRIP = 0x3,
42 TOP_TRIANGLE_LIST = 0x4,
43 TOP_TRIANGLE_STRIP = 0x5,
44 TOP_TRIANGLE_FAN = 0x6,
45 TOP_QUAD_LIST = 0x7,
46 TOP_QUAD_STRIP = 0x8,
47 TOP_LINE_LIST_ADJ = 0x9,
48 TOP_LISTSTRIP_ADJ = 0xA,
49 TOP_TRI_LIST_ADJ = 0xB,
50 TOP_TRI_STRIP_ADJ = 0xC,
51 TOP_TRI_STRIP_REVERSE = 0xD,
52 TOP_POLYGON = 0xE,
53 TOP_RECT_LIST = 0xF,
54 TOP_LINE_LOOP = 0x10,
55 TOP_POINT_LIST_BF = 0x11,
56 TOP_LINE_STRIP_CONT = 0x12,
57 TOP_LINE_STRIP_BF = 0x13,
58 TOP_LINE_STRIP_CONT_BF = 0x14,
59 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
60 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
61
62 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
63 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
64 TOP_PATCHLIST_2 = 0x21,
65 TOP_PATCHLIST_3 = 0x22,
66 TOP_PATCHLIST_4 = 0x23,
67 TOP_PATCHLIST_5 = 0x24,
68 TOP_PATCHLIST_6 = 0x25,
69 TOP_PATCHLIST_7 = 0x26,
70 TOP_PATCHLIST_8 = 0x27,
71 TOP_PATCHLIST_9 = 0x28,
72 TOP_PATCHLIST_10 = 0x29,
73 TOP_PATCHLIST_11 = 0x2A,
74 TOP_PATCHLIST_12 = 0x2B,
75 TOP_PATCHLIST_13 = 0x2C,
76 TOP_PATCHLIST_14 = 0x2D,
77 TOP_PATCHLIST_15 = 0x2E,
78 TOP_PATCHLIST_16 = 0x2F,
79 TOP_PATCHLIST_17 = 0x30,
80 TOP_PATCHLIST_18 = 0x31,
81 TOP_PATCHLIST_19 = 0x32,
82 TOP_PATCHLIST_20 = 0x33,
83 TOP_PATCHLIST_21 = 0x34,
84 TOP_PATCHLIST_22 = 0x35,
85 TOP_PATCHLIST_23 = 0x36,
86 TOP_PATCHLIST_24 = 0x37,
87 TOP_PATCHLIST_25 = 0x38,
88 TOP_PATCHLIST_26 = 0x39,
89 TOP_PATCHLIST_27 = 0x3A,
90 TOP_PATCHLIST_28 = 0x3B,
91 TOP_PATCHLIST_29 = 0x3C,
92 TOP_PATCHLIST_30 = 0x3D,
93 TOP_PATCHLIST_31 = 0x3E,
94 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
95 };
96
97 //////////////////////////////////////////////////////////////////////////
98 /// SWR_SHADER_TYPE
99 //////////////////////////////////////////////////////////////////////////
100 enum SWR_SHADER_TYPE
101 {
102 SHADER_VERTEX,
103 SHADER_GEOMETRY,
104 SHADER_DOMAIN,
105 SHADER_HULL,
106 SHADER_PIXEL,
107 SHADER_COMPUTE,
108
109 NUM_SHADER_TYPES,
110 };
111
112 //////////////////////////////////////////////////////////////////////////
113 /// SWR_RENDERTARGET_ATTACHMENT
114 /// @todo Its not clear what an "attachment" means. Its not common term.
115 //////////////////////////////////////////////////////////////////////////
116 enum SWR_RENDERTARGET_ATTACHMENT
117 {
118 SWR_ATTACHMENT_COLOR0,
119 SWR_ATTACHMENT_COLOR1,
120 SWR_ATTACHMENT_COLOR2,
121 SWR_ATTACHMENT_COLOR3,
122 SWR_ATTACHMENT_COLOR4,
123 SWR_ATTACHMENT_COLOR5,
124 SWR_ATTACHMENT_COLOR6,
125 SWR_ATTACHMENT_COLOR7,
126 SWR_ATTACHMENT_DEPTH,
127 SWR_ATTACHMENT_STENCIL,
128
129 SWR_NUM_ATTACHMENTS
130 };
131
132 #define SWR_NUM_RENDERTARGETS 8
133
134 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
135 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
136 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
137 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
138 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
139 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
140 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
141 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
142 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
143 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
144 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
145 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
146
147
148 //////////////////////////////////////////////////////////////////////////
149 /// @brief SWR Inner Tessellation factor ID
150 /// See above GetTessFactorOutputPosition code for documentation
151 enum SWR_INNER_TESSFACTOR_ID
152 {
153 SWR_QUAD_U_TRI_INSIDE,
154 SWR_QUAD_V_INSIDE,
155
156 SWR_NUM_INNER_TESS_FACTORS,
157 };
158
159 //////////////////////////////////////////////////////////////////////////
160 /// @brief SWR Outer Tessellation factor ID
161 /// See above GetTessFactorOutputPosition code for documentation
162 enum SWR_OUTER_TESSFACTOR_ID
163 {
164 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
165 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
166 SWR_QUAD_U_EQ1_TRI_W,
167 SWR_QUAD_V_EQ1,
168
169 SWR_NUM_OUTER_TESS_FACTORS,
170 };
171
172
173 /////////////////////////////////////////////////////////////////////////
174 /// simdvertex
175 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
176 /// Contains position in clip space, hardcoded to attribute 0,
177 /// space for up to 32 attributes, as well as any SGV values generated
178 /// by the pipeline
179 /////////////////////////////////////////////////////////////////////////
180 #define VERTEX_POSITION_SLOT 0
181 #define VERTEX_ATTRIB_START_SLOT 1
182 #define VERTEX_ATTRIB_END_SLOT 32
183 #define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here
184 #define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here
185 #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
186 #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
187 #define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here
188 #define VERTEX_VIEWPORT_ARRAY_INDEX_SLOT 38
189 // SoAoSoA
190 struct simdvertex
191 {
192 simdvector attrib[KNOB_NUM_ATTRIBUTES];
193 };
194
195 //////////////////////////////////////////////////////////////////////////
196 /// SWR_VS_CONTEXT
197 /// @brief Input to vertex shader
198 /////////////////////////////////////////////////////////////////////////
199 struct SWR_VS_CONTEXT
200 {
201 simdvertex* pVin; // IN: SIMD input vertex data store
202 simdvertex* pVout; // OUT: SIMD output vertex data store
203
204 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
205 simdscalari VertexID; // IN: Vertex ID
206 simdscalari mask; // IN: Active mask for shader
207 };
208
209 /////////////////////////////////////////////////////////////////////////
210 /// ScalarCPoint
211 /// @brief defines a control point element as passed from the output
212 /// of the hull shader to the input of the domain shader
213 /////////////////////////////////////////////////////////////////////////
214 struct ScalarAttrib
215 {
216 float x;
217 float y;
218 float z;
219 float w;
220 };
221
222 struct ScalarCPoint
223 {
224 ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES];
225 };
226
227 //////////////////////////////////////////////////////////////////////////
228 /// SWR_TESSELLATION_FACTORS
229 /// @brief Tessellation factors structure (non-vector)
230 /////////////////////////////////////////////////////////////////////////
231 struct SWR_TESSELLATION_FACTORS
232 {
233 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
234 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
235 };
236
237 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
238 struct ScalarPatch
239 {
240 SWR_TESSELLATION_FACTORS tessFactors;
241 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
242 ScalarCPoint patchData;
243 };
244
245 //////////////////////////////////////////////////////////////////////////
246 /// SWR_HS_CONTEXT
247 /// @brief Input to hull shader
248 /////////////////////////////////////////////////////////////////////////
249 struct SWR_HS_CONTEXT
250 {
251 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
252 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
253 simdscalari mask; // IN: Active mask for shader
254 ScalarPatch* pCPout; // OUT: Output control point patch
255 // SIMD-sized-array of SCALAR patches
256 };
257
258 //////////////////////////////////////////////////////////////////////////
259 /// SWR_DS_CONTEXT
260 /// @brief Input to domain shader
261 /////////////////////////////////////////////////////////////////////////
262 struct SWR_DS_CONTEXT
263 {
264 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
265 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
266 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
267 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
268 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
269 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
270 simdscalari mask; // IN: Active mask for shader
271 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
272 };
273
274 //////////////////////////////////////////////////////////////////////////
275 /// SWR_GS_CONTEXT
276 /// @brief Input to geometry shader.
277 /////////////////////////////////////////////////////////////////////////
278 struct SWR_GS_CONTEXT
279 {
280 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: input primitive data for SIMD prims
281 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
282 uint32_t InstanceID; // IN: input instance ID
283 simdscalari mask; // IN: Active mask for shader
284 uint8_t* pStream; // OUT: output stream (contains vertices for all output streams)
285 uint8_t* pCutOrStreamIdBuffer; // OUT: cut or stream id buffer
286 simdscalari vertexCount; // OUT: num vertices emitted per SIMD lane
287 };
288
289 struct PixelPositions
290 {
291 simdscalar UL;
292 simdscalar center;
293 simdscalar sample;
294 simdscalar centroid;
295 };
296
297 #define SWR_MAX_NUM_MULTISAMPLES 16
298
299 //////////////////////////////////////////////////////////////////////////
300 /// SWR_PS_CONTEXT
301 /// @brief Input to pixel shader.
302 /////////////////////////////////////////////////////////////////////////
303 struct SWR_PS_CONTEXT
304 {
305 PixelPositions vX; // IN: x location(s) of pixels
306 PixelPositions vY; // IN: x location(s) of pixels
307 simdscalar vZ; // INOUT: z location of pixels
308 simdscalari activeMask; // OUT: mask for kill
309 simdscalar inputMask; // IN: input coverage mask for all samples
310 simdscalari oMask; // OUT: mask for output coverage
311
312 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
313 PixelPositions vJ;
314 PixelPositions vOneOverW; // IN: 1/w
315
316 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
317 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
318 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
319 const float *I; // IN: Barycentric A, B, and C coefs used to compute I
320 const float *J; // IN: Barycentric A, B, and C coefs used to compute J
321 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
322 const float* pSamplePosX; // IN: array of sample positions
323 const float* pSamplePosY; // IN: array of sample positions
324 simdvector shaded[SWR_NUM_RENDERTARGETS];
325 // OUT: result color per rendertarget
326
327 uint32_t frontFace; // IN: front- 1, back- 0
328 uint32_t primID; // IN: primitive ID
329 uint32_t sampleIndex; // IN: sampleIndex
330
331 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
332
333 };
334
335 //////////////////////////////////////////////////////////////////////////
336 /// SWR_CS_CONTEXT
337 /// @brief Input to compute shader.
338 /////////////////////////////////////////////////////////////////////////
339 struct SWR_CS_CONTEXT
340 {
341 // The ThreadGroupId is the current thread group index relative
342 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
343 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
344
345 // Compute shader accepts the following system values.
346 // o ThreadId - Current thread id relative to all other threads in dispatch.
347 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
348 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
349 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
350 //
351 // All of these system values can be computed in the shader. They will be
352 // derived from the current tile counter. The tile counter is an atomic counter that
353 // resides in the draw context and is initialized to the product of the dispatch dims.
354 //
355 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
356 //
357 // Each CPU worker thread will atomically decrement this counter and passes the current
358 // count into the shader. When the count reaches 0 then all thread groups in the
359 // dispatch call have been completed.
360
361 uint32_t tileCounter; // The tile counter value for this thread group.
362
363 // Dispatch dimensions used by shader to compute system values from the tile counter.
364 uint32_t dispatchDims[3];
365
366 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
367
368 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
369 };
370
371 // enums
372 enum SWR_TILE_MODE
373 {
374 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
375 SWR_TILE_MODE_WMAJOR, // W major tiling
376 SWR_TILE_MODE_XMAJOR, // X major tiling
377 SWR_TILE_MODE_YMAJOR, // Y major tiling
378 SWR_TILE_SWRZ, // SWR-Z tiling
379
380 SWR_TILE_MODE_COUNT
381 };
382
383 enum SWR_SURFACE_TYPE
384 {
385 SURFACE_1D = 0,
386 SURFACE_2D = 1,
387 SURFACE_3D = 2,
388 SURFACE_CUBE = 3,
389 SURFACE_BUFFER = 4,
390 SURFACE_STRUCTURED_BUFFER = 5,
391 SURFACE_NULL = 7
392 };
393
394 enum SWR_ZFUNCTION
395 {
396 ZFUNC_ALWAYS,
397 ZFUNC_NEVER,
398 ZFUNC_LT,
399 ZFUNC_EQ,
400 ZFUNC_LE,
401 ZFUNC_GT,
402 ZFUNC_NE,
403 ZFUNC_GE,
404 NUM_ZFUNC
405 };
406
407 enum SWR_STENCILOP
408 {
409 STENCILOP_KEEP,
410 STENCILOP_ZERO,
411 STENCILOP_REPLACE,
412 STENCILOP_INCRSAT,
413 STENCILOP_DECRSAT,
414 STENCILOP_INCR,
415 STENCILOP_DECR,
416 STENCILOP_INVERT
417 };
418
419 enum SWR_BLEND_FACTOR
420 {
421 BLENDFACTOR_ONE,
422 BLENDFACTOR_SRC_COLOR,
423 BLENDFACTOR_SRC_ALPHA,
424 BLENDFACTOR_DST_ALPHA,
425 BLENDFACTOR_DST_COLOR,
426 BLENDFACTOR_SRC_ALPHA_SATURATE,
427 BLENDFACTOR_CONST_COLOR,
428 BLENDFACTOR_CONST_ALPHA,
429 BLENDFACTOR_SRC1_COLOR,
430 BLENDFACTOR_SRC1_ALPHA,
431 BLENDFACTOR_ZERO,
432 BLENDFACTOR_INV_SRC_COLOR,
433 BLENDFACTOR_INV_SRC_ALPHA,
434 BLENDFACTOR_INV_DST_ALPHA,
435 BLENDFACTOR_INV_DST_COLOR,
436 BLENDFACTOR_INV_CONST_COLOR,
437 BLENDFACTOR_INV_CONST_ALPHA,
438 BLENDFACTOR_INV_SRC1_COLOR,
439 BLENDFACTOR_INV_SRC1_ALPHA
440 };
441
442 enum SWR_BLEND_OP
443 {
444 BLENDOP_ADD,
445 BLENDOP_SUBTRACT,
446 BLENDOP_REVSUBTRACT,
447 BLENDOP_MIN,
448 BLENDOP_MAX,
449 };
450
451 enum SWR_LOGIC_OP
452 {
453 LOGICOP_CLEAR,
454 LOGICOP_NOR,
455 LOGICOP_AND_INVERTED,
456 LOGICOP_COPY_INVERTED,
457 LOGICOP_AND_REVERSE,
458 LOGICOP_INVERT,
459 LOGICOP_XOR,
460 LOGICOP_NAND,
461 LOGICOP_AND,
462 LOGICOP_EQUIV,
463 LOGICOP_NOOP,
464 LOGICOP_OR_INVERTED,
465 LOGICOP_COPY,
466 LOGICOP_OR_REVERSE,
467 LOGICOP_OR,
468 LOGICOP_SET,
469 };
470
471 //////////////////////////////////////////////////////////////////////////
472 /// SWR_AUX_MODE
473 /// @brief Specifies how the auxiliary buffer is used by the driver.
474 //////////////////////////////////////////////////////////////////////////
475 enum SWR_AUX_MODE
476 {
477 AUX_MODE_NONE,
478 AUX_MODE_COLOR,
479 AUX_MODE_UAV,
480 AUX_MODE_DEPTH,
481 };
482
483 //////////////////////////////////////////////////////////////////////////
484 /// SWR_SURFACE_STATE
485 //////////////////////////////////////////////////////////////////////////
486 struct SWR_SURFACE_STATE
487 {
488 uint8_t *pBaseAddress;
489 SWR_SURFACE_TYPE type; // @llvm_enum
490 SWR_FORMAT format; // @llvm_enum
491 uint32_t width;
492 uint32_t height;
493 uint32_t depth;
494 uint32_t numSamples;
495 uint32_t samplePattern;
496 uint32_t pitch;
497 uint32_t qpitch;
498 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
499 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
500 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
501 uint32_t lod; // for render targets, the lod being rendered to
502 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
503 SWR_TILE_MODE tileMode; // @llvm_enum
504 uint32_t halign;
505 uint32_t valign;
506 uint32_t xOffset;
507 uint32_t yOffset;
508
509 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
510
511 uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc.
512 SWR_AUX_MODE auxMode; // @llvm_enum
513
514 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
515 };
516
517 // vertex fetch state
518 // WARNING- any changes to this struct need to be reflected
519 // in the fetch shader jit
520 struct SWR_VERTEX_BUFFER_STATE
521 {
522 uint32_t index;
523 uint32_t pitch;
524 const uint8_t *pData;
525 uint32_t size;
526 uint32_t numaNode;
527 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
528 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
529 };
530
531 struct SWR_INDEX_BUFFER_STATE
532 {
533 // Format type for indices (e.g. UINT16, UINT32, etc.)
534 SWR_FORMAT format; // @llvm_enum
535 const void *pIndices;
536 uint32_t size;
537 };
538
539
540 //////////////////////////////////////////////////////////////////////////
541 /// SWR_FETCH_CONTEXT
542 /// @brief Input to fetch shader.
543 /// @note WARNING - Changes to this struct need to be reflected in the
544 /// fetch shader jit.
545 /////////////////////////////////////////////////////////////////////////
546 struct SWR_FETCH_CONTEXT
547 {
548 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
549 const int32_t* pIndices; // IN: pointer to index buffer for indexed draws
550 const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking
551 uint32_t CurInstance; // IN: current instance
552 uint32_t BaseVertex; // IN: base vertex
553 uint32_t StartVertex; // IN: start vertex
554 uint32_t StartInstance; // IN: start instance
555 simdscalari VertexID; // OUT: vector of vertex IDs
556 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
557 };
558
559 //////////////////////////////////////////////////////////////////////////
560 /// SWR_STATS
561 ///
562 /// @brief All statistics generated by SWR go here. These are public
563 /// to driver.
564 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)565 OSALIGNLINE(struct) SWR_STATS
566 {
567 // Occlusion Query
568 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
569
570 // Pipeline Stats
571 uint64_t PsInvocations; // Number of Pixel Shader invocations
572 uint64_t CsInvocations; // Number of Compute Shader invocations
573
574 };
575
576 //////////////////////////////////////////////////////////////////////////
577 /// SWR_STATS
578 ///
579 /// @brief All statistics generated by FE.
580 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)581 OSALIGNLINE(struct) SWR_STATS_FE
582 {
583 uint64_t IaVertices; // Number of Fetch Shader vertices
584 uint64_t IaPrimitives; // Number of PA primitives.
585 uint64_t VsInvocations; // Number of Vertex Shader invocations
586 uint64_t HsInvocations; // Number of Hull Shader invocations
587 uint64_t DsInvocations; // Number of Domain Shader invocations
588 uint64_t GsInvocations; // Number of Geometry Shader invocations
589 uint64_t GsPrimitives; // Number of prims GS outputs.
590 uint64_t CInvocations; // Number of clipper invocations
591 uint64_t CPrimitives; // Number of clipper primitives.
592
593 // Streamout Stats
594 uint64_t SoPrimStorageNeeded[4];
595 uint64_t SoNumPrimsWritten[4];
596 };
597
598 //////////////////////////////////////////////////////////////////////////
599 /// STREAMOUT_BUFFERS
600 /////////////////////////////////////////////////////////////////////////
601
602 #define MAX_SO_STREAMS 4
603 #define MAX_SO_BUFFERS 4
604 #define MAX_ATTRIBUTES 32
605
606 struct SWR_STREAMOUT_BUFFER
607 {
608 bool enable;
609 bool soWriteEnable;
610
611 // Pointers to streamout buffers.
612 uint32_t* pBuffer;
613
614 // Size of buffer in dwords.
615 uint32_t bufferSize;
616
617 // Vertex pitch of buffer in dwords.
618 uint32_t pitch;
619
620 // Offset into buffer in dwords. SOS will increment this offset.
621 uint32_t streamOffset;
622
623 // Offset to the SO write offset. If not null then we update offset here.
624 uint32_t* pWriteOffset;
625
626 };
627
628 //////////////////////////////////////////////////////////////////////////
629 /// STREAMOUT_STATE
630 /////////////////////////////////////////////////////////////////////////
631 struct SWR_STREAMOUT_STATE
632 {
633 // This disables stream output.
634 bool soEnable;
635
636 // which streams are enabled for streamout
637 bool streamEnable[MAX_SO_STREAMS];
638
639 // If set then do not send any streams to the rasterizer.
640 bool rasterizerDisable;
641
642 // Specifies which stream to send to the rasterizer.
643 uint32_t streamToRasterizer;
644
645 // The stream masks specify which attributes are sent to which streams.
646 // These masks help the FE to setup the pPrimData buffer that is passed
647 // the Stream Output Shader (SOS) function.
648 uint32_t streamMasks[MAX_SO_STREAMS];
649
650 // Number of attributes, including position, per vertex that are streamed out.
651 // This should match number of bits in stream mask.
652 uint32_t streamNumEntries[MAX_SO_STREAMS];
653 };
654
655 //////////////////////////////////////////////////////////////////////////
656 /// STREAMOUT_CONTEXT - Passed to SOS
657 /////////////////////////////////////////////////////////////////////////
658 struct SWR_STREAMOUT_CONTEXT
659 {
660 uint32_t* pPrimData;
661 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
662
663 // Num prims written for this stream
664 uint32_t numPrimsWritten;
665
666 // Num prims that should have been written if there were no overflow.
667 uint32_t numPrimStorageNeeded;
668 };
669
670 //////////////////////////////////////////////////////////////////////////
671 /// SWR_GS_STATE - Geometry shader state
672 /////////////////////////////////////////////////////////////////////////
673 struct SWR_GS_STATE
674 {
675 bool gsEnable;
676
677 // number of input attributes per vertex. used by the frontend to
678 // optimize assembling primitives for GS
679 uint32_t numInputAttribs;
680
681 // output topology - can be point, tristrip, or linestrip
682 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
683
684 // maximum number of verts that can be emitted by a single instance of the GS
685 uint32_t maxNumVerts;
686
687 // instance count
688 uint32_t instanceCount;
689
690 // geometry shader emits renderTargetArrayIndex
691 bool emitsRenderTargetArrayIndex;
692
693 // geometry shader emits PrimitiveID
694 bool emitsPrimitiveID;
695
696 // geometry shader emits ViewportArrayIndex
697 bool emitsViewportArrayIndex;
698
699 // if true, geometry shader emits a single stream, with separate cut buffer.
700 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
701 // to map vertices to streams
702 bool isSingleStream;
703
704 // when single stream is enabled, singleStreamID dictates which stream is being output.
705 // field ignored if isSingleStream is false
706 uint32_t singleStreamID;
707 };
708
709
710 //////////////////////////////////////////////////////////////////////////
711 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
712 /////////////////////////////////////////////////////////////////////////
713 enum SWR_TS_OUTPUT_TOPOLOGY
714 {
715 SWR_TS_OUTPUT_POINT,
716 SWR_TS_OUTPUT_LINE,
717 SWR_TS_OUTPUT_TRI_CW,
718 SWR_TS_OUTPUT_TRI_CCW,
719
720 SWR_TS_OUTPUT_TOPOLOGY_COUNT
721 };
722
723 //////////////////////////////////////////////////////////////////////////
724 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
725 /////////////////////////////////////////////////////////////////////////
726 enum SWR_TS_PARTITIONING
727 {
728 SWR_TS_INTEGER,
729 SWR_TS_ODD_FRACTIONAL,
730 SWR_TS_EVEN_FRACTIONAL,
731
732 SWR_TS_PARTITIONING_COUNT
733 };
734
735 //////////////////////////////////////////////////////////////////////////
736 /// SWR_TS_DOMAIN - Defines Tessellation Domain
737 /////////////////////////////////////////////////////////////////////////
738 enum SWR_TS_DOMAIN
739 {
740 SWR_TS_QUAD,
741 SWR_TS_TRI,
742 SWR_TS_ISOLINE,
743
744 SWR_TS_DOMAIN_COUNT
745 };
746
747 //////////////////////////////////////////////////////////////////////////
748 /// SWR_TS_STATE - Tessellation state
749 /////////////////////////////////////////////////////////////////////////
750 struct SWR_TS_STATE
751 {
752 bool tsEnable;
753 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
754 SWR_TS_PARTITIONING partitioning; // @llvm_enum
755 SWR_TS_DOMAIN domain; // @llvm_enum
756
757 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
758
759 uint32_t numHsInputAttribs;
760 uint32_t numHsOutputAttribs;
761 uint32_t numDsOutputAttribs;
762 };
763
764 // output merger state
765 struct SWR_RENDER_TARGET_BLEND_STATE
766 {
767 uint8_t writeDisableRed : 1;
768 uint8_t writeDisableGreen : 1;
769 uint8_t writeDisableBlue : 1;
770 uint8_t writeDisableAlpha : 1;
771 };
772 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
773
774 enum SWR_MULTISAMPLE_COUNT
775 {
776 SWR_MULTISAMPLE_1X = 0,
777 SWR_MULTISAMPLE_2X,
778 SWR_MULTISAMPLE_4X,
779 SWR_MULTISAMPLE_8X,
780 SWR_MULTISAMPLE_16X,
781 SWR_MULTISAMPLE_TYPE_COUNT
782 };
783
784 struct SWR_BLEND_STATE
785 {
786 // constant blend factor color in RGBA float
787 float constantColor[4];
788
789 // alpha test reference value in unorm8 or float32
790 uint32_t alphaTestReference;
791 uint32_t sampleMask;
792 // all RT's have the same sample count
793 ///@todo move this to Output Merger state when we refactor
794 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
795
796 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
797 };
798 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
799
800 //////////////////////////////////////////////////////////////////////////
801 /// FUNCTION POINTERS FOR SHADERS
802
803 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
804 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
805 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
806 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
807 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
808 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
809 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
810 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
811 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
812 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
813 simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
814 uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
815 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar);
816
817
818
819 //////////////////////////////////////////////////////////////////////////
820 /// FRONTEND_STATE
821 /////////////////////////////////////////////////////////////////////////
822 struct SWR_FRONTEND_STATE
823 {
824 // skip clip test, perspective divide, and viewport transform
825 // intended for verts in screen space
826 bool vpTransformDisable;
827 bool bEnableCutIndex;
828 union
829 {
830 struct
831 {
832 uint32_t triFan : 2;
833 uint32_t lineStripList : 1;
834 uint32_t triStripList : 2;
835 };
836 uint32_t bits;
837 } provokingVertex;
838 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
839 };
840
841 //////////////////////////////////////////////////////////////////////////
842 /// VIEWPORT_MATRIX
843 /////////////////////////////////////////////////////////////////////////
844 struct SWR_VIEWPORT_MATRIX
845 {
846 float m00;
847 float m11;
848 float m22;
849 float m30;
850 float m31;
851 float m32;
852 };
853
854 //////////////////////////////////////////////////////////////////////////
855 /// VIEWPORT_MATRIXES
856 /////////////////////////////////////////////////////////////////////////
857 struct SWR_VIEWPORT_MATRICES
858 {
859 float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
860 float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
861 float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
862 float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
863 float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
864 float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
865 };
866
867 //////////////////////////////////////////////////////////////////////////
868 /// SWR_VIEWPORT
869 /////////////////////////////////////////////////////////////////////////
870 struct SWR_VIEWPORT
871 {
872 float x;
873 float y;
874 float width;
875 float height;
876 float minZ;
877 float maxZ;
878 };
879
880 //////////////////////////////////////////////////////////////////////////
881 /// SWR_CULLMODE
882 //////////////////////////////////////////////////////////////////////////
883 enum SWR_CULLMODE
884 {
885 SWR_CULLMODE_BOTH,
886 SWR_CULLMODE_NONE,
887 SWR_CULLMODE_FRONT,
888 SWR_CULLMODE_BACK
889 };
890
891 enum SWR_FILLMODE
892 {
893 SWR_FILLMODE_POINT,
894 SWR_FILLMODE_WIREFRAME,
895 SWR_FILLMODE_SOLID
896 };
897
898 enum SWR_FRONTWINDING
899 {
900 SWR_FRONTWINDING_CW,
901 SWR_FRONTWINDING_CCW
902 };
903
904
905 enum SWR_MSAA_SAMPLE_PATTERN
906 {
907 SWR_MSAA_CENTER_PATTERN,
908 SWR_MSAA_STANDARD_PATTERN,
909 SWR_MSAA_SAMPLE_PATTERN_COUNT
910 };
911
912 enum SWR_PIXEL_LOCATION
913 {
914 SWR_PIXEL_LOCATION_CENTER,
915 SWR_PIXEL_LOCATION_UL,
916 };
917
918 // fixed point screen space sample locations within a pixel
919 struct SWR_MULTISAMPLE_POS
920 {
921 uint32_t x;
922 uint32_t y;
923 };
924
925 enum SWR_MSAA_RASTMODE
926 {
927 SWR_MSAA_RASTMODE_OFF_PIXEL,
928 SWR_MSAA_RASTMODE_OFF_PATTERN,
929 SWR_MSAA_RASTMODE_ON_PIXEL,
930 SWR_MSAA_RASTMODE_ON_PATTERN
931 };
932
933 //////////////////////////////////////////////////////////////////////////
934 /// SWR_RASTSTATE
935 //////////////////////////////////////////////////////////////////////////
936 struct SWR_RASTSTATE
937 {
938 uint32_t cullMode : 2;
939 uint32_t fillMode : 2;
940 uint32_t frontWinding : 1;
941 uint32_t scissorEnable : 1;
942 uint32_t depthClipEnable : 1;
943 uint32_t clipHalfZ : 1;
944 uint32_t pointParam : 1;
945 uint32_t pointSpriteEnable : 1;
946 uint32_t pointSpriteTopOrigin : 1;
947 uint32_t msaaRastEnable : 1;
948 uint32_t forcedSampleCount : 1;
949 uint32_t pixelOffset : 1;
950 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
951 uint32_t conservativeRast : 1;
952
953 float pointSize;
954 float lineWidth;
955
956 float depthBias;
957 float slopeScaledDepthBias;
958 float depthBiasClamp;
959 SWR_FORMAT depthFormat; // @llvm_enum
960
961 ///@todo: MSAA lines
962 // multisample state for MSAA lines
963 SWR_MSAA_RASTMODE rastMode; // @llvm_enum
964
965 // sample count the rasterizer is running at
966 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
967 uint32_t pixelLocation; // UL or Center
968 SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];
969 SWR_MSAA_SAMPLE_PATTERN samplePattern; // @llvm_enum
970
971 // user clip/cull distance enables
972 uint8_t cullDistanceMask;
973 uint8_t clipDistanceMask;
974 };
975
976 enum SWR_CONSTANT_SOURCE
977 {
978 SWR_CONSTANT_SOURCE_CONST_0000,
979 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
980 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
981 SWR_CONSTANT_SOURCE_PRIM_ID
982 };
983
984 struct SWR_ATTRIB_SWIZZLE
985 {
986 uint16_t sourceAttrib : 5; // source attribute
987 uint16_t constantSource : 2; // constant source to apply
988 uint16_t componentOverrideMask : 4; // override component with constant source
989 };
990
991 // backend state
992 struct SWR_BACKEND_STATE
993 {
994 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
995 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
996
997 uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
998 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
999
1000 bool swizzleEnable; // when enabled, core will parse the swizzle map when
1001 // setting up attributes for the backend, otherwise
1002 // all attributes up to numAttributes will be sent
1003 SWR_ATTRIB_SWIZZLE swizzleMap[32];
1004 };
1005
1006
1007 union SWR_DEPTH_STENCIL_STATE
1008 {
1009 struct
1010 {
1011 // dword 0
1012 uint32_t depthWriteEnable : 1;
1013 uint32_t depthTestEnable : 1;
1014 uint32_t stencilWriteEnable : 1;
1015 uint32_t stencilTestEnable : 1;
1016 uint32_t doubleSidedStencilTestEnable : 1;
1017
1018 uint32_t depthTestFunc : 3;
1019 uint32_t stencilTestFunc : 3;
1020
1021 uint32_t backfaceStencilPassDepthPassOp : 3;
1022 uint32_t backfaceStencilPassDepthFailOp : 3;
1023 uint32_t backfaceStencilFailOp : 3;
1024 uint32_t backfaceStencilTestFunc : 3;
1025 uint32_t stencilPassDepthPassOp : 3;
1026 uint32_t stencilPassDepthFailOp : 3;
1027 uint32_t stencilFailOp : 3;
1028
1029 // dword 1
1030 uint8_t backfaceStencilWriteMask;
1031 uint8_t backfaceStencilTestMask;
1032 uint8_t stencilWriteMask;
1033 uint8_t stencilTestMask;
1034
1035 // dword 2
1036 uint8_t backfaceStencilRefValue;
1037 uint8_t stencilRefValue;
1038 };
1039 uint32_t value[3];
1040 };
1041
1042 enum SWR_SHADING_RATE
1043 {
1044 SWR_SHADING_RATE_PIXEL,
1045 SWR_SHADING_RATE_SAMPLE,
1046 SWR_SHADING_RATE_COUNT,
1047 };
1048
1049 enum SWR_INPUT_COVERAGE
1050 {
1051 SWR_INPUT_COVERAGE_NONE,
1052 SWR_INPUT_COVERAGE_NORMAL,
1053 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1054 SWR_INPUT_COVERAGE_COUNT,
1055 };
1056
1057 enum SWR_PS_POSITION_OFFSET
1058 {
1059 SWR_PS_POSITION_SAMPLE_NONE,
1060 SWR_PS_POSITION_SAMPLE_OFFSET,
1061 SWR_PS_POSITION_CENTROID_OFFSET,
1062 SWR_PS_POSITION_OFFSET_COUNT,
1063 };
1064
1065 enum SWR_BARYCENTRICS_MASK
1066 {
1067 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1068 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1069 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1070 };
1071
1072 // pixel shader state
1073 struct SWR_PS_STATE
1074 {
1075 // dword 0-1
1076 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1077
1078 // dword 2
1079 uint32_t killsPixel : 1; // pixel shader can kill pixels
1080 uint32_t inputCoverage : 2; // ps uses input coverage
1081 uint32_t writesODepth : 1; // pixel shader writes to depth
1082 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1083 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1084 uint32_t numRenderTargets : 4; // number of render target outputs in use (0-8)
1085 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1086 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1087 uint32_t usesUAV : 1; // pixel shader accesses UAV
1088 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1089
1090 };
1091
1092 // depth bounds state
1093 struct SWR_DEPTH_BOUNDS_STATE
1094 {
1095 bool depthBoundsTestEnable;
1096 float depthBoundsTestMinValue;
1097 float depthBoundsTestMaxValue;
1098 };
1099
1100