• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************************
2 * Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file api.cpp
24 *
25 * @brief API implementation
26 *
27 ******************************************************************************/
28 
29 #include <cfloat>
30 #include <cmath>
31 #include <cstdio>
32 #include <new>
33 
34 #include "core/api.h"
35 #include "core/backend.h"
36 #include "core/context.h"
37 #include "core/depthstencil.h"
38 #include "core/frontend.h"
39 #include "core/rasterizer.h"
40 #include "core/rdtsc_core.h"
41 #include "core/threads.h"
42 #include "core/tilemgr.h"
43 #include "core/clip.h"
44 #include "core/utils.h"
45 
46 #include "common/simdintrin.h"
47 #include "common/os.h"
48 
49 static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
50 
51 void SetupDefaultState(SWR_CONTEXT *pContext);
52 
GetContext(HANDLE hContext)53 static INLINE SWR_CONTEXT* GetContext(HANDLE hContext)
54 {
55     return (SWR_CONTEXT*)hContext;
56 }
57 
WakeAllThreads(SWR_CONTEXT * pContext)58 void WakeAllThreads(SWR_CONTEXT *pContext)
59 {
60     pContext->FifosNotEmpty.notify_all();
61 }
62 
63 //////////////////////////////////////////////////////////////////////////
64 /// @brief Create SWR Context.
65 /// @param pCreateInfo - pointer to creation info.
SwrCreateContext(SWR_CREATECONTEXT_INFO * pCreateInfo)66 HANDLE SwrCreateContext(
67     SWR_CREATECONTEXT_INFO* pCreateInfo)
68 {
69     RDTSC_RESET();
70     RDTSC_INIT(0);
71 
72     void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * 4);
73     memset(pContextMem, 0, sizeof(SWR_CONTEXT));
74     SWR_CONTEXT *pContext = new (pContextMem) SWR_CONTEXT();
75 
76     pContext->privateStateSize = pCreateInfo->privateStateSize;
77 
78     pContext->dcRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT);
79     pContext->dsRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT);
80 
81     pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
82     pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
83 
84     for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
85     {
86         pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
87         new (&pContext->pMacroTileManagerArray[dc]) MacroTileMgr(*pContext->dcRing[dc].pArena);
88         new (&pContext->pDispatchQueueArray[dc]) DispatchQueue();
89 
90         pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
91     }
92 
93     pContext->threadInfo.MAX_WORKER_THREADS        = KNOB_MAX_WORKER_THREADS;
94     pContext->threadInfo.MAX_NUMA_NODES            = KNOB_MAX_NUMA_NODES;
95     pContext->threadInfo.MAX_CORES_PER_NUMA_NODE   = KNOB_MAX_CORES_PER_NUMA_NODE;
96     pContext->threadInfo.MAX_THREADS_PER_CORE      = KNOB_MAX_THREADS_PER_CORE;
97     pContext->threadInfo.SINGLE_THREADED           = KNOB_SINGLE_THREADED;
98 
99     if (pCreateInfo->pThreadInfo)
100     {
101         pContext->threadInfo = *pCreateInfo->pThreadInfo;
102     }
103 
104     memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock));
105     memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
106     new (&pContext->WaitLock) std::mutex();
107     new (&pContext->FifosNotEmpty) std::condition_variable();
108 
109     CreateThreadPool(pContext, &pContext->threadPool);
110 
111     pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads];
112     pContext->pStats = new SWR_STATS[pContext->NumWorkerThreads];
113 
114 #if defined(KNOB_ENABLE_AR)
115     // Setup ArchRast thread contexts which includes +1 for API thread.
116     pContext->pArContext = new HANDLE[pContext->NumWorkerThreads+1];
117     pContext->pArContext[pContext->NumWorkerThreads] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API);
118 #endif
119 
120     // Allocate scratch space for workers.
121     ///@note We could lazily allocate this but its rather small amount of memory.
122     for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
123     {
124 #if defined(_WIN32)
125         uint32_t numaNode = pContext->threadPool.pThreadData ?
126             pContext->threadPool.pThreadData[i].numaId : 0;
127         pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma(
128             GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE),
129             MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE,
130             numaNode);
131 #else
132         pContext->ppScratch[i] = (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
133 #endif
134 
135 #if defined(KNOB_ENABLE_AR)
136         // Initialize worker thread context for ArchRast.
137         pContext->pArContext[i] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::WORKER);
138 #endif
139     }
140 
141     // State setup AFTER context is fully initialized
142     SetupDefaultState(pContext);
143 
144     // initialize hot tile manager
145     pContext->pHotTileMgr = new HotTileMgr();
146 
147     // initialize function pointer tables
148     InitClearTilesTable();
149 
150     // initialize callback functions
151     pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
152     pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
153     pContext->pfnClearTile = pCreateInfo->pfnClearTile;
154     pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
155     pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
156     pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
157 
158 
159     // pass pointer to bucket manager back to caller
160 #ifdef KNOB_ENABLE_RDTSC
161     pCreateInfo->pBucketMgr = &gBucketMgr;
162 #endif
163 
164     pCreateInfo->contextSaveSize = sizeof(API_STATE);
165 
166     StartThreadPool(pContext, &pContext->threadPool);
167 
168     return (HANDLE)pContext;
169 }
170 
CopyState(DRAW_STATE & dst,const DRAW_STATE & src)171 void CopyState(DRAW_STATE& dst, const DRAW_STATE& src)
172 {
173     memcpy(&dst.state, &src.state, sizeof(API_STATE));
174 }
175 
176 template<bool IsDraw>
QueueWork(SWR_CONTEXT * pContext)177 void QueueWork(SWR_CONTEXT *pContext)
178 {
179     DRAW_CONTEXT* pDC = pContext->pCurDrawContext;
180     uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT;
181 
182     if (IsDraw)
183     {
184         pDC->pTileMgr = &pContext->pMacroTileManagerArray[dcIndex];
185         pDC->pTileMgr->initialize();
186     }
187 
188     // Each worker thread looks at a DC for both FE and BE work at different times and so we
189     // multiply threadDone by 2.  When the threadDone counter has reached 0 then all workers
190     // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
191     // then moved on if all work is done.)
192     pContext->pCurDrawContext->threadsDone = pContext->NumFEThreads + pContext->NumBEThreads;
193 
194     if (IsDraw)
195     {
196         InterlockedIncrement((volatile LONG*)&pContext->drawsOutstandingFE);
197     }
198 
199     _ReadWriteBarrier();
200     {
201         std::unique_lock<std::mutex> lock(pContext->WaitLock);
202         pContext->dcRing.Enqueue();
203     }
204 
205     if (pContext->threadInfo.SINGLE_THREADED)
206     {
207         // flush denormals to 0
208         uint32_t mxcsr = _mm_getcsr();
209         _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
210 
211         if (IsDraw)
212         {
213             uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
214             WorkOnFifoFE(pContext, 0, curDraw[0]);
215             WorkOnFifoBE(pContext, 0, curDraw[1], pContext->singleThreadLockedTiles, 0, 0);
216         }
217         else
218         {
219             uint32_t curDispatch = pContext->pCurDrawContext->drawId;
220             WorkOnCompute(pContext, 0, curDispatch);
221         }
222 
223         // Dequeue the work here, if not already done, since we're single threaded (i.e. no workers).
224         while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0) {}
225 
226         // restore csr
227         _mm_setcsr(mxcsr);
228     }
229     else
230     {
231         AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId);
232         WakeAllThreads(pContext);
233         AR_API_END(APIDrawWakeAllThreads, 1);
234     }
235 
236     // Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
237     pContext->pPrevDrawContext = pContext->pCurDrawContext;
238     pContext->pCurDrawContext = nullptr;
239 }
240 
QueueDraw(SWR_CONTEXT * pContext)241 INLINE void QueueDraw(SWR_CONTEXT* pContext)
242 {
243     QueueWork<true>(pContext);
244 }
245 
QueueDispatch(SWR_CONTEXT * pContext)246 INLINE void QueueDispatch(SWR_CONTEXT* pContext)
247 {
248     QueueWork<false>(pContext);
249 }
250 
GetDrawContext(SWR_CONTEXT * pContext,bool isSplitDraw=false)251 DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
252 {
253     AR_API_BEGIN(APIGetDrawContext, 0);
254     // If current draw context is null then need to obtain a new draw context to use from ring.
255     if (pContext->pCurDrawContext == nullptr)
256     {
257         // Need to wait for a free entry.
258         while (pContext->dcRing.IsFull())
259         {
260             _mm_pause();
261         }
262 
263         uint64_t curDraw = pContext->dcRing.GetHead();
264         uint32_t dcIndex = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT;
265 
266         if ((pContext->frameCount - pContext->lastFrameChecked) > 2 ||
267             (curDraw - pContext->lastDrawChecked) > 0x10000)
268         {
269             // Take this opportunity to clean-up old arena allocations
270             pContext->cachingArenaAllocator.FreeOldBlocks();
271 
272             pContext->lastFrameChecked = pContext->frameCount;
273             pContext->lastDrawChecked = curDraw;
274         }
275 
276         DRAW_CONTEXT* pCurDrawContext = &pContext->dcRing[dcIndex];
277         pContext->pCurDrawContext = pCurDrawContext;
278 
279         // Assign next available entry in DS ring to this DC.
280         uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT;
281         pCurDrawContext->pState = &pContext->dsRing[dsIndex];
282 
283         // Copy previous state to current state.
284         if (pContext->pPrevDrawContext)
285         {
286             DRAW_CONTEXT* pPrevDrawContext = pContext->pPrevDrawContext;
287 
288             // If we're splitting our draw then we can just use the same state from the previous
289             // draw. In this case, we won't increment the DS ring index so the next non-split
290             // draw can receive the state.
291             if (isSplitDraw == false)
292             {
293                 CopyState(*pCurDrawContext->pState, *pPrevDrawContext->pState);
294 
295                 // Should have been cleaned up previously
296                 SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
297 
298                 pCurDrawContext->pState->pPrivateState = nullptr;
299 
300                 pContext->curStateId++;  // Progress state ring index forward.
301             }
302             else
303             {
304                 // If its a split draw then just copy the state pointer over
305                 // since its the same draw.
306                 pCurDrawContext->pState = pPrevDrawContext->pState;
307                 SWR_ASSERT(pPrevDrawContext->cleanupState == false);
308             }
309         }
310         else
311         {
312             SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
313             pContext->curStateId++;  // Progress state ring index forward.
314         }
315 
316         SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
317 
318         // Reset dependency
319         pCurDrawContext->dependent = false;
320         pCurDrawContext->dependentFE = false;
321 
322         pCurDrawContext->pContext = pContext;
323         pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
324 
325         pCurDrawContext->doneFE = false;
326         pCurDrawContext->FeLock = 0;
327         pCurDrawContext->threadsDone = 0;
328         pCurDrawContext->retireCallback.pfnCallbackFunc = nullptr;
329 
330         pCurDrawContext->dynState.Reset(pContext->NumWorkerThreads);
331 
332         // Assign unique drawId for this DC
333         pCurDrawContext->drawId = pContext->dcRing.GetHead();
334 
335         pCurDrawContext->cleanupState = true;
336 
337     }
338     else
339     {
340         SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC");
341     }
342 
343     AR_API_END(APIGetDrawContext, 0);
344     return pContext->pCurDrawContext;
345 }
346 
GetDrawState(SWR_CONTEXT * pContext)347 API_STATE* GetDrawState(SWR_CONTEXT *pContext)
348 {
349     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
350     SWR_ASSERT(pDC->pState != nullptr);
351 
352     return &pDC->pState->state;
353 }
354 
SwrDestroyContext(HANDLE hContext)355 void SwrDestroyContext(HANDLE hContext)
356 {
357     SWR_CONTEXT *pContext = GetContext(hContext);
358     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
359 
360     pDC->FeWork.type = SHUTDOWN;
361     pDC->FeWork.pfnWork = ProcessShutdown;
362 
363     //enqueue
364     QueueDraw(pContext);
365 
366     DestroyThreadPool(pContext, &pContext->threadPool);
367 
368     // free the fifos
369     for (uint32_t i = 0; i < KNOB_MAX_DRAWS_IN_FLIGHT; ++i)
370     {
371         delete[] pContext->dcRing[i].dynState.pStats;
372         delete pContext->dcRing[i].pArena;
373         delete pContext->dsRing[i].pArena;
374         pContext->pMacroTileManagerArray[i].~MacroTileMgr();
375         pContext->pDispatchQueueArray[i].~DispatchQueue();
376     }
377 
378     AlignedFree(pContext->pDispatchQueueArray);
379     AlignedFree(pContext->pMacroTileManagerArray);
380 
381     // Free scratch space.
382     for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
383     {
384 #if defined(_WIN32)
385         VirtualFree(pContext->ppScratch[i], 0, MEM_RELEASE);
386 #else
387         AlignedFree(pContext->ppScratch[i]);
388 #endif
389 
390 #if defined(KNOB_ENABLE_AR)
391         ArchRast::DestroyThreadContext(pContext->pArContext[i]);
392 #endif
393     }
394 
395     delete[] pContext->ppScratch;
396     delete[] pContext->pStats;
397 
398     delete(pContext->pHotTileMgr);
399 
400     pContext->~SWR_CONTEXT();
401     AlignedFree(GetContext(hContext));
402 }
403 
SwrSaveState(HANDLE hContext,void * pOutputStateBlock,size_t memSize)404 void SWR_API SwrSaveState(
405     HANDLE hContext,
406     void* pOutputStateBlock,
407     size_t memSize)
408 {
409     SWR_CONTEXT *pContext = GetContext(hContext);
410     auto pSrc = GetDrawState(pContext);
411     SWR_ASSERT(pOutputStateBlock && memSize >= sizeof(*pSrc));
412 
413     memcpy(pOutputStateBlock, pSrc, sizeof(*pSrc));
414 }
415 
SwrRestoreState(HANDLE hContext,const void * pStateBlock,size_t memSize)416 void SWR_API SwrRestoreState(
417     HANDLE hContext,
418     const void* pStateBlock,
419     size_t memSize)
420 {
421     SWR_CONTEXT *pContext = GetContext(hContext);
422     auto pDst = GetDrawState(pContext);
423     SWR_ASSERT(pStateBlock && memSize >= sizeof(*pDst));
424 
425     memcpy(pDst, pStateBlock, sizeof(*pDst));
426 }
427 
SetupDefaultState(SWR_CONTEXT * pContext)428 void SetupDefaultState(SWR_CONTEXT *pContext)
429 {
430     API_STATE* pState = GetDrawState(pContext);
431 
432     pState->rastState.cullMode = SWR_CULLMODE_NONE;
433     pState->rastState.frontWinding = SWR_FRONTWINDING_CCW;
434 
435     pState->depthBoundsState.depthBoundsTestEnable = false;
436     pState->depthBoundsState.depthBoundsTestMinValue = 0.0f;
437     pState->depthBoundsState.depthBoundsTestMaxValue = 1.0f;
438 }
439 
SwrSync(HANDLE hContext,PFN_CALLBACK_FUNC pfnFunc,uint64_t userData,uint64_t userData2,uint64_t userData3)440 void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint64_t userData2, uint64_t userData3)
441 {
442     SWR_ASSERT(pfnFunc != nullptr);
443 
444     SWR_CONTEXT *pContext = GetContext(hContext);
445     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
446 
447     AR_API_BEGIN(APISync, 0);
448 
449     pDC->FeWork.type = SYNC;
450     pDC->FeWork.pfnWork = ProcessSync;
451 
452     // Setup callback function
453     pDC->retireCallback.pfnCallbackFunc = pfnFunc;
454     pDC->retireCallback.userData = userData;
455     pDC->retireCallback.userData2 = userData2;
456     pDC->retireCallback.userData3 = userData3;
457 
458     //enqueue
459     QueueDraw(pContext);
460 
461     AR_API_END(APISync, 1);
462 }
463 
SwrWaitForIdle(HANDLE hContext)464 void SwrWaitForIdle(HANDLE hContext)
465 {
466     SWR_CONTEXT *pContext = GetContext(hContext);
467 
468     AR_API_BEGIN(APIWaitForIdle, 0);
469 
470     while (!pContext->dcRing.IsEmpty())
471     {
472         _mm_pause();
473     }
474 
475     AR_API_END(APIWaitForIdle, 1);
476 }
477 
SwrWaitForIdleFE(HANDLE hContext)478 void SwrWaitForIdleFE(HANDLE hContext)
479 {
480     SWR_CONTEXT *pContext = GetContext(hContext);
481 
482     AR_API_BEGIN(APIWaitForIdle, 0);
483 
484     while (pContext->drawsOutstandingFE > 0)
485     {
486         _mm_pause();
487     }
488 
489     AR_API_END(APIWaitForIdle, 1);
490 }
491 
SwrSetVertexBuffers(HANDLE hContext,uint32_t numBuffers,const SWR_VERTEX_BUFFER_STATE * pVertexBuffers)492 void SwrSetVertexBuffers(
493     HANDLE hContext,
494     uint32_t numBuffers,
495     const SWR_VERTEX_BUFFER_STATE* pVertexBuffers)
496 {
497     API_STATE* pState = GetDrawState(GetContext(hContext));
498 
499     for (uint32_t i = 0; i < numBuffers; ++i)
500     {
501         const SWR_VERTEX_BUFFER_STATE *pVB = &pVertexBuffers[i];
502         pState->vertexBuffers[pVB->index] = *pVB;
503     }
504 }
505 
SwrSetIndexBuffer(HANDLE hContext,const SWR_INDEX_BUFFER_STATE * pIndexBuffer)506 void SwrSetIndexBuffer(
507     HANDLE hContext,
508     const SWR_INDEX_BUFFER_STATE* pIndexBuffer)
509 {
510     API_STATE* pState = GetDrawState(GetContext(hContext));
511 
512     pState->indexBuffer = *pIndexBuffer;
513 }
514 
SwrSetFetchFunc(HANDLE hContext,PFN_FETCH_FUNC pfnFetchFunc)515 void SwrSetFetchFunc(
516     HANDLE hContext,
517     PFN_FETCH_FUNC    pfnFetchFunc)
518 {
519     API_STATE* pState = GetDrawState(GetContext(hContext));
520 
521     pState->pfnFetchFunc = pfnFetchFunc;
522 }
523 
SwrSetSoFunc(HANDLE hContext,PFN_SO_FUNC pfnSoFunc,uint32_t streamIndex)524 void SwrSetSoFunc(
525     HANDLE hContext,
526     PFN_SO_FUNC    pfnSoFunc,
527     uint32_t streamIndex)
528 {
529     API_STATE* pState = GetDrawState(GetContext(hContext));
530 
531     SWR_ASSERT(streamIndex < MAX_SO_STREAMS);
532 
533     pState->pfnSoFunc[streamIndex] = pfnSoFunc;
534 }
535 
SwrSetSoState(HANDLE hContext,SWR_STREAMOUT_STATE * pSoState)536 void SwrSetSoState(
537     HANDLE hContext,
538     SWR_STREAMOUT_STATE* pSoState)
539 {
540     API_STATE* pState = GetDrawState(GetContext(hContext));
541 
542     pState->soState = *pSoState;
543 }
544 
SwrSetSoBuffers(HANDLE hContext,SWR_STREAMOUT_BUFFER * pSoBuffer,uint32_t slot)545 void SwrSetSoBuffers(
546     HANDLE hContext,
547     SWR_STREAMOUT_BUFFER* pSoBuffer,
548     uint32_t slot)
549 {
550     API_STATE* pState = GetDrawState(GetContext(hContext));
551 
552     SWR_ASSERT((slot < 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot);
553 
554     pState->soBuffer[slot] = *pSoBuffer;
555 }
556 
SwrSetVertexFunc(HANDLE hContext,PFN_VERTEX_FUNC pfnVertexFunc)557 void SwrSetVertexFunc(
558     HANDLE hContext,
559     PFN_VERTEX_FUNC pfnVertexFunc)
560 {
561     API_STATE* pState = GetDrawState(GetContext(hContext));
562 
563     pState->pfnVertexFunc = pfnVertexFunc;
564 }
565 
SwrSetFrontendState(HANDLE hContext,SWR_FRONTEND_STATE * pFEState)566 void SwrSetFrontendState(
567     HANDLE hContext,
568     SWR_FRONTEND_STATE *pFEState)
569 {
570     API_STATE* pState = GetDrawState(GetContext(hContext));
571     pState->frontendState = *pFEState;
572 }
573 
SwrSetGsState(HANDLE hContext,SWR_GS_STATE * pGSState)574 void SwrSetGsState(
575     HANDLE hContext,
576     SWR_GS_STATE *pGSState)
577 {
578     API_STATE* pState = GetDrawState(GetContext(hContext));
579     pState->gsState = *pGSState;
580 }
581 
SwrSetGsFunc(HANDLE hContext,PFN_GS_FUNC pfnGsFunc)582 void SwrSetGsFunc(
583     HANDLE hContext,
584     PFN_GS_FUNC pfnGsFunc)
585 {
586     API_STATE* pState = GetDrawState(GetContext(hContext));
587     pState->pfnGsFunc = pfnGsFunc;
588 }
589 
SwrSetCsFunc(HANDLE hContext,PFN_CS_FUNC pfnCsFunc,uint32_t totalThreadsInGroup,uint32_t totalSpillFillSize)590 void SwrSetCsFunc(
591     HANDLE hContext,
592     PFN_CS_FUNC pfnCsFunc,
593     uint32_t totalThreadsInGroup,
594     uint32_t totalSpillFillSize)
595 {
596     API_STATE* pState = GetDrawState(GetContext(hContext));
597     pState->pfnCsFunc = pfnCsFunc;
598     pState->totalThreadsInGroup = totalThreadsInGroup;
599     pState->totalSpillFillSize = totalSpillFillSize;
600 }
601 
SwrSetTsState(HANDLE hContext,SWR_TS_STATE * pState)602 void SwrSetTsState(
603     HANDLE hContext,
604     SWR_TS_STATE *pState)
605 {
606     API_STATE* pApiState = GetDrawState(GetContext(hContext));
607     pApiState->tsState = *pState;
608 }
609 
SwrSetHsFunc(HANDLE hContext,PFN_HS_FUNC pfnFunc)610 void SwrSetHsFunc(
611     HANDLE hContext,
612     PFN_HS_FUNC pfnFunc)
613 {
614     API_STATE* pApiState = GetDrawState(GetContext(hContext));
615     pApiState->pfnHsFunc = pfnFunc;
616 }
617 
SwrSetDsFunc(HANDLE hContext,PFN_DS_FUNC pfnFunc)618 void SwrSetDsFunc(
619     HANDLE hContext,
620     PFN_DS_FUNC pfnFunc)
621 {
622     API_STATE* pApiState = GetDrawState(GetContext(hContext));
623     pApiState->pfnDsFunc = pfnFunc;
624 }
625 
SwrSetDepthStencilState(HANDLE hContext,SWR_DEPTH_STENCIL_STATE * pDSState)626 void SwrSetDepthStencilState(
627     HANDLE hContext,
628     SWR_DEPTH_STENCIL_STATE *pDSState)
629 {
630     API_STATE* pState = GetDrawState(GetContext(hContext));
631 
632     pState->depthStencilState = *pDSState;
633 }
634 
SwrSetBackendState(HANDLE hContext,SWR_BACKEND_STATE * pBEState)635 void SwrSetBackendState(
636     HANDLE hContext,
637     SWR_BACKEND_STATE *pBEState)
638 {
639     API_STATE* pState = GetDrawState(GetContext(hContext));
640 
641     pState->backendState = *pBEState;
642 }
643 
SwrSetDepthBoundsState(HANDLE hContext,SWR_DEPTH_BOUNDS_STATE * pDBState)644 void SwrSetDepthBoundsState(
645     HANDLE hContext,
646     SWR_DEPTH_BOUNDS_STATE *pDBState)
647 {
648     API_STATE* pState = GetDrawState(GetContext(hContext));
649 
650     pState->depthBoundsState = *pDBState;
651 }
652 
SwrSetPixelShaderState(HANDLE hContext,SWR_PS_STATE * pPSState)653 void SwrSetPixelShaderState(
654     HANDLE hContext,
655     SWR_PS_STATE *pPSState)
656 {
657     API_STATE *pState = GetDrawState(GetContext(hContext));
658     pState->psState = *pPSState;
659 }
660 
SwrSetBlendState(HANDLE hContext,SWR_BLEND_STATE * pBlendState)661 void SwrSetBlendState(
662     HANDLE hContext,
663     SWR_BLEND_STATE *pBlendState)
664 {
665     API_STATE *pState = GetDrawState(GetContext(hContext));
666     memcpy(&pState->blendState, pBlendState, sizeof(SWR_BLEND_STATE));
667 }
668 
SwrSetBlendFunc(HANDLE hContext,uint32_t renderTarget,PFN_BLEND_JIT_FUNC pfnBlendFunc)669 void SwrSetBlendFunc(
670     HANDLE hContext,
671     uint32_t renderTarget,
672     PFN_BLEND_JIT_FUNC pfnBlendFunc)
673 {
674     SWR_ASSERT(renderTarget < SWR_NUM_RENDERTARGETS);
675     API_STATE *pState = GetDrawState(GetContext(hContext));
676     pState->pfnBlendFunc[renderTarget] = pfnBlendFunc;
677 }
678 
679 // update guardband multipliers for the viewport
updateGuardbands(API_STATE * pState)680 void updateGuardbands(API_STATE *pState)
681 {
682     uint32_t numGbs = pState->gsState.emitsRenderTargetArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
683 
684     for(uint32_t i = 0; i < numGbs; ++i)
685     {
686         // guardband center is viewport center
687         pState->gbState.left[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
688         pState->gbState.right[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
689         pState->gbState.top[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
690         pState->gbState.bottom[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
691     }
692 }
693 
SwrSetRastState(HANDLE hContext,const SWR_RASTSTATE * pRastState)694 void SwrSetRastState(
695     HANDLE hContext,
696     const SWR_RASTSTATE *pRastState)
697 {
698     SWR_CONTEXT *pContext = GetContext(hContext);
699     API_STATE* pState = GetDrawState(pContext);
700 
701     memcpy(&pState->rastState, pRastState, sizeof(SWR_RASTSTATE));
702 }
703 
SwrSetViewports(HANDLE hContext,uint32_t numViewports,const SWR_VIEWPORT * pViewports,const SWR_VIEWPORT_MATRICES * pMatrices)704 void SwrSetViewports(
705     HANDLE hContext,
706     uint32_t numViewports,
707     const SWR_VIEWPORT* pViewports,
708     const SWR_VIEWPORT_MATRICES* pMatrices)
709 {
710     SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS,
711         "Invalid number of viewports.");
712 
713     SWR_CONTEXT *pContext = GetContext(hContext);
714     API_STATE* pState = GetDrawState(pContext);
715 
716     memcpy(&pState->vp[0], pViewports, sizeof(SWR_VIEWPORT) * numViewports);
717     // @todo Faster to copy portions of the SOA or just copy all of it?
718     memcpy(&pState->vpMatrices, pMatrices, sizeof(SWR_VIEWPORT_MATRICES));
719 
720     updateGuardbands(pState);
721 }
722 
SwrSetScissorRects(HANDLE hContext,uint32_t numScissors,const SWR_RECT * pScissors)723 void SwrSetScissorRects(
724     HANDLE hContext,
725     uint32_t numScissors,
726     const SWR_RECT* pScissors)
727 {
728     SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS,
729         "Invalid number of scissor rects.");
730 
731     API_STATE* pState = GetDrawState(GetContext(hContext));
732     memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0]));
733 };
734 
SetupMacroTileScissors(DRAW_CONTEXT * pDC)735 void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
736 {
737     API_STATE *pState = &pDC->pState->state;
738     uint32_t numScissors = pState->gsState.emitsViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
739     pState->scissorsTileAligned = true;
740 
741     for (uint32_t index = 0; index < numScissors; ++index)
742     {
743         SWR_RECT &scissorInFixedPoint = pState->scissorsInFixedPoint[index];
744 
745         // Set up scissor dimensions based on scissor or viewport
746         if (pState->rastState.scissorEnable)
747         {
748             scissorInFixedPoint = pState->scissorRects[index];
749         }
750         else
751         {
752             // the vp width and height must be added to origin un-rounded then the result round to -inf.
753             // The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
754             scissorInFixedPoint.xmin = (int32_t)pState->vp[index].x;
755             scissorInFixedPoint.xmax = (int32_t)(pState->vp[index].x + pState->vp[index].width);
756             scissorInFixedPoint.ymin = (int32_t)pState->vp[index].y;
757             scissorInFixedPoint.ymax = (int32_t)(pState->vp[index].y + pState->vp[index].height);
758         }
759 
760         // Clamp to max rect
761         scissorInFixedPoint &= g_MaxScissorRect;
762 
763         // Test for tile alignment
764         bool tileAligned;
765         tileAligned  = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0;
766         tileAligned &= (scissorInFixedPoint.ymin % KNOB_TILE_Y_DIM) == 0;
767         tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_X_DIM) == 0;
768         tileAligned &= (scissorInFixedPoint.ymax % KNOB_TILE_Y_DIM) == 0;
769 
770         pState->scissorsTileAligned &= tileAligned;
771 
772         // Scale to fixed point
773         scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
774         scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
775         scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
776         scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
777 
778         // Make scissor inclusive
779         scissorInFixedPoint.xmax -= 1;
780         scissorInFixedPoint.ymax -= 1;
781     }
782 }
783 
784 // templated backend function tables
785 extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
786 extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2][2];
787 extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_MSAA_SAMPLE_PATTERN_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2][2];
788 extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2];
SetupPipeline(DRAW_CONTEXT * pDC)789 void SetupPipeline(DRAW_CONTEXT *pDC)
790 {
791     SWR_CONTEXT* pContext = pDC->pContext;
792     DRAW_STATE* pState = pDC->pState;
793     const SWR_RASTSTATE &rastState = pState->state.rastState;
794     const SWR_PS_STATE &psState = pState->state.psState;
795     BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
796     const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
797 
798     // setup backend
799     if (psState.pfnPixelShader == nullptr)
800     {
801         backendFuncs.pfnBackend = gBackendNullPs[pState->state.rastState.sampleCount];
802     }
803     else
804     {
805         const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
806         const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
807         const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
808 
809         SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
810 
811         // select backend function
812         switch(psState.shadingRate)
813         {
814         case SWR_SHADING_RATE_PIXEL:
815             if(bMultisampleEnable)
816             {
817                 // always need to generate I & J per sample for Z interpolation
818                 barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
819                 backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage][centroid][forcedSampleCount][canEarlyZ];
820             }
821             else
822             {
823                 // always need to generate I & J per pixel for Z interpolation
824                 barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
825                 backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
826             }
827             break;
828         case SWR_SHADING_RATE_SAMPLE:
829             SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
830             // always need to generate I & J per sample for Z interpolation
831             barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
832             backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
833             break;
834         default:
835             SWR_ASSERT(0 && "Invalid shading rate");
836             break;
837         }
838     }
839 
840     PFN_PROCESS_PRIMS pfnBinner;
841     switch (pState->state.topology)
842     {
843     case TOP_POINT_LIST:
844         pState->pfnProcessPrims = ClipPoints;
845         pfnBinner = BinPoints;
846         break;
847     case TOP_LINE_LIST:
848     case TOP_LINE_STRIP:
849     case TOP_LINE_LOOP:
850     case TOP_LINE_LIST_ADJ:
851     case TOP_LISTSTRIP_ADJ:
852         pState->pfnProcessPrims = ClipLines;
853         pfnBinner = BinLines;
854         break;
855     default:
856         pState->pfnProcessPrims = ClipTriangles;
857         pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
858         break;
859     };
860 
861 
862     // disable clipper if viewport transform is disabled
863     if (pState->state.frontendState.vpTransformDisable)
864     {
865         pState->pfnProcessPrims = pfnBinner;
866     }
867 
868     if ((pState->state.psState.pfnPixelShader == nullptr) &&
869         (pState->state.depthStencilState.depthTestEnable == FALSE) &&
870         (pState->state.depthStencilState.depthWriteEnable == FALSE) &&
871         (pState->state.depthStencilState.stencilTestEnable == FALSE) &&
872         (pState->state.depthStencilState.stencilWriteEnable == FALSE) &&
873         (pState->state.backendState.numAttributes == 0))
874     {
875         pState->pfnProcessPrims = nullptr;
876     }
877 
878     if (pState->state.soState.rasterizerDisable == true)
879     {
880         pState->pfnProcessPrims = nullptr;
881     }
882 
883 
884     // set up the frontend attribute count
885     pState->state.feNumAttributes = 0;
886     const SWR_BACKEND_STATE& backendState = pState->state.backendState;
887     if (backendState.swizzleEnable)
888     {
889         // attribute swizzling is enabled, iterate over the map and record the max attribute used
890         for (uint32_t i = 0; i < backendState.numAttributes; ++i)
891         {
892             pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
893         }
894     }
895     else
896     {
897         pState->state.feNumAttributes = pState->state.backendState.numAttributes;
898     }
899 
900     if (pState->state.soState.soEnable)
901     {
902         uint32_t streamMasks = 0;
903         for (uint32_t i = 0; i < 4; ++i)
904         {
905             streamMasks |= pState->state.soState.streamMasks[i];
906         }
907 
908         DWORD maxAttrib;
909         if (_BitScanReverse(&maxAttrib, streamMasks))
910         {
911             pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
912         }
913     }
914 
915     // complicated logic to test for cases where we don't need backing hottile memory for a draw
916     // have to check for the special case where depth/stencil test is enabled but depthwrite is disabled.
917     pState->state.depthHottileEnable = ((!(pState->state.depthStencilState.depthTestEnable &&
918                                            !pState->state.depthStencilState.depthWriteEnable &&
919                                            !pState->state.depthBoundsState.depthBoundsTestEnable &&
920                                            pState->state.depthStencilState.depthTestFunc == ZFUNC_ALWAYS)) &&
921                                         (pState->state.depthStencilState.depthTestEnable ||
922                                          pState->state.depthStencilState.depthWriteEnable ||
923                                          pState->state.depthBoundsState.depthBoundsTestEnable)) ? true : false;
924 
925     pState->state.stencilHottileEnable = (((!(pState->state.depthStencilState.stencilTestEnable &&
926                                              !pState->state.depthStencilState.stencilWriteEnable &&
927                                               pState->state.depthStencilState.stencilTestFunc == ZFUNC_ALWAYS)) ||
928                                           // for stencil we have to check the double sided state as well
929                                           (!(pState->state.depthStencilState.doubleSidedStencilTestEnable &&
930                                              !pState->state.depthStencilState.stencilWriteEnable &&
931                                               pState->state.depthStencilState.backfaceStencilTestFunc == ZFUNC_ALWAYS))) &&
932                                           (pState->state.depthStencilState.stencilTestEnable  ||
933                                            pState->state.depthStencilState.stencilWriteEnable)) ? true : false;
934 
935     uint32_t numRTs = pState->state.psState.numRenderTargets;
936     pState->state.colorHottileEnable = 0;
937     if (psState.pfnPixelShader != nullptr)
938     {
939         for (uint32_t rt = 0; rt < numRTs; ++rt)
940         {
941             pState->state.colorHottileEnable |=
942                 (!pState->state.blendState.renderTarget[rt].writeDisableAlpha ||
943                  !pState->state.blendState.renderTarget[rt].writeDisableRed ||
944                  !pState->state.blendState.renderTarget[rt].writeDisableGreen ||
945                  !pState->state.blendState.renderTarget[rt].writeDisableBlue) ? (1 << rt) : 0;
946         }
947     }
948 
949     // Setup depth quantization function
950     if (pState->state.depthHottileEnable)
951     {
952         switch (pState->state.rastState.depthFormat)
953         {
954         case R32_FLOAT_X8X24_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT_X8X24_TYPELESS > ; break;
955         case R32_FLOAT: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; break;
956         case R24_UNORM_X8_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R24_UNORM_X8_TYPELESS > ; break;
957         case R16_UNORM: pState->state.pfnQuantizeDepth = QuantizeDepth < R16_UNORM > ; break;
958         default: SWR_ASSERT(false, "Unsupported depth format for depth quantiztion.");
959             pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
960         }
961     }
962     else
963     {
964         // set up pass-through quantize if depth isn't enabled
965         pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
966     }
967 }
968 
969 //////////////////////////////////////////////////////////////////////////
970 /// @brief InitDraw
971 /// @param pDC - Draw context to initialize for this draw.
InitDraw(DRAW_CONTEXT * pDC,bool isSplitDraw)972 void InitDraw(
973     DRAW_CONTEXT *pDC,
974     bool isSplitDraw)
975 {
976     // We don't need to re-setup the scissors/pipeline state again for split draw.
977     if (isSplitDraw == false)
978     {
979         SetupMacroTileScissors(pDC);
980         SetupPipeline(pDC);
981     }
982 
983 
984 }
985 
986 //////////////////////////////////////////////////////////////////////////
987 /// @brief We can split the draw for certain topologies for better performance.
988 /// @param totalVerts - Total vertices for draw
989 /// @param topology - Topology used for draw
MaxVertsPerDraw(DRAW_CONTEXT * pDC,uint32_t totalVerts,PRIMITIVE_TOPOLOGY topology)990 uint32_t MaxVertsPerDraw(
991     DRAW_CONTEXT* pDC,
992     uint32_t totalVerts,
993     PRIMITIVE_TOPOLOGY topology)
994 {
995     API_STATE& state = pDC->pState->state;
996 
997     uint32_t vertsPerDraw = totalVerts;
998 
999     if (state.soState.soEnable)
1000     {
1001         return totalVerts;
1002     }
1003 
1004     switch (topology)
1005     {
1006     case TOP_POINT_LIST:
1007     case TOP_TRIANGLE_LIST:
1008         vertsPerDraw = KNOB_MAX_PRIMS_PER_DRAW;
1009         break;
1010 
1011     case TOP_PATCHLIST_1:
1012     case TOP_PATCHLIST_2:
1013     case TOP_PATCHLIST_3:
1014     case TOP_PATCHLIST_4:
1015     case TOP_PATCHLIST_5:
1016     case TOP_PATCHLIST_6:
1017     case TOP_PATCHLIST_7:
1018     case TOP_PATCHLIST_8:
1019     case TOP_PATCHLIST_9:
1020     case TOP_PATCHLIST_10:
1021     case TOP_PATCHLIST_11:
1022     case TOP_PATCHLIST_12:
1023     case TOP_PATCHLIST_13:
1024     case TOP_PATCHLIST_14:
1025     case TOP_PATCHLIST_15:
1026     case TOP_PATCHLIST_16:
1027     case TOP_PATCHLIST_17:
1028     case TOP_PATCHLIST_18:
1029     case TOP_PATCHLIST_19:
1030     case TOP_PATCHLIST_20:
1031     case TOP_PATCHLIST_21:
1032     case TOP_PATCHLIST_22:
1033     case TOP_PATCHLIST_23:
1034     case TOP_PATCHLIST_24:
1035     case TOP_PATCHLIST_25:
1036     case TOP_PATCHLIST_26:
1037     case TOP_PATCHLIST_27:
1038     case TOP_PATCHLIST_28:
1039     case TOP_PATCHLIST_29:
1040     case TOP_PATCHLIST_30:
1041     case TOP_PATCHLIST_31:
1042     case TOP_PATCHLIST_32:
1043         if (pDC->pState->state.tsState.tsEnable)
1044         {
1045             uint32_t vertsPerPrim = topology - TOP_PATCHLIST_BASE;
1046             vertsPerDraw = vertsPerPrim * KNOB_MAX_TESS_PRIMS_PER_DRAW;
1047         }
1048         break;
1049 
1050     // The Primitive Assembly code can only handle 1 RECT at a time.
1051     case TOP_RECT_LIST:
1052         vertsPerDraw = 3;
1053         break;
1054 
1055     default:
1056         // We are not splitting up draws for other topologies.
1057         break;
1058     }
1059 
1060     return vertsPerDraw;
1061 }
1062 
1063 
1064 //////////////////////////////////////////////////////////////////////////
1065 /// @brief DrawInstanced
1066 /// @param hContext - Handle passed back from SwrCreateContext
1067 /// @param topology - Specifies topology for draw.
1068 /// @param numVerts - How many vertices to read sequentially from vertex data (per instance).
1069 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1070 /// @param numInstances - How many instances to render.
1071 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
DrawInstanced(HANDLE hContext,PRIMITIVE_TOPOLOGY topology,uint32_t numVertices,uint32_t startVertex,uint32_t numInstances=1,uint32_t startInstance=0)1072 void DrawInstanced(
1073     HANDLE hContext,
1074     PRIMITIVE_TOPOLOGY topology,
1075     uint32_t numVertices,
1076     uint32_t startVertex,
1077     uint32_t numInstances = 1,
1078     uint32_t startInstance = 0)
1079 {
1080     if (KNOB_TOSS_DRAW)
1081     {
1082         return;
1083     }
1084 
1085     SWR_CONTEXT *pContext = GetContext(hContext);
1086     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1087 
1088     AR_API_BEGIN(APIDraw, pDC->drawId);
1089     AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertices, startVertex, numInstances, startInstance));
1090 
1091     uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
1092     uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
1093     uint32_t remainingVerts = numVertices;
1094 
1095     API_STATE    *pState = &pDC->pState->state;
1096     pState->topology = topology;
1097     pState->forceFront = false;
1098 
1099     // disable culling for points/lines
1100     uint32_t oldCullMode = pState->rastState.cullMode;
1101     if (topology == TOP_POINT_LIST)
1102     {
1103         pState->rastState.cullMode = SWR_CULLMODE_NONE;
1104         pState->forceFront = true;
1105     }
1106     else if (topology == TOP_RECT_LIST)
1107     {
1108         pState->rastState.cullMode = SWR_CULLMODE_NONE;
1109     }
1110 
1111 
1112     int draw = 0;
1113     while (remainingVerts)
1114     {
1115         uint32_t numVertsForDraw = (remainingVerts < maxVertsPerDraw) ?
1116         remainingVerts : maxVertsPerDraw;
1117 
1118         bool isSplitDraw = (draw > 0) ? true : false;
1119         DRAW_CONTEXT* pDC = GetDrawContext(pContext, isSplitDraw);
1120         InitDraw(pDC, isSplitDraw);
1121 
1122         pDC->FeWork.type = DRAW;
1123         pDC->FeWork.pfnWork = GetProcessDrawFunc(
1124             false,  // IsIndexed
1125             false, // bEnableCutIndex
1126             pState->tsState.tsEnable,
1127             pState->gsState.gsEnable,
1128             pState->soState.soEnable,
1129             pDC->pState->pfnProcessPrims != nullptr);
1130         pDC->FeWork.desc.draw.numVerts = numVertsForDraw;
1131         pDC->FeWork.desc.draw.startVertex = startVertex;
1132         pDC->FeWork.desc.draw.numInstances = numInstances;
1133         pDC->FeWork.desc.draw.startInstance = startInstance;
1134         pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
1135         pDC->FeWork.desc.draw.startVertexID = draw * maxVertsPerDraw;
1136 
1137         pDC->cleanupState = (remainingVerts == numVertsForDraw);
1138 
1139         //enqueue DC
1140         QueueDraw(pContext);
1141 
1142         remainingVerts -= numVertsForDraw;
1143         draw++;
1144     }
1145 
1146     // restore culling state
1147     pDC = GetDrawContext(pContext);
1148     pDC->pState->state.rastState.cullMode = oldCullMode;
1149 
1150 
1151     AR_API_END(APIDraw, numVertices * numInstances);
1152 }
1153 
1154 //////////////////////////////////////////////////////////////////////////
1155 /// @brief SwrDraw
1156 /// @param hContext - Handle passed back from SwrCreateContext
1157 /// @param topology - Specifies topology for draw.
1158 /// @param startVertex - Specifies start vertex in vertex buffer for draw.
1159 /// @param primCount - Number of vertices.
SwrDraw(HANDLE hContext,PRIMITIVE_TOPOLOGY topology,uint32_t startVertex,uint32_t numVertices)1160 void SwrDraw(
1161     HANDLE hContext,
1162     PRIMITIVE_TOPOLOGY topology,
1163     uint32_t startVertex,
1164     uint32_t numVertices)
1165 {
1166     DrawInstanced(hContext, topology, numVertices, startVertex);
1167 }
1168 
1169 //////////////////////////////////////////////////////////////////////////
1170 /// @brief SwrDrawInstanced
1171 /// @param hContext - Handle passed back from SwrCreateContext
1172 /// @param topology - Specifies topology for draw.
1173 /// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
1174 /// @param numInstances - How many instances to render.
1175 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1176 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
SwrDrawInstanced(HANDLE hContext,PRIMITIVE_TOPOLOGY topology,uint32_t numVertsPerInstance,uint32_t numInstances,uint32_t startVertex,uint32_t startInstance)1177 void SwrDrawInstanced(
1178     HANDLE hContext,
1179     PRIMITIVE_TOPOLOGY topology,
1180     uint32_t numVertsPerInstance,
1181     uint32_t numInstances,
1182     uint32_t startVertex,
1183     uint32_t startInstance
1184     )
1185 {
1186     DrawInstanced(hContext, topology, numVertsPerInstance, startVertex, numInstances, startInstance);
1187 }
1188 
1189 //////////////////////////////////////////////////////////////////////////
1190 /// @brief DrawIndexedInstanced
1191 /// @param hContext - Handle passed back from SwrCreateContext
1192 /// @param topology - Specifies topology for draw.
1193 /// @param numIndices - Number of indices to read sequentially from index buffer.
1194 /// @param indexOffset - Starting index into index buffer.
1195 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1196 /// @param numInstances - Number of instances to render.
1197 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
DrawIndexedInstance(HANDLE hContext,PRIMITIVE_TOPOLOGY topology,uint32_t numIndices,uint32_t indexOffset,int32_t baseVertex,uint32_t numInstances=1,uint32_t startInstance=0)1198 void DrawIndexedInstance(
1199     HANDLE hContext,
1200     PRIMITIVE_TOPOLOGY topology,
1201     uint32_t numIndices,
1202     uint32_t indexOffset,
1203     int32_t baseVertex,
1204     uint32_t numInstances = 1,
1205     uint32_t startInstance = 0)
1206 {
1207     if (KNOB_TOSS_DRAW)
1208     {
1209         return;
1210     }
1211 
1212     SWR_CONTEXT *pContext = GetContext(hContext);
1213     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1214     API_STATE* pState = &pDC->pState->state;
1215 
1216     AR_API_BEGIN(APIDrawIndexed, pDC->drawId);
1217     AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
1218 
1219     uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
1220     uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
1221     uint32_t remainingIndices = numIndices;
1222 
1223     uint32_t indexSize = 0;
1224     switch (pState->indexBuffer.format)
1225     {
1226     case R32_UINT: indexSize = sizeof(uint32_t); break;
1227     case R16_UINT: indexSize = sizeof(uint16_t); break;
1228     case R8_UINT: indexSize = sizeof(uint8_t); break;
1229     default:
1230         SWR_ASSERT(0);
1231     }
1232 
1233     int draw = 0;
1234     uint8_t *pIB = (uint8_t*)pState->indexBuffer.pIndices;
1235     pIB += (uint64_t)indexOffset * (uint64_t)indexSize;
1236 
1237     pState->topology = topology;
1238     pState->forceFront = false;
1239 
1240     // disable culling for points/lines
1241     uint32_t oldCullMode = pState->rastState.cullMode;
1242     if (topology == TOP_POINT_LIST)
1243     {
1244         pState->rastState.cullMode = SWR_CULLMODE_NONE;
1245         pState->forceFront = true;
1246     }
1247     else if (topology == TOP_RECT_LIST)
1248     {
1249         pState->rastState.cullMode = SWR_CULLMODE_NONE;
1250     }
1251 
1252 
1253     while (remainingIndices)
1254     {
1255         uint32_t numIndicesForDraw = (remainingIndices < maxIndicesPerDraw) ?
1256         remainingIndices : maxIndicesPerDraw;
1257 
1258         // When breaking up draw, we need to obtain new draw context for each iteration.
1259         bool isSplitDraw = (draw > 0) ? true : false;
1260 
1261         pDC = GetDrawContext(pContext, isSplitDraw);
1262         InitDraw(pDC, isSplitDraw);
1263 
1264         pDC->FeWork.type = DRAW;
1265         pDC->FeWork.pfnWork = GetProcessDrawFunc(
1266             true,   // IsIndexed
1267             pState->frontendState.bEnableCutIndex,
1268             pState->tsState.tsEnable,
1269             pState->gsState.gsEnable,
1270             pState->soState.soEnable,
1271             pDC->pState->pfnProcessPrims != nullptr);
1272         pDC->FeWork.desc.draw.pDC = pDC;
1273         pDC->FeWork.desc.draw.numIndices = numIndicesForDraw;
1274         pDC->FeWork.desc.draw.pIB = (int*)pIB;
1275         pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format;
1276 
1277         pDC->FeWork.desc.draw.numInstances = numInstances;
1278         pDC->FeWork.desc.draw.startInstance = startInstance;
1279         pDC->FeWork.desc.draw.baseVertex = baseVertex;
1280         pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
1281 
1282         pDC->cleanupState = (remainingIndices == numIndicesForDraw);
1283 
1284         //enqueue DC
1285         QueueDraw(pContext);
1286 
1287         pIB += maxIndicesPerDraw * indexSize;
1288         remainingIndices -= numIndicesForDraw;
1289         draw++;
1290     }
1291 
1292     // Restore culling state
1293     pDC = GetDrawContext(pContext);
1294     pDC->pState->state.rastState.cullMode = oldCullMode;
1295 
1296 
1297     AR_API_END(APIDrawIndexed, numIndices * numInstances);
1298 }
1299 
1300 
1301 //////////////////////////////////////////////////////////////////////////
1302 /// @brief DrawIndexed
1303 /// @param hContext - Handle passed back from SwrCreateContext
1304 /// @param topology - Specifies topology for draw.
1305 /// @param numIndices - Number of indices to read sequentially from index buffer.
1306 /// @param indexOffset - Starting index into index buffer.
1307 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
SwrDrawIndexed(HANDLE hContext,PRIMITIVE_TOPOLOGY topology,uint32_t numIndices,uint32_t indexOffset,int32_t baseVertex)1308 void SwrDrawIndexed(
1309     HANDLE hContext,
1310     PRIMITIVE_TOPOLOGY topology,
1311     uint32_t numIndices,
1312     uint32_t indexOffset,
1313     int32_t baseVertex
1314     )
1315 {
1316     DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex);
1317 }
1318 
1319 //////////////////////////////////////////////////////////////////////////
1320 /// @brief SwrDrawIndexedInstanced
1321 /// @param hContext - Handle passed back from SwrCreateContext
1322 /// @param topology - Specifies topology for draw.
1323 /// @param numIndices - Number of indices to read sequentially from index buffer.
1324 /// @param numInstances - Number of instances to render.
1325 /// @param indexOffset - Starting index into index buffer.
1326 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1327 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
SwrDrawIndexedInstanced(HANDLE hContext,PRIMITIVE_TOPOLOGY topology,uint32_t numIndices,uint32_t numInstances,uint32_t indexOffset,int32_t baseVertex,uint32_t startInstance)1328 void SwrDrawIndexedInstanced(
1329     HANDLE hContext,
1330     PRIMITIVE_TOPOLOGY topology,
1331     uint32_t numIndices,
1332     uint32_t numInstances,
1333     uint32_t indexOffset,
1334     int32_t baseVertex,
1335     uint32_t startInstance)
1336 {
1337     DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance);
1338 }
1339 
1340 //////////////////////////////////////////////////////////////////////////
1341 /// @brief SwrInvalidateTiles
1342 /// @param hContext - Handle passed back from SwrCreateContext
1343 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
1344 /// @param invalidateRect - The pixel-coordinate rectangle to invalidate.  This will be expanded to
1345 ///                         be hottile size-aligned.
SwrInvalidateTiles(HANDLE hContext,uint32_t attachmentMask,const SWR_RECT & invalidateRect)1346 void SWR_API SwrInvalidateTiles(
1347     HANDLE hContext,
1348     uint32_t attachmentMask,
1349     const SWR_RECT& invalidateRect)
1350 {
1351     if (KNOB_TOSS_DRAW)
1352     {
1353         return;
1354     }
1355 
1356     SWR_CONTEXT *pContext = GetContext(hContext);
1357     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1358 
1359     pDC->FeWork.type = DISCARDINVALIDATETILES;
1360     pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
1361     pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
1362     pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect;
1363     pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
1364     pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
1365     pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false;
1366     pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
1367 
1368     //enqueue
1369     QueueDraw(pContext);
1370 }
1371 
1372 //////////////////////////////////////////////////////////////////////////
1373 /// @brief SwrDiscardRect
1374 /// @param hContext - Handle passed back from SwrCreateContext
1375 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
1376 /// @param rect - The pixel-coordinate rectangle to discard.  Only fully-covered hottiles will be
1377 ///               discarded.
SwrDiscardRect(HANDLE hContext,uint32_t attachmentMask,const SWR_RECT & rect)1378 void SWR_API SwrDiscardRect(
1379     HANDLE hContext,
1380     uint32_t attachmentMask,
1381     const SWR_RECT& rect)
1382 {
1383     if (KNOB_TOSS_DRAW)
1384     {
1385         return;
1386     }
1387 
1388     SWR_CONTEXT *pContext = GetContext(hContext);
1389     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1390 
1391     // Queue a load to the hottile
1392     pDC->FeWork.type = DISCARDINVALIDATETILES;
1393     pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
1394     pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
1395     pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
1396     pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
1397     pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
1398     pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true;
1399     pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
1400 
1401     //enqueue
1402     QueueDraw(pContext);
1403 }
1404 
1405 //////////////////////////////////////////////////////////////////////////
1406 /// @brief SwrDispatch
1407 /// @param hContext - Handle passed back from SwrCreateContext
1408 /// @param threadGroupCountX - Number of thread groups dispatched in X direction
1409 /// @param threadGroupCountY - Number of thread groups dispatched in Y direction
1410 /// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
SwrDispatch(HANDLE hContext,uint32_t threadGroupCountX,uint32_t threadGroupCountY,uint32_t threadGroupCountZ)1411 void SwrDispatch(
1412     HANDLE hContext,
1413     uint32_t threadGroupCountX,
1414     uint32_t threadGroupCountY,
1415     uint32_t threadGroupCountZ)
1416 {
1417     if (KNOB_TOSS_DRAW)
1418     {
1419         return;
1420     }
1421 
1422     SWR_CONTEXT *pContext = GetContext(hContext);
1423     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1424 
1425     AR_API_BEGIN(APIDispatch, pDC->drawId);
1426     AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
1427     pDC->isCompute = true;      // This is a compute context.
1428 
1429     COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
1430 
1431     pTaskData->threadGroupCountX = threadGroupCountX;
1432     pTaskData->threadGroupCountY = threadGroupCountY;
1433     pTaskData->threadGroupCountZ = threadGroupCountZ;
1434 
1435     uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ;
1436     uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT;
1437     pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex];
1438     pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE);
1439 
1440     QueueDispatch(pContext);
1441     AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ);
1442 }
1443 
1444 // Deswizzles, converts and stores current contents of the hot tiles to surface
1445 // described by pState
SwrStoreTiles(HANDLE hContext,uint32_t attachmentMask,SWR_TILE_STATE postStoreTileState,const SWR_RECT & storeRect)1446 void SWR_API SwrStoreTiles(
1447     HANDLE hContext,
1448     uint32_t attachmentMask,
1449     SWR_TILE_STATE postStoreTileState,
1450     const SWR_RECT& storeRect)
1451 {
1452     if (KNOB_TOSS_DRAW)
1453     {
1454         return;
1455     }
1456 
1457     SWR_CONTEXT *pContext = GetContext(hContext);
1458     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1459 
1460     AR_API_BEGIN(APIStoreTiles, pDC->drawId);
1461 
1462     pDC->FeWork.type = STORETILES;
1463     pDC->FeWork.pfnWork = ProcessStoreTiles;
1464     pDC->FeWork.desc.storeTiles.attachmentMask = attachmentMask;
1465     pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState;
1466     pDC->FeWork.desc.storeTiles.rect = storeRect;
1467     pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect;
1468 
1469     //enqueue
1470     QueueDraw(pContext);
1471 
1472     AR_API_END(APIStoreTiles, 1);
1473 }
1474 
1475 //////////////////////////////////////////////////////////////////////////
1476 /// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
1477 /// @param hContext - Handle passed back from SwrCreateContext
1478 /// @param attachmentMask - combination of SWR_ATTACHMENT_*_BIT attachments to clear
1479 /// @param renderTargetArrayIndex - the RT array index to clear
1480 /// @param clearColor - color use for clearing render targets
1481 /// @param z - depth value use for clearing depth buffer
1482 /// @param stencil - stencil value used for clearing stencil buffer
1483 /// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
SwrClearRenderTarget(HANDLE hContext,uint32_t attachmentMask,uint32_t renderTargetArrayIndex,const float clearColor[4],float z,uint8_t stencil,const SWR_RECT & clearRect)1484 void SWR_API SwrClearRenderTarget(
1485     HANDLE hContext,
1486     uint32_t attachmentMask,
1487     uint32_t renderTargetArrayIndex,
1488     const float clearColor[4],
1489     float z,
1490     uint8_t stencil,
1491     const SWR_RECT& clearRect)
1492 {
1493     if (KNOB_TOSS_DRAW)
1494     {
1495         return;
1496     }
1497 
1498     SWR_CONTEXT *pContext = GetContext(hContext);
1499     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1500 
1501     AR_API_BEGIN(APIClearRenderTarget, pDC->drawId);
1502 
1503     pDC->FeWork.type = CLEAR;
1504     pDC->FeWork.pfnWork = ProcessClear;
1505     pDC->FeWork.desc.clear.rect = clearRect;
1506     pDC->FeWork.desc.clear.rect &= g_MaxScissorRect;
1507     pDC->FeWork.desc.clear.attachmentMask = attachmentMask;
1508     pDC->FeWork.desc.clear.renderTargetArrayIndex = renderTargetArrayIndex;
1509     pDC->FeWork.desc.clear.clearDepth = z;
1510     pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0];
1511     pDC->FeWork.desc.clear.clearRTColor[1] = clearColor[1];
1512     pDC->FeWork.desc.clear.clearRTColor[2] = clearColor[2];
1513     pDC->FeWork.desc.clear.clearRTColor[3] = clearColor[3];
1514     pDC->FeWork.desc.clear.clearStencil = stencil;
1515 
1516     // enqueue draw
1517     QueueDraw(pContext);
1518 
1519     AR_API_END(APIClearRenderTarget, 1);
1520 }
1521 
1522 //////////////////////////////////////////////////////////////////////////
1523 /// @brief Returns a pointer to the private context state for the current
1524 ///        draw operation. This is used for external componets such as the
1525 ///        sampler.
1526 ///        SWR is responsible for the allocation of the private context state.
1527 /// @param hContext - Handle passed back from SwrCreateContext
SwrGetPrivateContextState(HANDLE hContext)1528 VOID* SwrGetPrivateContextState(
1529     HANDLE hContext)
1530 {
1531     SWR_CONTEXT* pContext = GetContext(hContext);
1532     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1533     DRAW_STATE* pState = pDC->pState;
1534 
1535     if (pState->pPrivateState == nullptr)
1536     {
1537         pState->pPrivateState = pState->pArena->AllocAligned(pContext->privateStateSize, KNOB_SIMD_WIDTH*sizeof(float));
1538     }
1539 
1540     return pState->pPrivateState;
1541 }
1542 
1543 //////////////////////////////////////////////////////////////////////////
1544 /// @brief Clients can use this to allocate memory for draw/dispatch
1545 ///        operations. The memory will automatically be freed once operation
1546 ///        has completed. Client can use this to allocate binding tables,
1547 ///        etc. needed for shader execution.
1548 /// @param hContext - Handle passed back from SwrCreateContext
1549 /// @param size - Size of allocation
1550 /// @param align - Alignment needed for allocation.
SwrAllocDrawContextMemory(HANDLE hContext,uint32_t size,uint32_t align)1551 VOID* SwrAllocDrawContextMemory(
1552     HANDLE hContext,
1553     uint32_t size,
1554     uint32_t align)
1555 {
1556     SWR_CONTEXT* pContext = GetContext(hContext);
1557     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1558 
1559     return pDC->pState->pArena->AllocAligned(size, align);
1560 }
1561 
1562 //////////////////////////////////////////////////////////////////////////
1563 /// @brief Enables stats counting
1564 /// @param hContext - Handle passed back from SwrCreateContext
1565 /// @param enable - If true then counts are incremented.
SwrEnableStatsFE(HANDLE hContext,bool enable)1566 void SwrEnableStatsFE(
1567     HANDLE hContext,
1568     bool enable)
1569 {
1570     SWR_CONTEXT *pContext = GetContext(hContext);
1571     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1572 
1573     pDC->pState->state.enableStatsFE = enable;
1574 }
1575 
1576 //////////////////////////////////////////////////////////////////////////
1577 /// @brief Enables stats counting
1578 /// @param hContext - Handle passed back from SwrCreateContext
1579 /// @param enable - If true then counts are incremented.
SwrEnableStatsBE(HANDLE hContext,bool enable)1580 void SwrEnableStatsBE(
1581     HANDLE hContext,
1582     bool enable)
1583 {
1584     SWR_CONTEXT *pContext = GetContext(hContext);
1585     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1586 
1587     pDC->pState->state.enableStatsBE = enable;
1588 }
1589 
1590 //////////////////////////////////////////////////////////////////////////
1591 /// @brief Mark end of frame - used for performance profiling
1592 /// @param hContext - Handle passed back from SwrCreateContext
SwrEndFrame(HANDLE hContext)1593 void SWR_API SwrEndFrame(
1594     HANDLE hContext)
1595 {
1596     SWR_CONTEXT *pContext = GetContext(hContext);
1597     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
1598 
1599     RDTSC_ENDFRAME();
1600     AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId));
1601 
1602     pContext->frameCount++;
1603 }
1604 
1605