1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file tilemgr.cpp
24 *
25 * @brief Implementation for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
27 *
28 ******************************************************************************/
29 #include <unordered_map>
30
31 #include "fifo.hpp"
32 #include "core/tilemgr.h"
33 #include "core/multisample.h"
34 #include "rdtsc_core.h"
35
MacroTileMgr(CachingArena & arena)36 MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena) {}
37
enqueue(uint32_t x,uint32_t y,BE_WORK * pWork)38 void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK* pWork)
39 {
40 // Should not enqueue more then what we have backing for in the hot tile manager.
41 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
42 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
43
44 if ((x & ~(KNOB_NUM_HOT_TILES_X - 1)) | (y & ~(KNOB_NUM_HOT_TILES_Y - 1)))
45 {
46 return;
47 }
48
49 uint32_t id = getTileId(x, y);
50
51 if (id >= mTiles.size())
52 {
53 mTiles.resize((16 + id) * 2);
54 }
55
56 MacroTileQueue* pTile = mTiles[id];
57 if (!pTile)
58 {
59 pTile = mTiles[id] = new MacroTileQueue();
60 }
61 pTile->mWorkItemsFE++;
62 pTile->mId = id;
63
64 if (pTile->mWorkItemsFE == 1)
65 {
66 pTile->clear(mArena);
67 mDirtyTiles.push_back(pTile);
68 }
69
70 mWorkItemsProduced++;
71 pTile->enqueue_try_nosync(mArena, pWork);
72 }
73
markTileComplete(uint32_t id)74 void MacroTileMgr::markTileComplete(uint32_t id)
75 {
76 SWR_ASSERT(mTiles.size() > id);
77 MacroTileQueue& tile = *mTiles[id];
78 uint32_t numTiles = tile.mWorkItemsFE;
79 InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
80
81 _ReadWriteBarrier();
82 tile.mWorkItemsBE += numTiles;
83 SWR_ASSERT(tile.mWorkItemsFE == tile.mWorkItemsBE);
84
85 // clear out tile, but defer fifo clear until the next DC first queues to it.
86 // this prevents worker threads from constantly locking a completed macro tile
87 tile.mWorkItemsFE = 0;
88 tile.mWorkItemsBE = 0;
89 }
90
GetHotTile(SWR_CONTEXT * pContext,DRAW_CONTEXT * pDC,HANDLE hWorkerPrivateData,uint32_t macroID,SWR_RENDERTARGET_ATTACHMENT attachment,bool create,uint32_t numSamples,uint32_t renderTargetArrayIndex)91 HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext,
92 DRAW_CONTEXT* pDC,
93 HANDLE hWorkerPrivateData,
94 uint32_t macroID,
95 SWR_RENDERTARGET_ATTACHMENT attachment,
96 bool create,
97 uint32_t numSamples,
98 uint32_t renderTargetArrayIndex)
99 {
100 uint32_t x, y;
101 MacroTileMgr::getTileIndices(macroID, x, y);
102
103 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
104 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
105
106 HotTileSet& tile = mHotTiles[x][y];
107 HOTTILE& hotTile = tile.Attachment[attachment];
108 if (hotTile.pBuffer == NULL)
109 {
110 if (create)
111 {
112 uint32_t size = numSamples * mHotTileSize[attachment];
113 uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
114 hotTile.pBuffer =
115 (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
116 hotTile.state = HOTTILE_INVALID;
117 hotTile.numSamples = numSamples;
118 hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
119 }
120 else
121 {
122 return NULL;
123 }
124 }
125 else
126 {
127 // free the old tile and create a new one with enough space to hold all samples
128 if (numSamples > hotTile.numSamples)
129 {
130 // tile should be either uninitialized or resolved if we're deleting and switching to a
131 // new sample count
132 SWR_ASSERT((hotTile.state == HOTTILE_INVALID) || (hotTile.state == HOTTILE_RESOLVED) ||
133 (hotTile.state == HOTTILE_CLEAR));
134 FreeHotTileMem(hotTile.pBuffer);
135
136 uint32_t size = numSamples * mHotTileSize[attachment];
137 uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
138 hotTile.pBuffer =
139 (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
140 hotTile.state = HOTTILE_INVALID;
141 hotTile.numSamples = numSamples;
142 }
143
144 // if requested render target array index isn't currently loaded, need to store out the
145 // current hottile and load the requested array slice
146 if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
147 {
148 SWR_FORMAT format;
149 switch (attachment)
150 {
151 case SWR_ATTACHMENT_COLOR0:
152 case SWR_ATTACHMENT_COLOR1:
153 case SWR_ATTACHMENT_COLOR2:
154 case SWR_ATTACHMENT_COLOR3:
155 case SWR_ATTACHMENT_COLOR4:
156 case SWR_ATTACHMENT_COLOR5:
157 case SWR_ATTACHMENT_COLOR6:
158 case SWR_ATTACHMENT_COLOR7:
159 format = KNOB_COLOR_HOT_TILE_FORMAT;
160 break;
161 case SWR_ATTACHMENT_DEPTH:
162 format = KNOB_DEPTH_HOT_TILE_FORMAT;
163 break;
164 case SWR_ATTACHMENT_STENCIL:
165 format = KNOB_STENCIL_HOT_TILE_FORMAT;
166 break;
167 default:
168 SWR_INVALID("Unknown attachment: %d", attachment);
169 format = KNOB_COLOR_HOT_TILE_FORMAT;
170 break;
171 }
172
173 if (hotTile.state == HOTTILE_CLEAR)
174 {
175 if (attachment == SWR_ATTACHMENT_STENCIL)
176 ClearStencilHotTile(&hotTile);
177 else if (attachment == SWR_ATTACHMENT_DEPTH)
178 ClearDepthHotTile(&hotTile);
179 else
180 ClearColorHotTile(&hotTile);
181
182 hotTile.state = HOTTILE_DIRTY;
183 }
184
185 if (hotTile.state == HOTTILE_DIRTY)
186 {
187 pContext->pfnStoreTile(pDC,
188 hWorkerPrivateData,
189 format,
190 attachment,
191 x * KNOB_MACROTILE_X_DIM,
192 y * KNOB_MACROTILE_Y_DIM,
193 hotTile.renderTargetArrayIndex,
194 hotTile.pBuffer);
195 }
196
197 pContext->pfnLoadTile(pDC,
198 hWorkerPrivateData,
199 format,
200 attachment,
201 x * KNOB_MACROTILE_X_DIM,
202 y * KNOB_MACROTILE_Y_DIM,
203 renderTargetArrayIndex,
204 hotTile.pBuffer);
205
206 hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
207 hotTile.state = HOTTILE_RESOLVED;
208 }
209 }
210 return &tile.Attachment[attachment];
211 }
212
GetHotTileNoLoad(SWR_CONTEXT * pContext,DRAW_CONTEXT * pDC,uint32_t macroID,SWR_RENDERTARGET_ATTACHMENT attachment,bool create,uint32_t numSamples)213 HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT* pContext,
214 DRAW_CONTEXT* pDC,
215 uint32_t macroID,
216 SWR_RENDERTARGET_ATTACHMENT attachment,
217 bool create,
218 uint32_t numSamples)
219 {
220 uint32_t x, y;
221 MacroTileMgr::getTileIndices(macroID, x, y);
222
223 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
224 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
225
226 HotTileSet& tile = mHotTiles[x][y];
227 HOTTILE& hotTile = tile.Attachment[attachment];
228 if (hotTile.pBuffer == NULL)
229 {
230 if (create)
231 {
232 uint32_t size = numSamples * mHotTileSize[attachment];
233 hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, 64);
234 hotTile.state = HOTTILE_INVALID;
235 hotTile.numSamples = numSamples;
236 hotTile.renderTargetArrayIndex = 0;
237 }
238 else
239 {
240 return NULL;
241 }
242 }
243
244 return &hotTile;
245 }
246
ClearColorHotTile(const HOTTILE * pHotTile)247 void HotTileMgr::ClearColorHotTile(
248 const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
249 {
250 // Load clear color into SIMD register...
251 float* pClearData = (float*)(pHotTile->clearData);
252 simd16scalar valR = _simd16_broadcast_ss(&pClearData[0]);
253 simd16scalar valG = _simd16_broadcast_ss(&pClearData[1]);
254 simd16scalar valB = _simd16_broadcast_ss(&pClearData[2]);
255 simd16scalar valA = _simd16_broadcast_ss(&pClearData[3]);
256
257 float* pfBuf = (float*)pHotTile->pBuffer;
258 uint32_t numSamples = pHotTile->numSamples;
259
260 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
261 {
262 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
263 {
264 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
265 si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
266 {
267 _simd16_store_ps(pfBuf, valR);
268 pfBuf += KNOB_SIMD16_WIDTH;
269
270 _simd16_store_ps(pfBuf, valG);
271 pfBuf += KNOB_SIMD16_WIDTH;
272
273 _simd16_store_ps(pfBuf, valB);
274 pfBuf += KNOB_SIMD16_WIDTH;
275
276 _simd16_store_ps(pfBuf, valA);
277 pfBuf += KNOB_SIMD16_WIDTH;
278 }
279 }
280 }
281 }
282
ClearDepthHotTile(const HOTTILE * pHotTile)283 void HotTileMgr::ClearDepthHotTile(
284 const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
285 {
286 // Load clear color into SIMD register...
287 float* pClearData = (float*)(pHotTile->clearData);
288 simd16scalar valZ = _simd16_broadcast_ss(&pClearData[0]);
289
290 float* pfBuf = (float*)pHotTile->pBuffer;
291 uint32_t numSamples = pHotTile->numSamples;
292
293 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
294 {
295 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
296 {
297 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
298 si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
299 {
300 _simd16_store_ps(pfBuf, valZ);
301 pfBuf += KNOB_SIMD16_WIDTH;
302 }
303 }
304 }
305 }
306
ClearStencilHotTile(const HOTTILE * pHotTile)307 void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
308 {
309 // convert from F32 to U8.
310 uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
311 // broadcast 32x into __m256i...
312 simd16scalari valS = _simd16_set1_epi8(clearVal);
313
314 simd16scalari* pBuf = (simd16scalari*)pHotTile->pBuffer;
315 uint32_t numSamples = pHotTile->numSamples;
316
317 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
318 {
319 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
320 {
321 // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
322 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
323 si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
324 {
325 _simd16_store_si(pBuf, valS);
326 pBuf += 1;
327 }
328 }
329 }
330 }
331
332 //////////////////////////////////////////////////////////////////////////
333 /// @brief InitializeHotTiles
334 /// for draw calls, we initialize the active hot tiles and perform deferred
335 /// load on them if tile is in invalid state. we do this in the outer thread
336 /// loop instead of inside the draw routine itself mainly for performance,
337 /// to avoid unnecessary setup every triangle
338 /// @todo support deferred clear
339 /// @param pCreateInfo - pointer to creation info.
InitializeHotTiles(SWR_CONTEXT * pContext,DRAW_CONTEXT * pDC,uint32_t workerId,uint32_t macroID)340 void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
341 DRAW_CONTEXT* pDC,
342 uint32_t workerId,
343 uint32_t macroID)
344 {
345 const API_STATE& state = GetApiState(pDC);
346 HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
347
348 uint32_t x, y;
349 MacroTileMgr::getTileIndices(macroID, x, y);
350 x *= KNOB_MACROTILE_X_DIM;
351 y *= KNOB_MACROTILE_Y_DIM;
352
353 uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
354
355 // check RT if enabled
356 unsigned long rtSlot = 0;
357 uint32_t colorHottileEnableMask = state.colorHottileEnable;
358 while (_BitScanForward(&rtSlot, colorHottileEnableMask))
359 {
360 HOTTILE* pHotTile =
361 GetHotTile(pContext,
362 pDC,
363 hWorkerPrivateData,
364 macroID,
365 (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
366 true,
367 numSamples);
368
369 if (pHotTile->state == HOTTILE_INVALID)
370 {
371 RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
372 // invalid hottile before draw requires a load from surface before we can draw to it
373 pContext->pfnLoadTile(pDC,
374 hWorkerPrivateData,
375 KNOB_COLOR_HOT_TILE_FORMAT,
376 (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
377 x,
378 y,
379 pHotTile->renderTargetArrayIndex,
380 pHotTile->pBuffer);
381 pHotTile->state = HOTTILE_RESOLVED;
382 RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
383 }
384 else if (pHotTile->state == HOTTILE_CLEAR)
385 {
386 RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
387 // Clear the tile.
388 ClearColorHotTile(pHotTile);
389 pHotTile->state = HOTTILE_DIRTY;
390 RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
391 }
392 colorHottileEnableMask &= ~(1 << rtSlot);
393 }
394
395 // check depth if enabled
396 if (state.depthHottileEnable)
397 {
398 HOTTILE* pHotTile = GetHotTile(
399 pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
400 if (pHotTile->state == HOTTILE_INVALID)
401 {
402 RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
403 // invalid hottile before draw requires a load from surface before we can draw to it
404 pContext->pfnLoadTile(pDC,
405 hWorkerPrivateData,
406 KNOB_DEPTH_HOT_TILE_FORMAT,
407 SWR_ATTACHMENT_DEPTH,
408 x,
409 y,
410 pHotTile->renderTargetArrayIndex,
411 pHotTile->pBuffer);
412 pHotTile->state = HOTTILE_DIRTY;
413 RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
414 }
415 else if (pHotTile->state == HOTTILE_CLEAR)
416 {
417 RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
418 // Clear the tile.
419 ClearDepthHotTile(pHotTile);
420 pHotTile->state = HOTTILE_DIRTY;
421 RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
422 }
423 }
424
425 // check stencil if enabled
426 if (state.stencilHottileEnable)
427 {
428 HOTTILE* pHotTile = GetHotTile(
429 pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
430 if (pHotTile->state == HOTTILE_INVALID)
431 {
432 RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
433 // invalid hottile before draw requires a load from surface before we can draw to it
434 pContext->pfnLoadTile(pDC,
435 hWorkerPrivateData,
436 KNOB_STENCIL_HOT_TILE_FORMAT,
437 SWR_ATTACHMENT_STENCIL,
438 x,
439 y,
440 pHotTile->renderTargetArrayIndex,
441 pHotTile->pBuffer);
442 pHotTile->state = HOTTILE_DIRTY;
443 RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
444 }
445 else if (pHotTile->state == HOTTILE_CLEAR)
446 {
447 RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
448 // Clear the tile.
449 ClearStencilHotTile(pHotTile);
450 pHotTile->state = HOTTILE_DIRTY;
451 RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
452 }
453 }
454 }
455