1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file backend.cpp
24 *
25 * @brief Backend handles rasterization, pixel shading and output merger
26 * operations.
27 *
28 ******************************************************************************/
29
30 #include <smmintrin.h>
31
32 #include "backend.h"
33 #include "backend_impl.h"
34 #include "tilemgr.h"
35 #include "memory/tilingtraits.h"
36 #include "core/multisample.h"
37
38 #include <algorithm>
39
40 template <SWR_FORMAT format>
ClearRasterTile(uint8_t * pTileBuffer,simd16vector & value)41 void ClearRasterTile(uint8_t* pTileBuffer, simd16vector& value)
42 {
43 auto lambda = [&](int32_t comp)
44 {
45 FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
46
47 pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
48 };
49
50 const uint32_t numIter =
51 (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
52
53 for (uint32_t i = 0; i < numIter; ++i)
54 {
55 UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
56 }
57 }
58
59 template <SWR_FORMAT format>
ClearMacroTile(DRAW_CONTEXT * pDC,HANDLE hWorkerPrivateData,SWR_RENDERTARGET_ATTACHMENT rt,uint32_t macroTile,uint32_t renderTargetArrayIndex,uint32_t clear[4],const SWR_RECT & rect)60 INLINE void ClearMacroTile(DRAW_CONTEXT* pDC,
61 HANDLE hWorkerPrivateData,
62 SWR_RENDERTARGET_ATTACHMENT rt,
63 uint32_t macroTile,
64 uint32_t renderTargetArrayIndex,
65 uint32_t clear[4],
66 const SWR_RECT& rect)
67 {
68 // convert clear color to hottile format
69 // clear color is in RGBA float/uint32
70
71 simd16vector vClear;
72 for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
73 {
74 simd16scalar vComp = _simd16_load1_ps((const float*)&clear[comp]);
75
76 if (FormatTraits<format>::isNormalized(comp))
77 {
78 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
79 vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
80 }
81 vComp = FormatTraits<format>::pack(comp, vComp);
82
83 vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
84 }
85
86 uint32_t tileX, tileY;
87 MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
88
89 // Init to full macrotile
90 SWR_RECT clearTile = {
91 KNOB_MACROTILE_X_DIM * int32_t(tileX),
92 KNOB_MACROTILE_Y_DIM * int32_t(tileY),
93 KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
94 KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
95 };
96
97 // intersect with clear rect
98 clearTile &= rect;
99
100 // translate to local hottile origin
101 clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM,
102 -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
103
104 // Make maximums inclusive (needed for convert to raster tiles)
105 clearTile.xmax -= 1;
106 clearTile.ymax -= 1;
107
108 // convert to raster tiles
109 clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
110 clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
111 clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
112 clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
113
114 const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
115 // compute steps between raster tile samples / raster tiles / macro tile rows
116 const uint32_t rasterTileSampleStep =
117 KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
118 const uint32_t rasterTileStep =
119 (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
120 const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
121 const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
122
123 HOTTILE* pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext,
124 pDC,
125 hWorkerPrivateData,
126 macroTile,
127 rt,
128 true,
129 numSamples,
130 renderTargetArrayIndex);
131 uint32_t rasterTileStartOffset =
132 (ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp>>(
133 pitch, clearTile.xmin, clearTile.ymin)) *
134 numSamples;
135 uint8_t* pRasterTileRow =
136 pHotTile->pBuffer +
137 rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ,
138 // FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
139
140 // loop over all raster tiles in the current hot tile
141 for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
142 {
143 uint8_t* pRasterTile = pRasterTileRow;
144 for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
145 {
146 for (int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
147 {
148 ClearRasterTile<format>(pRasterTile, vClear);
149 pRasterTile += rasterTileSampleStep;
150 }
151 }
152 pRasterTileRow += macroTileRowStep;
153 }
154
155 pHotTile->state = HOTTILE_DIRTY;
156 }
157
ProcessClearBE(DRAW_CONTEXT * pDC,uint32_t workerId,uint32_t macroTile,void * pUserData)158 void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
159 {
160 SWR_CONTEXT* pContext = pDC->pContext;
161 HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
162
163 if (KNOB_FAST_CLEAR)
164 {
165 CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
166 SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
167 uint32_t numSamples = GetNumSamples(sampleCount);
168
169 SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
170
171 RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
172
173 if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
174 {
175 unsigned long rt = 0;
176 uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
177 while (_BitScanForward(&rt, mask))
178 {
179 mask &= ~(1 << rt);
180
181 HOTTILE* pHotTile =
182 pContext->pHotTileMgr->GetHotTile(pContext,
183 pDC,
184 hWorkerPrivateData,
185 macroTile,
186 (SWR_RENDERTARGET_ATTACHMENT)rt,
187 true,
188 numSamples,
189 pClear->renderTargetArrayIndex);
190
191 // All we want to do here is to mark the hot tile as being in a "needs clear" state.
192 pHotTile->clearData[0] = *(uint32_t*)&(pClear->clearRTColor[0]);
193 pHotTile->clearData[1] = *(uint32_t*)&(pClear->clearRTColor[1]);
194 pHotTile->clearData[2] = *(uint32_t*)&(pClear->clearRTColor[2]);
195 pHotTile->clearData[3] = *(uint32_t*)&(pClear->clearRTColor[3]);
196 pHotTile->state = HOTTILE_CLEAR;
197 }
198 }
199
200 if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
201 {
202 HOTTILE* pHotTile = pContext->pHotTileMgr->GetHotTile(pContext,
203 pDC,
204 hWorkerPrivateData,
205 macroTile,
206 SWR_ATTACHMENT_DEPTH,
207 true,
208 numSamples,
209 pClear->renderTargetArrayIndex);
210 pHotTile->clearData[0] = *(uint32_t*)&pClear->clearDepth;
211 pHotTile->state = HOTTILE_CLEAR;
212 }
213
214 if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
215 {
216 HOTTILE* pHotTile = pContext->pHotTileMgr->GetHotTile(pContext,
217 pDC,
218 hWorkerPrivateData,
219 macroTile,
220 SWR_ATTACHMENT_STENCIL,
221 true,
222 numSamples,
223 pClear->renderTargetArrayIndex);
224
225 pHotTile->clearData[0] = pClear->clearStencil;
226 pHotTile->state = HOTTILE_CLEAR;
227 }
228
229 RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
230 }
231 else
232 {
233 // Legacy clear
234 CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
235 RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
236
237 if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
238 {
239 uint32_t clearData[4];
240 clearData[0] = *(uint32_t*)&(pClear->clearRTColor[0]);
241 clearData[1] = *(uint32_t*)&(pClear->clearRTColor[1]);
242 clearData[2] = *(uint32_t*)&(pClear->clearRTColor[2]);
243 clearData[3] = *(uint32_t*)&(pClear->clearRTColor[3]);
244
245 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
246 SWR_ASSERT(pfnClearTiles != nullptr);
247
248 unsigned long rt = 0;
249 uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
250 while (_BitScanForward(&rt, mask))
251 {
252 mask &= ~(1 << rt);
253
254 pfnClearTiles(pDC,
255 hWorkerPrivateData,
256 (SWR_RENDERTARGET_ATTACHMENT)rt,
257 macroTile,
258 pClear->renderTargetArrayIndex,
259 clearData,
260 pClear->rect);
261 }
262 }
263
264 if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
265 {
266 uint32_t clearData[4];
267 clearData[0] = *(uint32_t*)&pClear->clearDepth;
268 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
269 SWR_ASSERT(pfnClearTiles != nullptr);
270
271 pfnClearTiles(pDC,
272 hWorkerPrivateData,
273 SWR_ATTACHMENT_DEPTH,
274 macroTile,
275 pClear->renderTargetArrayIndex,
276 clearData,
277 pClear->rect);
278 }
279
280 if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
281 {
282 uint32_t clearData[4];
283 clearData[0] = pClear->clearStencil;
284 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
285
286 pfnClearTiles(pDC,
287 hWorkerPrivateData,
288 SWR_ATTACHMENT_STENCIL,
289 macroTile,
290 pClear->renderTargetArrayIndex,
291 clearData,
292 pClear->rect);
293 }
294
295 RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
296 }
297 }
298
InitClearTilesTable()299 void InitClearTilesTable()
300 {
301 memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
302
303 gClearTilesTable[R8G8B8A8_UNORM] = ClearMacroTile<R8G8B8A8_UNORM>;
304 gClearTilesTable[B8G8R8A8_UNORM] = ClearMacroTile<B8G8R8A8_UNORM>;
305 gClearTilesTable[R32_FLOAT] = ClearMacroTile<R32_FLOAT>;
306 gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
307 gClearTilesTable[R8_UINT] = ClearMacroTile<R8_UINT>;
308 }
309