1 /**************************************************************************** 2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file tilemgr.h 24 * 25 * @brief Definitions for Macro Tile Manager which provides the facilities 26 * for threads to work on an macro tile. 27 * 28 ******************************************************************************/ 29 #pragma once 30 31 #include <set> 32 #include <unordered_map> 33 #include "common/formats.h" 34 #include "common/intrin.h" 35 #include "fifo.hpp" 36 #include "context.h" 37 #include "format_traits.h" 38 39 ////////////////////////////////////////////////////////////////////////// 40 /// MacroTile - work queue for a tile. 41 ////////////////////////////////////////////////////////////////////////// 42 struct MacroTileQueue 43 { MacroTileQueueMacroTileQueue44 MacroTileQueue() {} ~MacroTileQueueMacroTileQueue45 ~MacroTileQueue() { destroy(); } 46 47 ////////////////////////////////////////////////////////////////////////// 48 /// @brief Returns number of work items queued for this tile. getNumQueuedMacroTileQueue49 uint32_t getNumQueued() { return mFifo.getNumQueued(); } 50 51 ////////////////////////////////////////////////////////////////////////// 52 /// @brief Attempt to lock the work fifo. If already locked then return false. tryLockMacroTileQueue53 bool tryLock() { return mFifo.tryLock(); } 54 55 ////////////////////////////////////////////////////////////////////////// 56 /// @brief Clear fifo and unlock it. 57 template <typename ArenaT> clearMacroTileQueue58 void clear(ArenaT& arena) 59 { 60 mFifo.clear(arena); 61 } 62 63 ////////////////////////////////////////////////////////////////////////// 64 /// @brief Peek at work sitting at the front of the fifo. peekMacroTileQueue65 BE_WORK* peek() { return mFifo.peek(); } 66 67 template <typename ArenaT> enqueue_try_nosyncMacroTileQueue68 bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry) 69 { 70 return mFifo.enqueue_try_nosync(arena, entry); 71 } 72 73 ////////////////////////////////////////////////////////////////////////// 74 /// @brief Move to next work item dequeueMacroTileQueue75 void dequeue() { mFifo.dequeue_noinc(); } 76 77 ////////////////////////////////////////////////////////////////////////// 78 /// @brief Destroy fifo destroyMacroTileQueue79 void destroy() { mFifo.destroy(); } 80 81 ///@todo This will all be private. 82 uint32_t mWorkItemsFE = 0; 83 uint32_t mWorkItemsBE = 0; 84 uint32_t mId = 0; 85 86 private: 87 QUEUE<BE_WORK> mFifo; 88 }; 89 90 ////////////////////////////////////////////////////////////////////////// 91 /// MacroTileMgr - Manages macrotiles for a draw. 92 ////////////////////////////////////////////////////////////////////////// 93 class MacroTileMgr 94 { 95 public: 96 MacroTileMgr(CachingArena& arena); ~MacroTileMgr()97 ~MacroTileMgr() 98 { 99 for (auto* pTile : mTiles) 100 { 101 delete pTile; 102 } 103 } 104 initialize()105 INLINE void initialize() 106 { 107 mWorkItemsProduced = 0; 108 mWorkItemsConsumed = 0; 109 110 mDirtyTiles.clear(); 111 } 112 getDirtyTiles()113 INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; } 114 void markTileComplete(uint32_t id); 115 isWorkComplete()116 INLINE bool isWorkComplete() { return mWorkItemsProduced == mWorkItemsConsumed; } 117 118 void enqueue(uint32_t x, uint32_t y, BE_WORK* pWork); 119 getTileIndices(uint32_t tileID,uint32_t & x,uint32_t & y)120 static INLINE void getTileIndices(uint32_t tileID, uint32_t& x, uint32_t& y) 121 { 122 // Morton / Z order of tiles 123 x = pext_u32(tileID, 0x55555555); 124 y = pext_u32(tileID, 0xAAAAAAAA); 125 } 126 getTileId(uint32_t x,uint32_t y)127 static INLINE uint32_t getTileId(uint32_t x, uint32_t y) 128 { 129 // Morton / Z order of tiles 130 return pdep_u32(x, 0x55555555) | pdep_u32(y, 0xAAAAAAAA); 131 } 132 133 private: 134 CachingArena& mArena; 135 std::vector<MacroTileQueue*> mTiles; 136 137 // Any tile that has work queued to it is a dirty tile. 138 std::vector<MacroTileQueue*> mDirtyTiles; 139 OSALIGNLINE(long)140 OSALIGNLINE(long) mWorkItemsProduced{0}; OSALIGNLINE(volatile long)141 OSALIGNLINE(volatile long) mWorkItemsConsumed{0}; 142 }; 143 144 typedef void (*PFN_DISPATCH)(DRAW_CONTEXT* pDC, 145 uint32_t workerId, 146 uint32_t threadGroupId, 147 void*& pSpillFillBuffer, 148 void*& pScratchSpace); 149 150 ////////////////////////////////////////////////////////////////////////// 151 /// DispatchQueue - work queue for dispatch 152 ////////////////////////////////////////////////////////////////////////// 153 class DispatchQueue 154 { 155 public: DispatchQueue()156 DispatchQueue() {} 157 158 ////////////////////////////////////////////////////////////////////////// 159 /// @brief Setup the producer consumer counts. initialize(uint32_t totalTasks,void * pTaskData,PFN_DISPATCH pfnDispatch)160 void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch) 161 { 162 // The available and outstanding counts start with total tasks. 163 // At the start there are N tasks available and outstanding. 164 // When both the available and outstanding counts have reached 0 then all work has 165 // completed. When a worker starts on a threadgroup then it decrements the available count. 166 // When a worker completes a threadgroup then it decrements the outstanding count. 167 168 mTasksAvailable = totalTasks; 169 mTasksOutstanding = totalTasks; 170 171 mpTaskData = pTaskData; 172 mPfnDispatch = pfnDispatch; 173 } 174 175 ////////////////////////////////////////////////////////////////////////// 176 /// @brief Returns number of tasks available for this dispatch. getNumQueued()177 uint32_t getNumQueued() { return (mTasksAvailable > 0) ? mTasksAvailable : 0; } 178 179 ////////////////////////////////////////////////////////////////////////// 180 /// @brief Atomically decrement the work available count. If the result 181 // is greater than 0 then we can on the associated thread group. 182 // Otherwise, there is no more work to do. getWork(uint32_t & groupId)183 bool getWork(uint32_t& groupId) 184 { 185 long result = InterlockedDecrement(&mTasksAvailable); 186 187 if (result >= 0) 188 { 189 groupId = result; 190 return true; 191 } 192 193 return false; 194 } 195 196 ////////////////////////////////////////////////////////////////////////// 197 /// @brief Atomically decrement the outstanding count. A worker is notifying 198 /// us that he just finished some work. Also, return true if we're 199 /// the last worker to complete this dispatch. finishedWork()200 bool finishedWork() 201 { 202 long result = InterlockedDecrement(&mTasksOutstanding); 203 SWR_ASSERT(result >= 0, "Should never oversubscribe work"); 204 205 return (result == 0) ? true : false; 206 } 207 208 ////////////////////////////////////////////////////////////////////////// 209 /// @brief Work is complete once both the available/outstanding counts have reached 0. isWorkComplete()210 bool isWorkComplete() { return ((mTasksAvailable <= 0) && (mTasksOutstanding <= 0)); } 211 212 ////////////////////////////////////////////////////////////////////////// 213 /// @brief Return pointer to task data. GetTasksData()214 const void* GetTasksData() { return mpTaskData; } 215 216 ////////////////////////////////////////////////////////////////////////// 217 /// @brief Dispatches a unit of work dispatch(DRAW_CONTEXT * pDC,uint32_t workerId,uint32_t threadGroupId,void * & pSpillFillBuffer,void * & pScratchSpace)218 void dispatch(DRAW_CONTEXT* pDC, 219 uint32_t workerId, 220 uint32_t threadGroupId, 221 void*& pSpillFillBuffer, 222 void*& pScratchSpace) 223 { 224 SWR_ASSERT(mPfnDispatch != nullptr); 225 mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace); 226 } 227 228 void* mpTaskData{nullptr}; // The API thread will set this up and the callback task function 229 // will interpet this. 230 PFN_DISPATCH mPfnDispatch{nullptr}; // Function to call per dispatch 231 OSALIGNLINE(volatile long)232 OSALIGNLINE(volatile long) mTasksAvailable{0}; OSALIGNLINE(volatile long)233 OSALIGNLINE(volatile long) mTasksOutstanding{0}; 234 }; 235 236 /// @note this enum needs to be kept in sync with SWR_TILE_STATE! 237 enum HOTTILE_STATE 238 { 239 HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents 240 // before rendering 241 HOTTILE_CLEAR, // tile should be cleared 242 HOTTILE_DIRTY, // tile has been rendered to 243 HOTTILE_RESOLVED, // tile is consistent with memory (either loaded or stored) 244 }; 245 246 struct HOTTILE 247 { 248 uint8_t* pBuffer; 249 HOTTILE_STATE state; 250 uint32_t clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for 251 // alignment? 252 uint32_t numSamples; 253 uint32_t renderTargetArrayIndex; // current render target array index loaded 254 }; 255 256 union HotTileSet 257 { 258 struct 259 { 260 HOTTILE Color[SWR_NUM_RENDERTARGETS]; 261 HOTTILE Depth; 262 HOTTILE Stencil; 263 }; 264 HOTTILE Attachment[SWR_NUM_ATTACHMENTS]; 265 }; 266 267 class HotTileMgr 268 { 269 public: HotTileMgr()270 HotTileMgr() 271 { 272 memset(mHotTiles, 0, sizeof(mHotTiles)); 273 274 // cache hottile size 275 for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i) 276 { 277 mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * 278 FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8; 279 } 280 mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * 281 FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8; 282 mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * 283 FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8; 284 } 285 ~HotTileMgr()286 ~HotTileMgr() 287 { 288 for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x) 289 { 290 for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y) 291 { 292 for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a) 293 { 294 FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer); 295 } 296 } 297 } 298 } 299 300 void InitializeHotTiles(SWR_CONTEXT* pContext, 301 DRAW_CONTEXT* pDC, 302 uint32_t workerId, 303 uint32_t macroID); 304 305 HOTTILE* GetHotTile(SWR_CONTEXT* pContext, 306 DRAW_CONTEXT* pDC, 307 HANDLE hWorkerData, 308 uint32_t macroID, 309 SWR_RENDERTARGET_ATTACHMENT attachment, 310 bool create, 311 uint32_t numSamples = 1, 312 uint32_t renderTargetArrayIndex = 0); 313 314 HOTTILE* GetHotTileNoLoad(SWR_CONTEXT* pContext, 315 DRAW_CONTEXT* pDC, 316 uint32_t macroID, 317 SWR_RENDERTARGET_ATTACHMENT attachment, 318 bool create, 319 uint32_t numSamples = 1); 320 321 static void ClearColorHotTile(const HOTTILE* pHotTile); 322 static void ClearDepthHotTile(const HOTTILE* pHotTile); 323 static void ClearStencilHotTile(const HOTTILE* pHotTile); 324 325 private: 326 HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y]; 327 uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS]; 328 AllocHotTileMem(size_t size,uint32_t align,uint32_t numaNode)329 void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode) 330 { 331 void* p = nullptr; 332 #if defined(_WIN32) 333 HANDLE hProcess = GetCurrentProcess(); 334 p = VirtualAllocExNuma( 335 hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode); 336 #else 337 p = AlignedMalloc(size, align); 338 #endif 339 340 return p; 341 } 342 FreeHotTileMem(void * pBuffer)343 void FreeHotTileMem(void* pBuffer) 344 { 345 if (pBuffer) 346 { 347 #if defined(_WIN32) 348 VirtualFree(pBuffer, 0, MEM_RELEASE); 349 #else 350 AlignedFree(pBuffer); 351 #endif 352 } 353 } 354 }; 355