1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file tilemgr.h 24 * 25 * @brief Definitions for Macro Tile Manager which provides the facilities 26 * for threads to work on an macro tile. 27 * 28 ******************************************************************************/ 29 #pragma once 30 31 #include <set> 32 #include <unordered_map> 33 #include "common/formats.h" 34 #include "fifo.hpp" 35 #include "context.h" 36 #include "format_traits.h" 37 38 ////////////////////////////////////////////////////////////////////////// 39 /// MacroTile - work queue for a tile. 40 ////////////////////////////////////////////////////////////////////////// 41 struct MacroTileQueue 42 { MacroTileQueueMacroTileQueue43 MacroTileQueue() { } ~MacroTileQueueMacroTileQueue44 ~MacroTileQueue() { } 45 46 ////////////////////////////////////////////////////////////////////////// 47 /// @brief Returns number of work items queued for this tile. getNumQueuedMacroTileQueue48 uint32_t getNumQueued() 49 { 50 return mFifo.getNumQueued(); 51 } 52 53 ////////////////////////////////////////////////////////////////////////// 54 /// @brief Attempt to lock the work fifo. If already locked then return false. tryLockMacroTileQueue55 bool tryLock() 56 { 57 return mFifo.tryLock(); 58 } 59 60 ////////////////////////////////////////////////////////////////////////// 61 /// @brief Clear fifo and unlock it. 62 template <typename ArenaT> clearMacroTileQueue63 void clear(ArenaT& arena) 64 { 65 mFifo.clear(arena); 66 } 67 68 ////////////////////////////////////////////////////////////////////////// 69 /// @brief Peek at work sitting at the front of the fifo. peekMacroTileQueue70 BE_WORK* peek() 71 { 72 return mFifo.peek(); 73 } 74 75 template <typename ArenaT> enqueue_try_nosyncMacroTileQueue76 bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry) 77 { 78 return mFifo.enqueue_try_nosync(arena, entry); 79 } 80 81 ////////////////////////////////////////////////////////////////////////// 82 /// @brief Move to next work item dequeueMacroTileQueue83 void dequeue() 84 { 85 mFifo.dequeue_noinc(); 86 } 87 88 ////////////////////////////////////////////////////////////////////////// 89 /// @brief Destroy fifo destroyMacroTileQueue90 void destroy() 91 { 92 mFifo.destroy(); 93 } 94 95 ///@todo This will all be private. 96 uint32_t mWorkItemsFE = 0; 97 uint32_t mWorkItemsBE = 0; 98 uint32_t mId = 0; 99 100 private: 101 QUEUE<BE_WORK> mFifo; 102 }; 103 104 ////////////////////////////////////////////////////////////////////////// 105 /// MacroTileMgr - Manages macrotiles for a draw. 106 ////////////////////////////////////////////////////////////////////////// 107 class MacroTileMgr 108 { 109 public: 110 MacroTileMgr(CachingArena& arena); ~MacroTileMgr()111 ~MacroTileMgr() 112 { 113 for (auto &tile : mTiles) 114 { 115 tile.second.destroy(); 116 } 117 } 118 initialize()119 INLINE void initialize() 120 { 121 mWorkItemsProduced = 0; 122 mWorkItemsConsumed = 0; 123 124 mDirtyTiles.clear(); 125 } 126 getDirtyTiles()127 INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; } 128 void markTileComplete(uint32_t id); 129 isWorkComplete()130 INLINE bool isWorkComplete() 131 { 132 return mWorkItemsProduced == mWorkItemsConsumed; 133 } 134 135 void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork); 136 getTileIndices(uint32_t tileID,uint32_t & x,uint32_t & y)137 static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y) 138 { 139 y = tileID & 0xffff; 140 x = (tileID >> 16) & 0xffff; 141 } 142 143 private: 144 CachingArena& mArena; 145 std::unordered_map<uint32_t, MacroTileQueue> mTiles; 146 147 // Any tile that has work queued to it is a dirty tile. 148 std::vector<MacroTileQueue*> mDirtyTiles; 149 OSALIGNLINE(long)150 OSALIGNLINE(long) mWorkItemsProduced { 0 }; OSALIGNLINE(volatile long)151 OSALIGNLINE(volatile long) mWorkItemsConsumed { 0 }; 152 }; 153 154 typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace); 155 156 ////////////////////////////////////////////////////////////////////////// 157 /// DispatchQueue - work queue for dispatch 158 ////////////////////////////////////////////////////////////////////////// 159 class DispatchQueue 160 { 161 public: DispatchQueue()162 DispatchQueue() {} 163 164 ////////////////////////////////////////////////////////////////////////// 165 /// @brief Setup the producer consumer counts. initialize(uint32_t totalTasks,void * pTaskData,PFN_DISPATCH pfnDispatch)166 void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch) 167 { 168 // The available and outstanding counts start with total tasks. 169 // At the start there are N tasks available and outstanding. 170 // When both the available and outstanding counts have reached 0 then all work has completed. 171 // When a worker starts on a threadgroup then it decrements the available count. 172 // When a worker completes a threadgroup then it decrements the outstanding count. 173 174 mTasksAvailable = totalTasks; 175 mTasksOutstanding = totalTasks; 176 177 mpTaskData = pTaskData; 178 mPfnDispatch = pfnDispatch; 179 } 180 181 ////////////////////////////////////////////////////////////////////////// 182 /// @brief Returns number of tasks available for this dispatch. getNumQueued()183 uint32_t getNumQueued() 184 { 185 return (mTasksAvailable > 0) ? mTasksAvailable : 0; 186 } 187 188 ////////////////////////////////////////////////////////////////////////// 189 /// @brief Atomically decrement the work available count. If the result 190 // is greater than 0 then we can on the associated thread group. 191 // Otherwise, there is no more work to do. getWork(uint32_t & groupId)192 bool getWork(uint32_t& groupId) 193 { 194 long result = InterlockedDecrement(&mTasksAvailable); 195 196 if (result >= 0) 197 { 198 groupId = result; 199 return true; 200 } 201 202 return false; 203 } 204 205 ////////////////////////////////////////////////////////////////////////// 206 /// @brief Atomically decrement the outstanding count. A worker is notifying 207 /// us that he just finished some work. Also, return true if we're 208 /// the last worker to complete this dispatch. finishedWork()209 bool finishedWork() 210 { 211 long result = InterlockedDecrement(&mTasksOutstanding); 212 SWR_ASSERT(result >= 0, "Should never oversubscribe work"); 213 214 return (result == 0) ? true : false; 215 } 216 217 ////////////////////////////////////////////////////////////////////////// 218 /// @brief Work is complete once both the available/outstanding counts have reached 0. isWorkComplete()219 bool isWorkComplete() 220 { 221 return ((mTasksAvailable <= 0) && 222 (mTasksOutstanding <= 0)); 223 } 224 225 ////////////////////////////////////////////////////////////////////////// 226 /// @brief Return pointer to task data. GetTasksData()227 const void* GetTasksData() 228 { 229 return mpTaskData; 230 } 231 232 ////////////////////////////////////////////////////////////////////////// 233 /// @brief Dispatches a unit of work dispatch(DRAW_CONTEXT * pDC,uint32_t workerId,uint32_t threadGroupId,void * & pSpillFillBuffer,void * & pScratchSpace)234 void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace) 235 { 236 SWR_ASSERT(mPfnDispatch != nullptr); 237 mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace); 238 } 239 240 void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this. 241 PFN_DISPATCH mPfnDispatch{ nullptr }; // Function to call per dispatch 242 OSALIGNLINE(volatile long)243 OSALIGNLINE(volatile long) mTasksAvailable{ 0 }; OSALIGNLINE(volatile long)244 OSALIGNLINE(volatile long) mTasksOutstanding{ 0 }; 245 }; 246 247 248 enum HOTTILE_STATE 249 { 250 HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents before rendering 251 HOTTILE_CLEAR, // tile should be cleared 252 HOTTILE_DIRTY, // tile has been rendered to 253 HOTTILE_RESOLVED, // tile has been stored to memory 254 }; 255 256 struct HOTTILE 257 { 258 uint8_t *pBuffer; 259 HOTTILE_STATE state; 260 DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment? 261 uint32_t numSamples; 262 uint32_t renderTargetArrayIndex; // current render target array index loaded 263 }; 264 265 union HotTileSet 266 { 267 struct 268 { 269 HOTTILE Color[SWR_NUM_RENDERTARGETS]; 270 HOTTILE Depth; 271 HOTTILE Stencil; 272 }; 273 HOTTILE Attachment[SWR_NUM_ATTACHMENTS]; 274 }; 275 276 class HotTileMgr 277 { 278 public: HotTileMgr()279 HotTileMgr() 280 { 281 memset(mHotTiles, 0, sizeof(mHotTiles)); 282 283 // cache hottile size 284 for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i) 285 { 286 mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8; 287 } 288 mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8; 289 mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8; 290 } 291 ~HotTileMgr()292 ~HotTileMgr() 293 { 294 for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x) 295 { 296 for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y) 297 { 298 for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a) 299 { 300 FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer); 301 } 302 } 303 } 304 } 305 306 void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID); 307 308 HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1, 309 uint32_t renderTargetArrayIndex = 0); 310 311 HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1); 312 313 static void ClearColorHotTile(const HOTTILE* pHotTile); 314 static void ClearDepthHotTile(const HOTTILE* pHotTile); 315 static void ClearStencilHotTile(const HOTTILE* pHotTile); 316 317 private: 318 HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y]; 319 uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS]; 320 AllocHotTileMem(size_t size,uint32_t align,uint32_t numaNode)321 void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode) 322 { 323 void* p = nullptr; 324 #if defined(_WIN32) 325 HANDLE hProcess = GetCurrentProcess(); 326 p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode); 327 #else 328 p = AlignedMalloc(size, align); 329 #endif 330 331 return p; 332 } 333 FreeHotTileMem(void * pBuffer)334 void FreeHotTileMem(void* pBuffer) 335 { 336 if (pBuffer) 337 { 338 #if defined(_WIN32) 339 VirtualFree(pBuffer, 0, MEM_RELEASE); 340 #else 341 AlignedFree(pBuffer); 342 #endif 343 } 344 } 345 }; 346 347