1 /**************************************************************************** 2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file archrast.cpp 24 * 25 * @brief Implementation for archrast. 26 * 27 ******************************************************************************/ 28 #include <atomic> 29 30 #include "common/os.h" 31 #include "archrast/archrast.h" 32 #include "archrast/eventmanager.h" 33 #include "gen_ar_eventhandlerfile.hpp" 34 35 namespace ArchRast 36 { 37 ////////////////////////////////////////////////////////////////////////// 38 /// @brief struct that keeps track of depth and stencil event information 39 struct DepthStencilStats 40 { 41 uint32_t earlyZTestPassCount = 0; 42 uint32_t earlyZTestFailCount = 0; 43 uint32_t lateZTestPassCount = 0; 44 uint32_t lateZTestFailCount = 0; 45 uint32_t earlyStencilTestPassCount = 0; 46 uint32_t earlyStencilTestFailCount = 0; 47 uint32_t lateStencilTestPassCount = 0; 48 uint32_t lateStencilTestFailCount = 0; 49 }; 50 51 struct CStats 52 { 53 uint32_t clippedVerts = 0; 54 }; 55 56 struct TEStats 57 { 58 uint32_t inputPrims = 0; 59 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine. 60 }; 61 62 struct GSStats 63 { 64 uint32_t inputPrimCount; 65 uint32_t primGeneratedCount; 66 uint32_t vertsInput; 67 }; 68 69 ////////////////////////////////////////////////////////////////////////// 70 /// @brief Event handler that saves stat events to event files. This 71 /// handler filters out unwanted events. 72 class EventHandlerStatsFile : public EventHandlerFile 73 { 74 public: EventHandlerStatsFile(uint32_t id)75 EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {} 76 77 // These are events that we're not interested in saving in stats event files. Handle(const Start & event)78 virtual void Handle(const Start& event) {} Handle(const End & event)79 virtual void Handle(const End& event) {} 80 Handle(const EarlyDepthStencilInfoSingleSample & event)81 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event) 82 { 83 //earlyZ test compute 84 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 85 mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 86 87 //earlyStencil test compute 88 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 89 mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 90 mNeedFlush = true; 91 } 92 Handle(const EarlyDepthStencilInfoSampleRate & event)93 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event) 94 { 95 //earlyZ test compute 96 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 97 mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 98 99 //earlyStencil test compute 100 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 101 mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 102 mNeedFlush = true; 103 } 104 Handle(const EarlyDepthStencilInfoNullPS & event)105 virtual void Handle(const EarlyDepthStencilInfoNullPS& event) 106 { 107 //earlyZ test compute 108 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 109 mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 110 111 //earlyStencil test compute 112 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 113 mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 114 mNeedFlush = true; 115 } 116 Handle(const LateDepthStencilInfoSingleSample & event)117 virtual void Handle(const LateDepthStencilInfoSingleSample& event) 118 { 119 //lateZ test compute 120 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 121 mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 122 123 //lateStencil test compute 124 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 125 mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 126 mNeedFlush = true; 127 } 128 Handle(const LateDepthStencilInfoSampleRate & event)129 virtual void Handle(const LateDepthStencilInfoSampleRate& event) 130 { 131 //lateZ test compute 132 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 133 mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 134 135 //lateStencil test compute 136 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 137 mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 138 mNeedFlush = true; 139 } 140 Handle(const LateDepthStencilInfoNullPS & event)141 virtual void Handle(const LateDepthStencilInfoNullPS& event) 142 { 143 //lateZ test compute 144 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 145 mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 146 147 //lateStencil test compute 148 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 149 mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 150 mNeedFlush = true; 151 } 152 Handle(const EarlyDepthInfoPixelRate & event)153 virtual void Handle(const EarlyDepthInfoPixelRate& event) 154 { 155 //earlyZ test compute 156 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount; 157 mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); 158 mNeedFlush = true; 159 } 160 161 Handle(const LateDepthInfoPixelRate & event)162 virtual void Handle(const LateDepthInfoPixelRate& event) 163 { 164 //lateZ test compute 165 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount; 166 mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); 167 mNeedFlush = true; 168 } 169 170 171 // Flush cached events for this draw FlushDraw(uint32_t drawId)172 virtual void FlushDraw(uint32_t drawId) 173 { 174 if (mNeedFlush == false) return; 175 176 //singleSample 177 EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount)); 178 EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount)); 179 EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount)); 180 EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount)); 181 182 //sampleRate 183 EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount)); 184 EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount)); 185 EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount)); 186 EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount)); 187 188 //pixelRate 189 EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount)); 190 EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount)); 191 192 193 //NullPS 194 EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount)); 195 EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount)); 196 197 //Reset Internal Counters 198 mDSSingleSample = {}; 199 mDSSampleRate = {}; 200 mDSPixelRate = {}; 201 mDSNullPS = {}; 202 203 mNeedFlush = false; 204 } 205 Handle(const FrontendDrawEndEvent & event)206 virtual void Handle(const FrontendDrawEndEvent& event) 207 { 208 //Clipper 209 EventHandlerFile::Handle(VertsClipped(event.data.drawId, mClipper.clippedVerts)); 210 211 //Tesselator 212 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims)); 213 214 //Geometry Shader 215 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount)); 216 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount)); 217 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput)); 218 219 //Reset Internal Counters 220 mClipper = {}; 221 mTS = {}; 222 mGS = {}; 223 } 224 Handle(const GSPrimInfo & event)225 virtual void Handle(const GSPrimInfo& event) 226 { 227 mGS.inputPrimCount += event.data.inputPrimCount; 228 mGS.primGeneratedCount += event.data.primGeneratedCount; 229 mGS.vertsInput += event.data.vertsInput; 230 } 231 Handle(const ClipVertexCount & event)232 virtual void Handle(const ClipVertexCount& event) 233 { 234 mClipper.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim); 235 } 236 Handle(const TessPrimCount & event)237 virtual void Handle(const TessPrimCount& event) 238 { 239 mTS.inputPrims += event.data.primCount; 240 } 241 242 protected: 243 bool mNeedFlush; 244 // Per draw stats 245 DepthStencilStats mDSSingleSample = {}; 246 DepthStencilStats mDSSampleRate = {}; 247 DepthStencilStats mDSPixelRate = {}; 248 DepthStencilStats mDSNullPS = {}; 249 DepthStencilStats mDSOmZ = {}; 250 CStats mClipper = {}; 251 TEStats mTS = {}; 252 GSStats mGS = {}; 253 254 }; 255 FromHandle(HANDLE hThreadContext)256 static EventManager* FromHandle(HANDLE hThreadContext) 257 { 258 return reinterpret_cast<EventManager*>(hThreadContext); 259 } 260 261 // Construct an event manager and associate a handler with it. CreateThreadContext(AR_THREAD type)262 HANDLE CreateThreadContext(AR_THREAD type) 263 { 264 // Can we assume single threaded here? 265 static std::atomic<uint32_t> counter(0); 266 uint32_t id = counter.fetch_add(1); 267 268 EventManager* pManager = new EventManager(); 269 EventHandlerFile* pHandler = new EventHandlerStatsFile(id); 270 271 if (pManager && pHandler) 272 { 273 pManager->Attach(pHandler); 274 275 if (type == AR_THREAD::API) 276 { 277 pHandler->Handle(ThreadStartApiEvent()); 278 } 279 else 280 { 281 pHandler->Handle(ThreadStartWorkerEvent()); 282 } 283 pHandler->MarkHeader(); 284 285 return pManager; 286 } 287 288 SWR_INVALID("Failed to register thread."); 289 return nullptr; 290 } 291 DestroyThreadContext(HANDLE hThreadContext)292 void DestroyThreadContext(HANDLE hThreadContext) 293 { 294 EventManager* pManager = FromHandle(hThreadContext); 295 SWR_ASSERT(pManager != nullptr); 296 297 delete pManager; 298 } 299 300 // Dispatch event for this thread. Dispatch(HANDLE hThreadContext,const Event & event)301 void Dispatch(HANDLE hThreadContext, const Event& event) 302 { 303 EventManager* pManager = FromHandle(hThreadContext); 304 SWR_ASSERT(pManager != nullptr); 305 306 pManager->Dispatch(event); 307 } 308 309 // Flush for this thread. FlushDraw(HANDLE hThreadContext,uint32_t drawId)310 void FlushDraw(HANDLE hThreadContext, uint32_t drawId) 311 { 312 EventManager* pManager = FromHandle(hThreadContext); 313 SWR_ASSERT(pManager != nullptr); 314 315 pManager->FlushDraw(drawId); 316 } 317 } 318