1 /**************************************************************************** 2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file archrast.h 24 * 25 * @brief Definitions for archrast. 26 * 27 ******************************************************************************/ 28 #include <atomic> 29 30 #include "common/os.h" 31 #include "archrast/archrast.h" 32 #include "archrast/eventmanager.h" 33 #include "gen_ar_eventhandlerfile.h" 34 35 namespace ArchRast 36 { 37 ////////////////////////////////////////////////////////////////////////// 38 /// @brief struct that keeps track of depth and stencil event information 39 struct DepthStencilStats 40 { 41 uint32_t earlyZTestPassCount = 0; 42 uint32_t earlyZTestFailCount = 0; 43 uint32_t lateZTestPassCount = 0; 44 uint32_t lateZTestFailCount = 0; 45 uint32_t earlyStencilTestPassCount = 0; 46 uint32_t earlyStencilTestFailCount = 0; 47 uint32_t lateStencilTestPassCount = 0; 48 uint32_t lateStencilTestFailCount = 0; 49 uint32_t earlyZTestCount = 0; 50 uint32_t lateZTestCount = 0; 51 uint32_t earlyStencilTestCount = 0; 52 uint32_t lateStencilTestCount = 0; 53 }; 54 55 struct CStats 56 { 57 uint32_t clippedVerts = 0; 58 }; 59 60 struct TEStats 61 { 62 uint32_t inputPrims = 0; 63 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine. 64 }; 65 66 struct GSStats 67 { 68 uint32_t inputPrimCount; 69 uint32_t primGeneratedCount; 70 uint32_t vertsInput; 71 }; 72 73 ////////////////////////////////////////////////////////////////////////// 74 /// @brief Event handler that saves stat events to event files. This 75 /// handler filters out unwanted events. 76 class EventHandlerStatsFile : public EventHandlerFile 77 { 78 public: 79 DepthStencilStats DSSingleSample = {}; 80 DepthStencilStats DSSampleRate = {}; 81 DepthStencilStats DSPixelRate = {}; 82 DepthStencilStats DSNullPS = {}; 83 DepthStencilStats DSOmZ = {}; 84 CStats CS = {}; 85 TEStats TS = {}; 86 GSStats GS = {}; 87 EventHandlerStatsFile(uint32_t id)88 EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {} 89 90 // These are events that we're not interested in saving in stats event files. Handle(Start & event)91 virtual void Handle(Start& event) {} Handle(End & event)92 virtual void Handle(End& event) {} 93 Handle(EarlyDepthStencilInfoSingleSample & event)94 virtual void Handle(EarlyDepthStencilInfoSingleSample& event) 95 { 96 //earlyZ test compute 97 DSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 98 DSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 99 DSSingleSample.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); 100 101 //earlyStencil test compute 102 DSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 103 DSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 104 DSSingleSample.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); 105 106 //outputerMerger test compute 107 DSOmZ.earlyZTestPassCount += DSSingleSample.earlyZTestPassCount; 108 DSOmZ.earlyZTestFailCount += DSSingleSample.earlyZTestFailCount; 109 DSOmZ.earlyZTestCount += DSSingleSample.earlyZTestCount; 110 DSOmZ.earlyStencilTestPassCount += DSSingleSample.earlyStencilTestPassCount; 111 DSOmZ.earlyStencilTestFailCount += DSSingleSample.earlyStencilTestFailCount; 112 DSOmZ.earlyStencilTestCount += DSSingleSample.earlyStencilTestCount; 113 } 114 Handle(EarlyDepthStencilInfoSampleRate & event)115 virtual void Handle(EarlyDepthStencilInfoSampleRate& event) 116 { 117 //earlyZ test compute 118 DSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 119 DSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 120 DSSampleRate.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); 121 122 //earlyStencil test compute 123 DSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 124 DSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 125 DSSampleRate.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); 126 127 //outputerMerger test compute 128 DSOmZ.earlyZTestPassCount += DSSampleRate.earlyZTestPassCount; 129 DSOmZ.earlyZTestFailCount += DSSampleRate.earlyZTestFailCount; 130 DSOmZ.earlyZTestCount += DSSampleRate.earlyZTestCount; 131 DSOmZ.earlyStencilTestPassCount += DSSampleRate.earlyStencilTestPassCount; 132 DSOmZ.earlyStencilTestFailCount += DSSampleRate.earlyStencilTestFailCount; 133 DSOmZ.earlyStencilTestCount += DSSampleRate.earlyStencilTestCount; 134 } 135 Handle(EarlyDepthStencilInfoNullPS & event)136 virtual void Handle(EarlyDepthStencilInfoNullPS& event) 137 { 138 //earlyZ test compute 139 DSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 140 DSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 141 DSNullPS.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); 142 143 //earlyStencil test compute 144 DSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 145 DSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 146 DSNullPS.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); 147 148 //outputerMerger test compute 149 DSOmZ.earlyZTestPassCount += DSNullPS.earlyZTestPassCount; 150 DSOmZ.earlyZTestFailCount += DSNullPS.earlyZTestFailCount; 151 DSOmZ.earlyZTestCount += DSNullPS.earlyZTestCount; 152 DSOmZ.earlyStencilTestPassCount += DSNullPS.earlyStencilTestPassCount; 153 DSOmZ.earlyStencilTestFailCount += DSNullPS.earlyStencilTestFailCount; 154 DSOmZ.earlyStencilTestCount += DSNullPS.earlyStencilTestCount; 155 } 156 Handle(LateDepthStencilInfoSingleSample & event)157 virtual void Handle(LateDepthStencilInfoSingleSample& event) 158 { 159 //lateZ test compute 160 DSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 161 DSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 162 DSSingleSample.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); 163 164 //lateStencil test compute 165 DSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 166 DSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 167 DSSingleSample.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); 168 169 //outputerMerger test compute 170 DSOmZ.lateZTestPassCount += DSSingleSample.lateZTestPassCount; 171 DSOmZ.lateZTestFailCount += DSSingleSample.lateZTestFailCount; 172 DSOmZ.lateZTestCount += DSSingleSample.lateZTestCount; 173 DSOmZ.lateStencilTestPassCount += DSSingleSample.lateStencilTestPassCount; 174 DSOmZ.lateStencilTestFailCount += DSSingleSample.lateStencilTestFailCount; 175 DSOmZ.lateStencilTestCount += DSSingleSample.lateStencilTestCount; 176 } 177 Handle(LateDepthStencilInfoSampleRate & event)178 virtual void Handle(LateDepthStencilInfoSampleRate& event) 179 { 180 //lateZ test compute 181 DSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 182 DSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 183 DSSampleRate.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); 184 185 //lateStencil test compute 186 DSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 187 DSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 188 DSSampleRate.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); 189 190 //outputerMerger test compute 191 DSOmZ.lateZTestPassCount += DSSampleRate.lateZTestPassCount; 192 DSOmZ.lateZTestFailCount += DSSampleRate.lateZTestFailCount; 193 DSOmZ.lateZTestCount += DSSampleRate.lateZTestCount; 194 DSOmZ.lateStencilTestPassCount += DSSampleRate.lateStencilTestPassCount; 195 DSOmZ.lateStencilTestFailCount += DSSampleRate.lateStencilTestFailCount; 196 DSOmZ.lateStencilTestCount += DSSampleRate.lateStencilTestCount; 197 } 198 Handle(LateDepthStencilInfoNullPS & event)199 virtual void Handle(LateDepthStencilInfoNullPS& event) 200 { 201 //lateZ test compute 202 DSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 203 DSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 204 DSNullPS.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); 205 206 //lateStencil test compute 207 DSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 208 DSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 209 DSNullPS.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); 210 211 //outputerMerger test compute 212 DSOmZ.lateZTestPassCount += DSNullPS.lateZTestPassCount; 213 DSOmZ.lateZTestFailCount += DSNullPS.lateZTestFailCount; 214 DSOmZ.lateZTestCount += DSNullPS.lateZTestCount; 215 DSOmZ.lateStencilTestPassCount += DSNullPS.lateStencilTestPassCount; 216 DSOmZ.lateStencilTestFailCount += DSNullPS.lateStencilTestFailCount; 217 DSOmZ.lateStencilTestCount += DSNullPS.lateStencilTestCount; 218 } 219 Handle(EarlyDepthInfoPixelRate & event)220 virtual void Handle(EarlyDepthInfoPixelRate& event) 221 { 222 //earlyZ test compute 223 DSPixelRate.earlyZTestCount += _mm_popcnt_u32(event.data.activeLanes); 224 DSPixelRate.earlyZTestPassCount += event.data.depthPassCount; 225 DSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); 226 227 //outputerMerger test compute 228 DSOmZ.earlyZTestPassCount += DSPixelRate.earlyZTestPassCount; 229 DSOmZ.earlyZTestFailCount += DSPixelRate.earlyZTestFailCount; 230 DSOmZ.earlyZTestCount += DSPixelRate.earlyZTestCount; 231 } 232 233 Handle(LateDepthInfoPixelRate & event)234 virtual void Handle(LateDepthInfoPixelRate& event) 235 { 236 //lateZ test compute 237 DSPixelRate.lateZTestCount += _mm_popcnt_u32(event.data.activeLanes); 238 DSPixelRate.lateZTestPassCount += event.data.depthPassCount; 239 DSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); 240 241 //outputerMerger test compute 242 DSOmZ.lateZTestPassCount += DSPixelRate.lateZTestPassCount; 243 DSOmZ.lateZTestFailCount += DSPixelRate.lateZTestFailCount; 244 DSOmZ.lateZTestCount += DSPixelRate.lateZTestCount; 245 246 } 247 248 Handle(BackendDrawEndEvent & event)249 virtual void Handle(BackendDrawEndEvent& event) 250 { 251 //singleSample 252 EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, DSSingleSample.earlyZTestPassCount, DSSingleSample.earlyZTestFailCount, DSSingleSample.earlyZTestCount)); 253 EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, DSSingleSample.lateZTestPassCount, DSSingleSample.lateZTestFailCount, DSSingleSample.lateZTestCount)); 254 EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, DSSingleSample.earlyStencilTestPassCount, DSSingleSample.earlyStencilTestFailCount, DSSingleSample.earlyStencilTestCount)); 255 EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, DSSingleSample.lateStencilTestPassCount, DSSingleSample.lateStencilTestFailCount, DSSingleSample.lateStencilTestCount)); 256 257 //sampleRate 258 EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, DSSampleRate.earlyZTestPassCount, DSSampleRate.earlyZTestFailCount, DSSampleRate.earlyZTestCount)); 259 EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, DSSampleRate.lateZTestPassCount, DSSampleRate.lateZTestFailCount, DSSampleRate.lateZTestCount)); 260 EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, DSSampleRate.earlyStencilTestPassCount, DSSampleRate.earlyStencilTestFailCount, DSSampleRate.earlyStencilTestCount)); 261 EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, DSSampleRate.lateStencilTestPassCount, DSSampleRate.lateStencilTestFailCount, DSSampleRate.lateStencilTestCount)); 262 263 //pixelRate 264 EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, DSPixelRate.earlyZTestPassCount, DSPixelRate.earlyZTestFailCount, DSPixelRate.earlyZTestCount)); 265 EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, DSPixelRate.lateZTestPassCount, DSPixelRate.lateZTestFailCount, DSPixelRate.lateZTestCount)); 266 267 268 //NullPS 269 EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, DSNullPS.earlyZTestPassCount, DSNullPS.earlyZTestFailCount, DSNullPS.earlyZTestCount)); 270 EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, DSNullPS.earlyStencilTestPassCount, DSNullPS.earlyStencilTestFailCount, DSNullPS.earlyStencilTestCount)); 271 272 //OmZ 273 EventHandlerFile::Handle(EarlyOmZ(event.data.drawId, DSOmZ.earlyZTestPassCount, DSOmZ.earlyZTestFailCount, DSOmZ.earlyZTestCount)); 274 EventHandlerFile::Handle(EarlyOmStencil(event.data.drawId, DSOmZ.earlyStencilTestPassCount, DSOmZ.earlyStencilTestFailCount, DSOmZ.earlyStencilTestCount)); 275 EventHandlerFile::Handle(LateOmZ(event.data.drawId, DSOmZ.lateZTestPassCount, DSOmZ.lateZTestFailCount, DSOmZ.lateZTestCount)); 276 EventHandlerFile::Handle(LateOmStencil(event.data.drawId, DSOmZ.lateStencilTestPassCount, DSOmZ.lateStencilTestFailCount, DSOmZ.lateStencilTestCount)); 277 278 //Reset Internal Counters 279 DSSingleSample = {}; 280 DSSampleRate = {}; 281 DSPixelRate = {}; 282 DSNullPS = {}; 283 DSOmZ = {}; 284 } 285 Handle(FrontendDrawEndEvent & event)286 virtual void Handle(FrontendDrawEndEvent& event) 287 { 288 //Clipper 289 EventHandlerFile::Handle(VertsClipped(event.data.drawId, CS.clippedVerts)); 290 291 //Tesselator 292 EventHandlerFile::Handle(TessPrims(event.data.drawId, TS.inputPrims)); 293 294 //Geometry Shader 295 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, GS.inputPrimCount)); 296 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, GS.primGeneratedCount)); 297 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, GS.vertsInput)); 298 299 //Reset Internal Counters 300 CS = {}; 301 TS = {}; 302 GS = {}; 303 } 304 Handle(GSPrimInfo & event)305 virtual void Handle(GSPrimInfo& event) 306 { 307 GS.inputPrimCount += event.data.inputPrimCount; 308 GS.primGeneratedCount += event.data.primGeneratedCount; 309 GS.vertsInput += event.data.vertsInput; 310 } 311 Handle(ClipVertexCount & event)312 virtual void Handle(ClipVertexCount& event) 313 { 314 CS.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim); 315 } 316 Handle(TessPrimCount & event)317 virtual void Handle(TessPrimCount& event) 318 { 319 TS.inputPrims += event.data.primCount; 320 } 321 }; 322 FromHandle(HANDLE hThreadContext)323 static EventManager* FromHandle(HANDLE hThreadContext) 324 { 325 return reinterpret_cast<EventManager*>(hThreadContext); 326 } 327 328 // Construct an event manager and associate a handler with it. CreateThreadContext(AR_THREAD type)329 HANDLE CreateThreadContext(AR_THREAD type) 330 { 331 // Can we assume single threaded here? 332 static std::atomic<uint32_t> counter(0); 333 uint32_t id = counter.fetch_add(1); 334 335 EventManager* pManager = new EventManager(); 336 EventHandlerFile* pHandler = new EventHandlerStatsFile(id); 337 338 if (pManager && pHandler) 339 { 340 pManager->Attach(pHandler); 341 342 if (type == AR_THREAD::API) 343 { 344 pHandler->Handle(ThreadStartApiEvent()); 345 } 346 else 347 { 348 pHandler->Handle(ThreadStartWorkerEvent()); 349 } 350 pHandler->MarkHeader(); 351 352 return pManager; 353 } 354 355 SWR_ASSERT(0, "Failed to register thread."); 356 return nullptr; 357 } 358 DestroyThreadContext(HANDLE hThreadContext)359 void DestroyThreadContext(HANDLE hThreadContext) 360 { 361 EventManager* pManager = FromHandle(hThreadContext); 362 SWR_ASSERT(pManager != nullptr); 363 364 delete pManager; 365 } 366 367 // Dispatch event for this thread. Dispatch(HANDLE hThreadContext,Event & event)368 void Dispatch(HANDLE hThreadContext, Event& event) 369 { 370 EventManager* pManager = FromHandle(hThreadContext); 371 SWR_ASSERT(pManager != nullptr); 372 373 pManager->Dispatch(event); 374 } 375 } 376