1 /**************************************************************************** 2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file archrast.cpp 24 * 25 * @brief Implementation for archrast. 26 * 27 ******************************************************************************/ 28 #include <sys/stat.h> 29 30 #include <atomic> 31 #include <map> 32 33 #include "common/os.h" 34 #include "archrast/archrast.h" 35 #include "archrast/eventmanager.h" 36 #include "gen_ar_event.hpp" 37 #include "gen_ar_eventhandlerfile.hpp" 38 39 namespace ArchRast 40 { 41 ////////////////////////////////////////////////////////////////////////// 42 /// @brief struct that keeps track of depth and stencil event information 43 struct DepthStencilStats 44 { 45 uint32_t earlyZTestPassCount = 0; 46 uint32_t earlyZTestFailCount = 0; 47 uint32_t lateZTestPassCount = 0; 48 uint32_t lateZTestFailCount = 0; 49 uint32_t earlyStencilTestPassCount = 0; 50 uint32_t earlyStencilTestFailCount = 0; 51 uint32_t lateStencilTestPassCount = 0; 52 uint32_t lateStencilTestFailCount = 0; 53 }; 54 55 struct CStats 56 { 57 uint32_t trivialRejectCount; 58 uint32_t trivialAcceptCount; 59 uint32_t mustClipCount; 60 }; 61 62 struct TEStats 63 { 64 uint32_t inputPrims = 0; 65 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine. 66 }; 67 68 struct GSStateInfo 69 { 70 uint32_t inputPrimCount; 71 uint32_t primGeneratedCount; 72 uint32_t vertsInput; 73 }; 74 75 struct RastStats 76 { 77 uint32_t rasterTiles = 0; 78 }; 79 80 struct CullStats 81 { 82 uint32_t degeneratePrimCount = 0; 83 uint32_t backfacePrimCount = 0; 84 }; 85 86 struct AlphaStats 87 { 88 uint32_t alphaTestCount = 0; 89 uint32_t alphaBlendCount = 0; 90 }; 91 92 93 ////////////////////////////////////////////////////////////////////////// 94 /// @brief Event handler that handles API thread events. This is shared 95 /// between the API and its caller (e.g. driver shim) but typically 96 /// there is only a single API thread per context. So you can save 97 /// information in the class to be used for other events. 98 class EventHandlerApiStats : public EventHandlerFile 99 { 100 public: EventHandlerApiStats(uint32_t id)101 EventHandlerApiStats(uint32_t id) : EventHandlerFile(id) 102 { 103 #if defined(_WIN32) 104 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for 105 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it 106 // exists, this will attempt to copy it the first time we get here to package it with 107 // the stats. Otherwise, the user would need to specify the events.proto location when 108 // parsing the stats in post. 109 std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename; 110 eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends; 111 eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1) 112 << "\\events.proto" << std::ends; 113 114 // If event.proto already exists, we're done; else do the copy 115 struct stat buf; // Use a Posix stat for file existence check 116 if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0) 117 { 118 // Now check to make sure the events.proto source exists 119 if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0) 120 { 121 std::ifstream srcFile; 122 srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary); 123 if (srcFile.is_open()) 124 { 125 // Just do a binary buffer copy 126 std::ofstream dstFile; 127 dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary); 128 dstFile << srcFile.rdbuf(); 129 dstFile.close(); 130 } 131 srcFile.close(); 132 } 133 } 134 #endif 135 } 136 Handle(const DrawInstancedEvent & event)137 virtual void Handle(const DrawInstancedEvent& event) 138 { 139 DrawInfoEvent e(event.data.drawId, 140 ArchRast::Instanced, 141 event.data.topology, 142 event.data.numVertices, 143 0, 144 0, 145 event.data.startVertex, 146 event.data.numInstances, 147 event.data.startInstance, 148 event.data.tsEnable, 149 event.data.gsEnable, 150 event.data.soEnable, 151 event.data.soTopology, 152 event.data.splitId); 153 154 EventHandlerFile::Handle(e); 155 } 156 Handle(const DrawIndexedInstancedEvent & event)157 virtual void Handle(const DrawIndexedInstancedEvent& event) 158 { 159 DrawInfoEvent e(event.data.drawId, 160 ArchRast::IndexedInstanced, 161 event.data.topology, 162 0, 163 event.data.numIndices, 164 event.data.indexOffset, 165 event.data.baseVertex, 166 event.data.numInstances, 167 event.data.startInstance, 168 event.data.tsEnable, 169 event.data.gsEnable, 170 event.data.soEnable, 171 event.data.soTopology, 172 event.data.splitId); 173 174 EventHandlerFile::Handle(e); 175 } 176 }; 177 178 ////////////////////////////////////////////////////////////////////////// 179 /// @brief Event handler that handles worker thread events. There is one 180 /// event handler per thread. The python script will need to sum 181 /// up counters across all of the threads. 182 class EventHandlerWorkerStats : public EventHandlerFile 183 { 184 public: EventHandlerWorkerStats(uint32_t id)185 EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) 186 { 187 memset(mShaderStats, 0, sizeof(mShaderStats)); 188 } 189 Handle(const EarlyDepthStencilInfoSingleSample & event)190 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event) 191 { 192 // earlyZ test compute 193 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 194 mDSSingleSample.earlyZTestFailCount += 195 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 196 197 // earlyStencil test compute 198 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 199 mDSSingleSample.earlyStencilTestFailCount += 200 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 201 202 // earlyZ test single and multi sample 203 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 204 mDSCombined.earlyZTestFailCount += 205 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 206 207 // earlyStencil test single and multi sample 208 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 209 mDSCombined.earlyStencilTestFailCount += 210 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 211 212 mNeedFlush = true; 213 } 214 Handle(const EarlyDepthStencilInfoSampleRate & event)215 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event) 216 { 217 // earlyZ test compute 218 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 219 mDSSampleRate.earlyZTestFailCount += 220 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 221 222 // earlyStencil test compute 223 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 224 mDSSampleRate.earlyStencilTestFailCount += 225 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 226 227 // earlyZ test single and multi sample 228 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 229 mDSCombined.earlyZTestFailCount += 230 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 231 232 // earlyStencil test single and multi sample 233 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 234 mDSCombined.earlyStencilTestFailCount += 235 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 236 237 mNeedFlush = true; 238 } 239 Handle(const EarlyDepthStencilInfoNullPS & event)240 virtual void Handle(const EarlyDepthStencilInfoNullPS& event) 241 { 242 // earlyZ test compute 243 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 244 mDSNullPS.earlyZTestFailCount += 245 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 246 247 // earlyStencil test compute 248 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 249 mDSNullPS.earlyStencilTestFailCount += 250 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 251 mNeedFlush = true; 252 } 253 Handle(const LateDepthStencilInfoSingleSample & event)254 virtual void Handle(const LateDepthStencilInfoSingleSample& event) 255 { 256 // lateZ test compute 257 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 258 mDSSingleSample.lateZTestFailCount += 259 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 260 261 // lateStencil test compute 262 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 263 mDSSingleSample.lateStencilTestFailCount += 264 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 265 266 // lateZ test single and multi sample 267 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 268 mDSCombined.lateZTestFailCount += 269 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 270 271 // lateStencil test single and multi sample 272 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 273 mDSCombined.lateStencilTestFailCount += 274 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 275 276 mNeedFlush = true; 277 } 278 Handle(const LateDepthStencilInfoSampleRate & event)279 virtual void Handle(const LateDepthStencilInfoSampleRate& event) 280 { 281 // lateZ test compute 282 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 283 mDSSampleRate.lateZTestFailCount += 284 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 285 286 // lateStencil test compute 287 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 288 mDSSampleRate.lateStencilTestFailCount += 289 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 290 291 // lateZ test single and multi sample 292 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 293 mDSCombined.lateZTestFailCount += 294 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 295 296 // lateStencil test single and multi sample 297 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 298 mDSCombined.lateStencilTestFailCount += 299 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 300 301 mNeedFlush = true; 302 } 303 Handle(const LateDepthStencilInfoNullPS & event)304 virtual void Handle(const LateDepthStencilInfoNullPS& event) 305 { 306 // lateZ test compute 307 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); 308 mDSNullPS.lateZTestFailCount += 309 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); 310 311 // lateStencil test compute 312 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); 313 mDSNullPS.lateStencilTestFailCount += 314 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); 315 mNeedFlush = true; 316 } 317 Handle(const EarlyDepthInfoPixelRate & event)318 virtual void Handle(const EarlyDepthInfoPixelRate& event) 319 { 320 // earlyZ test compute 321 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount; 322 mDSPixelRate.earlyZTestFailCount += 323 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); 324 mNeedFlush = true; 325 } 326 327 Handle(const LateDepthInfoPixelRate & event)328 virtual void Handle(const LateDepthInfoPixelRate& event) 329 { 330 // lateZ test compute 331 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount; 332 mDSPixelRate.lateZTestFailCount += 333 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); 334 mNeedFlush = true; 335 } 336 337 Handle(const ClipInfoEvent & event)338 virtual void Handle(const ClipInfoEvent& event) 339 { 340 mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask); 341 mClipper.trivialRejectCount += 342 event.data.numInvocations - _mm_popcnt_u32(event.data.validMask); 343 mClipper.trivialAcceptCount += 344 _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask); 345 } 346 UpdateStats(SWR_SHADER_STATS * pStatTotals,const SWR_SHADER_STATS * pStatUpdate)347 void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate) 348 { 349 pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted; 350 pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted; 351 pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted; 352 pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted; 353 pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted; 354 pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted; 355 pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted; 356 pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed; 357 pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted; 358 pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted; 359 pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted; 360 pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted; 361 } 362 Handle(const VSStats & event)363 virtual void Handle(const VSStats& event) 364 { 365 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; 366 UpdateStats(&mShaderStats[SHADER_VERTEX], pStats); 367 } 368 Handle(const GSStats & event)369 virtual void Handle(const GSStats& event) 370 { 371 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; 372 UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats); 373 } 374 Handle(const DSStats & event)375 virtual void Handle(const DSStats& event) 376 { 377 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; 378 UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats); 379 } 380 Handle(const HSStats & event)381 virtual void Handle(const HSStats& event) 382 { 383 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; 384 UpdateStats(&mShaderStats[SHADER_HULL], pStats); 385 } 386 Handle(const PSStats & event)387 virtual void Handle(const PSStats& event) 388 { 389 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; 390 UpdateStats(&mShaderStats[SHADER_PIXEL], pStats); 391 mNeedFlush = true; 392 } 393 Handle(const CSStats & event)394 virtual void Handle(const CSStats& event) 395 { 396 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; 397 UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats); 398 mNeedFlush = true; 399 } 400 401 // Flush cached events for this draw FlushDraw(uint32_t drawId)402 virtual void FlushDraw(uint32_t drawId) 403 { 404 if (mNeedFlush == false) 405 return; 406 407 EventHandlerFile::Handle(PSInfo(drawId, 408 mShaderStats[SHADER_PIXEL].numInstExecuted, 409 mShaderStats[SHADER_PIXEL].numSampleExecuted, 410 mShaderStats[SHADER_PIXEL].numSampleLExecuted, 411 mShaderStats[SHADER_PIXEL].numSampleBExecuted, 412 mShaderStats[SHADER_PIXEL].numSampleCExecuted, 413 mShaderStats[SHADER_PIXEL].numSampleCLZExecuted, 414 mShaderStats[SHADER_PIXEL].numSampleCDExecuted, 415 mShaderStats[SHADER_PIXEL].numGather4Executed, 416 mShaderStats[SHADER_PIXEL].numGather4CExecuted, 417 mShaderStats[SHADER_PIXEL].numGather4CPOExecuted, 418 mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted, 419 mShaderStats[SHADER_PIXEL].numLodExecuted)); 420 EventHandlerFile::Handle(CSInfo(drawId, 421 mShaderStats[SHADER_COMPUTE].numInstExecuted, 422 mShaderStats[SHADER_COMPUTE].numSampleExecuted, 423 mShaderStats[SHADER_COMPUTE].numSampleLExecuted, 424 mShaderStats[SHADER_COMPUTE].numSampleBExecuted, 425 mShaderStats[SHADER_COMPUTE].numSampleCExecuted, 426 mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted, 427 mShaderStats[SHADER_COMPUTE].numSampleCDExecuted, 428 mShaderStats[SHADER_COMPUTE].numGather4Executed, 429 mShaderStats[SHADER_COMPUTE].numGather4CExecuted, 430 mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted, 431 mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted, 432 mShaderStats[SHADER_COMPUTE].numLodExecuted)); 433 434 // singleSample 435 EventHandlerFile::Handle(EarlyZSingleSample( 436 drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount)); 437 EventHandlerFile::Handle(LateZSingleSample( 438 drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount)); 439 EventHandlerFile::Handle( 440 EarlyStencilSingleSample(drawId, 441 mDSSingleSample.earlyStencilTestPassCount, 442 mDSSingleSample.earlyStencilTestFailCount)); 443 EventHandlerFile::Handle( 444 LateStencilSingleSample(drawId, 445 mDSSingleSample.lateStencilTestPassCount, 446 mDSSingleSample.lateStencilTestFailCount)); 447 448 // sampleRate 449 EventHandlerFile::Handle(EarlyZSampleRate( 450 drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount)); 451 EventHandlerFile::Handle(LateZSampleRate( 452 drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount)); 453 EventHandlerFile::Handle( 454 EarlyStencilSampleRate(drawId, 455 mDSSampleRate.earlyStencilTestPassCount, 456 mDSSampleRate.earlyStencilTestFailCount)); 457 EventHandlerFile::Handle(LateStencilSampleRate(drawId, 458 mDSSampleRate.lateStencilTestPassCount, 459 mDSSampleRate.lateStencilTestFailCount)); 460 461 // combined 462 EventHandlerFile::Handle( 463 EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount)); 464 EventHandlerFile::Handle( 465 LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount)); 466 EventHandlerFile::Handle(EarlyStencil(drawId, 467 mDSCombined.earlyStencilTestPassCount, 468 mDSCombined.earlyStencilTestFailCount)); 469 EventHandlerFile::Handle(LateStencil(drawId, 470 mDSCombined.lateStencilTestPassCount, 471 mDSCombined.lateStencilTestFailCount)); 472 473 // pixelRate 474 EventHandlerFile::Handle(EarlyZPixelRate( 475 drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount)); 476 EventHandlerFile::Handle(LateZPixelRate( 477 drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount)); 478 479 480 // NullPS 481 EventHandlerFile::Handle( 482 EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount)); 483 EventHandlerFile::Handle(EarlyStencilNullPS( 484 drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount)); 485 486 // Rasterized Subspans 487 EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles)); 488 489 // Alpha Subspans 490 EventHandlerFile::Handle( 491 AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount)); 492 493 // Primitive Culling 494 EventHandlerFile::Handle( 495 CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount)); 496 497 mDSSingleSample = {}; 498 mDSSampleRate = {}; 499 mDSCombined = {}; 500 mDSPixelRate = {}; 501 mDSNullPS = {}; 502 503 rastStats = {}; 504 mCullStats = {}; 505 mAlphaStats = {}; 506 507 mShaderStats[SHADER_PIXEL] = {}; 508 mShaderStats[SHADER_COMPUTE] = {}; 509 510 mNeedFlush = false; 511 } 512 Handle(const FrontendDrawEndEvent & event)513 virtual void Handle(const FrontendDrawEndEvent& event) 514 { 515 // Clipper 516 EventHandlerFile::Handle(ClipperEvent(event.data.drawId, 517 mClipper.trivialRejectCount, 518 mClipper.trivialAcceptCount, 519 mClipper.mustClipCount)); 520 521 // Tesselator 522 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims)); 523 524 // Geometry Shader 525 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount)); 526 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount)); 527 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput)); 528 529 EventHandlerFile::Handle(VSInfo(event.data.drawId, 530 mShaderStats[SHADER_VERTEX].numInstExecuted, 531 mShaderStats[SHADER_VERTEX].numSampleExecuted, 532 mShaderStats[SHADER_VERTEX].numSampleLExecuted, 533 mShaderStats[SHADER_VERTEX].numSampleBExecuted, 534 mShaderStats[SHADER_VERTEX].numSampleCExecuted, 535 mShaderStats[SHADER_VERTEX].numSampleCLZExecuted, 536 mShaderStats[SHADER_VERTEX].numSampleCDExecuted, 537 mShaderStats[SHADER_VERTEX].numGather4Executed, 538 mShaderStats[SHADER_VERTEX].numGather4CExecuted, 539 mShaderStats[SHADER_VERTEX].numGather4CPOExecuted, 540 mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted, 541 mShaderStats[SHADER_VERTEX].numLodExecuted)); 542 EventHandlerFile::Handle(HSInfo(event.data.drawId, 543 mShaderStats[SHADER_HULL].numInstExecuted, 544 mShaderStats[SHADER_HULL].numSampleExecuted, 545 mShaderStats[SHADER_HULL].numSampleLExecuted, 546 mShaderStats[SHADER_HULL].numSampleBExecuted, 547 mShaderStats[SHADER_HULL].numSampleCExecuted, 548 mShaderStats[SHADER_HULL].numSampleCLZExecuted, 549 mShaderStats[SHADER_HULL].numSampleCDExecuted, 550 mShaderStats[SHADER_HULL].numGather4Executed, 551 mShaderStats[SHADER_HULL].numGather4CExecuted, 552 mShaderStats[SHADER_HULL].numGather4CPOExecuted, 553 mShaderStats[SHADER_HULL].numGather4CPOCExecuted, 554 mShaderStats[SHADER_HULL].numLodExecuted)); 555 EventHandlerFile::Handle(DSInfo(event.data.drawId, 556 mShaderStats[SHADER_DOMAIN].numInstExecuted, 557 mShaderStats[SHADER_DOMAIN].numSampleExecuted, 558 mShaderStats[SHADER_DOMAIN].numSampleLExecuted, 559 mShaderStats[SHADER_DOMAIN].numSampleBExecuted, 560 mShaderStats[SHADER_DOMAIN].numSampleCExecuted, 561 mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted, 562 mShaderStats[SHADER_DOMAIN].numSampleCDExecuted, 563 mShaderStats[SHADER_DOMAIN].numGather4Executed, 564 mShaderStats[SHADER_DOMAIN].numGather4CExecuted, 565 mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted, 566 mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted, 567 mShaderStats[SHADER_DOMAIN].numLodExecuted)); 568 EventHandlerFile::Handle(GSInfo(event.data.drawId, 569 mShaderStats[SHADER_GEOMETRY].numInstExecuted, 570 mShaderStats[SHADER_GEOMETRY].numSampleExecuted, 571 mShaderStats[SHADER_GEOMETRY].numSampleLExecuted, 572 mShaderStats[SHADER_GEOMETRY].numSampleBExecuted, 573 mShaderStats[SHADER_GEOMETRY].numSampleCExecuted, 574 mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted, 575 mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted, 576 mShaderStats[SHADER_GEOMETRY].numGather4Executed, 577 mShaderStats[SHADER_GEOMETRY].numGather4CExecuted, 578 mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted, 579 mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted, 580 mShaderStats[SHADER_GEOMETRY].numLodExecuted)); 581 582 mShaderStats[SHADER_VERTEX] = {}; 583 mShaderStats[SHADER_HULL] = {}; 584 mShaderStats[SHADER_DOMAIN] = {}; 585 mShaderStats[SHADER_GEOMETRY] = {}; 586 587 // Reset Internal Counters 588 mClipper = {}; 589 mTS = {}; 590 mGS = {}; 591 } 592 Handle(const GSPrimInfo & event)593 virtual void Handle(const GSPrimInfo& event) 594 { 595 mGS.inputPrimCount += event.data.inputPrimCount; 596 mGS.primGeneratedCount += event.data.primGeneratedCount; 597 mGS.vertsInput += event.data.vertsInput; 598 } 599 Handle(const TessPrimCount & event)600 virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; } 601 Handle(const RasterTileCount & event)602 virtual void Handle(const RasterTileCount& event) 603 { 604 rastStats.rasterTiles += event.data.rasterTiles; 605 } 606 Handle(const CullInfoEvent & event)607 virtual void Handle(const CullInfoEvent& event) 608 { 609 mCullStats.degeneratePrimCount += _mm_popcnt_u32( 610 event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask)); 611 mCullStats.backfacePrimCount += _mm_popcnt_u32( 612 event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask)); 613 } 614 Handle(const AlphaInfoEvent & event)615 virtual void Handle(const AlphaInfoEvent& event) 616 { 617 mAlphaStats.alphaTestCount += event.data.alphaTestEnable; 618 mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable; 619 } 620 621 protected: 622 bool mNeedFlush; 623 // Per draw stats 624 DepthStencilStats mDSSingleSample = {}; 625 DepthStencilStats mDSSampleRate = {}; 626 DepthStencilStats mDSPixelRate = {}; 627 DepthStencilStats mDSCombined = {}; 628 DepthStencilStats mDSNullPS = {}; 629 DepthStencilStats mDSOmZ = {}; 630 CStats mClipper = {}; 631 TEStats mTS = {}; 632 GSStateInfo mGS = {}; 633 RastStats rastStats = {}; 634 CullStats mCullStats = {}; 635 AlphaStats mAlphaStats = {}; 636 637 SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES]; 638 639 }; 640 FromHandle(HANDLE hThreadContext)641 static EventManager* FromHandle(HANDLE hThreadContext) 642 { 643 return reinterpret_cast<EventManager*>(hThreadContext); 644 } 645 646 // Construct an event manager and associate a handler with it. CreateThreadContext(AR_THREAD type)647 HANDLE CreateThreadContext(AR_THREAD type) 648 { 649 // Can we assume single threaded here? 650 static std::atomic<uint32_t> counter(0); 651 uint32_t id = counter.fetch_add(1); 652 653 EventManager* pManager = new EventManager(); 654 655 if (pManager) 656 { 657 EventHandlerFile* pHandler = nullptr; 658 659 if (type == AR_THREAD::API) 660 { 661 pHandler = new EventHandlerApiStats(id); 662 pManager->Attach(pHandler); 663 pHandler->Handle(ThreadStartApiEvent()); 664 } 665 else 666 { 667 pHandler = new EventHandlerWorkerStats(id); 668 pManager->Attach(pHandler); 669 pHandler->Handle(ThreadStartWorkerEvent()); 670 } 671 672 pHandler->MarkHeader(); 673 674 return pManager; 675 } 676 677 SWR_INVALID("Failed to register thread."); 678 return nullptr; 679 } 680 DestroyThreadContext(HANDLE hThreadContext)681 void DestroyThreadContext(HANDLE hThreadContext) 682 { 683 EventManager* pManager = FromHandle(hThreadContext); 684 SWR_ASSERT(pManager != nullptr); 685 686 delete pManager; 687 } 688 689 // Dispatch event for this thread. Dispatch(HANDLE hThreadContext,const Event & event)690 void Dispatch(HANDLE hThreadContext, const Event& event) 691 { 692 if (event.IsEnabled()) 693 { 694 EventManager* pManager = reinterpret_cast<EventManager*>(hThreadContext); 695 SWR_ASSERT(pManager != nullptr); 696 pManager->Dispatch(event); 697 } 698 } 699 700 // Flush for this thread. FlushDraw(HANDLE hThreadContext,uint32_t drawId)701 void FlushDraw(HANDLE hThreadContext, uint32_t drawId) 702 { 703 EventManager* pManager = FromHandle(hThreadContext); 704 SWR_ASSERT(pManager != nullptr); 705 706 pManager->FlushDraw(drawId); 707 } 708 } // namespace ArchRast 709