• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.h
24 *
25 * @brief Definitions for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29 
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.h"
34 
35 namespace ArchRast
36 {
37     //////////////////////////////////////////////////////////////////////////
38     /// @brief struct that keeps track of depth and stencil event information
39     struct DepthStencilStats
40     {
41         uint32_t earlyZTestPassCount = 0;
42         uint32_t earlyZTestFailCount = 0;
43         uint32_t lateZTestPassCount = 0;
44         uint32_t lateZTestFailCount = 0;
45         uint32_t earlyStencilTestPassCount = 0;
46         uint32_t earlyStencilTestFailCount = 0;
47         uint32_t lateStencilTestPassCount = 0;
48         uint32_t lateStencilTestFailCount = 0;
49         uint32_t earlyZTestCount = 0;
50         uint32_t lateZTestCount = 0;
51         uint32_t earlyStencilTestCount = 0;
52         uint32_t lateStencilTestCount = 0;
53     };
54 
55     struct CStats
56     {
57         uint32_t clippedVerts = 0;
58     };
59 
60     struct TEStats
61     {
62         uint32_t inputPrims = 0;
63         //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
64     };
65 
66     struct GSStats
67     {
68         uint32_t inputPrimCount;
69         uint32_t primGeneratedCount;
70         uint32_t vertsInput;
71     };
72 
73     //////////////////////////////////////////////////////////////////////////
74     /// @brief Event handler that saves stat events to event files. This
75     ///        handler filters out unwanted events.
76     class EventHandlerStatsFile : public EventHandlerFile
77     {
78     public:
79         DepthStencilStats DSSingleSample = {};
80         DepthStencilStats DSSampleRate = {};
81         DepthStencilStats DSPixelRate = {};
82         DepthStencilStats DSNullPS = {};
83         DepthStencilStats DSOmZ = {};
84         CStats CS = {};
85         TEStats TS = {};
86         GSStats GS = {};
87 
EventHandlerStatsFile(uint32_t id)88         EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
89 
90         // These are events that we're not interested in saving in stats event files.
Handle(Start & event)91         virtual void Handle(Start& event) {}
Handle(End & event)92         virtual void Handle(End& event) {}
93 
Handle(EarlyDepthStencilInfoSingleSample & event)94         virtual void Handle(EarlyDepthStencilInfoSingleSample& event)
95         {
96             //earlyZ test compute
97             DSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
98             DSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
99             DSSingleSample.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
100 
101             //earlyStencil test compute
102             DSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
103             DSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
104             DSSingleSample.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
105 
106             //outputerMerger test compute
107             DSOmZ.earlyZTestPassCount += DSSingleSample.earlyZTestPassCount;
108             DSOmZ.earlyZTestFailCount += DSSingleSample.earlyZTestFailCount;
109             DSOmZ.earlyZTestCount += DSSingleSample.earlyZTestCount;
110             DSOmZ.earlyStencilTestPassCount += DSSingleSample.earlyStencilTestPassCount;
111             DSOmZ.earlyStencilTestFailCount += DSSingleSample.earlyStencilTestFailCount;
112             DSOmZ.earlyStencilTestCount += DSSingleSample.earlyStencilTestCount;
113         }
114 
Handle(EarlyDepthStencilInfoSampleRate & event)115         virtual void Handle(EarlyDepthStencilInfoSampleRate& event)
116         {
117             //earlyZ test compute
118             DSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
119             DSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
120             DSSampleRate.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
121 
122             //earlyStencil test compute
123             DSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
124             DSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
125             DSSampleRate.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
126 
127             //outputerMerger test compute
128             DSOmZ.earlyZTestPassCount += DSSampleRate.earlyZTestPassCount;
129             DSOmZ.earlyZTestFailCount += DSSampleRate.earlyZTestFailCount;
130             DSOmZ.earlyZTestCount += DSSampleRate.earlyZTestCount;
131             DSOmZ.earlyStencilTestPassCount += DSSampleRate.earlyStencilTestPassCount;
132             DSOmZ.earlyStencilTestFailCount += DSSampleRate.earlyStencilTestFailCount;
133             DSOmZ.earlyStencilTestCount += DSSampleRate.earlyStencilTestCount;
134         }
135 
Handle(EarlyDepthStencilInfoNullPS & event)136         virtual void Handle(EarlyDepthStencilInfoNullPS& event)
137         {
138             //earlyZ test compute
139             DSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
140             DSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
141             DSNullPS.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
142 
143             //earlyStencil test compute
144             DSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
145             DSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
146             DSNullPS.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
147 
148             //outputerMerger test compute
149             DSOmZ.earlyZTestPassCount += DSNullPS.earlyZTestPassCount;
150             DSOmZ.earlyZTestFailCount += DSNullPS.earlyZTestFailCount;
151             DSOmZ.earlyZTestCount += DSNullPS.earlyZTestCount;
152             DSOmZ.earlyStencilTestPassCount += DSNullPS.earlyStencilTestPassCount;
153             DSOmZ.earlyStencilTestFailCount += DSNullPS.earlyStencilTestFailCount;
154             DSOmZ.earlyStencilTestCount += DSNullPS.earlyStencilTestCount;
155         }
156 
Handle(LateDepthStencilInfoSingleSample & event)157         virtual void Handle(LateDepthStencilInfoSingleSample& event)
158         {
159             //lateZ test compute
160             DSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
161             DSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
162             DSSingleSample.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
163 
164             //lateStencil test compute
165             DSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
166             DSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
167             DSSingleSample.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
168 
169             //outputerMerger test compute
170             DSOmZ.lateZTestPassCount += DSSingleSample.lateZTestPassCount;
171             DSOmZ.lateZTestFailCount += DSSingleSample.lateZTestFailCount;
172             DSOmZ.lateZTestCount += DSSingleSample.lateZTestCount;
173             DSOmZ.lateStencilTestPassCount += DSSingleSample.lateStencilTestPassCount;
174             DSOmZ.lateStencilTestFailCount += DSSingleSample.lateStencilTestFailCount;
175             DSOmZ.lateStencilTestCount += DSSingleSample.lateStencilTestCount;
176         }
177 
Handle(LateDepthStencilInfoSampleRate & event)178         virtual void Handle(LateDepthStencilInfoSampleRate& event)
179         {
180             //lateZ test compute
181             DSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
182             DSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
183             DSSampleRate.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
184 
185             //lateStencil test compute
186             DSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
187             DSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
188             DSSampleRate.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
189 
190             //outputerMerger test compute
191             DSOmZ.lateZTestPassCount += DSSampleRate.lateZTestPassCount;
192             DSOmZ.lateZTestFailCount += DSSampleRate.lateZTestFailCount;
193             DSOmZ.lateZTestCount += DSSampleRate.lateZTestCount;
194             DSOmZ.lateStencilTestPassCount += DSSampleRate.lateStencilTestPassCount;
195             DSOmZ.lateStencilTestFailCount += DSSampleRate.lateStencilTestFailCount;
196             DSOmZ.lateStencilTestCount += DSSampleRate.lateStencilTestCount;
197         }
198 
Handle(LateDepthStencilInfoNullPS & event)199         virtual void Handle(LateDepthStencilInfoNullPS& event)
200         {
201             //lateZ test compute
202             DSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
203             DSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
204             DSNullPS.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
205 
206             //lateStencil test compute
207             DSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
208             DSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
209             DSNullPS.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
210 
211             //outputerMerger test compute
212             DSOmZ.lateZTestPassCount += DSNullPS.lateZTestPassCount;
213             DSOmZ.lateZTestFailCount += DSNullPS.lateZTestFailCount;
214             DSOmZ.lateZTestCount += DSNullPS.lateZTestCount;
215             DSOmZ.lateStencilTestPassCount += DSNullPS.lateStencilTestPassCount;
216             DSOmZ.lateStencilTestFailCount += DSNullPS.lateStencilTestFailCount;
217             DSOmZ.lateStencilTestCount += DSNullPS.lateStencilTestCount;
218         }
219 
Handle(EarlyDepthInfoPixelRate & event)220         virtual void Handle(EarlyDepthInfoPixelRate& event)
221         {
222             //earlyZ test compute
223             DSPixelRate.earlyZTestCount += _mm_popcnt_u32(event.data.activeLanes);
224             DSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
225             DSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
226 
227             //outputerMerger test compute
228             DSOmZ.earlyZTestPassCount += DSPixelRate.earlyZTestPassCount;
229             DSOmZ.earlyZTestFailCount += DSPixelRate.earlyZTestFailCount;
230             DSOmZ.earlyZTestCount += DSPixelRate.earlyZTestCount;
231         }
232 
233 
Handle(LateDepthInfoPixelRate & event)234         virtual void Handle(LateDepthInfoPixelRate& event)
235         {
236             //lateZ test compute
237             DSPixelRate.lateZTestCount += _mm_popcnt_u32(event.data.activeLanes);
238             DSPixelRate.lateZTestPassCount += event.data.depthPassCount;
239             DSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
240 
241             //outputerMerger test compute
242             DSOmZ.lateZTestPassCount += DSPixelRate.lateZTestPassCount;
243             DSOmZ.lateZTestFailCount += DSPixelRate.lateZTestFailCount;
244             DSOmZ.lateZTestCount += DSPixelRate.lateZTestCount;
245 
246         }
247 
248 
Handle(BackendDrawEndEvent & event)249         virtual void Handle(BackendDrawEndEvent& event)
250         {
251             //singleSample
252             EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, DSSingleSample.earlyZTestPassCount, DSSingleSample.earlyZTestFailCount, DSSingleSample.earlyZTestCount));
253             EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, DSSingleSample.lateZTestPassCount, DSSingleSample.lateZTestFailCount, DSSingleSample.lateZTestCount));
254             EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, DSSingleSample.earlyStencilTestPassCount, DSSingleSample.earlyStencilTestFailCount, DSSingleSample.earlyStencilTestCount));
255             EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, DSSingleSample.lateStencilTestPassCount, DSSingleSample.lateStencilTestFailCount, DSSingleSample.lateStencilTestCount));
256 
257             //sampleRate
258             EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, DSSampleRate.earlyZTestPassCount, DSSampleRate.earlyZTestFailCount, DSSampleRate.earlyZTestCount));
259             EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, DSSampleRate.lateZTestPassCount, DSSampleRate.lateZTestFailCount, DSSampleRate.lateZTestCount));
260             EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, DSSampleRate.earlyStencilTestPassCount, DSSampleRate.earlyStencilTestFailCount, DSSampleRate.earlyStencilTestCount));
261             EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, DSSampleRate.lateStencilTestPassCount, DSSampleRate.lateStencilTestFailCount, DSSampleRate.lateStencilTestCount));
262 
263             //pixelRate
264             EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, DSPixelRate.earlyZTestPassCount, DSPixelRate.earlyZTestFailCount, DSPixelRate.earlyZTestCount));
265             EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, DSPixelRate.lateZTestPassCount, DSPixelRate.lateZTestFailCount, DSPixelRate.lateZTestCount));
266 
267 
268             //NullPS
269             EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, DSNullPS.earlyZTestPassCount, DSNullPS.earlyZTestFailCount, DSNullPS.earlyZTestCount));
270             EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, DSNullPS.earlyStencilTestPassCount, DSNullPS.earlyStencilTestFailCount, DSNullPS.earlyStencilTestCount));
271 
272             //OmZ
273             EventHandlerFile::Handle(EarlyOmZ(event.data.drawId, DSOmZ.earlyZTestPassCount, DSOmZ.earlyZTestFailCount, DSOmZ.earlyZTestCount));
274             EventHandlerFile::Handle(EarlyOmStencil(event.data.drawId, DSOmZ.earlyStencilTestPassCount, DSOmZ.earlyStencilTestFailCount, DSOmZ.earlyStencilTestCount));
275             EventHandlerFile::Handle(LateOmZ(event.data.drawId, DSOmZ.lateZTestPassCount, DSOmZ.lateZTestFailCount, DSOmZ.lateZTestCount));
276             EventHandlerFile::Handle(LateOmStencil(event.data.drawId, DSOmZ.lateStencilTestPassCount, DSOmZ.lateStencilTestFailCount, DSOmZ.lateStencilTestCount));
277 
278             //Reset Internal Counters
279             DSSingleSample = {};
280             DSSampleRate = {};
281             DSPixelRate = {};
282             DSNullPS = {};
283             DSOmZ = {};
284         }
285 
Handle(FrontendDrawEndEvent & event)286         virtual void Handle(FrontendDrawEndEvent& event)
287         {
288             //Clipper
289             EventHandlerFile::Handle(VertsClipped(event.data.drawId, CS.clippedVerts));
290 
291             //Tesselator
292             EventHandlerFile::Handle(TessPrims(event.data.drawId, TS.inputPrims));
293 
294             //Geometry Shader
295             EventHandlerFile::Handle(GSInputPrims(event.data.drawId, GS.inputPrimCount));
296             EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, GS.primGeneratedCount));
297             EventHandlerFile::Handle(GSVertsInput(event.data.drawId, GS.vertsInput));
298 
299             //Reset Internal Counters
300             CS = {};
301             TS = {};
302             GS = {};
303         }
304 
Handle(GSPrimInfo & event)305         virtual void Handle(GSPrimInfo& event)
306         {
307             GS.inputPrimCount += event.data.inputPrimCount;
308             GS.primGeneratedCount += event.data.primGeneratedCount;
309             GS.vertsInput += event.data.vertsInput;
310         }
311 
Handle(ClipVertexCount & event)312         virtual void Handle(ClipVertexCount& event)
313         {
314             CS.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
315         }
316 
Handle(TessPrimCount & event)317         virtual void Handle(TessPrimCount& event)
318         {
319             TS.inputPrims += event.data.primCount;
320         }
321     };
322 
FromHandle(HANDLE hThreadContext)323     static EventManager* FromHandle(HANDLE hThreadContext)
324     {
325         return reinterpret_cast<EventManager*>(hThreadContext);
326     }
327 
328     // Construct an event manager and associate a handler with it.
CreateThreadContext(AR_THREAD type)329     HANDLE CreateThreadContext(AR_THREAD type)
330     {
331         // Can we assume single threaded here?
332         static std::atomic<uint32_t> counter(0);
333         uint32_t id = counter.fetch_add(1);
334 
335         EventManager* pManager = new EventManager();
336         EventHandlerFile* pHandler = new EventHandlerStatsFile(id);
337 
338         if (pManager && pHandler)
339         {
340             pManager->Attach(pHandler);
341 
342             if (type == AR_THREAD::API)
343             {
344                 pHandler->Handle(ThreadStartApiEvent());
345             }
346             else
347             {
348                 pHandler->Handle(ThreadStartWorkerEvent());
349             }
350             pHandler->MarkHeader();
351 
352             return pManager;
353         }
354 
355         SWR_ASSERT(0, "Failed to register thread.");
356         return nullptr;
357     }
358 
DestroyThreadContext(HANDLE hThreadContext)359     void DestroyThreadContext(HANDLE hThreadContext)
360     {
361         EventManager* pManager = FromHandle(hThreadContext);
362         SWR_ASSERT(pManager != nullptr);
363 
364         delete pManager;
365     }
366 
367     // Dispatch event for this thread.
Dispatch(HANDLE hThreadContext,Event & event)368     void Dispatch(HANDLE hThreadContext, Event& event)
369     {
370         EventManager* pManager = FromHandle(hThreadContext);
371         SWR_ASSERT(pManager != nullptr);
372 
373         pManager->Dispatch(event);
374     }
375 }
376