• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "Profiling.hpp"
6 
7 #include <armnn/BackendId.hpp>
8 #include <armnn/utility/Assert.hpp>
9 #include <armnn/utility/IgnoreUnused.hpp>
10 
11 #include "JsonPrinter.hpp"
12 
13 #if ARMNN_STREAMLINE_ENABLED
14 #include <streamline_annotate.h>
15 #endif
16 
17 #include <algorithm>
18 #include <iomanip>
19 #include <iostream>
20 #include <fstream>
21 #include <map>
22 #include <stack>
23 
24 namespace armnn
25 {
26 
27 // Controls the amount of memory initially allocated to store profiling events.
28 // If chosen carefully, the profiling system will not make any additional allocations, thus minimizing its impact on
29 // measured times.
30 constexpr std::size_t g_ProfilingEventCountHint = 1024;
31 
32 // Whether profiling reports should include the sequence of events together with their timings.
33 constexpr bool g_WriteProfilingEventSequence = true;
34 
35 // Whether profiling reports should also report detailed information on events grouped by inference.
36 // This can spam the output stream, so use carefully (or adapt the code to just output information
37 // of interest).
38 constexpr bool g_AggregateProfilingEventsByInference = true;
39 
40 // Whether a call to Profiler::AnalyzeEventsAndWriteResults() will be made when the Profiler is destroyed.
41 // It can be convenient for local tests.
42 constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false;
43 
FindMeasurement(const std::string & name,const Event * event)44 Measurement FindMeasurement(const std::string& name, const Event* event)
45 {
46 
47     ARMNN_ASSERT(event != nullptr);
48 
49     // Search though the measurements.
50     for (const auto& measurement : event->GetMeasurements())
51     {
52         if (measurement.m_Name == name)
53         {
54             // Measurement found.
55             return measurement;
56         }
57     }
58 
59     // Measurement not found.
60     return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
61 }
62 
FindKernelMeasurements(const Event * event)63 std::vector<Measurement> FindKernelMeasurements(const Event* event)
64 {
65     ARMNN_ASSERT(event != nullptr);
66 
67     std::vector<Measurement> measurements;
68 
69     // Search through the measurements.
70     for (const auto& measurement : event->GetMeasurements())
71     {
72         if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
73             || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
74         {
75             // Measurement found.
76             measurements.push_back(measurement);
77         }
78     }
79 
80     return measurements;
81 }
82 
CalculateProfilingEventStats() const83 std::map<std::string, Profiler::ProfilingEventStats> Profiler::CalculateProfilingEventStats() const
84 {
85     std::map<std::string, ProfilingEventStats> nameToStatsMap;
86 
87     for (const auto& event : m_EventSequence)
88     {
89         Measurement measurement = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME, event.get());
90 
91         double durationMs = measurement.m_Value;
92         auto it = nameToStatsMap.find(event->GetName());
93         if (it != nameToStatsMap.end())
94         {
95             ProfilingEventStats& stats = it->second;
96             stats.m_TotalMs += durationMs;
97             stats.m_MinMs = std::min(stats.m_MinMs, durationMs);
98             stats.m_MaxMs = std::max(stats.m_MaxMs, durationMs);
99             ++stats.m_Count;
100         }
101         else
102         {
103             nameToStatsMap.emplace(event->GetName(), ProfilingEventStats{ durationMs, durationMs, durationMs, 1 });
104         }
105     }
106 
107     return nameToStatsMap;
108 }
109 
GetEventPtr(const Event * ptr)110 const Event* GetEventPtr(const Event* ptr) { return ptr;}
GetEventPtr(const std::unique_ptr<Event> & ptr)111 const Event* GetEventPtr(const std::unique_ptr<Event>& ptr) {return ptr.get(); }
112 
113 template<typename ItertType>
AnalyzeEventSequenceAndWriteResults(ItertType first,ItertType last,std::ostream & outStream) const114 void Profiler::AnalyzeEventSequenceAndWriteResults(ItertType first, ItertType last, std::ostream& outStream) const
115 {
116     // Outputs event sequence, if needed.
117     if (g_WriteProfilingEventSequence)
118     {
119         // Makes sure timestamps are output with 6 decimals, and save old settings.
120         std::streamsize oldPrecision = outStream.precision();
121         outStream.precision(6);
122         std::ios_base::fmtflags oldFlags = outStream.flags();
123         outStream.setf(std::ios::fixed);
124         // Outputs fields.
125         outStream << "Event Sequence - Name | Duration (ms) | Start (ms) | Stop (ms) | Device" << std::endl;
126         for (auto event = first; event != last; ++event)
127         {
128             const Event* eventPtr = GetEventPtr((*event));
129             double startTimeMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME_START, eventPtr).m_Value;
130             double stopTimeMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME_STOP, eventPtr).m_Value;
131 
132             // Find the WallClock measurement if there is one.
133             double durationMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME, eventPtr).m_Value;
134             outStream << std::setw(50) << eventPtr->GetName() << " "
135                       << std::setw(20) << durationMs
136                       << std::setw(20) << startTimeMs
137                       << std::setw(20) << stopTimeMs
138                       << std::setw(20) << eventPtr->GetBackendId().Get()
139                       << std::endl;
140         }
141         outStream << std::endl;
142         // Restores previous precision settings.
143         outStream.flags(oldFlags);
144         outStream.precision(oldPrecision);
145     }
146 
147     // Aggregates results per event name.
148     std::map<std::string, ProfilingEventStats> nameToStatsMap = CalculateProfilingEventStats();
149 
150     // Outputs aggregated stats.
151     outStream << "Event Stats - Name | Avg (ms) | Min (ms) | Max (ms) | Total (ms) | Count" << std::endl;
152     for (const auto& pair : nameToStatsMap)
153     {
154         const std::string& eventLabel = pair.first;
155         const ProfilingEventStats& eventStats = pair.second;
156         const double avgMs = eventStats.m_TotalMs / double(eventStats.m_Count);
157 
158         outStream << "\t" << std::setw(50) << eventLabel << " " << std::setw(9) << avgMs << " "
159             << std::setw(9) << eventStats.m_MinMs << " " << std::setw(9) << eventStats.m_MaxMs << " "
160             << std::setw(9) << eventStats.m_TotalMs << " " << std::setw(9) << eventStats.m_Count << std::endl;
161     }
162     outStream << std::endl;
163 }
164 
Profiler()165 Profiler::Profiler()
166     : m_ProfilingEnabled(false)
167 {
168     m_EventSequence.reserve(g_ProfilingEventCountHint);
169 
170 #if ARMNN_STREAMLINE_ENABLED
171     // Initialises streamline annotations.
172     ANNOTATE_SETUP;
173 #endif
174 }
175 
~Profiler()176 Profiler::~Profiler()
177 {
178     if (m_ProfilingEnabled)
179     {
180         if (g_WriteReportToStdOutOnProfilerDestruction)
181         {
182             Print(std::cout);
183         }
184     }
185 
186     // Un-register this profiler from the current thread.
187     ProfilerManager::GetInstance().RegisterProfiler(nullptr);
188 }
189 
IsProfilingEnabled()190 bool Profiler::IsProfilingEnabled()
191 {
192     return m_ProfilingEnabled;
193 }
194 
EnableProfiling(bool enableProfiling)195 void Profiler::EnableProfiling(bool enableProfiling)
196 {
197     m_ProfilingEnabled = enableProfiling;
198 }
199 
BeginEvent(const BackendId & backendId,const std::string & label,std::vector<InstrumentPtr> && instruments)200 Event* Profiler::BeginEvent(const BackendId& backendId,
201                             const std::string& label,
202                             std::vector<InstrumentPtr>&& instruments)
203 {
204     Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
205     m_EventSequence.push_back(std::make_unique<Event>(label, this, parent, backendId, std::move(instruments)));
206     Event* event = m_EventSequence.back().get();
207     event->Start();
208 
209 #if ARMNN_STREAMLINE_ENABLED
210     ANNOTATE_CHANNEL_COLOR(uint32_t(m_Parents.size()), GetEventColor(backendId), label.c_str());
211 #endif
212 
213     m_Parents.push(event);
214     return event;
215 }
216 
EndEvent(Event * event)217 void Profiler::EndEvent(Event* event)
218 {
219     event->Stop();
220 
221     ARMNN_ASSERT(!m_Parents.empty());
222     ARMNN_ASSERT(event == m_Parents.top());
223     m_Parents.pop();
224 
225     Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
226     IgnoreUnused(parent);
227     ARMNN_ASSERT(event->GetParentEvent() == parent);
228 
229 #if ARMNN_STREAMLINE_ENABLED
230     ANNOTATE_CHANNEL_END(uint32_t(m_Parents.size()));
231 #endif
232 }
233 
CalcLevel(const Event * eventPtr)234 int CalcLevel(const Event* eventPtr)
235 {
236     int level=0;
237     while (eventPtr != nullptr)
238     {
239         eventPtr = eventPtr->GetParentEvent();
240         level++;
241     }
242     return level;
243 }
244 
PopulateInferences(std::vector<const Event * > & outInferences,int & outBaseLevel) const245 void Profiler::PopulateInferences(std::vector<const Event*>& outInferences, int& outBaseLevel) const
246 {
247     outInferences.reserve(m_EventSequence.size());
248     for (const auto& event : m_EventSequence)
249     {
250         const Event* eventPtrRaw = event.get();
251         if (eventPtrRaw->GetName() == "EnqueueWorkload")
252         {
253             outBaseLevel = (outBaseLevel == -1) ? CalcLevel(eventPtrRaw) : outBaseLevel;
254             outInferences.push_back(eventPtrRaw);
255         }
256     }
257 }
258 
PopulateDescendants(std::map<const Event *,std::vector<const Event * >> & outDescendantsMap) const259 void Profiler::PopulateDescendants(std::map<const Event*, std::vector<const Event*>>& outDescendantsMap) const
260 {
261     for (const auto& event : m_EventSequence)
262     {
263         const Event* eventPtrRaw = event.get();
264         const Event* parent = eventPtrRaw->GetParentEvent();
265 
266         if (!parent)
267         {
268             continue;
269         }
270 
271         auto it = outDescendantsMap.find(parent);
272         if (it == outDescendantsMap.end())
273         {
274             outDescendantsMap.emplace(parent, std::vector<const Event*>({eventPtrRaw}));
275         }
276         else
277         {
278             it->second.push_back(eventPtrRaw);
279         }
280     }
281 }
282 
283 
ExtractJsonObjects(unsigned int inferenceIndex,const Event * parentEvent,JsonChildObject & parentObject,std::map<const Event *,std::vector<const Event * >> descendantsMap)284 void ExtractJsonObjects(unsigned int inferenceIndex,
285                         const Event* parentEvent,
286                         JsonChildObject& parentObject,
287                         std::map<const Event*, std::vector<const Event*>> descendantsMap)
288 {
289     ARMNN_ASSERT(parentEvent);
290     std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
291     unsigned int childIdx=0;
292     for(size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
293     {
294         if (inferenceIndex == 0)
295         {
296             // Only add kernel measurement once, in case of multiple inferences
297             JsonChildObject measurementObject{instrumentMeasurements[measurementIndex].m_Name};
298             measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
299             measurementObject.SetType(JsonObjectType::Measurement);
300 
301             ARMNN_ASSERT(parentObject.NumChildren() == childIdx);
302             parentObject.AddChild(measurementObject);
303         }
304 
305         parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
306     }
307 
308 
309     auto childEventsIt = descendantsMap.find(parentEvent);
310     if (childEventsIt != descendantsMap.end())
311     {
312         for (auto childEvent : childEventsIt->second)
313         {
314             if (inferenceIndex == 0)
315             {
316                 // Only add second level once, in case of multiple inferences
317                 JsonChildObject childObject{childEvent->GetName()};
318                 childObject.SetType(JsonObjectType::Event);
319                 parentObject.AddChild(childObject);
320             }
321 
322             // Recursively process children. In reality this won't be very deep recursion. ~4-6 levels deep.
323             ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
324 
325             childIdx++;
326         }
327     }
328 }
329 
Print(std::ostream & outStream) const330 void Profiler::Print(std::ostream& outStream) const
331 {
332     // Makes sure timestamps are output with 6 decimals, and save old settings.
333     std::streamsize oldPrecision = outStream.precision();
334     outStream.precision(6);
335     std::ios_base::fmtflags oldFlags = outStream.flags();
336     outStream.setf(std::ios::fixed);
337     JsonPrinter printer(outStream);
338 
339     // First find all the "inference" Events and print out duration measurements.
340     int baseLevel = -1;
341     std::vector<const Event*> inferences;
342     PopulateInferences(inferences, baseLevel);
343 
344     // Second map out descendants hierarchy
345     std::map<const Event*, std::vector<const Event*>> descendantsMap;
346     PopulateDescendants(descendantsMap);
347 
348     JsonChildObject inferenceObject{"inference_measurements"};
349     JsonChildObject layerObject{"layer_measurements"};
350     std::vector<JsonChildObject> workloadObjects;
351     std::map<unsigned int, std::vector<JsonChildObject>> workloadToKernelObjects;
352 
353     for (unsigned int inferenceIndex = 0; inferenceIndex < inferences.size(); ++inferenceIndex)
354     {
355         auto inference = inferences[inferenceIndex];
356         ExtractJsonObjects(inferenceIndex, inference, inferenceObject, descendantsMap);
357     }
358 
359     printer.PrintHeader();
360     printer.PrintArmNNHeader();
361 
362     // print inference object, also prints child layer and kernel measurements
363     size_t id=0;
364     printer.PrintJsonChildObject(inferenceObject, id);
365 
366     // end of ArmNN
367     printer.PrintNewLine();
368     printer.PrintFooter();
369 
370     // end of main JSON object
371     printer.PrintNewLine();
372     printer.PrintFooter();
373     printer.PrintNewLine();
374 
375     // Restores previous precision settings.
376     outStream.flags(oldFlags);
377     outStream.precision(oldPrecision);
378 }
379 
AnalyzeEventsAndWriteResults(std::ostream & outStream) const380 void Profiler::AnalyzeEventsAndWriteResults(std::ostream& outStream) const
381 {
382     // Stack should be empty now.
383     const bool saneMarkerSequence = m_Parents.empty();
384 
385     // Abort if the sequence of markers was found to have incorrect information:
386     // The stats cannot be trusted.
387     if (!saneMarkerSequence)
388     {
389         outStream << "Cannot write profiling stats. "
390             "Unexpected errors were found when analyzing the sequence of logged events, which may lead to plainly "
391             "wrong stats. The profiling system may contain implementation issues or could have been used in an "
392             "unsafe manner." << std::endl;
393         return;
394     }
395 
396     // Analyzes the full sequence of events.
397     AnalyzeEventSequenceAndWriteResults(m_EventSequence.cbegin(),
398                                         m_EventSequence.cend(),
399                                         outStream);
400 
401     // Aggregates events by tag if requested (spams the output stream if done for all tags).
402     if (g_AggregateProfilingEventsByInference)
403     {
404         outStream << std::endl;
405         outStream << "***" << std::endl;
406         outStream << "*** Per Inference Stats" << std::endl;
407         outStream << "***" << std::endl;
408         outStream << std::endl;
409 
410         int baseLevel = -1;
411         std::vector<const Event*> inferences;
412         PopulateInferences(inferences, baseLevel);
413 
414         // Second map out descendants hierarchy
415         std::map<const Event*, std::vector<const Event*>> descendantsMap;
416         PopulateDescendants(descendantsMap);
417 
418         std::function<void (const Event*, std::vector<const Event*>&)>
419             FindDescendantEvents = [&](const Event* eventPtr,
420                 std::vector<const Event*>& sequence)
421             {
422                 sequence.push_back(eventPtr);
423 
424                 if (CalcLevel(eventPtr) > baseLevel+2) //We only care about levels as deep as workload executions.
425                 {
426                     return;
427                 }
428 
429                 auto children = descendantsMap.find(eventPtr);
430                 if (children == descendantsMap.end())
431                 {
432                     return;
433                 }
434 
435                 for (const Event* child : children->second)
436                 {
437                     return FindDescendantEvents(child, sequence);
438                 }
439             };
440 
441         // Third, find events belonging to each inference
442         int inferenceIdx = 0;
443         for (auto inference : inferences)
444         {
445             std::vector<const Event*> sequence;
446 
447             //build sequence, depth first
448             FindDescendantEvents(inference, sequence);
449 
450             outStream << "> Begin Inference: " << inferenceIdx << std::endl;
451             outStream << std::endl;
452             AnalyzeEventSequenceAndWriteResults(sequence.cbegin(),
453                                                 sequence.cend(),
454                                                 outStream);
455             outStream << std::endl;
456             outStream << "> End Inference: " << inferenceIdx << std::endl;
457 
458             inferenceIdx++;
459         }
460     }
461 }
462 
GetEventColor(const BackendId & backendId) const463 std::uint32_t Profiler::GetEventColor(const BackendId& backendId) const
464 {
465     static BackendId cpuRef("CpuRef");
466     static BackendId cpuAcc("CpuAcc");
467     static BackendId gpuAcc("GpuAcc");
468     if (backendId == cpuRef) {
469             // Cyan
470             return 0xffff001b;
471     } else if (backendId == cpuAcc) {
472             // Green
473             return 0x00ff001b;
474     } else if (backendId == gpuAcc) {
475             // Purple
476             return 0xff007f1b;
477     } else {
478             // Dark gray
479             return 0x5555551b;
480     }
481 }
482 
483 // The thread_local pointer to the profiler instance.
484 thread_local Profiler* tl_Profiler = nullptr;
485 
GetInstance()486 ProfilerManager& ProfilerManager::GetInstance()
487 {
488     // Global reference to the single ProfileManager instance allowed.
489     static ProfilerManager s_ProfilerManager;
490     return s_ProfilerManager;
491 }
492 
RegisterProfiler(Profiler * profiler)493 void ProfilerManager::RegisterProfiler(Profiler* profiler)
494 {
495     tl_Profiler = profiler;
496 }
497 
GetProfiler()498 Profiler* ProfilerManager::GetProfiler()
499 {
500     return tl_Profiler;
501 }
502 
503 } // namespace armnn
504