• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "runtime/tooling/perf_counter.h"
17 #include "libpandabase/utils/time.h"
18 #include <securec.h>
19 
20 namespace ark::tooling {
21 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
22 Perf g_perf;
23 
CounterValue(uint64_t value,bool exact,bool available,double accuracy)24 CounterValue::CounterValue(uint64_t value, bool exact, bool available, double accuracy)
25     : value_(value), exact_(exact), available_(available), accuracy_(accuracy)
26 {
27 }
28 
CreateUnavailable()29 CounterValue CounterValue::CreateUnavailable()
30 {
31     return CounterValue(0, false, false, 0.0);
32 }
33 
CreateExact(uint64_t value)34 CounterValue CounterValue::CreateExact(uint64_t value)
35 {
36     return CounterValue(value, true, true, 1.0);
37 }
38 
CreateApprox(uint64_t value,double accuracy)39 CounterValue CounterValue::CreateApprox(uint64_t value, double accuracy)
40 {
41     return CounterValue(value, false, true, accuracy);
42 }
43 
GetValue() const44 uint64_t CounterValue::GetValue() const
45 {
46     return value_;
47 }
48 
IsExact() const49 bool CounterValue::IsExact() const
50 {
51     return exact_;
52 }
53 
IsAvailable() const54 bool CounterValue::IsAvailable() const
55 {
56     return available_;
57 }
58 
GetAccuracy() const59 double CounterValue::GetAccuracy() const
60 {
61     return accuracy_;
62 }
63 
Reset()64 void CounterAccumulator::Reset()
65 {
66     // Atomic with relaxed order reason: memory order is not required
67     value_.store(0, std::memory_order_relaxed);
68     // Atomic with relaxed order reason: memory order is not required
69     missing_.store(0, std::memory_order_relaxed);
70     // Atomic with relaxed order reason: memory order is not required
71     total_.store(0, std::memory_order_relaxed);
72 }
73 
Add(uint64_t value)74 void CounterAccumulator::Add(uint64_t value)
75 {
76     // Atomic with relaxed order reason: memory order is not required
77     value_.fetch_add(value, std::memory_order_relaxed);
78     // Atomic with relaxed order reason: memory order is not required
79     total_.fetch_add(1, std::memory_order_relaxed);
80 }
81 
AddMissing()82 void CounterAccumulator::AddMissing()
83 {
84     // Atomic with relaxed order reason: memory order is not required
85     missing_.fetch_add(1, std::memory_order_relaxed);
86     // Atomic with relaxed order reason: memory order is not required
87     total_.fetch_add(1, std::memory_order_relaxed);
88 }
89 
GetValue() const90 CounterValue CounterAccumulator::GetValue() const
91 {
92     if (!IsAvailable()) {
93         return CounterValue::CreateUnavailable();
94     }
95 
96     if (HasMissing()) {
97         return CounterValue::CreateApprox(GetApprox(), GetAccuracy());
98     }
99 
100     return CounterValue::CreateExact(GetExact());
101 }
102 
IsAvailable() const103 bool CounterAccumulator::IsAvailable() const
104 {
105     // Atomic with relaxed order reason: memory order is not required
106     return total_.load(std::memory_order_relaxed) > 0;
107 }
108 
HasMissing() const109 bool CounterAccumulator::HasMissing() const
110 {
111     // Atomic with relaxed order reason: memory order is not required
112     return missing_.load(std::memory_order_relaxed) > 0;
113 }
114 
GetExact() const115 uint64_t CounterAccumulator::GetExact() const
116 {
117     ASSERT(IsAvailable());
118     ASSERT(!HasMissing());
119     // Atomic with relaxed order reason: memory order is not required
120     return value_.load(std::memory_order_relaxed);
121 }
122 
GetApprox() const123 uint64_t CounterAccumulator::GetApprox() const
124 {
125     ASSERT(IsAvailable());
126     ASSERT(HasMissing());
127     // Atomic with relaxed order reason: memory order is not required
128     return value_.load(std::memory_order_relaxed) * GetAccuracy();
129 }
130 
GetAccuracy() const131 double CounterAccumulator::GetAccuracy() const
132 {
133     ASSERT(IsAvailable());
134     ASSERT(HasMissing());
135     // Atomic with relaxed order reason: memory order is not required
136     return static_cast<double>(missing_.load(std::memory_order_relaxed)) / total_.load(std::memory_order_relaxed);
137 }
138 
OpenDescriptor(uint32_t type,uint64_t config)139 int PerfFileHandler::OpenDescriptor(uint32_t type, uint64_t config)
140 {
141     perf_event_attr attr {};
142     attr.type = type;
143     attr.size = sizeof(attr);
144     attr.config = config;
145     attr.disabled = 1;
146     attr.exclude_kernel = 1;
147     attr.exclude_hv = 1;
148     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
149     return syscall(__NR_perf_event_open, &attr, 0 /*pid*/, -1 /*cpu*/, -1 /*group_fd*/, PERF_FLAG_FD_CLOEXEC);
150 }
151 
PerfFileHandler(uint32_t type,uint64_t config)152 PerfFileHandler::PerfFileHandler(uint32_t type, uint64_t config) : fd_(OpenDescriptor(type, config)) {}
153 
~PerfFileHandler()154 PerfFileHandler::~PerfFileHandler()
155 {
156     if (fd_ >= 0) {
157         close(fd_);
158     }
159 }
160 
PerfFileHandler(PerfFileHandler && other)161 PerfFileHandler::PerfFileHandler(PerfFileHandler &&other) : fd_(other.fd_)
162 {
163     other.fd_ = -1;
164 }
165 
operator =(PerfFileHandler && other)166 PerfFileHandler &PerfFileHandler::operator=(PerfFileHandler &&other)
167 {
168     this->fd_ = other.fd_;
169     other.fd_ = -1;
170     return *this;
171 }
172 
Reset()173 void PerfFileHandler::Reset()
174 {
175     if (fd_ >= 0) {
176         ioctl(fd_, PERF_EVENT_IOC_RESET, 0);
177     }
178 }
179 
Enable()180 void PerfFileHandler::Enable()
181 {
182     if (fd_ >= 0) {
183         ioctl(fd_, PERF_EVENT_IOC_ENABLE, 0);
184     }
185 }
186 
Disable()187 void PerfFileHandler::Disable()
188 {
189     if (fd_ >= 0) {
190         ioctl(fd_, PERF_EVENT_IOC_DISABLE, 0);
191     }
192 }
193 
GetData() const194 std::optional<uint64_t> PerfFileHandler::GetData() const
195 {
196     if (fd_ < 0) {
197         return std::nullopt;
198     }
199 
200     uint64_t count;
201     auto n = read(fd_, &count, sizeof(count));
202     if (n != sizeof(count)) {
203         return std::nullopt;
204     }
205 
206     return count;
207 }
208 
PerfCollector(Perf * p,std::vector<const PerfCounterDescriptor * > & list,bool isWallTime)209 PerfCollector::PerfCollector(Perf *p, std::vector<const PerfCounterDescriptor *> &list, bool isWallTime)
210     : perf_(p), isWallTime_(isWallTime)
211 {
212     for (auto *desc : list) {
213         counters_.insert(std::make_pair(desc, desc->CreatePerfFileHandler()));
214     }
215 
216     Enable();
217 
218     if (isWallTime_) {
219         startTime_ = ark::time::GetCurrentTimeInNanos();
220     }
221 }
222 
~PerfCollector()223 PerfCollector::~PerfCollector()
224 {
225     if (isWallTime_) {
226         perf_->AddWallTime(ark::time::GetCurrentTimeInNanos() - startTime_);
227     }
228 
229     Disable();
230 
231     for (auto &[desc, counter] : counters_) {
232         auto data = counter.GetData();
233         if (data) {
234             perf_->Add(desc, data.value());
235         } else {
236             perf_->AddMissing(desc);
237         }
238     }
239 }
240 
Reset()241 void PerfCollector::Reset()
242 {
243     for (auto &[desc, counter] : counters_) {
244         counter.Reset();
245     }
246 }
247 
Enable()248 void PerfCollector::Enable()
249 {
250     for (auto &[desc, counter] : counters_) {
251         counter.Enable();
252     }
253 }
254 
Disable()255 void PerfCollector::Disable()
256 {
257     for (auto &[desc, counter] : counters_) {
258         counter.Disable();
259     }
260 }
261 
PerfCounterDescriptor(const char * name,uint32_t type,uint64_t config,std::unique_ptr<CounterReporter> reporter)262 PerfCounterDescriptor::PerfCounterDescriptor(const char *name, uint32_t type, uint64_t config,
263                                              std::unique_ptr<CounterReporter> reporter)
264     : name_(name), type_(type), config_(config), reporter_(std::move(reporter))
265 {
266 }
267 
CreatePerfFileHandler() const268 PerfFileHandler PerfCounterDescriptor::CreatePerfFileHandler() const
269 {
270     return PerfFileHandler(type_, config_);
271 }
272 
GetName() const273 const char *PerfCounterDescriptor::GetName() const
274 {
275     return name_;
276 }
277 
278 class CounterReporter {
279 public:
280     virtual void Report(std::ostream &out, const char *title, CounterValue counter, const Perf *p) const = 0;
281     CounterReporter() = default;
282     NO_COPY_SEMANTIC(CounterReporter);
283     NO_MOVE_SEMANTIC(CounterReporter);
284     virtual ~CounterReporter() = default;
285 
286     static constexpr size_t VALUE_ALIGNEMENT = 20;
287 
288 protected:
289     static void FormatLongNumber(uint64_t value, char *out);
290     static void ReportNanCounter(std::ostream &out, const char *title);
291 
292     static void ReportLongCounter(std::ostream &out, const char *title, CounterValue counter);
293     static void ReportLongCounter(std::ostream &out, const char *title, CounterValue counter,
294                                   CounterValue denominatorCounter, const char *unit);
295     static void ReportTimeCounter(std::ostream &out, const char *title, CounterValue counter);
296     static void ReportCounterRatio(std::ostream &out, CounterValue counter, CounterValue denominatorCounter,
297                                    const char *unit);
298     static constexpr size_t BUFFER_SIZE = VALUE_ALIGNEMENT + 1;
299     static constexpr size_t PRECISSION = 3;
300 };
301 
FormatLongNumber(uint64_t value,char * out)302 void CounterReporter::FormatLongNumber(uint64_t value, char *out)
303 {
304     static constexpr size_t THREE_DIGITS = 3;
305     static constexpr size_t RADIX10 = 10;
306 
307     auto p = VALUE_ALIGNEMENT;
308     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
309     out[p--] = '\0';
310 
311     auto k = THREE_DIGITS;
312     while (value != 0) {
313         auto d = static_cast<char>((value % RADIX10) + '0');
314         value /= RADIX10;
315         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
316         out[p--] = d;
317         k--;
318         if (k == 0) {
319             k = THREE_DIGITS;
320             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
321             out[p--] = ' ';
322         }
323     }
324 
325     for (size_t i = 0; i <= p; i++) {
326         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
327         out[i] = ' ';
328     }
329 }
330 
ReportNanCounter(std::ostream & out,const char * title)331 void CounterReporter::ReportNanCounter(std::ostream &out, const char *title)
332 {
333     out << std::setw(VALUE_ALIGNEMENT) << "NaN"
334         << " " << title << std::endl;
335 }
336 
ReportLongCounter(std::ostream & out,const char * title,CounterValue counter)337 void CounterReporter::ReportLongCounter(std::ostream &out, const char *title, CounterValue counter)
338 {
339     if (!counter.IsAvailable()) {
340         ReportNanCounter(out, title);
341         return;
342     }
343 
344     char buf[BUFFER_SIZE];  // NOLINT(modernize-avoid-c-arrays)
345     FormatLongNumber(counter.GetValue(), buf);
346 
347     out << buf << " " << title;
348     if (!counter.IsExact()) {
349         out << " approx";
350     }
351     out << std::endl;
352 }
353 
ReportLongCounter(std::ostream & out,const char * title,CounterValue counter,CounterValue denominatorCounter,const char * unit)354 void CounterReporter::ReportLongCounter(std::ostream &out, const char *title, CounterValue counter,
355                                         CounterValue denominatorCounter, const char *unit)
356 {
357     if (!counter.IsAvailable()) {
358         ReportNanCounter(out, title);
359         return;
360     }
361 
362     char buf[BUFFER_SIZE];  // NOLINT(modernize-avoid-c-arrays)
363     FormatLongNumber(counter.GetValue(), buf);
364 
365     if (!denominatorCounter.IsAvailable() || denominatorCounter.GetValue() == 0) {
366         out << buf << " " << title << std::endl;
367         return;
368     }
369 
370     out << buf << " " << title << " (" << std::setprecision(PRECISSION) << std::fixed
371         << static_cast<double>(counter.GetValue()) / denominatorCounter.GetValue() << " " << unit << ")";
372 
373     if (!counter.IsExact() || !denominatorCounter.IsExact()) {
374         out << " approx";
375     }
376     out << std::endl;
377 }
378 
ReportTimeCounter(std::ostream & out,const char * title,CounterValue counter)379 void CounterReporter::ReportTimeCounter(std::ostream &out, const char *title, CounterValue counter)
380 {
381     if (!counter.IsAvailable()) {
382         ReportNanCounter(out, title);
383         return;
384     }
385 
386     auto prettyTime = ark::helpers::TimeConverter(counter.GetValue());
387     out << std::setw(VALUE_ALIGNEMENT - prettyTime.GetLiteral().length()) << prettyTime << " " << title;
388     if (!counter.IsExact()) {
389         out << " approx";
390     }
391     out << std::endl;
392 }
393 
ReportCounterRatio(std::ostream & out,CounterValue counter,CounterValue denominatorCounter,const char * unit)394 void CounterReporter::ReportCounterRatio(std::ostream &out, CounterValue counter, CounterValue denominatorCounter,
395                                          const char *unit)
396 {
397     if (!denominatorCounter.IsAvailable() || denominatorCounter.GetValue() == 0) {
398         ReportNanCounter(out, unit);
399         return;
400     }
401 
402     out << std::setw(VALUE_ALIGNEMENT) << std::setprecision(PRECISSION) << std::fixed
403         << static_cast<double>(counter.GetValue()) / denominatorCounter.GetValue() << " " << unit << std::endl;
404 }
405 
Report(std::ostream & out,const Perf * p) const406 void PerfCounterDescriptor::Report(std::ostream &out, const Perf *p) const
407 {
408     reporter_->Report(out, GetName(), p->Get(this), p);
409 }
410 
Perf()411 Perf::Perf()
412     : Perf({&PerfCounterDescriptor::TASK_CLOCK, &PerfCounterDescriptor::TOTAL_CPU_CYCLES,
413             &PerfCounterDescriptor::STALLED_BACKEND_CYCLES, &PerfCounterDescriptor::INSTRUCTIONS_COUNT})
414 {
415 }
416 
Perf(std::initializer_list<const PerfCounterDescriptor * > list)417 Perf::Perf(std::initializer_list<const PerfCounterDescriptor *> list)
418 {
419     for (auto *e : list) {
420         perfDescriptors_.push_back(e);
421     }
422     Reset();
423 }
424 
CreateCollector(bool isWallTime)425 PerfCollector Perf::CreateCollector(bool isWallTime)
426 {
427     return PerfCollector(this, perfDescriptors_, isWallTime);
428 }
429 
Reset()430 void Perf::Reset()
431 {
432     for (auto *desc : perfDescriptors_) {
433         counters_[desc].Reset();
434     }
435 }
436 
Add(const PerfCounterDescriptor * desc,uint64_t value)437 void Perf::Add(const PerfCounterDescriptor *desc, uint64_t value)
438 {
439     counters_.at(desc).Add(value);
440 }
441 
AddMissing(const PerfCounterDescriptor * desc)442 void Perf::AddMissing(const PerfCounterDescriptor *desc)
443 {
444     counters_.at(desc).AddMissing();
445 }
446 
AddWallTime(uint64_t time)447 void Perf::AddWallTime(uint64_t time)
448 {
449     wallTime_ = time;
450 }
451 
Get(const PerfCounterDescriptor * desc) const452 CounterValue Perf::Get(const PerfCounterDescriptor *desc) const
453 {
454     if (counters_.count(desc) > 0) {
455         return counters_.at(desc).GetValue();
456     }
457 
458     return CounterValue::CreateUnavailable();
459 }
460 
Report(std::ostream & out) const461 void Perf::Report(std::ostream &out) const
462 {
463     out << std::endl;
464     if (wallTime_ > 0) {
465         auto prettyTime = ark::helpers::TimeConverter(wallTime_);
466         out << std::setw(CounterReporter::VALUE_ALIGNEMENT - prettyTime.GetLiteral().size()) << prettyTime
467             << " wall time" << std::endl;
468     }
469     for (auto *desc : perfDescriptors_) {
470         desc->Report(out, this);
471     }
472 }
473 
operator <<(std::ostream & out,const Perf & p)474 std::ostream &operator<<(std::ostream &out, const Perf &p)
475 {
476     p.Report(out);
477     return out;
478 }
479 
480 class LongCounterReporter : public CounterReporter {
481 public:
Report(std::ostream & out,const char * title,CounterValue counter,const Perf * p) const482     void Report(std::ostream &out, const char *title, CounterValue counter,
483                 [[maybe_unused]] const Perf *p) const override
484     {
485         ReportLongCounter(out, title, counter);
486     }
487 };
488 
489 class LongCounterWithRatioReporter : public CounterReporter {
490 public:
LongCounterWithRatioReporter(const PerfCounterDescriptor * denominator,const char * unit)491     LongCounterWithRatioReporter(const PerfCounterDescriptor *denominator, const char *unit)
492         : denominator_(denominator), unit_(unit)
493     {
494     }
495 
Report(std::ostream & out,const char * title,CounterValue counter,const Perf * p) const496     void Report(std::ostream &out, const char *title, CounterValue counter, const Perf *p) const override
497     {
498         ReportLongCounter(out, title, counter, p->Get(denominator_), unit_);
499     }
500 
501 private:
502     const PerfCounterDescriptor *denominator_;
503     const char *unit_;
504 };
505 
506 class InstructionCounterReporter : public CounterReporter {
507 public:
Report(std::ostream & out,const char * title,CounterValue counter,const Perf * p) const508     void Report(std::ostream &out, const char *title, CounterValue counter, const Perf *p) const override
509     {
510         ReportLongCounter(out, title, counter);
511         ReportCounterRatio(out, counter, p->Get(&PerfCounterDescriptor::TOTAL_CPU_CYCLES), "insn per cycle");
512         ReportCounterRatio(out, p->Get(&PerfCounterDescriptor::STALLED_BACKEND_CYCLES), counter,
513                            "stalled cycles per insn");
514     }
515 };
516 
517 class TimeCounterReporter : public CounterReporter {
518 public:
Report(std::ostream & out,const char * title,CounterValue counter,const Perf * p) const519     void Report(std::ostream &out, const char *title, CounterValue counter,
520                 [[maybe_unused]] const Perf *p) const override
521     {
522         ReportTimeCounter(out, title, counter);
523     }
524 };
525 
526 // NOLINTBEGIN(fuchsia-statically-constructed-objects)
527 const PerfCounterDescriptor PerfCounterDescriptor::TASK_CLOCK("task-clock", PERF_TYPE_SOFTWARE,
528                                                               PERF_COUNT_SW_TASK_CLOCK,
529                                                               std::make_unique<TimeCounterReporter>());
530 const PerfCounterDescriptor PerfCounterDescriptor::CONTEXT_SWITCHES("context switches", PERF_TYPE_SOFTWARE,
531                                                                     PERF_COUNT_SW_CONTEXT_SWITCHES,
532                                                                     std::make_unique<LongCounterReporter>());
533 const PerfCounterDescriptor PerfCounterDescriptor::CPU_MIGRATION("cpu-migrations", PERF_TYPE_SOFTWARE,
534                                                                  PERF_COUNT_SW_CPU_MIGRATIONS,
535                                                                  std::make_unique<LongCounterReporter>());
536 const PerfCounterDescriptor PerfCounterDescriptor::PAGE_FAULT("page-faults", PERF_TYPE_SOFTWARE,
537                                                               PERF_COUNT_SW_PAGE_FAULTS,
538                                                               std::make_unique<LongCounterReporter>());
539 const PerfCounterDescriptor PerfCounterDescriptor::TOTAL_CPU_CYCLES(
540     "total cpu cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES,
541     std::make_unique<LongCounterWithRatioReporter>(&PerfCounterDescriptor::TASK_CLOCK, "GHz"));
542 const PerfCounterDescriptor PerfCounterDescriptor::STALLED_FRONTEND_CYCLES(
543     "stalled frontend cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
544     std::make_unique<LongCounterWithRatioReporter>(&PerfCounterDescriptor::TOTAL_CPU_CYCLES, "of cycles"));
545 const PerfCounterDescriptor PerfCounterDescriptor::STALLED_BACKEND_CYCLES(
546     "stalled backend cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
547     std::make_unique<LongCounterWithRatioReporter>(&PerfCounterDescriptor::TOTAL_CPU_CYCLES, "of cycles"));
548 const PerfCounterDescriptor PerfCounterDescriptor::INSTRUCTIONS_COUNT("instructions", PERF_TYPE_HARDWARE,
549                                                                       PERF_COUNT_HW_INSTRUCTIONS,
550                                                                       std::make_unique<InstructionCounterReporter>());
551 const PerfCounterDescriptor PerfCounterDescriptor::BRANCHES("branches", PERF_TYPE_HARDWARE,
552                                                             PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
553                                                             std::make_unique<LongCounterReporter>());
554 const PerfCounterDescriptor PerfCounterDescriptor::BRANCH_MISSES(
555     "branch-misses", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES,
556     std::make_unique<LongCounterWithRatioReporter>(&PerfCounterDescriptor::BRANCHES, "of branches"));
557 // NOLINTEND(fuchsia-statically-constructed-objects)
558 }  // namespace ark::tooling
559