1 /**
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "runtime/tooling/perf_counter.h"
17 #include "libpandabase/utils/time.h"
18 #include <securec.h>
19
20 namespace ark::tooling {
21 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
22 Perf g_perf;
23
CounterValue(uint64_t value,bool exact,bool available,double accuracy)24 CounterValue::CounterValue(uint64_t value, bool exact, bool available, double accuracy)
25 : value_(value), exact_(exact), available_(available), accuracy_(accuracy)
26 {
27 }
28
CreateUnavailable()29 CounterValue CounterValue::CreateUnavailable()
30 {
31 return CounterValue(0, false, false, 0.0);
32 }
33
CreateExact(uint64_t value)34 CounterValue CounterValue::CreateExact(uint64_t value)
35 {
36 return CounterValue(value, true, true, 1.0);
37 }
38
CreateApprox(uint64_t value,double accuracy)39 CounterValue CounterValue::CreateApprox(uint64_t value, double accuracy)
40 {
41 return CounterValue(value, false, true, accuracy);
42 }
43
GetValue() const44 uint64_t CounterValue::GetValue() const
45 {
46 return value_;
47 }
48
IsExact() const49 bool CounterValue::IsExact() const
50 {
51 return exact_;
52 }
53
IsAvailable() const54 bool CounterValue::IsAvailable() const
55 {
56 return available_;
57 }
58
GetAccuracy() const59 double CounterValue::GetAccuracy() const
60 {
61 return accuracy_;
62 }
63
Reset()64 void CounterAccumulator::Reset()
65 {
66 // Atomic with relaxed order reason: memory order is not required
67 value_.store(0, std::memory_order_relaxed);
68 // Atomic with relaxed order reason: memory order is not required
69 missing_.store(0, std::memory_order_relaxed);
70 // Atomic with relaxed order reason: memory order is not required
71 total_.store(0, std::memory_order_relaxed);
72 }
73
Add(uint64_t value)74 void CounterAccumulator::Add(uint64_t value)
75 {
76 // Atomic with relaxed order reason: memory order is not required
77 value_.fetch_add(value, std::memory_order_relaxed);
78 // Atomic with relaxed order reason: memory order is not required
79 total_.fetch_add(1, std::memory_order_relaxed);
80 }
81
AddMissing()82 void CounterAccumulator::AddMissing()
83 {
84 // Atomic with relaxed order reason: memory order is not required
85 missing_.fetch_add(1, std::memory_order_relaxed);
86 // Atomic with relaxed order reason: memory order is not required
87 total_.fetch_add(1, std::memory_order_relaxed);
88 }
89
GetValue() const90 CounterValue CounterAccumulator::GetValue() const
91 {
92 if (!IsAvailable()) {
93 return CounterValue::CreateUnavailable();
94 }
95
96 if (HasMissing()) {
97 return CounterValue::CreateApprox(GetApprox(), GetAccuracy());
98 }
99
100 return CounterValue::CreateExact(GetExact());
101 }
102
IsAvailable() const103 bool CounterAccumulator::IsAvailable() const
104 {
105 // Atomic with relaxed order reason: memory order is not required
106 return total_.load(std::memory_order_relaxed) > 0;
107 }
108
HasMissing() const109 bool CounterAccumulator::HasMissing() const
110 {
111 // Atomic with relaxed order reason: memory order is not required
112 return missing_.load(std::memory_order_relaxed) > 0;
113 }
114
GetExact() const115 uint64_t CounterAccumulator::GetExact() const
116 {
117 ASSERT(IsAvailable());
118 ASSERT(!HasMissing());
119 // Atomic with relaxed order reason: memory order is not required
120 return value_.load(std::memory_order_relaxed);
121 }
122
GetApprox() const123 uint64_t CounterAccumulator::GetApprox() const
124 {
125 ASSERT(IsAvailable());
126 ASSERT(HasMissing());
127 // Atomic with relaxed order reason: memory order is not required
128 return value_.load(std::memory_order_relaxed) * GetAccuracy();
129 }
130
GetAccuracy() const131 double CounterAccumulator::GetAccuracy() const
132 {
133 ASSERT(IsAvailable());
134 ASSERT(HasMissing());
135 // Atomic with relaxed order reason: memory order is not required
136 return static_cast<double>(missing_.load(std::memory_order_relaxed)) / total_.load(std::memory_order_relaxed);
137 }
138
OpenDescriptor(uint32_t type,uint64_t config)139 int PerfFileHandler::OpenDescriptor(uint32_t type, uint64_t config)
140 {
141 perf_event_attr attr {};
142 attr.type = type;
143 attr.size = sizeof(attr);
144 attr.config = config;
145 attr.disabled = 1;
146 attr.exclude_kernel = 1;
147 attr.exclude_hv = 1;
148 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
149 return syscall(__NR_perf_event_open, &attr, 0 /*pid*/, -1 /*cpu*/, -1 /*group_fd*/, PERF_FLAG_FD_CLOEXEC);
150 }
151
PerfFileHandler(uint32_t type,uint64_t config)152 PerfFileHandler::PerfFileHandler(uint32_t type, uint64_t config) : fd_(OpenDescriptor(type, config)) {}
153
~PerfFileHandler()154 PerfFileHandler::~PerfFileHandler()
155 {
156 if (fd_ >= 0) {
157 close(fd_);
158 }
159 }
160
PerfFileHandler(PerfFileHandler && other)161 PerfFileHandler::PerfFileHandler(PerfFileHandler &&other) : fd_(other.fd_)
162 {
163 other.fd_ = -1;
164 }
165
operator =(PerfFileHandler && other)166 PerfFileHandler &PerfFileHandler::operator=(PerfFileHandler &&other)
167 {
168 this->fd_ = other.fd_;
169 other.fd_ = -1;
170 return *this;
171 }
172
Reset()173 void PerfFileHandler::Reset()
174 {
175 if (fd_ >= 0) {
176 ioctl(fd_, PERF_EVENT_IOC_RESET, 0);
177 }
178 }
179
Enable()180 void PerfFileHandler::Enable()
181 {
182 if (fd_ >= 0) {
183 ioctl(fd_, PERF_EVENT_IOC_ENABLE, 0);
184 }
185 }
186
Disable()187 void PerfFileHandler::Disable()
188 {
189 if (fd_ >= 0) {
190 ioctl(fd_, PERF_EVENT_IOC_DISABLE, 0);
191 }
192 }
193
GetData() const194 std::optional<uint64_t> PerfFileHandler::GetData() const
195 {
196 if (fd_ < 0) {
197 return std::nullopt;
198 }
199
200 uint64_t count;
201 auto n = read(fd_, &count, sizeof(count));
202 if (n != sizeof(count)) {
203 return std::nullopt;
204 }
205
206 return count;
207 }
208
PerfCollector(Perf * p,std::vector<const PerfCounterDescriptor * > & list,bool isWallTime)209 PerfCollector::PerfCollector(Perf *p, std::vector<const PerfCounterDescriptor *> &list, bool isWallTime)
210 : perf_(p), isWallTime_(isWallTime)
211 {
212 for (auto *desc : list) {
213 counters_.insert(std::make_pair(desc, desc->CreatePerfFileHandler()));
214 }
215
216 Enable();
217
218 if (isWallTime_) {
219 startTime_ = ark::time::GetCurrentTimeInNanos();
220 }
221 }
222
~PerfCollector()223 PerfCollector::~PerfCollector()
224 {
225 if (isWallTime_) {
226 perf_->AddWallTime(ark::time::GetCurrentTimeInNanos() - startTime_);
227 }
228
229 Disable();
230
231 for (auto &[desc, counter] : counters_) {
232 auto data = counter.GetData();
233 if (data) {
234 perf_->Add(desc, data.value());
235 } else {
236 perf_->AddMissing(desc);
237 }
238 }
239 }
240
Reset()241 void PerfCollector::Reset()
242 {
243 for (auto &[desc, counter] : counters_) {
244 counter.Reset();
245 }
246 }
247
Enable()248 void PerfCollector::Enable()
249 {
250 for (auto &[desc, counter] : counters_) {
251 counter.Enable();
252 }
253 }
254
Disable()255 void PerfCollector::Disable()
256 {
257 for (auto &[desc, counter] : counters_) {
258 counter.Disable();
259 }
260 }
261
PerfCounterDescriptor(const char * name,uint32_t type,uint64_t config,std::unique_ptr<CounterReporter> reporter)262 PerfCounterDescriptor::PerfCounterDescriptor(const char *name, uint32_t type, uint64_t config,
263 std::unique_ptr<CounterReporter> reporter)
264 : name_(name), type_(type), config_(config), reporter_(std::move(reporter))
265 {
266 }
267
CreatePerfFileHandler() const268 PerfFileHandler PerfCounterDescriptor::CreatePerfFileHandler() const
269 {
270 return PerfFileHandler(type_, config_);
271 }
272
GetName() const273 const char *PerfCounterDescriptor::GetName() const
274 {
275 return name_;
276 }
277
278 class CounterReporter {
279 public:
280 virtual void Report(std::ostream &out, const char *title, CounterValue counter, const Perf *p) const = 0;
281 CounterReporter() = default;
282 NO_COPY_SEMANTIC(CounterReporter);
283 NO_MOVE_SEMANTIC(CounterReporter);
284 virtual ~CounterReporter() = default;
285
286 static constexpr size_t VALUE_ALIGNEMENT = 20;
287
288 protected:
289 static void FormatLongNumber(uint64_t value, char *out);
290 static void ReportNanCounter(std::ostream &out, const char *title);
291
292 static void ReportLongCounter(std::ostream &out, const char *title, CounterValue counter);
293 static void ReportLongCounter(std::ostream &out, const char *title, CounterValue counter,
294 CounterValue denominatorCounter, const char *unit);
295 static void ReportTimeCounter(std::ostream &out, const char *title, CounterValue counter);
296 static void ReportCounterRatio(std::ostream &out, CounterValue counter, CounterValue denominatorCounter,
297 const char *unit);
298 static constexpr size_t BUFFER_SIZE = VALUE_ALIGNEMENT + 1;
299 static constexpr size_t PRECISSION = 3;
300 };
301
FormatLongNumber(uint64_t value,char * out)302 void CounterReporter::FormatLongNumber(uint64_t value, char *out)
303 {
304 static constexpr size_t THREE_DIGITS = 3;
305 static constexpr size_t RADIX10 = 10;
306
307 auto p = VALUE_ALIGNEMENT;
308 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
309 out[p--] = '\0';
310
311 auto k = THREE_DIGITS;
312 while (value != 0) {
313 auto d = static_cast<char>((value % RADIX10) + '0');
314 value /= RADIX10;
315 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
316 out[p--] = d;
317 k--;
318 if (k == 0) {
319 k = THREE_DIGITS;
320 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
321 out[p--] = ' ';
322 }
323 }
324
325 for (size_t i = 0; i <= p; i++) {
326 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
327 out[i] = ' ';
328 }
329 }
330
ReportNanCounter(std::ostream & out,const char * title)331 void CounterReporter::ReportNanCounter(std::ostream &out, const char *title)
332 {
333 out << std::setw(VALUE_ALIGNEMENT) << "NaN"
334 << " " << title << std::endl;
335 }
336
ReportLongCounter(std::ostream & out,const char * title,CounterValue counter)337 void CounterReporter::ReportLongCounter(std::ostream &out, const char *title, CounterValue counter)
338 {
339 if (!counter.IsAvailable()) {
340 ReportNanCounter(out, title);
341 return;
342 }
343
344 char buf[BUFFER_SIZE]; // NOLINT(modernize-avoid-c-arrays)
345 FormatLongNumber(counter.GetValue(), buf);
346
347 out << buf << " " << title;
348 if (!counter.IsExact()) {
349 out << " approx";
350 }
351 out << std::endl;
352 }
353
ReportLongCounter(std::ostream & out,const char * title,CounterValue counter,CounterValue denominatorCounter,const char * unit)354 void CounterReporter::ReportLongCounter(std::ostream &out, const char *title, CounterValue counter,
355 CounterValue denominatorCounter, const char *unit)
356 {
357 if (!counter.IsAvailable()) {
358 ReportNanCounter(out, title);
359 return;
360 }
361
362 char buf[BUFFER_SIZE]; // NOLINT(modernize-avoid-c-arrays)
363 FormatLongNumber(counter.GetValue(), buf);
364
365 if (!denominatorCounter.IsAvailable() || denominatorCounter.GetValue() == 0) {
366 out << buf << " " << title << std::endl;
367 return;
368 }
369
370 out << buf << " " << title << " (" << std::setprecision(PRECISSION) << std::fixed
371 << static_cast<double>(counter.GetValue()) / denominatorCounter.GetValue() << " " << unit << ")";
372
373 if (!counter.IsExact() || !denominatorCounter.IsExact()) {
374 out << " approx";
375 }
376 out << std::endl;
377 }
378
ReportTimeCounter(std::ostream & out,const char * title,CounterValue counter)379 void CounterReporter::ReportTimeCounter(std::ostream &out, const char *title, CounterValue counter)
380 {
381 if (!counter.IsAvailable()) {
382 ReportNanCounter(out, title);
383 return;
384 }
385
386 auto prettyTime = ark::helpers::TimeConverter(counter.GetValue());
387 out << std::setw(VALUE_ALIGNEMENT - prettyTime.GetLiteral().length()) << prettyTime << " " << title;
388 if (!counter.IsExact()) {
389 out << " approx";
390 }
391 out << std::endl;
392 }
393
ReportCounterRatio(std::ostream & out,CounterValue counter,CounterValue denominatorCounter,const char * unit)394 void CounterReporter::ReportCounterRatio(std::ostream &out, CounterValue counter, CounterValue denominatorCounter,
395 const char *unit)
396 {
397 if (!denominatorCounter.IsAvailable() || denominatorCounter.GetValue() == 0) {
398 ReportNanCounter(out, unit);
399 return;
400 }
401
402 out << std::setw(VALUE_ALIGNEMENT) << std::setprecision(PRECISSION) << std::fixed
403 << static_cast<double>(counter.GetValue()) / denominatorCounter.GetValue() << " " << unit << std::endl;
404 }
405
Report(std::ostream & out,const Perf * p) const406 void PerfCounterDescriptor::Report(std::ostream &out, const Perf *p) const
407 {
408 reporter_->Report(out, GetName(), p->Get(this), p);
409 }
410
Perf()411 Perf::Perf()
412 : Perf({&PerfCounterDescriptor::TASK_CLOCK, &PerfCounterDescriptor::TOTAL_CPU_CYCLES,
413 &PerfCounterDescriptor::STALLED_BACKEND_CYCLES, &PerfCounterDescriptor::INSTRUCTIONS_COUNT})
414 {
415 }
416
Perf(std::initializer_list<const PerfCounterDescriptor * > list)417 Perf::Perf(std::initializer_list<const PerfCounterDescriptor *> list)
418 {
419 for (auto *e : list) {
420 perfDescriptors_.push_back(e);
421 }
422 Reset();
423 }
424
CreateCollector(bool isWallTime)425 PerfCollector Perf::CreateCollector(bool isWallTime)
426 {
427 return PerfCollector(this, perfDescriptors_, isWallTime);
428 }
429
Reset()430 void Perf::Reset()
431 {
432 for (auto *desc : perfDescriptors_) {
433 counters_[desc].Reset();
434 }
435 }
436
Add(const PerfCounterDescriptor * desc,uint64_t value)437 void Perf::Add(const PerfCounterDescriptor *desc, uint64_t value)
438 {
439 counters_.at(desc).Add(value);
440 }
441
AddMissing(const PerfCounterDescriptor * desc)442 void Perf::AddMissing(const PerfCounterDescriptor *desc)
443 {
444 counters_.at(desc).AddMissing();
445 }
446
AddWallTime(uint64_t time)447 void Perf::AddWallTime(uint64_t time)
448 {
449 wallTime_ = time;
450 }
451
Get(const PerfCounterDescriptor * desc) const452 CounterValue Perf::Get(const PerfCounterDescriptor *desc) const
453 {
454 if (counters_.count(desc) > 0) {
455 return counters_.at(desc).GetValue();
456 }
457
458 return CounterValue::CreateUnavailable();
459 }
460
Report(std::ostream & out) const461 void Perf::Report(std::ostream &out) const
462 {
463 out << std::endl;
464 if (wallTime_ > 0) {
465 auto prettyTime = ark::helpers::TimeConverter(wallTime_);
466 out << std::setw(CounterReporter::VALUE_ALIGNEMENT - prettyTime.GetLiteral().size()) << prettyTime
467 << " wall time" << std::endl;
468 }
469 for (auto *desc : perfDescriptors_) {
470 desc->Report(out, this);
471 }
472 }
473
operator <<(std::ostream & out,const Perf & p)474 std::ostream &operator<<(std::ostream &out, const Perf &p)
475 {
476 p.Report(out);
477 return out;
478 }
479
480 class LongCounterReporter : public CounterReporter {
481 public:
Report(std::ostream & out,const char * title,CounterValue counter,const Perf * p) const482 void Report(std::ostream &out, const char *title, CounterValue counter,
483 [[maybe_unused]] const Perf *p) const override
484 {
485 ReportLongCounter(out, title, counter);
486 }
487 };
488
489 class LongCounterWithRatioReporter : public CounterReporter {
490 public:
LongCounterWithRatioReporter(const PerfCounterDescriptor * denominator,const char * unit)491 LongCounterWithRatioReporter(const PerfCounterDescriptor *denominator, const char *unit)
492 : denominator_(denominator), unit_(unit)
493 {
494 }
495
Report(std::ostream & out,const char * title,CounterValue counter,const Perf * p) const496 void Report(std::ostream &out, const char *title, CounterValue counter, const Perf *p) const override
497 {
498 ReportLongCounter(out, title, counter, p->Get(denominator_), unit_);
499 }
500
501 private:
502 const PerfCounterDescriptor *denominator_;
503 const char *unit_;
504 };
505
506 class InstructionCounterReporter : public CounterReporter {
507 public:
Report(std::ostream & out,const char * title,CounterValue counter,const Perf * p) const508 void Report(std::ostream &out, const char *title, CounterValue counter, const Perf *p) const override
509 {
510 ReportLongCounter(out, title, counter);
511 ReportCounterRatio(out, counter, p->Get(&PerfCounterDescriptor::TOTAL_CPU_CYCLES), "insn per cycle");
512 ReportCounterRatio(out, p->Get(&PerfCounterDescriptor::STALLED_BACKEND_CYCLES), counter,
513 "stalled cycles per insn");
514 }
515 };
516
517 class TimeCounterReporter : public CounterReporter {
518 public:
Report(std::ostream & out,const char * title,CounterValue counter,const Perf * p) const519 void Report(std::ostream &out, const char *title, CounterValue counter,
520 [[maybe_unused]] const Perf *p) const override
521 {
522 ReportTimeCounter(out, title, counter);
523 }
524 };
525
526 // NOLINTBEGIN(fuchsia-statically-constructed-objects)
527 const PerfCounterDescriptor PerfCounterDescriptor::TASK_CLOCK("task-clock", PERF_TYPE_SOFTWARE,
528 PERF_COUNT_SW_TASK_CLOCK,
529 std::make_unique<TimeCounterReporter>());
530 const PerfCounterDescriptor PerfCounterDescriptor::CONTEXT_SWITCHES("context switches", PERF_TYPE_SOFTWARE,
531 PERF_COUNT_SW_CONTEXT_SWITCHES,
532 std::make_unique<LongCounterReporter>());
533 const PerfCounterDescriptor PerfCounterDescriptor::CPU_MIGRATION("cpu-migrations", PERF_TYPE_SOFTWARE,
534 PERF_COUNT_SW_CPU_MIGRATIONS,
535 std::make_unique<LongCounterReporter>());
536 const PerfCounterDescriptor PerfCounterDescriptor::PAGE_FAULT("page-faults", PERF_TYPE_SOFTWARE,
537 PERF_COUNT_SW_PAGE_FAULTS,
538 std::make_unique<LongCounterReporter>());
539 const PerfCounterDescriptor PerfCounterDescriptor::TOTAL_CPU_CYCLES(
540 "total cpu cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES,
541 std::make_unique<LongCounterWithRatioReporter>(&PerfCounterDescriptor::TASK_CLOCK, "GHz"));
542 const PerfCounterDescriptor PerfCounterDescriptor::STALLED_FRONTEND_CYCLES(
543 "stalled frontend cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
544 std::make_unique<LongCounterWithRatioReporter>(&PerfCounterDescriptor::TOTAL_CPU_CYCLES, "of cycles"));
545 const PerfCounterDescriptor PerfCounterDescriptor::STALLED_BACKEND_CYCLES(
546 "stalled backend cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
547 std::make_unique<LongCounterWithRatioReporter>(&PerfCounterDescriptor::TOTAL_CPU_CYCLES, "of cycles"));
548 const PerfCounterDescriptor PerfCounterDescriptor::INSTRUCTIONS_COUNT("instructions", PERF_TYPE_HARDWARE,
549 PERF_COUNT_HW_INSTRUCTIONS,
550 std::make_unique<InstructionCounterReporter>());
551 const PerfCounterDescriptor PerfCounterDescriptor::BRANCHES("branches", PERF_TYPE_HARDWARE,
552 PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
553 std::make_unique<LongCounterReporter>());
554 const PerfCounterDescriptor PerfCounterDescriptor::BRANCH_MISSES(
555 "branch-misses", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES,
556 std::make_unique<LongCounterWithRatioReporter>(&PerfCounterDescriptor::BRANCHES, "of branches"));
557 // NOLINTEND(fuchsia-statically-constructed-objects)
558 } // namespace ark::tooling
559