1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "perf_events.h"
16
17 #include <cassert>
18 #include <cinttypes>
19 #include <csignal>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <iostream>
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/resource.h>
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 #if defined(CONFIG_HAS_SYSPARA)
29 #include <parameters.h>
30 #endif
31
32 #include "debug_logger.h"
33 #include "register.h"
34 #include "subcommand_dump.h"
35 #include "symbols_file.h"
36 #include "utilities.h"
37
38 using namespace std;
39 using namespace std::chrono;
40 namespace OHOS {
41 namespace Developtools {
42 namespace HiPerf {
43 static std::atomic_bool g_trackRunning = false;
44
Open(perf_event_attr & attr,pid_t pid,int cpu,int groupFd,unsigned long flags)45 OHOS::UniqueFd PerfEvents::Open(perf_event_attr &attr, pid_t pid, int cpu, int groupFd,
46 unsigned long flags)
47 {
48 OHOS::UniqueFd fd = UniqueFd(syscall(__NR_perf_event_open, &attr, pid, cpu, groupFd, flags));
49 if (fd < 0) {
50 HLOGEP("syscall perf_event_open failed. ");
51 // dump when open failed.
52 SubCommandDump::DumpPrintEventAttr(attr, std::numeric_limits<int>::min());
53 }
54 HLOGV("perf_event_open: got fd %d for pid %d cpu %d group %d flags %lu", fd.Get(), pid, cpu, groupFd, flags);
55 return fd;
56 }
57
PerfEvents()58 PerfEvents::PerfEvents() : timeOut_(DEFAULT_TIMEOUT * THOUSANDS), timeReport_(0)
59 {
60 pageSize_ = sysconf(_SC_PAGESIZE);
61 HLOGI("BuildArch %s", GetArchName(BUILD_ARCH_TYPE).c_str());
62 }
63
~PerfEvents()64 PerfEvents::~PerfEvents()
65 {
66 // close mmap
67 for (auto it = cpuMmap_.begin(); it != cpuMmap_.end();) {
68 const MmapFd &mmapItem = it->second;
69 munmap(mmapItem.mmapPage, (1 + mmapPages_) * pageSize_);
70 it = cpuMmap_.erase(it);
71 }
72
73 // close file descriptor of perf_event_open() created
74 for (auto eventGroupItem = eventGroupItem_.begin(); eventGroupItem != eventGroupItem_.end();) {
75 for (const auto &eventItem : eventGroupItem->eventItems) {
76 for (const auto &fdItem : eventItem.fdItems) {
77 close(fdItem.fd);
78 }
79 }
80 eventGroupItem = eventGroupItem_.erase(eventGroupItem);
81 }
82
83 ExitReadRecordBufThread();
84 }
85
IsEventSupport(perf_type_id type,__u64 config)86 bool PerfEvents::IsEventSupport(perf_type_id type, __u64 config)
87 {
88 unique_ptr<perf_event_attr> attr = PerfEvents::CreateDefaultAttr(type, config);
89 UniqueFd fd = Open(*attr.get());
90 if (fd < 0) {
91 printf("event not support %s\n", GetStaticConfigName(type, config).c_str());
92 return false;
93 } else {
94 close(fd);
95 return true;
96 }
97 }
IsEventAttrSupport(perf_event_attr & attr)98 bool PerfEvents::IsEventAttrSupport(perf_event_attr &attr)
99 {
100 UniqueFd fd = Open(attr);
101 if (fd < 0) {
102 return false;
103 } else {
104 close(fd);
105 return true;
106 }
107 }
108
SetBranchSampleType(uint64_t value)109 bool PerfEvents::SetBranchSampleType(uint64_t value)
110 {
111 if (value != 0) {
112 // cpu-clcles event must be supported
113 unique_ptr<perf_event_attr> attr =
114 PerfEvents::CreateDefaultAttr(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
115 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
116 attr->branch_sample_type = value;
117 if (!IsEventAttrSupport(*attr.get())) {
118 return false;
119 }
120 }
121 branchSampleType_ = value;
122 return true;
123 }
124
AddDefaultEvent(perf_type_id type)125 bool PerfEvents::AddDefaultEvent(perf_type_id type)
126 {
127 auto it = DEFAULT_TYPE_CONFIGS.find(type);
128 if (it != DEFAULT_TYPE_CONFIGS.end()) {
129 for (auto config : it->second) {
130 AddEvent(type, config);
131 }
132 }
133 return true;
134 }
135
AddOffCpuEvent()136 bool PerfEvents::AddOffCpuEvent()
137 {
138 std::string eventName = "sched:sched_switch";
139 if (eventSpaceType_ == EventSpaceType::USER) {
140 eventName += ":u";
141 } else if (eventSpaceType_ == EventSpaceType::KERNEL) {
142 eventName += ":k";
143 }
144 return AddEvent(eventName);
145 }
146
AddEvents(const std::vector<std::string> & eventStrings,bool group)147 bool PerfEvents::AddEvents(const std::vector<std::string> &eventStrings, bool group)
148 {
149 bool followGroup = false;
150 HLOGV(" %s %s", VectorToString(eventStrings).c_str(), followGroup ? "followGroup" : "");
151
152 for (std::string eventString : eventStrings) {
153 if (!AddEvent(eventString, followGroup)) {
154 return false;
155 }
156 // this is group request , Follow-up events need to follow the previous group
157 if (group) {
158 followGroup = true;
159 }
160 }
161 return true;
162 }
163
164 // event name can have :k or :u suffix
165 // tracepoint event name is like sched:sched_switch
166 // clang-format off
ParseEventName(const std::string & nameStr,std::string & name,bool & excludeUser,bool & excludeKernel,bool & isTracePoint)167 bool PerfEvents::ParseEventName(const std::string &nameStr,
168 std::string &name, bool &excludeUser, bool &excludeKernel, bool &isTracePoint)
169 // clang-format on
170 {
171 name = nameStr;
172 excludeUser = false;
173 excludeKernel = false;
174 isTracePoint = false;
175 if (nameStr.find(":") != std::string::npos) {
176 static constexpr size_t maxNumberTokensNoTracePoint = 2;
177 static constexpr size_t maxNumberTokensTracePoint = 3;
178 std::vector<std::string> eventTokens = StringSplit(nameStr, ":");
179 if (eventTokens.size() == maxNumberTokensTracePoint) {
180 // tracepoint event with :u or :k
181 if (eventTokens.back() == "k") {
182 excludeUser = true;
183 HLOGV("kernelOnly event");
184 } else if (eventTokens.back() == "u") {
185 excludeKernel = true;
186 HLOGV("userOnly event");
187 } else {
188 HLOGV("unknown event name %s", nameStr.c_str());
189 return false;
190 }
191 name = eventTokens[0] + ":" + eventTokens[1];
192 isTracePoint = true;
193 } else if (eventTokens.size() == maxNumberTokensNoTracePoint) {
194 name = eventTokens[0];
195 if (eventTokens.back() == "k") {
196 excludeUser = true;
197 HLOGV("kernelOnly event");
198 } else if (eventTokens.back() == "u") {
199 excludeKernel = true;
200 HLOGV("userOnly event");
201 } else {
202 name = nameStr;
203 isTracePoint = true;
204 HLOGV("tracepoint event is in form of xx:xxx");
205 }
206 } else {
207 printf("unknown ':' format:'%s'\n", nameStr.c_str());
208 return false;
209 }
210 if (reportCallBack_) {
211 if ((eventTokens[0] == "sw-task-clock" || eventTokens[0] == "sw-cpu-clock") &&
212 (excludeUser || excludeKernel)) {
213 printf(
214 "event type %s with modifier u and modifier k is not supported by the kernel.",
215 eventTokens[0].c_str());
216 return false;
217 }
218 }
219 }
220 return true;
221 }
222
AddEvent(const std::string & eventString,bool followGroup)223 bool PerfEvents::AddEvent(const std::string &eventString, bool followGroup)
224 {
225 std::string eventName;
226 bool excludeUser = false;
227 bool excludeKernel = false;
228 bool isTracePointEvent = false;
229 if (!ParseEventName(eventString, eventName, excludeUser, excludeKernel, isTracePointEvent)) {
230 return false;
231 }
232 if (excludeUser) {
233 eventSpaceType_ |= EventSpaceType::KERNEL;
234 } else if (excludeKernel) {
235 eventSpaceType_ |= EventSpaceType::USER;
236 } else {
237 eventSpaceType_ |= EventSpaceType::USER_KERNEL;
238 }
239
240 // find if
241 if (isTracePointEvent) {
242 if (traceConfigTable.empty()) {
243 LoadTracepointEventTypesFromSystem();
244 }
245 for (auto traceType : traceConfigTable) {
246 if (traceType.second == eventName) {
247 return AddEvent(PERF_TYPE_TRACEPOINT, traceType.first, excludeUser, excludeKernel,
248 followGroup);
249 }
250 }
251 } else {
252 for (auto type : TYPE_CONFIGS) {
253 for (auto config : (type.second)) {
254 if (config.second == eventName) {
255 return AddEvent(type.first, config.first, excludeUser, excludeKernel,
256 followGroup);
257 }
258 }
259 }
260 }
261
262 printf("%s event is not supported by the kernel.\n", eventName.c_str());
263 return false;
264 }
265
AddEvent(perf_type_id type,__u64 config,bool excludeUser,bool excludeKernel,bool followGroup)266 bool PerfEvents::AddEvent(perf_type_id type, __u64 config, bool excludeUser, bool excludeKernel,
267 bool followGroup)
268 {
269 HLOG_ASSERT(!excludeUser or !excludeKernel);
270 if (followGroup && eventGroupItem_.empty()) {
271 HLOGE("no group leader create before");
272 return false;
273 }
274 // found the event name
275 if (!IsEventSupport(type, config)) {
276 return false;
277 }
278 HLOGV("type %d config %llu excludeUser %d excludeKernel %d followGroup %d", type, config,
279 excludeUser, excludeKernel, followGroup);
280
281 // if use follow ?
282 EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back()
283 : eventGroupItem_.emplace_back();
284 // always new item
285 EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
286
287 eventItem.typeName = GetTypeName(type);
288 if (type == PERF_TYPE_TRACEPOINT) {
289 eventItem.configName = GetTraceConfigName(config);
290 } else {
291 eventItem.configName = GetStaticConfigName(type, config);
292 }
293
294 // attr
295 if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
296 HLOGE("memset_s failed in PerfEvents::AddEvent");
297 return false;
298 }
299 eventItem.attr.size = sizeof(perf_event_attr);
300 eventItem.attr.type = type;
301 eventItem.attr.config = config;
302 eventItem.attr.disabled = 1;
303 eventItem.attr.read_format =
304 PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
305
306 eventItem.attr.inherit = (inherit_ ? 1 : 0);
307 eventItem.attr.exclude_kernel = excludeKernel;
308 eventItem.attr.exclude_user = excludeUser;
309
310 // we also need mmap for record
311 if (recordCallBack_) {
312 if (samplePeriod_ > 0) {
313 eventItem.attr.freq = 0;
314 eventItem.attr.sample_freq = 0;
315 eventItem.attr.sample_period = samplePeriod_;
316 } else if (sampleFreq_ > 0) {
317 eventItem.attr.freq = 1;
318 eventItem.attr.sample_freq = sampleFreq_;
319 } else {
320 if (type == PERF_TYPE_TRACEPOINT) {
321 eventItem.attr.freq = 0;
322 eventItem.attr.sample_period = DEFAULT_SAMPLE_PERIOD;
323 } else {
324 eventItem.attr.freq = 1;
325 eventItem.attr.sample_freq = DEFAULT_SAMPLE_FREQUNCY;
326 }
327 }
328
329 eventItem.attr.watermark = 1;
330 if (eventItem.attr.watermark == 1) {
331 eventItem.attr.wakeup_watermark = (mmapPages_ * pageSize_) >> 1;
332 static constexpr unsigned int maxWakeupMark = 1024 * 1024;
333 if (eventItem.attr.wakeup_watermark > maxWakeupMark) {
334 eventItem.attr.wakeup_watermark = maxWakeupMark;
335 }
336 }
337
338 // for a group of events, only enable comm/mmap on the first event
339 if (!followGroup) {
340 eventItem.attr.comm = 1;
341 eventItem.attr.mmap = 1;
342 eventItem.attr.mmap2 = 1;
343 eventItem.attr.mmap_data = 1;
344 }
345
346 if (sampleStackType_ == SampleStackType::DWARF) {
347 eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN |
348 PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
349 eventItem.attr.exclude_callchain_user = 1;
350 eventItem.attr.sample_regs_user = GetSupportedRegMask(GetDeviceArch());
351 eventItem.attr.sample_stack_user = dwarfSampleStackSize_;
352 } else if (sampleStackType_ == SampleStackType::FP) {
353 eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN;
354 } else {
355 eventItem.attr.sample_type = SAMPLE_TYPE;
356 }
357
358 if (isHM_) {
359 eventItem.attr.sample_type |= PERF_SAMPLE_SERVER_PID;
360 }
361 }
362
363 // set clock id
364 if (clockId_ != -1) {
365 eventItem.attr.use_clockid = 1;
366 eventItem.attr.clockid = clockId_;
367 }
368 if (branchSampleType_ != 0) {
369 eventItem.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
370 eventItem.attr.branch_sample_type = branchSampleType_;
371 }
372
373 HLOGV("Add Event: '%s':'%s' %s %s %s", eventItem.typeName.c_str(), eventItem.configName.c_str(),
374 excludeUser ? "excludeUser" : "", excludeKernel ? "excludeKernel" : "",
375 followGroup ? "" : "group leader");
376
377 return true;
378 }
379
CreateDefaultAttr(perf_type_id type,__u64 config)380 std::unique_ptr<perf_event_attr> PerfEvents::CreateDefaultAttr(perf_type_id type, __u64 config)
381 {
382 unique_ptr<perf_event_attr> attr = make_unique<perf_event_attr>();
383 if (memset_s(attr.get(), sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
384 HLOGE("memset_s failed in PerfEvents::CreateDefaultAttr");
385 return nullptr;
386 }
387 attr->size = sizeof(perf_event_attr);
388 attr->type = type;
389 attr->config = config;
390 attr->disabled = 1;
391 return attr;
392 }
393
394 // should move to upper caller
395 static struct sigaction g_oldSig {
396 };
CaptureSig()397 static bool CaptureSig()
398 {
399 HLOGD("capture Ctrl + C to end sampling decently");
400 struct sigaction sig {
401 };
402
403 sig.sa_handler = [](int sig) {
404 printf("\n Ctrl + C detected.\n");
405 g_trackRunning = false;
406 };
407
408 sig.sa_flags = 0;
409 if (sigaction(SIGINT, &sig, &g_oldSig) < 0) {
410 perror("Fail to call sigaction for SIGINT");
411 return false;
412 }
413 return true;
414 }
415
RecoverCaptureSig()416 static void RecoverCaptureSig()
417 {
418 if (sigaction(SIGINT, &g_oldSig, nullptr) < 0) {
419 perror("Fail to call sigaction for SIGINT");
420 }
421 }
422
423 // split to two part
424 // because WriteAttrAndId need fd id before start tracking
PrepareTracking(void)425 bool PerfEvents::PrepareTracking(void)
426 {
427 // 1. prepare cpu pid
428 if (!PrepareFdEvents()) {
429 HLOGE("PrepareFdEvents() failed");
430 return false;
431 }
432
433 // 2. create events
434 if (!CreateFdEvents()) {
435 HLOGE("CreateFdEvents() failed");
436 return false;
437 }
438
439 HLOGV("success");
440 prepared_ = true;
441 return true;
442 }
443
ExitReadRecordBufThread()444 void PerfEvents::ExitReadRecordBufThread()
445 {
446 if (isLowPriorityThread_) {
447 if (setpriority(PRIO_PROCESS, gettid(), 0) != 0) {
448 HLOGW("failed to decrease priority of reading kernel");
449 }
450 }
451 if (readRecordBufThread_.joinable()) {
452 {
453 std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
454 readRecordThreadRunning_ = false;
455 __sync_synchronize();
456 cvRecordBuf_.notify_one();
457 }
458 readRecordBufThread_.join();
459 }
460 }
461
PrepareRecordThread()462 bool PerfEvents::PrepareRecordThread()
463 {
464 try {
465 recordBuf_ = std::make_unique<RingBuffer>(CalcBufferSize());
466 } catch (const std::exception &e) {
467 printf("create record buffer(size %zu) failed: %s\n", CalcBufferSize(), e.what());
468 return false;
469 }
470 readRecordThreadRunning_ = true;
471 readRecordBufThread_ = std::thread(&PerfEvents::ReadRecordFromBuf, this);
472
473 rlimit rlim;
474 int result = getrlimit(RLIMIT_NICE, &rlim);
475 const rlim_t lowPriority = 40;
476 if (result == 0 && rlim.rlim_cur == lowPriority) {
477 const int highPriority = -20;
478 result = setpriority(PRIO_PROCESS, gettid(), highPriority);
479 if (result != 0) {
480 HLOGW("failed to increase priority of reading kernel");
481 } else {
482 isLowPriorityThread_ = true;
483 }
484 }
485
486 return true;
487 }
488
WaitRecordThread()489 void PerfEvents::WaitRecordThread()
490 {
491 printf("Process and Saving data...\n");
492 ExitReadRecordBufThread();
493
494 const auto usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - trackingEndTime_);
495 if (verboseReport_) {
496 printf("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
497 }
498 HLOGV("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
499 #ifdef HIPERF_DEBUG_TIME
500 printf("%zu record processed, used %0.3f ms(%4.2f us/record)\n", recordEventCount_,
501 recordCallBackTime_.count() / MS_DURATION,
502 recordCallBackTime_.count() / static_cast<double>(recordEventCount_));
503 printf("total wait sleep time %0.3f ms.\n", recordSleepTime_.count() / MS_DURATION);
504 printf("read from kernel time %0.3f ms.\n", recordKernelReadTime_.count() / MS_DURATION);
505 #endif
506 }
507
StartTracking(bool immediately)508 bool PerfEvents::StartTracking(bool immediately)
509 {
510 if (!prepared_) {
511 HLOGD("do not prepared_");
512 return false;
513 }
514
515 if (recordCallBack_) {
516 if (!PrepareRecordThread()) {
517 return false;
518 }
519 }
520
521 HLOGD("step: 1. enable event");
522 trackingStartTime_ = steady_clock::now();
523 if (immediately) {
524 if (!EnableTracking()) {
525 HLOGE("PerfEvents::EnableTracking() failed");
526 return false;
527 }
528 }
529
530 if (immediately) {
531 printf("Profiling duration is %.3f seconds.\n", float(timeOut_.count()) / THOUSANDS);
532 printf("Start Profiling...\n");
533 }
534
535 g_trackRunning = true;
536 if (!CaptureSig()) {
537 HLOGE("captureSig() failed");
538 g_trackRunning = false;
539 ExitReadRecordBufThread();
540 return false;
541 }
542
543 HLOGD("step: 2. thread loop");
544 if (recordCallBack_) {
545 RecordLoop();
546 } else {
547 StatLoop();
548 }
549
550 if (recordCallBack_) {
551 // read left samples after disable events
552 ReadRecordsFromMmaps();
553 }
554
555 HLOGD("step: 3. disable event");
556 if (!PerfEventsEnable(false)) {
557 HLOGE("PerfEvents::PerfEventsEnable() failed");
558 }
559 trackingEndTime_ = steady_clock::now();
560
561 RecoverCaptureSig();
562
563 if (recordCallBack_) {
564 WaitRecordThread();
565 }
566
567 HLOGD("step: 4. exit");
568 return true;
569 }
570
StopTracking(void)571 bool PerfEvents::StopTracking(void)
572 {
573 if (g_trackRunning) {
574 printf("some one called StopTracking\n");
575 g_trackRunning = false;
576 if (trackedCommand_) {
577 if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_STARTED) {
578 trackedCommand_->Stop();
579 }
580 }
581 if (!PerfEventsEnable(false)) {
582 HLOGE("StopTracking : PerfEventsEnable(false) failed");
583 return false;
584 }
585 }
586 return true;
587 }
588
PauseTracking(void)589 bool PerfEvents::PauseTracking(void)
590 {
591 if (!startedTracking_) {
592 return false;
593 }
594 return PerfEventsEnable(false);
595 }
596
ResumeTracking(void)597 bool PerfEvents::ResumeTracking(void)
598 {
599 if (!startedTracking_) {
600 return false;
601 }
602 return PerfEventsEnable(true);
603 }
604
EnableTracking()605 bool PerfEvents::EnableTracking()
606 {
607 if (startedTracking_) {
608 return true;
609 }
610 if (!PerfEventsEnable(true)) {
611 HLOGE("PerfEvents::PerfEventsEnable() failed");
612 return false;
613 }
614
615 if (trackedCommand_) {
616 // start tracked Command
617 if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_WAITING) {
618 if (!trackedCommand_->StartCommand()) {
619 int wstatus;
620 if (!trackedCommand_->WaitCommand(wstatus)) {
621 trackedCommand_->Stop();
622 }
623 std::string commandName = trackedCommand_->GetCommandName();
624 printf("failed to execute command: %zu: %s\n", commandName.size(), commandName.c_str());
625 return false;
626 }
627 } else if (trackedCommand_->GetState() != TrackedCommand::State::COMMAND_STARTED) {
628 return false;
629 }
630 }
631 startedTracking_ = true;
632 return true;
633 }
634
IsTrackRunning()635 bool PerfEvents::IsTrackRunning()
636 {
637 return g_trackRunning;
638 }
639
SetSystemTarget(bool systemTarget)640 void PerfEvents::SetSystemTarget(bool systemTarget)
641 {
642 systemTarget_ = systemTarget;
643 }
644
SetCpu(std::vector<pid_t> cpus)645 void PerfEvents::SetCpu(std::vector<pid_t> cpus)
646 {
647 cpus_ = cpus;
648 }
649
SetPid(std::vector<pid_t> pids)650 void PerfEvents::SetPid(std::vector<pid_t> pids)
651 {
652 pids_ = pids;
653 }
654
SetTimeOut(float timeOut)655 void PerfEvents::SetTimeOut(float timeOut)
656 {
657 if (timeOut > 0) {
658 timeOut_ = milliseconds(static_cast<int>(timeOut * THOUSANDS));
659 }
660 }
661
SetTimeReport(int timeReport)662 void PerfEvents::SetTimeReport(int timeReport)
663 {
664 static constexpr int minMsReportInterval = 10;
665 if (timeReport < minMsReportInterval && timeReport != 0) {
666 timeReport = minMsReportInterval;
667 printf("time report min value is %d.\n", timeReport);
668 }
669
670 timeReport_ = milliseconds(timeReport);
671 }
672
GetSupportEvents(perf_type_id type)673 std::map<__u64, std::string> PerfEvents::GetSupportEvents(perf_type_id type)
674 {
675 if (type == PERF_TYPE_TRACEPOINT) {
676 LoadTracepointEventTypesFromSystem();
677 }
678
679 std::map<__u64, std::string> eventConfigs;
680 auto configTable = TYPE_CONFIGS.find(type);
681 if (configTable != TYPE_CONFIGS.end()) {
682 auto configs = configTable->second;
683 for (auto config : configs) {
684 if (type == PERF_TYPE_TRACEPOINT || IsEventSupport(type, (__u64)config.first)) {
685 eventConfigs.insert(config);
686 } else {
687 HLOGD("'%s' not support", config.second.c_str());
688 }
689 }
690 }
691 return eventConfigs;
692 }
693
LoadTracepointEventTypesFromSystem()694 void PerfEvents::LoadTracepointEventTypesFromSystem()
695 {
696 if (traceConfigTable.empty()) {
697 std::string basePath {"/sys/kernel/tracing/events"};
698 if (isHM_) {
699 basePath = "/sys/kernel/tracing/hongmeng/events";
700 }
701 if (access(basePath.c_str(), R_OK) != 0) {
702 basePath = "/sys/kernel/debug/tracing/events";
703 }
704 for (const auto &eventName : GetSubDirs(basePath)) {
705 std::string eventPath = basePath + "/" + eventName;
706 for (const auto &concreteEvent : GetSubDirs(eventPath)) {
707 std::string idPath = eventPath + "/" + concreteEvent + "/id";
708 {
709 std::string resolvedPath = CanonicalizeSpecPath(idPath.c_str());
710 std::ifstream ifs {resolvedPath};
711 // clang-format off
712 const std::string idStr = {
713 std::istream_iterator<char>(ifs),
714 std::istream_iterator<char>()
715 };
716 // clang-format on
717 __u64 id {0};
718 try {
719 id = std::stoul(idStr, nullptr);
720 } catch (...) {
721 continue;
722 }
723 auto typeConfigs = TYPE_CONFIGS.find(PERF_TYPE_TRACEPOINT);
724 HLOG_ASSERT(typeConfigs != TYPE_CONFIGS.end());
725 auto configPair = typeConfigs->second.insert(
726 std::make_pair(id, eventName + ":" + concreteEvent));
727 traceConfigTable.insert(std::make_pair(id, eventName + ":" + concreteEvent));
728 ConfigTable::iterator it = configPair.first;
729 HLOGV("TYPE_CONFIGS add %llu:%s in %zu", it->first, it->second.c_str(),
730 typeConfigs->second.size());
731 }
732 }
733 }
734 }
735 }
736
SetPerCpu(bool perCpu)737 void PerfEvents::SetPerCpu(bool perCpu)
738 {
739 perCpu_ = perCpu;
740 }
741
SetPerThread(bool perThread)742 void PerfEvents::SetPerThread(bool perThread)
743 {
744 perThread_ = perThread;
745 }
746
SetVerboseReport(bool verboseReport)747 void PerfEvents::SetVerboseReport(bool verboseReport)
748 {
749 verboseReport_ = verboseReport;
750 }
751
SetSampleFrequency(unsigned int frequency)752 void PerfEvents::SetSampleFrequency(unsigned int frequency)
753 {
754 if (frequency > 0) {
755 sampleFreq_ = frequency;
756 }
757 int maxRate = 0;
758 static bool printFlag = false;
759 if (!ReadIntFromProcFile("/proc/sys/kernel/perf_event_max_sample_rate", maxRate)) {
760 printf("read perf_event_max_sample_rate fail.\n");
761 return;
762 }
763 if (sampleFreq_ > static_cast<unsigned int>(maxRate)) {
764 sampleFreq_ = static_cast<unsigned int>(maxRate);
765 if (!printFlag) {
766 printf("Adjust sampling frequency to maximum allowed frequency %d.\n", maxRate);
767 printFlag = true;
768 }
769 }
770 }
771
SetSamplePeriod(unsigned int period)772 void PerfEvents::SetSamplePeriod(unsigned int period)
773 {
774 if (period > 0) {
775 samplePeriod_ = period;
776 }
777 }
778
SetMmapPages(size_t mmapPages)779 void PerfEvents::SetMmapPages(size_t mmapPages)
780 {
781 mmapPages_ = mmapPages;
782 }
783
SetSampleStackType(SampleStackType type)784 void PerfEvents::SetSampleStackType(SampleStackType type)
785 {
786 sampleStackType_ = type;
787 }
788
SetDwarfSampleStackSize(uint32_t stackSize)789 void PerfEvents::SetDwarfSampleStackSize(uint32_t stackSize)
790 {
791 HLOGD("request stack size is %u", stackSize);
792 dwarfSampleStackSize_ = stackSize;
793 }
794
PerfEventsEnable(bool enable)795 bool PerfEvents::PerfEventsEnable(bool enable)
796 {
797 HLOGV("%s", std::to_string(enable).c_str());
798 for (const auto &eventGroupItem : eventGroupItem_) {
799 for (const auto &eventItem : eventGroupItem.eventItems) {
800 for (const auto &fdItem : eventItem.fdItems) {
801 int result =
802 ioctl(fdItem.fd, enable ? PERF_EVENT_IOC_ENABLE : PERF_EVENT_IOC_DISABLE, 0);
803 if (result < 0) {
804 printf("Cannot '%s' perf fd! type config name: '%s:%s'\n",
805 enable ? "enable" : "disable", eventItem.typeName.c_str(),
806 eventItem.configName.c_str());
807 return false;
808 }
809 }
810 }
811 }
812 return true;
813 }
814
SetHM(bool isHM)815 void PerfEvents::SetHM(bool isHM)
816 {
817 isHM_ = isHM;
818 }
819
SetStatCallBack(StatCallBack reportCallBack)820 void PerfEvents::SetStatCallBack(StatCallBack reportCallBack)
821 {
822 reportCallBack_ = reportCallBack;
823 }
SetRecordCallBack(RecordCallBack recordCallBack)824 void PerfEvents::SetRecordCallBack(RecordCallBack recordCallBack)
825 {
826 recordCallBack_ = recordCallBack;
827 }
828
PutAllCpus()829 inline void PerfEvents::PutAllCpus()
830 {
831 int cpuConfigs = sysconf(_SC_NPROCESSORS_CONF);
832 for (int i = 0; i < cpuConfigs; i++) {
833 cpus_.push_back(i); // put all cpu
834 }
835 }
836
PrepareFdEvents(void)837 bool PerfEvents::PrepareFdEvents(void)
838 {
839 /*
840 https://man7.org/linux/man-pages/man2/perf_event_open.2.html
841 pid == 0 and cpu == -1
842 This measures the calling process/thread on any CPU.
843
844 pid == 0 and cpu >= 0
845 This measures the calling process/thread only when running
846 on the specified CPU.
847
848 pid > 0 and cpu == -1
849 This measures the specified process/thread on any CPU.
850
851 pid > 0 and cpu >= 0
852 This measures the specified process/thread only when
853 running on the specified CPU.
854
855 pid == -1 and cpu >= 0
856 This measures all processes/threads on the specified CPU.
857 This requires CAP_PERFMON (since Linux 5.8) or
858 CAP_SYS_ADMIN capability or a
859 /proc/sys/kernel/perf_event_paranoid value of less than 1.
860
861 pid == -1 and cpu == -1
862 This setting is invalid and will return an error.
863 */
864 if (systemTarget_) {
865 pids_.clear();
866 pids_.push_back(-1);
867 } else {
868 if (trackedCommand_) {
869 pids_.push_back(trackedCommand_->GetChildPid());
870 }
871 if (pids_.empty()) {
872 pids_.push_back(0); // no pid means use 0 as self pid
873 }
874 }
875 if (perCpu_ || perThread_) {
876 cpus_.clear();
877 PutAllCpus();
878 }
879 if (cpus_.empty()) {
880 PutAllCpus();
881 }
882
883 // print info tell user which cpu and process we will select.
884 if (pids_.size() == 1 && pids_[0] == -1) {
885 HLOGI("target process: system scope \n");
886 } else {
887 HLOGI("target process: %zu (%s)\n", pids_.size(),
888 (pids_[0] == 0) ? std::to_string(gettid()).c_str() : VectorToString(pids_).c_str());
889 }
890 if (cpus_.size() == 1 && cpus_[0] == -1) {
891 HLOGI("target cpus: %ld \n", sysconf(_SC_NPROCESSORS_CONF));
892 } else {
893 HLOGI("target cpus: %zu / %ld (%s)\n", cpus_.size(), sysconf(_SC_NPROCESSORS_CONF),
894 VectorToString(cpus_).c_str());
895 }
896
897 return true;
898 }
899
CreateFdEvents(void)900 bool PerfEvents::CreateFdEvents(void)
901 {
902 // must be some events , or will failed
903 if (eventGroupItem_.empty()) {
904 printf("no event select.\n");
905 return false;
906 }
907
908 // create each fd by cpu and process user select
909 /*
910 https://man7.org/linux/man-pages/man2/perf_event_open.2.html
911
912 (A single event on its own is created with group_fd = -1 and is
913 considered to be a group with only 1 member.)
914 */
915 // Even if there is only one event, it is counted as a group.
916
917 uint fdNumber = 0;
918 uint eventNumber = 0;
919 uint groupNumber = 0;
920 for (auto &eventGroupItem : eventGroupItem_) {
921 /*
922 Explain what is the configuration of the group:
923 Suppose we have 2 Event, 2 PID, and 3 CPU settings
924 According to verification,
925 Group's fd requires the pid to be the same as the cpu, the only difference is event
926 In other words, if you want to bind E1 and E2 to the same group
927 That can only be like this:
928
929 event E1 pid P1 cpu C1 [Group 1]
930 event E1 pid P1 cpu C2 [Group 2]
931 event E1 pid P1 cpu C3 [Group 3]
932
933 event E1 pid P2 cpu C1 [Group 4]
934 event E1 pid P2 cpu C2 [Group 5]
935 event E1 pid P2 cpu C3 [Group 6]
936
937 event E2 pid P1 cpu C1 [Group 1]
938 event E2 pid P1 cpu C2 [Group 2]
939 event E2 pid P1 cpu C3 [Group 3]
940
941 event E2 pid P2 cpu C1 [Group 4]
942 event E2 pid P2 cpu C2 [Group 5]
943 event E2 pid P2 cpu C3 [Group 6]
944 */
945 HLOGV("group %2u. eventGroupItem leader: '%s':", groupNumber++,
946 eventGroupItem.eventItems[0].configName.c_str());
947
948 int groupFdCache[cpus_.size()][pids_.size()];
949 for (size_t i = 0; i < cpus_.size(); i++) { // each cpu
950 for (size_t j = 0; j < pids_.size(); j++) { // each pid
951 // The leader is created first, with group_fd = -1.
952 groupFdCache[i][j] = -1;
953 }
954 }
955
956 uint eventIndex = 0;
957 for (auto &eventItem : eventGroupItem.eventItems) {
958 HLOGV(" - event %2u. eventName: '%s:%s'", eventIndex++, eventItem.typeName.c_str(),
959 eventItem.configName.c_str());
960
961 for (size_t icpu = 0; icpu < cpus_.size(); icpu++) { // each cpu
962 for (size_t ipid = 0; ipid < pids_.size(); ipid++) { // each pid
963 // one fd event group must match same cpu and same pid config (event can be
964 // different)
965 // clang-format off
966 UniqueFd fd = Open(eventItem.attr, pids_[ipid], cpus_[icpu],
967 groupFdCache[icpu][ipid], 0);
968 // clang-format on
969 if (fd < 0) {
970 if (errno == ESRCH) {
971 if (verboseReport_) {
972 printf("pid %d does not exist.\n", pids_[ipid]);
973 }
974 HLOGE("pid %d does not exist.\n", pids_[ipid]);
975 continue;
976 } else {
977 // clang-format off
978 if (verboseReport_) {
979 char errInfo[ERRINFOLEN] = { 0 };
980 strerror_r(errno, errInfo, ERRINFOLEN);
981 printf("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
982 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
983 }
984 char errInfo[ERRINFOLEN] = { 0 };
985 strerror_r(errno, errInfo, ERRINFOLEN);
986 HLOGE("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
987 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
988 // clang-format on
989 break; // jump to next cpu
990 }
991 }
992 // after open successed , fill the result
993 // make a new FdItem
994 FdItem &fdItem = eventItem.fdItems.emplace_back();
995 fdItem.fd = move(fd);
996 fdItem.cpu = cpus_[icpu];
997 fdItem.pid = pids_[ipid];
998 fdNumber++;
999
1000 // if sampling, mmap ring buffer
1001 if (recordCallBack_) {
1002 CreateMmap(fdItem, eventItem.attr);
1003 }
1004 // update group leader
1005 int groupFdCacheNum = groupFdCache[icpu][ipid];
1006 if (groupFdCacheNum == -1) {
1007 groupFdCache[icpu][ipid] = fdItem.fd.Get();
1008 }
1009 }
1010 }
1011 eventNumber++;
1012 }
1013 }
1014
1015 if (fdNumber == 0) {
1016 HLOGE("open %d fd for %d events", fdNumber, eventNumber);
1017 return false;
1018 }
1019
1020 HLOGD("will try read %u events from %u fd (%zu groups):", eventNumber, fdNumber,
1021 eventGroupItem_.size());
1022
1023 return true;
1024 }
1025
StatReport(const __u64 & durationInSec)1026 bool PerfEvents::StatReport(const __u64 &durationInSec)
1027 {
1028 read_format_no_group readNoGroupValue;
1029
1030 // only need read when need report
1031 HLOGM("eventGroupItem_:%zu", eventGroupItem_.size());
1032 __u64 groupId = 0;
1033 // clear countEvents data
1034 countEvents_.clear();
1035 for (const auto &eventGroupItem : eventGroupItem_) {
1036 HLOGM("eventItems:%zu", eventGroupItem.eventItems.size());
1037 groupId++;
1038 for (const auto &eventItem : eventGroupItem.eventItems) {
1039 // count event info together (every cpu , every pid)
1040 std::string configName = "";
1041 if (eventItem.attr.exclude_kernel) {
1042 configName = eventItem.configName + ":u";
1043 } else if (eventItem.attr.exclude_user) {
1044 configName = eventItem.configName + ":k";
1045 } else {
1046 configName = eventItem.configName;
1047 }
1048 if (countEvents_.count(configName) == 0) {
1049 auto countEvent = make_unique<CountEvent>(CountEvent {});
1050 countEvents_[configName] = std::move(countEvent);
1051 countEvents_[configName]->userOnly = eventItem.attr.exclude_kernel;
1052 countEvents_[configName]->kernelOnly = eventItem.attr.exclude_user;
1053 }
1054 std::unique_ptr<CountEvent> &countEvent = countEvents_[configName];
1055 HLOGM("eventItem.fdItems:%zu", eventItem.fdItems.size());
1056 for (const auto &fditem : eventItem.fdItems) {
1057 if (read(fditem.fd, &readNoGroupValue, sizeof(readNoGroupValue)) > 0) {
1058 countEvent->eventCount += readNoGroupValue.value;
1059 countEvent->timeEnabled += readNoGroupValue.timeEnabled;
1060 countEvent->timeRunning += readNoGroupValue.timeRunning;
1061 countEvent->id = groupId;
1062 if (durationInSec != 0) {
1063 countEvent->usedCpus = (countEvent->eventCount / 1e9) / (durationInSec / THOUSANDS);
1064 }
1065 if (verboseReport_) {
1066 printf("%s id:%llu(c%d:p%d) timeEnabled:%llu timeRunning:%llu value:%llu\n",
1067 eventItem.configName.c_str(), readNoGroupValue.id, fditem.cpu, fditem.pid,
1068 readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning, readNoGroupValue.value);
1069 }
1070 if ((perCpu_ || perThread_) && readNoGroupValue.value) {
1071 countEvent->summaries.emplace_back(fditem.cpu, fditem.pid, readNoGroupValue.value,
1072 readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning);
1073 }
1074 } else {
1075 printf("read failed from event '%s'\n", eventItem.configName.c_str());
1076 }
1077 }
1078 }
1079 }
1080
1081 reportCallBack_(countEvents_);
1082
1083 return true;
1084 }
1085
CreateMmap(const FdItem & item,const perf_event_attr & attr)1086 bool PerfEvents::CreateMmap(const FdItem &item, const perf_event_attr &attr)
1087 {
1088 auto it = cpuMmap_.find(item.cpu);
1089 if (it == cpuMmap_.end()) {
1090 void *rbuf = mmap(nullptr, (1 + mmapPages_) * pageSize_, PROT_READ | PROT_WRITE, MAP_SHARED,
1091 item.fd.Get(), 0);
1092 if (rbuf == MMAP_FAILED) {
1093 char errInfo[ERRINFOLEN] = {0};
1094 strerror_r(errno, errInfo, ERRINFOLEN);
1095 perror("errno:%d, errstr:%s", errno, errInfo);
1096 perror("Fail to call mmap \n");
1097 return false;
1098 }
1099 MmapFd mmapItem;
1100 mmapItem.fd = item.fd.Get();
1101 mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1102 mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1103 mmapItem.bufSize = mmapPages_ * pageSize_;
1104 mmapItem.attr = &attr;
1105 mmapItem.posCallChain = GetCallChainPosInSampleRecord(attr);
1106
1107 cpuMmap_[item.cpu] = mmapItem;
1108 pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1109 HLOGD("CreateMmap success cpu %d fd %d", item.cpu, mmapItem.fd);
1110 } else {
1111 const MmapFd &mmapItem = it->second;
1112 int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1113 if (rc != 0) {
1114 HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1115 perror("failed to share mapped buffer\n");
1116 return false;
1117 }
1118 }
1119 return true;
1120 }
1121
GetAttrWithId() const1122 std::vector<AttrWithId> PerfEvents::GetAttrWithId() const
1123 {
1124 std::vector<AttrWithId> result;
1125 HLOGV("eventGroupItem_ %zu :", eventGroupItem_.size());
1126
1127 for (const auto &eventGroupItem : eventGroupItem_) {
1128 HLOGV(" eventItems %zu eventItems:", eventGroupItem.eventItems.size());
1129 for (const auto &eventItem : eventGroupItem.eventItems) {
1130 AttrWithId attrId;
1131 attrId.attr = eventItem.attr;
1132 attrId.name = eventItem.configName;
1133 HLOGV(" fdItems %zu fdItems:", eventItem.fdItems.size());
1134 for (const auto &fdItem : eventItem.fdItems) {
1135 auto &id = attrId.ids.emplace_back(fdItem.GetPrefId());
1136 HLOGV(" eventItem.fdItems GetPrefId %" PRIu64 "", id);
1137 }
1138 result.emplace_back(attrId);
1139 }
1140 }
1141 return result;
1142 }
1143
CalcBufferSize()1144 size_t PerfEvents::CalcBufferSize()
1145 {
1146 size_t maxBufferSize;
1147 if (LittleMemory()) {
1148 maxBufferSize = MAX_BUFFER_SIZE_LITTLE;
1149 } else {
1150 maxBufferSize = MAX_BUFFER_SIZE_LARGE;
1151 }
1152
1153 size_t bufferSize = maxBufferSize;
1154 if (!systemTarget_) {
1155 // suppose ring buffer is 4 times as much as mmap
1156 static constexpr int TIMES = 4;
1157 bufferSize = cpuMmap_.size() * mmapPages_ * pageSize_ * TIMES;
1158 if (bufferSize < MIN_BUFFER_SIZE) {
1159 bufferSize = MIN_BUFFER_SIZE;
1160 } else if (bufferSize > maxBufferSize) {
1161 bufferSize = maxBufferSize;
1162 }
1163 }
1164 HLOGD("CalcBufferSize return %zu", bufferSize);
1165 return bufferSize;
1166 }
1167
IsRecordInMmap(int timeout)1168 inline bool PerfEvents::IsRecordInMmap(int timeout)
1169 {
1170 if (pollFds_.size() > 0) {
1171 if (poll(static_cast<struct pollfd*>(pollFds_.data()), pollFds_.size(), timeout) <= 0) {
1172 // time out try again
1173 return false;
1174 }
1175 }
1176 return true;
1177 }
1178
CompareRecordTime(const PerfEvents::MmapFd * left,const PerfEvents::MmapFd * right)1179 static bool CompareRecordTime(const PerfEvents::MmapFd *left, const PerfEvents::MmapFd *right)
1180 {
1181 return left->timestamp > right->timestamp;
1182 }
1183
ReadRecordsFromMmaps()1184 void PerfEvents::ReadRecordsFromMmaps()
1185 {
1186 #ifdef HIPERF_DEBUG_TIME
1187 const auto readKenelStartTime = steady_clock::now();
1188 #endif
1189 // get readable mmap at this time
1190 for (auto &it : cpuMmap_) {
1191 ssize_t dataSize = it.second.mmapPage->data_head - it.second.mmapPage->data_tail;
1192 __sync_synchronize(); // this same as rmb in gcc, after reading mmapPage->data_head
1193 if (dataSize <= 0) {
1194 continue;
1195 }
1196 it.second.dataSize = dataSize;
1197 MmapRecordHeap_.push_back(&(it.second));
1198 }
1199 if (MmapRecordHeap_.empty()) {
1200 return;
1201 }
1202
1203 if (MmapRecordHeap_.size() > 1) {
1204 for (const auto &it : MmapRecordHeap_) {
1205 GetRecordFromMmap(*it);
1206 }
1207 std::make_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.end(), CompareRecordTime);
1208
1209 size_t heapSize = MmapRecordHeap_.size();
1210 while (heapSize > 1) {
1211 std::pop_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1212 CompareRecordTime);
1213 MoveRecordToBuf(*MmapRecordHeap_[heapSize - 1]);
1214 if (GetRecordFromMmap(*MmapRecordHeap_[heapSize - 1])) {
1215 std::push_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1216 CompareRecordTime);
1217 } else {
1218 heapSize--;
1219 }
1220 }
1221 }
1222
1223 while (GetRecordFromMmap(*MmapRecordHeap_.front())) {
1224 MoveRecordToBuf(*MmapRecordHeap_.front());
1225 }
1226 MmapRecordHeap_.clear();
1227 {
1228 std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
1229 recordBufReady_ = true;
1230 }
1231 cvRecordBuf_.notify_one();
1232 #ifdef HIPERF_DEBUG_TIME
1233 recordKernelReadTime_ += duration_cast<milliseconds>(steady_clock::now() - readKenelStartTime);
1234 #endif
1235 }
1236
GetRecordFromMmap(MmapFd & mmap)1237 bool PerfEvents::GetRecordFromMmap(MmapFd &mmap)
1238 {
1239 if (mmap.dataSize <= 0) {
1240 return false;
1241 }
1242
1243 GetRecordFieldFromMmap(mmap, &(mmap.header), mmap.mmapPage->data_tail, sizeof(mmap.header));
1244 if (mmap.header.type != PERF_RECORD_SAMPLE) {
1245 mmap.timestamp = 0;
1246 return true;
1247 }
1248 // in PERF_RECORD_SAMPLE : header + u64 sample_id + u64 ip + u32 pid + u32 tid + u64 time
1249 constexpr size_t timePos = sizeof(perf_event_header) + sizeof(uint64_t) + sizeof(uint64_t) +
1250 sizeof(uint32_t) + sizeof(uint32_t);
1251 GetRecordFieldFromMmap(mmap, &(mmap.timestamp), mmap.mmapPage->data_tail + timePos,
1252 sizeof(mmap.timestamp));
1253 return true;
1254 }
1255
GetRecordFieldFromMmap(MmapFd & mmap,void * dest,size_t pos,size_t size)1256 void PerfEvents::GetRecordFieldFromMmap(MmapFd &mmap, void *dest, size_t pos, size_t size)
1257 {
1258 pos = pos % mmap.bufSize;
1259 size_t tailSize = mmap.bufSize - pos;
1260 size_t copySize = std::min(size, tailSize);
1261 if (memcpy_s(dest, copySize, mmap.buf + pos, copySize) != 0) {
1262 HLOGEP("memcpy_s %p to %p failed. size %zd", mmap.buf + pos, dest, copySize);
1263 }
1264 if (copySize < size) {
1265 size -= copySize;
1266 if (memcpy_s(static_cast<uint8_t *>(dest) + copySize, size, mmap.buf, size) != 0) {
1267 HLOGEP("GetRecordFieldFromMmap: memcpy_s mmap.buf to dest failed. size %zd", size);
1268 }
1269 }
1270 }
1271
GetCallChainPosInSampleRecord(const perf_event_attr & attr)1272 size_t PerfEvents::GetCallChainPosInSampleRecord(const perf_event_attr &attr)
1273 {
1274 // reference struct PerfRecordSampleData
1275 int fixedFieldNumber = __builtin_popcountll(
1276 attr.sample_type & (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1277 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | PERF_SAMPLE_ID |
1278 PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD));
1279 size_t pos = sizeof(perf_event_header) + sizeof(uint64_t) * fixedFieldNumber;
1280 if (attr.sample_type & PERF_SAMPLE_READ) {
1281 pos += sizeof(read_format);
1282 }
1283 return pos;
1284 }
1285
GetStackSizePosInSampleRecord(MmapFd & mmap)1286 size_t PerfEvents::GetStackSizePosInSampleRecord(MmapFd &mmap)
1287 {
1288 size_t pos = mmap.posCallChain;
1289 if (mmap.attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
1290 uint64_t nr = 0;
1291 GetRecordFieldFromMmap(mmap, &nr, mmap.mmapPage->data_tail + pos, sizeof(nr));
1292 pos += (sizeof(nr) + nr * sizeof(uint64_t));
1293 }
1294 if (mmap.attr->sample_type & PERF_SAMPLE_RAW) {
1295 uint32_t raw_size = 0;
1296 GetRecordFieldFromMmap(mmap, &raw_size, mmap.mmapPage->data_tail + pos, sizeof(raw_size));
1297 pos += (sizeof(raw_size) + raw_size);
1298 }
1299 if (mmap.attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
1300 uint64_t bnr = 0;
1301 GetRecordFieldFromMmap(mmap, &bnr, mmap.mmapPage->data_tail + pos, sizeof(bnr));
1302 pos += (sizeof(bnr) + bnr * sizeof(perf_branch_entry));
1303 }
1304 if (mmap.attr->sample_type & PERF_SAMPLE_REGS_USER) {
1305 uint64_t user_abi = 0;
1306 GetRecordFieldFromMmap(mmap, &user_abi, mmap.mmapPage->data_tail + pos, sizeof(user_abi));
1307 pos += sizeof(user_abi);
1308 if (user_abi > 0) {
1309 uint64_t reg_nr = __builtin_popcountll(mmap.attr->sample_regs_user);
1310 pos += reg_nr * sizeof(uint64_t);
1311 }
1312 }
1313 if (mmap.attr->sample_type & PERF_SAMPLE_SERVER_PID) {
1314 uint64_t server_nr = 0;
1315 GetRecordFieldFromMmap(mmap, &server_nr, mmap.mmapPage->data_tail + pos, sizeof(server_nr));
1316 pos += (sizeof(server_nr) + server_nr * sizeof(uint64_t));
1317 }
1318 return pos;
1319 }
1320
CutStackAndMove(MmapFd & mmap)1321 bool PerfEvents::CutStackAndMove(MmapFd &mmap)
1322 {
1323 constexpr uint32_t alignSize = 64;
1324 if (!(mmap.attr->sample_type & PERF_SAMPLE_STACK_USER)) {
1325 return false;
1326 }
1327 size_t stackSizePos = GetStackSizePosInSampleRecord(mmap);
1328 uint64_t stackSize = 0;
1329 GetRecordFieldFromMmap(mmap, &stackSize, mmap.mmapPage->data_tail + stackSizePos,
1330 sizeof(stackSize));
1331 if (stackSize == 0) {
1332 return false;
1333 }
1334 size_t dynSizePos = stackSizePos + sizeof(uint64_t) + stackSize;
1335 uint64_t dynSize = 0;
1336 GetRecordFieldFromMmap(mmap, &dynSize, mmap.mmapPage->data_tail + dynSizePos, sizeof(dynSize));
1337 uint64_t newStackSize = std::min((dynSize + alignSize >= 1 ? dynSize + alignSize - 1 : 0) &
1338 (~(alignSize >= 1 ? alignSize - 1 : 0)), stackSize);
1339 if (newStackSize >= stackSize) {
1340 return false;
1341 }
1342 HLOGM("stackSize %" PRIx64 " dynSize %" PRIx64 " newStackSize %" PRIx64 "\n", stackSize, dynSize, newStackSize);
1343 // move and cut stack_data
1344 // mmap: |<+++copy1+++>|<++++++copy2++++++>|<---------------cut--------------->|<+++copy3+++>|
1345 // ^ ^ ^ ^
1346 // new_header stackSizePos <stackSize-dynSize> dynSizePos
1347 uint16_t recordSize = mmap.header.size;
1348 mmap.header.size -= stackSize - newStackSize; // reduce the stack size
1349 uint8_t *buf = recordBuf_->AllocForWrite(mmap.header.size);
1350 // copy1: new_header
1351 if (buf == nullptr) {
1352 return false;
1353 }
1354 if (memcpy_s(buf, sizeof(perf_event_header), &(mmap.header), sizeof(perf_event_header)) != 0) {
1355 HLOGEP("memcpy_s %p to %p failed. size %zd", &(mmap.header), buf,
1356 sizeof(perf_event_header));
1357 }
1358 size_t copyPos = sizeof(perf_event_header);
1359 size_t copySize = stackSizePos - sizeof(perf_event_header) + sizeof(stackSize) + newStackSize;
1360 // copy2: copy stack_size, data[stack_size],
1361 GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + copyPos, copySize);
1362 copyPos += copySize;
1363 // copy3: copy dyn_size
1364 GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + dynSizePos,
1365 recordSize - dynSizePos);
1366 // update stack_size
1367 if (memcpy_s(buf + stackSizePos, sizeof(stackSize), &(newStackSize), sizeof(newStackSize)) != 0) {
1368 HLOGEP("CutStackAndMove: memcpy_s newStack to buf stackSizePos failed. size %zd", sizeof(newStackSize));
1369 }
1370 recordBuf_->EndWrite();
1371 __sync_synchronize();
1372 mmap.mmapPage->data_tail += recordSize;
1373 mmap.dataSize -= recordSize;
1374 return true;
1375 }
1376
MoveRecordToBuf(MmapFd & mmap)1377 void PerfEvents::MoveRecordToBuf(MmapFd &mmap)
1378 {
1379 uint8_t *buf = nullptr;
1380 if (mmap.header.type == PERF_RECORD_SAMPLE) {
1381 if (recordBuf_->GetFreeSize() <= BUFFER_CRITICAL_LEVEL) {
1382 lostSamples_++;
1383 HLOGD("BUFFER_CRITICAL_LEVEL: lost sample record");
1384 goto RETURN;
1385 }
1386 if (CutStackAndMove(mmap)) {
1387 return;
1388 }
1389 } else if (mmap.header.type == PERF_RECORD_LOST) {
1390 // in PERF_RECORD_LOST : header + u64 id + u64 lost
1391 constexpr size_t lostPos = sizeof(perf_event_header) + sizeof(uint64_t);
1392 uint64_t lost = 0;
1393 GetRecordFieldFromMmap(mmap, &lost, mmap.mmapPage->data_tail + lostPos, sizeof(lost));
1394 lostSamples_ += lost;
1395 HLOGD("PERF_RECORD_LOST: lost sample record");
1396 goto RETURN;
1397 }
1398
1399 if ((buf = recordBuf_->AllocForWrite(mmap.header.size)) == nullptr) {
1400 // this record type must be Non-Sample
1401 lostNonSamples_++;
1402 HLOGD("alloc buffer failed: lost non-sample record");
1403 goto RETURN;
1404 }
1405
1406 GetRecordFieldFromMmap(mmap, buf, mmap.mmapPage->data_tail, mmap.header.size);
1407 recordBuf_->EndWrite();
1408 RETURN:
1409 __sync_synchronize();
1410 mmap.mmapPage->data_tail += mmap.header.size;
1411 mmap.dataSize -= mmap.header.size;
1412 }
1413
ReadRecordFromBuf()1414 void PerfEvents::ReadRecordFromBuf()
1415 {
1416 const perf_event_attr *attr = GetDefaultAttr();
1417 uint8_t *p = nullptr;
1418
1419 while (readRecordThreadRunning_) {
1420 {
1421 std::unique_lock<std::mutex> lk(mtxRrecordBuf_);
1422 cvRecordBuf_.wait(lk, [this] {
1423 if (recordBufReady_) {
1424 recordBufReady_ = false;
1425 return true;
1426 }
1427 return !readRecordThreadRunning_;
1428 });
1429 }
1430 while ((p = recordBuf_->GetReadData()) != nullptr) {
1431 uint32_t *type = reinterpret_cast<uint32_t *>(p);
1432 #ifdef HIPERF_DEBUG_TIME
1433 const auto readingStartTime_ = steady_clock::now();
1434 #endif
1435 #if !HIDEBUG_SKIP_CALLBACK
1436 recordCallBack_(GetPerfSampleFromCache(*type, p, *attr));
1437 #endif
1438 recordEventCount_++;
1439 #ifdef HIPERF_DEBUG_TIME
1440 recordCallBackTime_ +=
1441 duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1442 #endif
1443 recordBuf_->EndRead();
1444 }
1445 }
1446 HLOGD("exit because trackStoped");
1447
1448 // read the data left over in buffer
1449 while ((p = recordBuf_->GetReadData()) != nullptr) {
1450 uint32_t *type = reinterpret_cast<uint32_t *>(p);
1451 #ifdef HIPERF_DEBUG_TIME
1452 const auto readingStartTime_ = steady_clock::now();
1453 #endif
1454 #if !HIDEBUG_SKIP_CALLBACK
1455 recordCallBack_(GetPerfSampleFromCache(*type, p, *attr));
1456 #endif
1457 recordEventCount_++;
1458 #ifdef HIPERF_DEBUG_TIME
1459 recordCallBackTime_ += duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1460 #endif
1461 recordBuf_->EndRead();
1462 }
1463 HLOGD("read all records from buffer");
1464 }
1465
HaveTargetsExit(const std::chrono::steady_clock::time_point & startTime)1466 bool PerfEvents::HaveTargetsExit(const std::chrono::steady_clock::time_point &startTime)
1467 {
1468 if (systemTarget_) {
1469 return false;
1470 }
1471 if (trackedCommand_) {
1472 if (trackedCommand_->GetState() < TrackedCommand::State::COMMAND_STARTED) {
1473 return false; // not start yet
1474 }
1475 int wstatus;
1476 if (trackedCommand_->WaitCommand(wstatus)) {
1477 milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1478 printf("tracked command(%s) has exited (total %" PRId64 " ms)\n",
1479 trackedCommand_->GetCommandName().c_str(), (uint64_t)usedMsTick.count());
1480 return true;
1481 }
1482 return false;
1483 }
1484
1485 for (auto it = pids_.begin(); it != pids_.end();) {
1486 if (IsDir("/proc/" + std::to_string(*it))) {
1487 it++;
1488 } else {
1489 it = pids_.erase(it);
1490 }
1491 }
1492 if (pids_.empty()) {
1493 milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1494 printf("tracked processes have exited (total %" PRId64 " ms)\n", (uint64_t)usedMsTick.count());
1495 return true;
1496 }
1497 return false;
1498 }
1499
RecordLoop()1500 void PerfEvents::RecordLoop()
1501 {
1502 // calc the time
1503 const auto startTime = steady_clock::now();
1504 const auto endTime = startTime + timeOut_;
1505 milliseconds usedTimeMsTick {};
1506 int count = 1;
1507
1508 while (g_trackRunning) {
1509 // time check point
1510 const auto thisTime = steady_clock::now();
1511 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1512 if ((uint64_t)usedTimeMsTick.count() > (uint64_t)(count * THOUSANDS)) {
1513 if (HaveTargetsExit(startTime)) {
1514 break;
1515 }
1516 ++count;
1517 }
1518
1519 if (thisTime >= endTime) {
1520 printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1521 if (trackedCommand_) {
1522 trackedCommand_->Stop();
1523 }
1524 break;
1525 }
1526
1527 int timeLeft = duration_cast<milliseconds>(endTime - thisTime).count();
1528 if (IsRecordInMmap(std::min(timeLeft, pollTimeOut_))) {
1529 ReadRecordsFromMmaps();
1530 }
1531 }
1532
1533 if (!g_trackRunning) {
1534 // for user interrupt situation, print time statistic
1535 usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1536 printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1537 }
1538 }
1539
StatLoop()1540 void PerfEvents::StatLoop()
1541 {
1542 // calc the time
1543 const auto startTime = steady_clock::now();
1544 const auto endTime = startTime + timeOut_;
1545 auto nextReportTime = startTime + timeReport_;
1546 milliseconds usedTimeMsTick {};
1547 __u64 durationInSec = 0;
1548 int64_t thesholdTimeInMs = 2 * HUNDREDS;
1549
1550 while (g_trackRunning) {
1551 // time check point
1552 const auto thisTime = steady_clock::now();
1553 if (timeReport_ != milliseconds::zero()) {
1554 // stat cmd
1555 if (thisTime >= nextReportTime) {
1556 // only for log or debug?
1557 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1558 durationInSec = usedTimeMsTick.count();
1559 auto lefTimeMsTick = duration_cast<milliseconds>(endTime - thisTime);
1560 printf("\nReport at %" PRId64 " ms (%" PRId64 " ms left):\n",
1561 (uint64_t)usedTimeMsTick.count(), (uint64_t)lefTimeMsTick.count());
1562 // end of comments
1563 nextReportTime += timeReport_;
1564 StatReport(durationInSec);
1565 }
1566 }
1567
1568 if (HaveTargetsExit(startTime)) {
1569 break;
1570 }
1571
1572 if (thisTime >= endTime) {
1573 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1574 durationInSec = usedTimeMsTick.count();
1575 printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1576 if (trackedCommand_) {
1577 trackedCommand_->Stop();
1578 }
1579 break;
1580 }
1581
1582 // lefttime > 200ms sleep 100ms, else sleep 200us
1583 uint64_t defaultSleepUs = 2 * HUNDREDS; // 200us
1584 if (timeReport_ == milliseconds::zero()
1585 && (timeOut_.count() * THOUSANDS) > thesholdTimeInMs) {
1586 milliseconds leftTimeMsTmp = duration_cast<milliseconds>(endTime - thisTime);
1587 if (leftTimeMsTmp.count() > thesholdTimeInMs) {
1588 defaultSleepUs = HUNDREDS * THOUSANDS; // 100ms
1589 }
1590 }
1591 std::this_thread::sleep_for(microseconds(defaultSleepUs));
1592 }
1593
1594 if (!g_trackRunning) {
1595 // for user interrupt situation, print time statistic
1596 usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1597 printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1598 }
1599
1600 if (timeReport_ == milliseconds::zero()) {
1601 StatReport(durationInSec);
1602 }
1603 }
1604
GetTypeName(perf_type_id type_id)1605 const std::string PerfEvents::GetTypeName(perf_type_id type_id)
1606 {
1607 auto it = PERF_TYPES.find(type_id);
1608 if (it != PERF_TYPES.end()) {
1609 return it->second;
1610 } else {
1611 return "<not found>";
1612 }
1613 }
1614 } // namespace HiPerf
1615 } // namespace Developtools
1616 } // namespace OHOS
1617