1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "perf_events.h"
16
17 #include <cassert>
18 #include <cinttypes>
19 #include <csignal>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <iostream>
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/resource.h>
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 #if defined(CONFIG_HAS_SYSPARA)
29 #include <parameters.h>
30 #endif
31
32 #include "spe_decoder.h"
33 #include "debug_logger.h"
34 #include "hiperf_hilog.h"
35 #include "register.h"
36 #include "subcommand_dump.h"
37 #include "symbols_file.h"
38 #include "utilities.h"
39
40 using namespace std;
41 using namespace std::chrono;
42 namespace OHOS {
43 namespace Developtools {
44 namespace HiPerf {
45 static std::atomic_bool g_trackRunning = false;
46
Open(perf_event_attr & attr,pid_t pid,int cpu,int groupFd,unsigned long flags)47 OHOS::UniqueFd PerfEvents::Open(perf_event_attr &attr, pid_t pid, int cpu, int groupFd,
48 unsigned long flags)
49 {
50 OHOS::UniqueFd fd = UniqueFd(syscall(__NR_perf_event_open, &attr, pid, cpu, groupFd, flags));
51 if (fd < 0) {
52 HLOGEP("syscall perf_event_open failed. ");
53 // dump when open failed.
54 SubCommandDump::DumpPrintEventAttr(attr, std::numeric_limits<int>::min());
55 }
56 HLOGV("perf_event_open: got fd %d for pid %d cpu %d group %d flags %lu", fd.Get(), pid, cpu, groupFd, flags);
57 return fd;
58 }
59
SpeReadData(void * dataPage,u64 * dataTail,uint8_t * buf,u32 size)60 void PerfEvents::SpeReadData(void *dataPage, u64 *dataTail, uint8_t *buf, u32 size)
61 {
62 void *src = nullptr;
63 u32 left = 0;
64 u32 offset = static_cast<u32>(*dataTail);
65 u32 copySize;
66 u32 traceSize = size;
67 CHECK_TRUE(size > (auxMmapPages_ * pageSize_ + sizeof(struct PerfRecordAuxtraceData)),
68 NO_RETVAL, 1, "buf size invalid");
69 while (traceSize > 0) {
70 offset = CALC_OFFSET(offset, auxMmapPages_ * pageSize_);
71 left = static_cast<u32>(auxMmapPages_ * pageSize_ - offset);
72 copySize = min(traceSize, left);
73 src = PTR_ADD(dataPage, offset);
74 if (memcpy_s(buf, left, src, copySize) != 0) {
75 HLOGV("SpeReadData memcpy_s failed.");
76 }
77
78 traceSize -= copySize;
79 offset += copySize;
80 buf = reinterpret_cast<uint8_t *>(PTR_ADD(buf, copySize));
81 }
82
83 *dataTail += size;
84 }
85
arm_spe_reference()86 static u64 arm_spe_reference()
87 {
88 struct timespec ts;
89 clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
90 return static_cast<uint64_t>(ts.tv_sec) ^ static_cast<uint64_t>(ts.tv_nsec);
91 }
92
ReadRecordsFromSpeMmaps(MmapFd & mmapFd,u64 auxOffset,u64 auxSize,u32 pid,u32 tid)93 void PerfEvents::ReadRecordsFromSpeMmaps(MmapFd& mmapFd, u64 auxOffset, u64 auxSize, u32 pid, u32 tid)
94 {
95 if (mmapFd.mmapPage == nullptr || mmapFd.auxBuf == nullptr) {
96 printf("ReadRecordsFromSpeMmaps mmapFd.mmapPage == nullptr, mmapFd.fd: %d", mmapFd.fd);
97 return;
98 }
99 perf_event_mmap_page *userPage = reinterpret_cast<perf_event_mmap_page *>(mmapFd.mmapPage);
100 void *auxPage = mmapFd.auxBuf;
101 userPage->aux_tail = auxOffset - auxSize;
102 u64 auxHead = userPage->aux_head;
103 u64 auxTail = userPage->aux_tail;
104 HLOGD("mmap cpu %d, aux_head: %llu, aux_tail:%llu, auxOffset:%llu, auxSize:%llu",
105 mmapFd.cpu, auxHead, auxTail, auxOffset, auxSize);
106 if (auxHead <= auxTail) {
107 return;
108 }
109 if (auxSize > auxMmapPages_ * pageSize_) {
110 userPage->aux_tail += auxSize;
111 return;
112 }
113
114 int cpu = mmapFd.cpu;
115 __sync_synchronize();
116 PerfRecordAuxtrace auxtraceRecord = PerfRecordAuxtrace(auxSize, auxTail,
117 arm_spe_reference(), cpu, tid, cpu, pid);
118 static std::vector<u8> vbuf(RECORD_SIZE_LIMIT);
119 uint8_t *buf;
120 if ((buf = recordBuf_->AllocForWrite(auxtraceRecord.header.size + auxSize)) == nullptr) {
121 HLOGD("alloc buffer failed: PerfRecordAuxtrace record, readSize: %llu", auxSize);
122 return;
123 }
124 auxtraceRecord.GetBinary1(vbuf);
125 if (memcpy_s(buf, auxtraceRecord.header.size, vbuf.data(), auxtraceRecord.header.size) != 0) {
126 HLOGE("memcpy_s return failed");
127 return;
128 }
129 buf += auxtraceRecord.header.size;
130
131 while (auxSize > 0) {
132 u64 readSize = pageSize_;
133 if (auxSize < pageSize_) {
134 readSize = auxSize;
135 }
136 __sync_synchronize();
137 SpeReadData(auxPage, &auxTail, buf, readSize);
138 __sync_synchronize();
139 userPage->aux_tail += readSize;
140 auxTail = userPage->aux_tail;
141 buf += readSize;
142 auxSize -= readSize;
143 }
144 recordBuf_->EndWrite();
145 }
146
GetSpeType()147 u32 GetSpeType()
148 {
149 FILE *fd;
150 u32 speType;
151
152 fd = fopen("/sys/devices/arm_spe_0/type", "r");
153 if (fd == nullptr) {
154 HLOGV("open sysfs file failed");
155 return -1;
156 }
157 if (fscanf_s(fd, "%u", &speType) <= 0) {
158 HLOGV("fscanf_s file failed");
159 (void)fclose(fd);
160 return -1;
161 }
162
163 (void)fclose(fd);
164 return speType;
165 }
166
PerfEvents()167 PerfEvents::PerfEvents() : timeOut_(DEFAULT_TIMEOUT * THOUSANDS), timeReport_(0)
168 {
169 pageSize_ = sysconf(_SC_PAGESIZE);
170 HLOGI("BuildArch %s", GetArchName(BUILD_ARCH_TYPE).c_str());
171 }
172
~PerfEvents()173 PerfEvents::~PerfEvents()
174 {
175 // close mmap
176 for (auto it = cpuMmap_.begin(); it != cpuMmap_.end();) {
177 const MmapFd &mmapItem = it->second;
178 if (!isSpe_) {
179 munmap(mmapItem.mmapPage, (1 + mmapPages_) * pageSize_);
180 } else {
181 munmap(mmapItem.mmapPage, (1 + auxMmapPages_) * pageSize_);
182 munmap(mmapItem.auxBuf, auxMmapPages_ * pageSize_);
183 }
184 it = cpuMmap_.erase(it);
185 }
186
187 ExitReadRecordBufThread();
188 }
189
IsEventSupport(perf_type_id type,__u64 config)190 bool PerfEvents::IsEventSupport(perf_type_id type, __u64 config)
191 {
192 unique_ptr<perf_event_attr> attr = PerfEvents::CreateDefaultAttr(type, config);
193 CHECK_TRUE(attr == nullptr, false, 0, "");
194 UniqueFd fd = Open(*attr.get());
195 if (fd < 0) {
196 printf("event not support %s\n", GetStaticConfigName(type, config).c_str());
197 return false;
198 }
199 return true;
200 }
IsEventAttrSupport(perf_event_attr & attr)201 bool PerfEvents::IsEventAttrSupport(perf_event_attr &attr)
202 {
203 UniqueFd fd = Open(attr);
204 if (fd < 0) {
205 return false;
206 }
207 return true;
208 }
209
SetBranchSampleType(uint64_t value)210 bool PerfEvents::SetBranchSampleType(uint64_t value)
211 {
212 if (value != 0) {
213 // cpu-clcles event must be supported
214 unique_ptr<perf_event_attr> attr =
215 PerfEvents::CreateDefaultAttr(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
216 CHECK_TRUE(attr == nullptr, false, 0, "");
217 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
218 attr->branch_sample_type = value;
219 if (!IsEventAttrSupport(*attr.get())) {
220 return false;
221 }
222 }
223 branchSampleType_ = value;
224 return true;
225 }
226
AddDefaultEvent(perf_type_id type)227 bool PerfEvents::AddDefaultEvent(perf_type_id type)
228 {
229 auto it = DEFAULT_TYPE_CONFIGS.find(type);
230 if (it != DEFAULT_TYPE_CONFIGS.end()) {
231 for (auto config : it->second) {
232 AddEvent(type, config);
233 }
234 }
235 return true;
236 }
237
AddOffCpuEvent()238 bool PerfEvents::AddOffCpuEvent()
239 {
240 std::string eventName = "sched:sched_switch";
241 if (eventSpaceType_ == EventSpaceType::USER) {
242 eventName += ":u";
243 } else if (eventSpaceType_ == EventSpaceType::KERNEL) {
244 eventName += ":k";
245 }
246 return AddEvent(eventName);
247 }
248
AddEvents(const std::vector<std::string> & eventStrings,bool group)249 bool PerfEvents::AddEvents(const std::vector<std::string> &eventStrings, bool group)
250 {
251 bool followGroup = false;
252 HLOGV(" %s", VectorToString(eventStrings).c_str());
253
254 for (std::string eventString : eventStrings) {
255 if (!AddEvent(eventString, followGroup)) {
256 return false;
257 }
258 // this is group request , Follow-up events need to follow the previous group
259 if (group) {
260 followGroup = true;
261 }
262 }
263 return true;
264 }
265
266 // event name can have :k or :u suffix
267 // tracepoint event name is like sched:sched_switch
268 // clang-format off
ParseEventName(const std::string & nameStr,std::string & name,bool & excludeUser,bool & excludeKernel,bool & isTracePoint)269 bool PerfEvents::ParseEventName(const std::string &nameStr,
270 std::string &name, bool &excludeUser, bool &excludeKernel, bool &isTracePoint)
271 // clang-format on
272 {
273 name = nameStr;
274 excludeUser = false;
275 excludeKernel = false;
276 isTracePoint = false;
277 if (nameStr.find(":") != std::string::npos) {
278 static constexpr size_t maxNumberTokensNoTracePoint = 2;
279 static constexpr size_t maxNumberTokensTracePoint = 3;
280 std::vector<std::string> eventTokens = StringSplit(nameStr, ":");
281 if (eventTokens.size() == maxNumberTokensTracePoint) {
282 // tracepoint event with :u or :k
283 if (eventTokens.back() == "k") {
284 excludeUser = true;
285 HLOGV("kernelOnly event");
286 } else if (eventTokens.back() == "u") {
287 excludeKernel = true;
288 HLOGV("userOnly event");
289 } else {
290 HLOGV("unknown event name %s", nameStr.c_str());
291 return false;
292 }
293 name = eventTokens[0] + ":" + eventTokens[1];
294 isTracePoint = true;
295 } else if (eventTokens.size() == maxNumberTokensNoTracePoint) {
296 name = eventTokens[0];
297 if (eventTokens.back() == "k") {
298 excludeUser = true;
299 HLOGV("kernelOnly event");
300 } else if (eventTokens.back() == "u") {
301 excludeKernel = true;
302 HLOGV("userOnly event");
303 } else {
304 name = nameStr;
305 isTracePoint = true;
306 HLOGV("tracepoint event is in form of xx:xxx");
307 }
308 } else {
309 printf("unknown ':' format:'%s'\n", nameStr.c_str());
310 return false;
311 }
312 if (reportCallBack_) {
313 if ((eventTokens[0] == "sw-task-clock" || eventTokens[0] == "sw-cpu-clock") &&
314 (excludeUser || excludeKernel)) {
315 printf(
316 "event type %s with modifier u and modifier k is not supported by the kernel.",
317 eventTokens[0].c_str());
318 return false;
319 }
320 }
321 }
322 return true;
323 }
324
AddEvent(const std::string & eventString,bool followGroup)325 bool PerfEvents::AddEvent(const std::string &eventString, bool followGroup)
326 {
327 std::string eventName;
328 bool excludeUser = false;
329 bool excludeKernel = false;
330 bool isTracePointEvent = false;
331 if (!ParseEventName(eventString, eventName, excludeUser, excludeKernel, isTracePointEvent)) {
332 return false;
333 }
334 if (excludeUser) {
335 eventSpaceType_ |= EventSpaceType::KERNEL;
336 } else if (excludeKernel) {
337 eventSpaceType_ |= EventSpaceType::USER;
338 } else {
339 eventSpaceType_ |= EventSpaceType::USER_KERNEL;
340 }
341
342 // find if
343 if (isTracePointEvent) {
344 if (traceConfigTable.empty()) {
345 LoadTracepointEventTypesFromSystem();
346 }
347 for (auto traceType : traceConfigTable) {
348 if (traceType.second == eventName) {
349 return AddEvent(PERF_TYPE_TRACEPOINT, traceType.first, excludeUser, excludeKernel,
350 followGroup);
351 }
352 }
353 } else {
354 if (eventName == "arm_spe_0") {
355 u32 speType = GetSpeType();
356 return AddSpeEvent(speType);
357 }
358 if (StringStartsWith(eventName, "0x")
359 && eventName.length() <= MAX_HEX_EVENT_NAME_LENGTH && IsHexDigits(eventName)) {
360 return AddEvent(PERF_TYPE_RAW, std::stoull(eventName, nullptr, NUMBER_FORMAT_HEX_BASE),
361 excludeUser, excludeKernel, followGroup);
362 } else {
363 auto [find, typeId, configId] = GetStaticConfigId(eventName);
364 if (find) {
365 return AddEvent(typeId, configId, excludeUser, excludeKernel, followGroup);
366 }
367 }
368 }
369
370 printf("%s event is not supported by the kernel.\n", eventName.c_str());
371 return false;
372 }
373
AddSpeEvent(u32 type,bool followGroup)374 bool PerfEvents::AddSpeEvent(u32 type, bool followGroup)
375 {
376 EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back() :
377 eventGroupItem_.emplace_back();
378 EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
379
380 if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
381 HLOGE("memset_s failed in PerfEvents::AddEvent");
382 return false;
383 }
384 eventItem.attr.type = type;
385 eventItem.attr.sample_period = MULTIPLE_SIZE;
386 eventItem.attr.size = sizeof(perf_event_attr);
387 eventItem.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
388 eventItem.attr.inherit = (inherit_ ? 1 : 0);
389 eventItem.attr.sample_type = SAMPLE_ID;
390 eventItem.attr.sample_id_all = 1;
391 eventItem.attr.disabled = 1;
392 eventItem.attr.config = 0x700010007; // temp type
393 return true;
394 }
395
SetConfig(std::map<const std::string,unsigned long long> & speOptMaps)396 void PerfEvents::SetConfig(std::map<const std::string, unsigned long long> &speOptMaps)
397 {
398 int jitterOffset = 16;
399 int branchOffset = 32;
400 int loadOffset = 33;
401 int storeOffset = 34;
402 config_ |= (speOptMaps["ts_enable"] & 0x1) << 0;
403 config_ |= (speOptMaps["pa_enable"] & 0x1) << 1;
404 config_ |= (speOptMaps["jitter"] & 0x1) << jitterOffset;
405 config_ |= (speOptMaps["branch_filter"] & 0x1) << branchOffset;
406 config_ |= (speOptMaps["load_filter"] & 0x1) << loadOffset;
407 config_ |= (speOptMaps["store_filter"] & 0x1) << storeOffset;
408 config1_ |= speOptMaps["event_filter"];
409 config2_ |= speOptMaps["min_latency"] & 0xfff;
410 }
411
AddEvent(perf_type_id type,__u64 config,bool excludeUser,bool excludeKernel,bool followGroup)412 bool PerfEvents::AddEvent(perf_type_id type, __u64 config, bool excludeUser, bool excludeKernel,
413 bool followGroup)
414 {
415 HLOG_ASSERT(!excludeUser or !excludeKernel);
416 CHECK_TRUE(followGroup && eventGroupItem_.empty(), false, 1, "no group leader create before");
417 // found the event name
418 CHECK_TRUE(!IsEventSupport(type, config), false, 0, "");
419 HLOGV("type %d config %llu excludeUser %d excludeKernel %d followGroup %d", type, config,
420 excludeUser, excludeKernel, followGroup);
421
422 // if use follow ?
423 EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back()
424 : eventGroupItem_.emplace_back();
425 // always new item
426 EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
427
428 eventItem.typeName = GetTypeName(type);
429 if (type == PERF_TYPE_TRACEPOINT) {
430 eventItem.configName = GetTraceConfigName(config);
431 } else {
432 eventItem.configName = GetStaticConfigName(type, config);
433 }
434
435 // attr
436 if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
437 HLOGE("memset_s failed in PerfEvents::AddEvent");
438 return false;
439 }
440 eventItem.attr.size = sizeof(perf_event_attr);
441 eventItem.attr.type = type;
442 eventItem.attr.config = config;
443 eventItem.attr.disabled = 1;
444 eventItem.attr.read_format =
445 PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
446
447 eventItem.attr.inherit = (inherit_ ? 1 : 0);
448 eventItem.attr.exclude_kernel = excludeKernel;
449 eventItem.attr.exclude_user = excludeUser;
450
451 // we also need mmap for record
452 if (recordCallBack_) {
453 if (samplePeriod_ > 0) {
454 eventItem.attr.freq = 0;
455 eventItem.attr.sample_freq = 0;
456 eventItem.attr.sample_period = samplePeriod_;
457 } else if (sampleFreq_ > 0) {
458 eventItem.attr.freq = 1;
459 eventItem.attr.sample_freq = sampleFreq_;
460 } else {
461 if (type == PERF_TYPE_TRACEPOINT) {
462 eventItem.attr.freq = 0;
463 eventItem.attr.sample_period = DEFAULT_SAMPLE_PERIOD;
464 } else {
465 eventItem.attr.freq = 1;
466 eventItem.attr.sample_freq = DEFAULT_SAMPLE_FREQUNCY;
467 }
468 }
469
470 eventItem.attr.watermark = 1;
471 eventItem.attr.wakeup_watermark = (mmapPages_ * pageSize_) >> 1;
472 static constexpr unsigned int maxWakeupMark = 1024 * 1024;
473 if (eventItem.attr.wakeup_watermark > maxWakeupMark) {
474 eventItem.attr.wakeup_watermark = maxWakeupMark;
475 }
476
477 // for a group of events, only enable comm/mmap on the first event
478 if (!followGroup) {
479 eventItem.attr.comm = 1;
480 eventItem.attr.mmap = 1;
481 eventItem.attr.mmap2 = 1;
482 eventItem.attr.mmap_data = 1;
483 }
484
485 if (sampleStackType_ == SampleStackType::DWARF) {
486 eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN |
487 PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
488 eventItem.attr.exclude_callchain_user = 1;
489 eventItem.attr.sample_regs_user = GetSupportedRegMask(GetDeviceArch());
490 eventItem.attr.sample_stack_user = dwarfSampleStackSize_;
491 } else if (sampleStackType_ == SampleStackType::FP) {
492 eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN;
493 } else {
494 eventItem.attr.sample_type = SAMPLE_TYPE;
495 }
496
497 if (isHM_) {
498 eventItem.attr.sample_type |= PERF_SAMPLE_SERVER_PID;
499 }
500 }
501
502 // set clock id
503 if (clockId_ != -1) {
504 eventItem.attr.use_clockid = 1;
505 eventItem.attr.clockid = clockId_;
506 }
507 if (branchSampleType_ != 0) {
508 eventItem.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
509 eventItem.attr.branch_sample_type = branchSampleType_;
510 }
511
512 HLOGV("Add Event: '%s':'%s' %s %s %s", eventItem.typeName.c_str(), eventItem.configName.c_str(),
513 excludeUser ? "excludeUser" : "", excludeKernel ? "excludeKernel" : "",
514 followGroup ? "" : "group leader");
515
516 return true;
517 }
518
CreateDefaultAttr(perf_type_id type,__u64 config)519 std::unique_ptr<perf_event_attr> PerfEvents::CreateDefaultAttr(perf_type_id type, __u64 config)
520 {
521 unique_ptr<perf_event_attr> attr = make_unique<perf_event_attr>();
522 if (memset_s(attr.get(), sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
523 HLOGE("memset_s failed in PerfEvents::CreateDefaultAttr");
524 return nullptr;
525 }
526 attr->size = sizeof(perf_event_attr);
527 attr->type = type;
528 attr->config = config;
529 attr->disabled = 1;
530 return attr;
531 }
532
533 // should move to upper caller
534 static struct sigaction g_oldSig {
535 };
CaptureSig()536 static bool CaptureSig()
537 {
538 HLOGD("capture Ctrl + C to end sampling decently");
539 struct sigaction sig {
540 };
541
542 sig.sa_handler = [](int sig) {
543 printf("\n Ctrl + C detected.\n");
544 g_trackRunning = false;
545 };
546
547 sig.sa_flags = 0;
548 if (sigaction(SIGINT, &sig, &g_oldSig) < 0) {
549 perror("Fail to call sigaction for SIGINT");
550 return false;
551 }
552 return true;
553 }
554
RecoverCaptureSig()555 static void RecoverCaptureSig()
556 {
557 if (sigaction(SIGINT, &g_oldSig, nullptr) < 0) {
558 perror("Fail to call sigaction for SIGINT");
559 }
560 }
561
562 // split to two part
563 // because WriteAttrAndId need fd id before start tracking
PrepareTracking(void)564 bool PerfEvents::PrepareTracking(void)
565 {
566 // 1. prepare cpu pid
567 CHECK_TRUE(!PrepareFdEvents(), false, 1, "PrepareFdEvents() failed");
568
569 // 2. create events
570 CHECK_TRUE(!CreateFdEvents(), false, 1, "CreateFdEvents() failed");
571
572 HLOGV("success");
573 prepared_ = true;
574 return true;
575 }
576
ExitReadRecordBufThread()577 void PerfEvents::ExitReadRecordBufThread()
578 {
579 if (isLowPriorityThread_) {
580 if (setpriority(PRIO_PROCESS, gettid(), 0) != 0) {
581 HLOGW("failed to decrease priority of reading kernel");
582 }
583 }
584 if (readRecordBufThread_.joinable()) {
585 {
586 std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
587 readRecordThreadRunning_ = false;
588 __sync_synchronize();
589 cvRecordBuf_.notify_one();
590 }
591 readRecordBufThread_.join();
592 }
593 }
594
PrepareRecordThread()595 bool PerfEvents::PrepareRecordThread()
596 {
597 try {
598 recordBuf_ = std::make_unique<RingBuffer>(CalcBufferSize());
599 } catch (const std::exception &e) {
600 printf("create record buffer(size %zu) failed: %s\n", CalcBufferSize(), e.what());
601 return false;
602 }
603 readRecordThreadRunning_ = true;
604 readRecordBufThread_ = std::thread(&PerfEvents::ReadRecordFromBuf, this);
605
606 rlimit rlim;
607 int result = getrlimit(RLIMIT_NICE, &rlim);
608 const rlim_t lowPriority = 40;
609 if (result == 0 && rlim.rlim_cur == lowPriority) {
610 const int highPriority = -20;
611 result = setpriority(PRIO_PROCESS, gettid(), highPriority);
612 if (result != 0) {
613 HLOGW("failed to increase priority of reading kernel");
614 } else {
615 isLowPriorityThread_ = true;
616 }
617 }
618
619 return true;
620 }
621
WaitRecordThread()622 void PerfEvents::WaitRecordThread()
623 {
624 printf("Process and Saving data...\n");
625 ExitReadRecordBufThread();
626
627 const auto usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - trackingEndTime_);
628 if (verboseReport_) {
629 printf("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
630 }
631 HLOGV("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
632 #ifdef HIPERF_DEBUG_TIME
633 printf("%zu record processed, used %0.3f ms(%4.2f us/record)\n", recordEventCount_,
634 recordCallBackTime_.count() / MS_DURATION,
635 recordCallBackTime_.count() / static_cast<double>(recordEventCount_));
636 printf("total wait sleep time %0.3f ms.\n", recordSleepTime_.count() / MS_DURATION);
637 printf("read from kernel time %0.3f ms.\n", recordKernelReadTime_.count() / MS_DURATION);
638 #endif
639 }
640
StartTracking(bool immediately)641 bool PerfEvents::StartTracking(bool immediately)
642 {
643 if (!prepared_) {
644 HLOGD("do not prepared_");
645 return false;
646 }
647
648 if (recordCallBack_) {
649 if (!PrepareRecordThread()) {
650 return false;
651 }
652 }
653
654 HLOGD("step: 1. enable event");
655 trackingStartTime_ = steady_clock::now();
656 if (immediately) {
657 if (!EnableTracking()) {
658 HLOGE("PerfEvents::EnableTracking() failed");
659 return false;
660 }
661 printf("Profiling duration is %.3f seconds.\n", float(timeOut_.count()) / THOUSANDS);
662 printf("Start Profiling...\n");
663 }
664
665 g_trackRunning = true;
666 if (!CaptureSig()) {
667 HLOGE("captureSig() failed");
668 g_trackRunning = false;
669 ExitReadRecordBufThread();
670 return false;
671 }
672
673 HLOGD("step: 2. thread loop");
674 if (recordCallBack_) {
675 RecordLoop();
676 } else {
677 StatLoop();
678 }
679
680 HLOGD("step: 3. disable event");
681 if (!PerfEventsEnable(false)) {
682 HLOGE("PerfEvents::PerfEventsEnable() failed");
683 }
684 if (recordCallBack_) {
685 // read left samples after disable events
686 ReadRecordsFromMmaps();
687 }
688 trackingEndTime_ = steady_clock::now();
689
690 RecoverCaptureSig();
691
692 if (recordCallBack_) {
693 WaitRecordThread();
694 }
695
696 HLOGD("step: 4. exit");
697 return true;
698 }
699
StopTracking(void)700 bool PerfEvents::StopTracking(void)
701 {
702 if (g_trackRunning) {
703 printf("some one called StopTracking\n");
704 g_trackRunning = false;
705 if (trackedCommand_) {
706 if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_STARTED) {
707 trackedCommand_->Stop();
708 }
709 }
710 CHECK_TRUE(!PerfEventsEnable(false), false, 1, "StopTracking : PerfEventsEnable(false) failed");
711 }
712 return true;
713 }
714
PauseTracking(void)715 bool PerfEvents::PauseTracking(void)
716 {
717 CHECK_TRUE(!startedTracking_, false, 0, "");
718 return PerfEventsEnable(false);
719 }
720
ResumeTracking(void)721 bool PerfEvents::ResumeTracking(void)
722 {
723 CHECK_TRUE(!startedTracking_, false, 0, "");
724 return PerfEventsEnable(true);
725 }
726
EnableTracking()727 bool PerfEvents::EnableTracking()
728 {
729 CHECK_TRUE(startedTracking_, true, 0, "");
730 CHECK_TRUE(!PerfEventsEnable(true), false, 1, "PerfEvents::PerfEventsEnable() failed");
731
732 if (trackedCommand_) {
733 // start tracked Command
734 if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_WAITING) {
735 if (!trackedCommand_->StartCommand()) {
736 int wstatus;
737 if (!trackedCommand_->WaitCommand(wstatus)) {
738 trackedCommand_->Stop();
739 }
740 std::string commandName = trackedCommand_->GetCommandName();
741 printf("failed to execute command: %zu: %s\n", commandName.size(), commandName.c_str());
742 return false;
743 }
744 } else if (trackedCommand_->GetState() != TrackedCommand::State::COMMAND_STARTED) {
745 return false;
746 }
747 }
748 startedTracking_ = true;
749 return true;
750 }
751
IsTrackRunning()752 bool PerfEvents::IsTrackRunning()
753 {
754 return g_trackRunning;
755 }
756
SetSystemTarget(bool systemTarget)757 void PerfEvents::SetSystemTarget(bool systemTarget)
758 {
759 systemTarget_ = systemTarget;
760 }
761
SetCpu(std::vector<pid_t> cpus)762 void PerfEvents::SetCpu(std::vector<pid_t> cpus)
763 {
764 cpus_ = cpus;
765 }
766
SetPid(std::vector<pid_t> pids)767 void PerfEvents::SetPid(std::vector<pid_t> pids)
768 {
769 pids_ = pids;
770 }
771
SetTimeOut(float timeOut)772 void PerfEvents::SetTimeOut(float timeOut)
773 {
774 if (timeOut > 0) {
775 timeOut_ = milliseconds(static_cast<int>(timeOut * THOUSANDS));
776 }
777 }
778
SetTimeReport(int timeReport)779 void PerfEvents::SetTimeReport(int timeReport)
780 {
781 static constexpr int minMsReportInterval = 10;
782 if (timeReport < minMsReportInterval && timeReport != 0) {
783 timeReport = minMsReportInterval;
784 printf("time report min value is %d.\n", timeReport);
785 }
786
787 timeReport_ = milliseconds(timeReport);
788 }
789
GetSupportEvents(perf_type_id type)790 std::map<__u64, std::string> PerfEvents::GetSupportEvents(perf_type_id type)
791 {
792 if (type == PERF_TYPE_TRACEPOINT) {
793 LoadTracepointEventTypesFromSystem();
794 }
795
796 std::map<__u64, std::string> eventConfigs;
797 auto configTable = TYPE_CONFIGS.find(type);
798 if (configTable != TYPE_CONFIGS.end()) {
799 auto configs = configTable->second;
800 for (auto config : configs) {
801 if (type == PERF_TYPE_TRACEPOINT || IsEventSupport(type, (__u64)config.first)) {
802 eventConfigs.insert(config);
803 } else {
804 HLOGD("'%s' not support", config.second.c_str());
805 }
806 }
807 }
808 return eventConfigs;
809 }
810
LoadTracepointEventTypesFromSystem()811 void PerfEvents::LoadTracepointEventTypesFromSystem()
812 {
813 if (traceConfigTable.empty()) {
814 std::string basePath {"/sys/kernel/tracing/events"};
815 if (access(basePath.c_str(), R_OK) != 0) {
816 basePath = "/sys/kernel/debug/tracing/events";
817 }
818 for (const auto &eventName : GetSubDirs(basePath)) {
819 std::string eventPath = basePath + "/" + eventName;
820 for (const auto &concreteEvent : GetSubDirs(eventPath)) {
821 std::string idPath = eventPath + "/" + concreteEvent + "/id";
822 {
823 std::string resolvedPath = CanonicalizeSpecPath(idPath.c_str());
824 std::ifstream ifs {resolvedPath};
825 // clang-format off
826 const std::string idStr = {
827 std::istream_iterator<char>(ifs),
828 std::istream_iterator<char>()
829 };
830 // clang-format on
831 __u64 id {0};
832 try {
833 id = std::stoul(idStr, nullptr);
834 } catch (...) {
835 continue;
836 }
837 if (isHM_ && id < MIN_HM_TRACEPOINT_EVENT_ID) {
838 continue;
839 }
840 auto typeConfigs = TYPE_CONFIGS.find(PERF_TYPE_TRACEPOINT);
841 HLOG_ASSERT(typeConfigs != TYPE_CONFIGS.end());
842 auto configPair = typeConfigs->second.insert(
843 std::make_pair(id, eventName + ":" + concreteEvent));
844 traceConfigTable.insert(std::make_pair(id, eventName + ":" + concreteEvent));
845 ConfigTable::iterator it = configPair.first;
846 HLOGV("TYPE_CONFIGS add %llu:%s in %zu", it->first, it->second.c_str(),
847 typeConfigs->second.size());
848 }
849 }
850 }
851 }
852 }
853
SetPerCpu(bool perCpu)854 void PerfEvents::SetPerCpu(bool perCpu)
855 {
856 perCpu_ = perCpu;
857 }
858
SetPerThread(bool perThread)859 void PerfEvents::SetPerThread(bool perThread)
860 {
861 perThread_ = perThread;
862 }
863
SetVerboseReport(bool verboseReport)864 void PerfEvents::SetVerboseReport(bool verboseReport)
865 {
866 verboseReport_ = verboseReport;
867 }
868
SetSampleFrequency(unsigned int frequency)869 void PerfEvents::SetSampleFrequency(unsigned int frequency)
870 {
871 if (frequency > 0) {
872 sampleFreq_ = frequency;
873 }
874 int maxRate = 0;
875 CHECK_TRUE(!ReadIntFromProcFile("/proc/sys/kernel/perf_event_max_sample_rate", maxRate),
876 NO_RETVAL, LOG_TYPE_PRINTF,
877 "read perf_event_max_sample_rate fail.\n");
878 if (sampleFreq_ > static_cast<unsigned int>(maxRate)) {
879 static bool printFlag = false;
880 sampleFreq_ = static_cast<unsigned int>(maxRate);
881 if (!printFlag) {
882 printf("Adjust sampling frequency to maximum allowed frequency %d.\n", maxRate);
883 printFlag = true;
884 }
885 }
886 }
887
SetSamplePeriod(unsigned int period)888 void PerfEvents::SetSamplePeriod(unsigned int period)
889 {
890 if (period > 0) {
891 samplePeriod_ = period;
892 }
893 }
894
SetMmapPages(size_t mmapPages)895 void PerfEvents::SetMmapPages(size_t mmapPages)
896 {
897 mmapPages_ = mmapPages;
898 }
899
SetSampleStackType(SampleStackType type)900 void PerfEvents::SetSampleStackType(SampleStackType type)
901 {
902 sampleStackType_ = type;
903 }
904
SetDwarfSampleStackSize(uint32_t stackSize)905 void PerfEvents::SetDwarfSampleStackSize(uint32_t stackSize)
906 {
907 HLOGD("request stack size is %u", stackSize);
908 dwarfSampleStackSize_ = stackSize;
909 }
910
PerfEventsEnable(bool enable)911 bool PerfEvents::PerfEventsEnable(bool enable)
912 {
913 HLOGV("%s", std::to_string(enable).c_str());
914 for (const auto &eventGroupItem : eventGroupItem_) {
915 for (const auto &eventItem : eventGroupItem.eventItems) {
916 for (const auto &fdItem : eventItem.fdItems) {
917 int result =
918 ioctl(fdItem.fd, enable ? PERF_EVENT_IOC_ENABLE : PERF_EVENT_IOC_DISABLE, 0);
919 if (result < 0) {
920 printf("Cannot '%s' perf fd! type config name: '%s:%s'\n",
921 enable ? "enable" : "disable", eventItem.typeName.c_str(),
922 eventItem.configName.c_str());
923 return false;
924 }
925 }
926 }
927 }
928 return true;
929 }
930
SetHM(bool isHM)931 void PerfEvents::SetHM(bool isHM)
932 {
933 isHM_ = isHM;
934 }
935
SetStatCallBack(StatCallBack reportCallBack)936 void PerfEvents::SetStatCallBack(StatCallBack reportCallBack)
937 {
938 reportCallBack_ = reportCallBack;
939 }
SetRecordCallBack(RecordCallBack recordCallBack)940 void PerfEvents::SetRecordCallBack(RecordCallBack recordCallBack)
941 {
942 recordCallBack_ = recordCallBack;
943 }
944
PutAllCpus()945 inline void PerfEvents::PutAllCpus()
946 {
947 int cpuConfigs = sysconf(_SC_NPROCESSORS_CONF);
948 for (int i = 0; i < cpuConfigs; i++) {
949 cpus_.push_back(i); // put all cpu
950 }
951 }
952
PrepareFdEvents(void)953 bool PerfEvents::PrepareFdEvents(void)
954 {
955 /*
956 https://man7.org/linux/man-pages/man2/perf_event_open.2.html
957 pid == 0 and cpu == -1
958 This measures the calling process/thread on any CPU.
959
960 pid == 0 and cpu >= 0
961 This measures the calling process/thread only when running
962 on the specified CPU.
963
964 pid > 0 and cpu == -1
965 This measures the specified process/thread on any CPU.
966
967 pid > 0 and cpu >= 0
968 This measures the specified process/thread only when
969 running on the specified CPU.
970
971 pid == -1 and cpu >= 0
972 This measures all processes/threads on the specified CPU.
973 This requires CAP_PERFMON (since Linux 5.8) or
974 CAP_SYS_ADMIN capability or a
975 /proc/sys/kernel/perf_event_paranoid value of less than 1.
976
977 pid == -1 and cpu == -1
978 This setting is invalid and will return an error.
979 */
980 if (systemTarget_) {
981 pids_.clear();
982 pids_.push_back(-1);
983 } else {
984 if (trackedCommand_) {
985 pids_.push_back(trackedCommand_->GetChildPid());
986 }
987 if (pids_.empty()) {
988 pids_.push_back(0); // no pid means use 0 as self pid
989 }
990 }
991 if (perCpu_ || perThread_) {
992 cpus_.clear();
993 PutAllCpus();
994 }
995 if (cpus_.empty()) {
996 PutAllCpus();
997 }
998
999 // print info tell user which cpu and process we will select.
1000 if (pids_.size() == 1 && pids_[0] == -1) {
1001 HLOGI("target process: system scope \n");
1002 } else {
1003 HLOGI("target process: %zu (%s)\n", pids_.size(),
1004 (pids_[0] == 0) ? std::to_string(gettid()).c_str() : VectorToString(pids_).c_str());
1005 }
1006 if (cpus_.size() == 1 && cpus_[0] == -1) {
1007 HLOGI("target cpus: %ld \n", sysconf(_SC_NPROCESSORS_CONF));
1008 } else {
1009 HLOGI("target cpus: %zu / %ld (%s)\n", cpus_.size(), sysconf(_SC_NPROCESSORS_CONF),
1010 VectorToString(cpus_).c_str());
1011 }
1012
1013 return true;
1014 }
1015
CreateFdEvents(void)1016 bool PerfEvents::CreateFdEvents(void)
1017 {
1018 // must be some events , or will failed
1019 CHECK_TRUE(eventGroupItem_.empty(), false, LOG_TYPE_PRINTF, "no event select.\n");
1020
1021 // create each fd by cpu and process user select
1022 /*
1023 https://man7.org/linux/man-pages/man2/perf_event_open.2.html
1024
1025 (A single event on its own is created with group_fd = -1 and is
1026 considered to be a group with only 1 member.)
1027 */
1028 // Even if there is only one event, it is counted as a group.
1029
1030 uint fdNumber = 0;
1031 uint eventNumber = 0;
1032 uint groupNumber = 0;
1033 for (auto &eventGroupItem : eventGroupItem_) {
1034 /*
1035 Explain what is the configuration of the group:
1036 Suppose we have 2 Event, 2 PID, and 3 CPU settings
1037 According to verification,
1038 Group's fd requires the pid to be the same as the cpu, the only difference is event
1039 In other words, if you want to bind E1 and E2 to the same group
1040 That can only be like this:
1041
1042 event E1 pid P1 cpu C1 [Group 1]
1043 event E1 pid P1 cpu C2 [Group 2]
1044 event E1 pid P1 cpu C3 [Group 3]
1045
1046 event E1 pid P2 cpu C1 [Group 4]
1047 event E1 pid P2 cpu C2 [Group 5]
1048 event E1 pid P2 cpu C3 [Group 6]
1049
1050 event E2 pid P1 cpu C1 [Group 1]
1051 event E2 pid P1 cpu C2 [Group 2]
1052 event E2 pid P1 cpu C3 [Group 3]
1053
1054 event E2 pid P2 cpu C1 [Group 4]
1055 event E2 pid P2 cpu C2 [Group 5]
1056 event E2 pid P2 cpu C3 [Group 6]
1057 */
1058 HLOGV("group %2u. eventGroupItem leader: '%s':", groupNumber++,
1059 eventGroupItem.eventItems[0].configName.c_str());
1060
1061 int groupFdCache[cpus_.size()][pids_.size()];
1062 for (size_t i = 0; i < cpus_.size(); i++) { // each cpu
1063 for (size_t j = 0; j < pids_.size(); j++) { // each pid
1064 // The leader is created first, with group_fd = -1.
1065 groupFdCache[i][j] = -1;
1066 }
1067 }
1068
1069 uint eventIndex = 0;
1070 for (auto &eventItem : eventGroupItem.eventItems) {
1071 HLOGV(" - event %2u. eventName: '%s:%s'", eventIndex++, eventItem.typeName.c_str(),
1072 eventItem.configName.c_str());
1073
1074 for (size_t icpu = 0; icpu < cpus_.size(); icpu++) { // each cpu
1075 for (size_t ipid = 0; ipid < pids_.size(); ipid++) { // each pid
1076 // one fd event group must match same cpu and same pid config (event can be
1077 // different)
1078 // clang-format off
1079 UniqueFd fd = Open(eventItem.attr, pids_[ipid], cpus_[icpu],
1080 groupFdCache[icpu][ipid], 0);
1081 // clang-format on
1082 if (fd < 0) {
1083 if (errno == ESRCH) {
1084 if (verboseReport_) {
1085 printf("pid %d does not exist.\n", pids_[ipid]);
1086 }
1087 HLOGE("pid %d does not exist.\n", pids_[ipid]);
1088 continue;
1089 } else {
1090 // clang-format off
1091 if (verboseReport_) {
1092 char errInfo[ERRINFOLEN] = { 0 };
1093 strerror_r(errno, errInfo, ERRINFOLEN);
1094 printf("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1095 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1096 }
1097 char errInfo[ERRINFOLEN] = { 0 };
1098 strerror_r(errno, errInfo, ERRINFOLEN);
1099 HLOGE("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1100 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1101 // clang-format on
1102 break; // jump to next cpu
1103 }
1104 }
1105 // after open successed , fill the result
1106 // make a new FdItem
1107 FdItem &fdItem = eventItem.fdItems.emplace_back();
1108 fdItem.fd = move(fd);
1109 fdItem.cpu = cpus_[icpu];
1110 fdItem.pid = pids_[ipid];
1111 fdNumber++;
1112
1113 // if sampling, mmap ring buffer
1114 if (recordCallBack_) {
1115 if (isSpe_) {
1116 CreateSpeMmap(fdItem, eventItem.attr);
1117 } else {
1118 CreateMmap(fdItem, eventItem.attr);
1119 }
1120 }
1121 // update group leader
1122 int groupFdCacheNum = groupFdCache[icpu][ipid];
1123 if (groupFdCacheNum == -1) {
1124 groupFdCache[icpu][ipid] = fdItem.fd.Get();
1125 }
1126 }
1127 }
1128 eventNumber++;
1129 }
1130 }
1131
1132 CHECK_TRUE(fdNumber == 0, false, 1, "open %d fd for %d events", fdNumber, eventNumber);
1133
1134 HLOGD("will try read %u events from %u fd (%zu groups):", eventNumber, fdNumber,
1135 eventGroupItem_.size());
1136
1137 return true;
1138 }
1139
StatReport(const __u64 & durationInSec)1140 bool PerfEvents::StatReport(const __u64 &durationInSec)
1141 {
1142 read_format_no_group readNoGroupValue;
1143
1144 // only need read when need report
1145 HLOGM("eventGroupItem_:%zu", eventGroupItem_.size());
1146 __u64 groupId = 0;
1147 // clear countEvents data
1148 countEvents_.clear();
1149 for (const auto &eventGroupItem : eventGroupItem_) {
1150 HLOGM("eventItems:%zu", eventGroupItem.eventItems.size());
1151 groupId++;
1152 for (const auto &eventItem : eventGroupItem.eventItems) {
1153 // count event info together (every cpu , every pid)
1154 std::string configName = "";
1155 if (eventItem.attr.exclude_kernel) {
1156 configName = eventItem.configName + ":u";
1157 } else if (eventItem.attr.exclude_user) {
1158 configName = eventItem.configName + ":k";
1159 } else {
1160 configName = eventItem.configName;
1161 }
1162 if (countEvents_.count(configName) == 0) {
1163 auto countEvent = make_unique<CountEvent>(CountEvent {});
1164 countEvents_[configName] = std::move(countEvent);
1165 countEvents_[configName]->userOnly = eventItem.attr.exclude_kernel;
1166 countEvents_[configName]->kernelOnly = eventItem.attr.exclude_user;
1167 }
1168 const std::unique_ptr<CountEvent> &countEvent = countEvents_[configName];
1169 HLOGM("eventItem.fdItems:%zu", eventItem.fdItems.size());
1170 for (const auto &fditem : eventItem.fdItems) {
1171 if (read(fditem.fd, &readNoGroupValue, sizeof(readNoGroupValue)) > 0) {
1172 countEvent->eventCount += readNoGroupValue.value;
1173 countEvent->timeEnabled += readNoGroupValue.timeEnabled;
1174 countEvent->timeRunning += readNoGroupValue.timeRunning;
1175 countEvent->id = groupId;
1176 if (durationInSec != 0) {
1177 countEvent->usedCpus = (countEvent->eventCount / 1e9) / (durationInSec / THOUSANDS);
1178 }
1179 if (verboseReport_) {
1180 printf("%s id:%llu(c%d:p%d) timeEnabled:%llu timeRunning:%llu value:%llu\n",
1181 eventItem.configName.c_str(), readNoGroupValue.id, fditem.cpu, fditem.pid,
1182 readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning, readNoGroupValue.value);
1183 }
1184 if ((perCpu_ || perThread_) && readNoGroupValue.value) {
1185 countEvent->summaries.emplace_back(fditem.cpu, fditem.pid, readNoGroupValue.value,
1186 readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning);
1187 }
1188 } else {
1189 printf("read failed from event '%s'\n", eventItem.configName.c_str());
1190 }
1191 }
1192 }
1193 }
1194
1195 reportCallBack_(countEvents_);
1196
1197 return true;
1198 }
1199
CreateSpeMmap(const FdItem & item,const perf_event_attr & attr)1200 bool PerfEvents::CreateSpeMmap(const FdItem &item, const perf_event_attr &attr)
1201 {
1202 auto it = cpuMmap_.find(item.cpu);
1203 if (it == cpuMmap_.end()) {
1204 void *rbuf = mmap(nullptr, (1 + auxMmapPages_) * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1205 item.fd.Get(), 0);
1206 CHECK_TRUE(rbuf == MMAP_FAILED, false, 0, "");
1207 void *auxRbuf = mmap(nullptr, auxMmapPages_ * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1208 item.fd.Get(), 0);
1209 MmapFd mmapItem;
1210 mmapItem.fd = item.fd.Get();
1211 mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1212 mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1213 mmapItem.auxBuf = auxRbuf;
1214 mmapItem.bufSize = auxMmapPages_ * pageSize_;
1215 mmapItem.auxBufSize = auxMmapPages_ * pageSize_;
1216 mmapItem.attr = &attr;
1217 mmapItem.tid_ = item.pid;
1218 mmapItem.cpu = item.cpu;
1219 cpuMmap_[item.cpu] = mmapItem;
1220 pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1221 } else {
1222 const MmapFd &mmapItem = it->second;
1223 int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1224 if (rc != 0) {
1225 HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1226 perror("failed to share mapped buffer\n");
1227 return false;
1228 }
1229 }
1230 return true;
1231 }
1232
CreateMmap(const FdItem & item,const perf_event_attr & attr)1233 bool PerfEvents::CreateMmap(const FdItem &item, const perf_event_attr &attr)
1234 {
1235 auto it = cpuMmap_.find(item.cpu);
1236 if (it == cpuMmap_.end()) {
1237 void *rbuf = mmap(nullptr, (1 + mmapPages_) * pageSize_, PROT_READ | PROT_WRITE, MAP_SHARED,
1238 item.fd.Get(), 0);
1239 if (rbuf == MMAP_FAILED) {
1240 char errInfo[ERRINFOLEN] = {0};
1241 strerror_r(errno, errInfo, ERRINFOLEN);
1242 perror("errno:%d, errstr:%s", errno, errInfo);
1243 perror("Fail to call mmap \n");
1244 return false;
1245 }
1246 MmapFd mmapItem;
1247 mmapItem.fd = item.fd.Get();
1248 mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1249 mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1250 mmapItem.bufSize = mmapPages_ * pageSize_;
1251 mmapItem.attr = &attr;
1252 mmapItem.posCallChain = GetCallChainPosInSampleRecord(attr);
1253
1254 cpuMmap_[item.cpu] = mmapItem;
1255 pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1256 HLOGD("CreateMmap success cpu %d fd %d", item.cpu, mmapItem.fd);
1257 } else {
1258 const MmapFd &mmapItem = it->second;
1259 int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1260 if (rc != 0) {
1261 HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1262 perror("failed to share mapped buffer\n");
1263 return false;
1264 }
1265 }
1266 return true;
1267 }
1268
GetAttrWithId() const1269 std::vector<AttrWithId> PerfEvents::GetAttrWithId() const
1270 {
1271 std::vector<AttrWithId> result;
1272 HLOGV("eventGroupItem_ %zu :", eventGroupItem_.size());
1273
1274 for (const auto &eventGroupItem : eventGroupItem_) {
1275 HLOGV(" eventItems %zu eventItems:", eventGroupItem.eventItems.size());
1276 for (const auto &eventItem : eventGroupItem.eventItems) {
1277 AttrWithId attrId;
1278 attrId.attr = eventItem.attr;
1279 attrId.name = eventItem.configName;
1280 HLOGV(" fdItems %zu fdItems:", eventItem.fdItems.size());
1281 for (const auto &fdItem : eventItem.fdItems) {
1282 auto &id = attrId.ids.emplace_back(fdItem.GetPrefId());
1283 HLOGV(" eventItem.fdItems GetPrefId %" PRIu64 "", id);
1284 }
1285 result.emplace_back(attrId);
1286 }
1287 }
1288 return result;
1289 }
1290
CalcBufferSize()1291 size_t PerfEvents::CalcBufferSize()
1292 {
1293 size_t maxBufferSize;
1294 if (LittleMemory()) {
1295 maxBufferSize = MAX_BUFFER_SIZE_LITTLE;
1296 } else {
1297 maxBufferSize = MAX_BUFFER_SIZE_LARGE;
1298 }
1299
1300 size_t bufferSize = maxBufferSize;
1301 if (!systemTarget_) {
1302 // suppose ring buffer is 4 times as much as mmap
1303 static constexpr int TIMES = 4;
1304 bufferSize = cpuMmap_.size() * mmapPages_ * pageSize_ * TIMES;
1305 if (bufferSize < MIN_BUFFER_SIZE) {
1306 bufferSize = MIN_BUFFER_SIZE;
1307 } else if (bufferSize > maxBufferSize) {
1308 bufferSize = maxBufferSize;
1309 }
1310 }
1311 HLOGD("CalcBufferSize return %zu", bufferSize);
1312 return bufferSize;
1313 }
1314
IsRecordInMmap(int timeout)1315 inline bool PerfEvents::IsRecordInMmap(int timeout)
1316 {
1317 HLOGV("enter");
1318 if (pollFds_.size() > 0) {
1319 if (poll(static_cast<struct pollfd*>(pollFds_.data()), pollFds_.size(), timeout) <= 0) {
1320 // time out try again
1321 return false;
1322 }
1323 }
1324 HLOGV("poll record from mmap");
1325 return true;
1326 }
1327
CompareRecordTime(const PerfEvents::MmapFd * left,const PerfEvents::MmapFd * right)1328 static bool CompareRecordTime(const PerfEvents::MmapFd *left, const PerfEvents::MmapFd *right)
1329 {
1330 return left->timestamp > right->timestamp;
1331 }
1332
ReadRecordsFromMmaps()1333 void PerfEvents::ReadRecordsFromMmaps()
1334 {
1335 #ifdef HIPERF_DEBUG_TIME
1336 const auto readKenelStartTime = steady_clock::now();
1337 #endif
1338 // get readable mmap at this time
1339 for (auto &it : cpuMmap_) {
1340 ssize_t dataSize = it.second.mmapPage->data_head - it.second.mmapPage->data_tail;
1341 __sync_synchronize(); // this same as rmb in gcc, after reading mmapPage->data_head
1342 if (dataSize <= 0) {
1343 continue;
1344 }
1345 it.second.dataSize = dataSize;
1346 MmapRecordHeap_.push_back(&(it.second));
1347 }
1348 if (MmapRecordHeap_.empty()) {
1349 return;
1350 }
1351 bool enableFlag = false;
1352 if (MmapRecordHeap_.size() > 1) {
1353 for (const auto &it : MmapRecordHeap_) {
1354 GetRecordFromMmap(*it);
1355 }
1356 std::make_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.end(), CompareRecordTime);
1357
1358 size_t heapSize = MmapRecordHeap_.size();
1359 while (heapSize > 1) {
1360 std::pop_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1361 CompareRecordTime);
1362 bool auxEvent = false;
1363 u32 pid = 0;
1364 u32 tid = 0;
1365 u64 auxOffset = 0;
1366 u64 auxSize = 0;
1367 MoveRecordToBuf(*MmapRecordHeap_[heapSize - 1], auxEvent, auxOffset, auxSize, pid, tid);
1368 if (isSpe_ && auxEvent) {
1369 ReadRecordsFromSpeMmaps(*MmapRecordHeap_[heapSize - 1], auxOffset, auxSize, pid, tid);
1370 enableFlag = true;
1371 }
1372 if (GetRecordFromMmap(*MmapRecordHeap_[heapSize - 1])) {
1373 std::push_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1374 CompareRecordTime);
1375 } else {
1376 heapSize--;
1377 }
1378 }
1379 }
1380
1381 while (GetRecordFromMmap(*MmapRecordHeap_.front())) {
1382 bool auxEvent = false;
1383 u32 pid = 0;
1384 u32 tid = 0;
1385 u64 auxOffset = 0;
1386 u64 auxSize = 0;
1387 MoveRecordToBuf(*MmapRecordHeap_.front(), auxEvent, auxOffset, auxSize, pid, tid);
1388 if (isSpe_ && auxEvent) {
1389 ReadRecordsFromSpeMmaps(*MmapRecordHeap_.front(), auxOffset, auxSize, pid, tid);
1390 enableFlag = true;
1391 }
1392 }
1393 if (isSpe_ && enableFlag) {
1394 PerfEventsEnable(false);
1395 PerfEventsEnable(true);
1396 }
1397 MmapRecordHeap_.clear();
1398 {
1399 std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
1400 recordBufReady_ = true;
1401 }
1402 cvRecordBuf_.notify_one();
1403 #ifdef HIPERF_DEBUG_TIME
1404 recordKernelReadTime_ += duration_cast<milliseconds>(steady_clock::now() - readKenelStartTime);
1405 #endif
1406 }
1407
GetRecordFromMmap(MmapFd & mmap)1408 bool PerfEvents::GetRecordFromMmap(MmapFd &mmap)
1409 {
1410 if (mmap.dataSize <= 0) {
1411 return false;
1412 }
1413
1414 GetRecordFieldFromMmap(mmap, &(mmap.header), mmap.mmapPage->data_tail, sizeof(mmap.header));
1415 if (mmap.header.type != PERF_RECORD_SAMPLE) {
1416 mmap.timestamp = 0;
1417 return true;
1418 }
1419 // in PERF_RECORD_SAMPLE : header + u64 sample_id + u64 ip + u32 pid + u32 tid + u64 time
1420 constexpr size_t timePos = sizeof(perf_event_header) + sizeof(uint64_t) + sizeof(uint64_t) +
1421 sizeof(uint32_t) + sizeof(uint32_t);
1422 GetRecordFieldFromMmap(mmap, &(mmap.timestamp), mmap.mmapPage->data_tail + timePos,
1423 sizeof(mmap.timestamp));
1424 return true;
1425 }
1426
GetRecordFieldFromMmap(MmapFd & mmap,void * dest,size_t pos,size_t size)1427 void PerfEvents::GetRecordFieldFromMmap(MmapFd &mmap, void *dest, size_t pos, size_t size)
1428 {
1429 CHECK_TRUE(mmap.bufSize == 0, NO_RETVAL, 0, "");
1430 pos = pos % mmap.bufSize;
1431 size_t tailSize = mmap.bufSize - pos;
1432 size_t copySize = std::min(size, tailSize);
1433 if (memcpy_s(dest, copySize, mmap.buf + pos, copySize) != 0) {
1434 HLOGEP("memcpy_s %p to %p failed. size %zd", mmap.buf + pos, dest, copySize);
1435 }
1436 if (copySize < size) {
1437 size -= copySize;
1438 if (memcpy_s(static_cast<uint8_t *>(dest) + copySize, size, mmap.buf, size) != 0) {
1439 HLOGEP("GetRecordFieldFromMmap: memcpy_s mmap.buf to dest failed. size %zd", size);
1440 }
1441 }
1442 }
1443
GetCallChainPosInSampleRecord(const perf_event_attr & attr)1444 size_t PerfEvents::GetCallChainPosInSampleRecord(const perf_event_attr &attr)
1445 {
1446 // reference struct PerfRecordSampleData
1447 int fixedFieldNumber = __builtin_popcountll(
1448 attr.sample_type & (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1449 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | PERF_SAMPLE_ID |
1450 PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD));
1451 size_t pos = sizeof(perf_event_header) + sizeof(uint64_t) * fixedFieldNumber;
1452 if (attr.sample_type & PERF_SAMPLE_READ) {
1453 pos += sizeof(read_format);
1454 }
1455 return pos;
1456 }
1457
GetStackSizePosInSampleRecord(MmapFd & mmap)1458 size_t PerfEvents::GetStackSizePosInSampleRecord(MmapFd &mmap)
1459 {
1460 size_t pos = mmap.posCallChain;
1461 if (mmap.attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
1462 uint64_t nr = 0;
1463 GetRecordFieldFromMmap(mmap, &nr, mmap.mmapPage->data_tail + pos, sizeof(nr));
1464 pos += (sizeof(nr) + nr * sizeof(uint64_t));
1465 }
1466 if (mmap.attr->sample_type & PERF_SAMPLE_RAW) {
1467 uint32_t raw_size = 0;
1468 GetRecordFieldFromMmap(mmap, &raw_size, mmap.mmapPage->data_tail + pos, sizeof(raw_size));
1469 pos += (sizeof(raw_size) + raw_size);
1470 }
1471 if (mmap.attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
1472 uint64_t bnr = 0;
1473 GetRecordFieldFromMmap(mmap, &bnr, mmap.mmapPage->data_tail + pos, sizeof(bnr));
1474 pos += (sizeof(bnr) + bnr * sizeof(PerfBranchEntry));
1475 }
1476 if (mmap.attr->sample_type & PERF_SAMPLE_REGS_USER) {
1477 uint64_t user_abi = 0;
1478 GetRecordFieldFromMmap(mmap, &user_abi, mmap.mmapPage->data_tail + pos, sizeof(user_abi));
1479 pos += sizeof(user_abi);
1480 if (user_abi > 0) {
1481 uint64_t reg_nr = __builtin_popcountll(mmap.attr->sample_regs_user);
1482 pos += reg_nr * sizeof(uint64_t);
1483 }
1484 }
1485 if (mmap.attr->sample_type & PERF_SAMPLE_SERVER_PID) {
1486 uint64_t server_nr = 0;
1487 GetRecordFieldFromMmap(mmap, &server_nr, mmap.mmapPage->data_tail + pos, sizeof(server_nr));
1488 pos += (sizeof(server_nr) + server_nr * sizeof(uint64_t));
1489 }
1490 return pos;
1491 }
1492
CutStackAndMove(MmapFd & mmap)1493 bool PerfEvents::CutStackAndMove(MmapFd &mmap)
1494 {
1495 constexpr uint32_t alignSize = 64;
1496 if (!(mmap.attr->sample_type & PERF_SAMPLE_STACK_USER)) {
1497 return false;
1498 }
1499 size_t stackSizePos = GetStackSizePosInSampleRecord(mmap);
1500 uint64_t stackSize = 0;
1501 GetRecordFieldFromMmap(mmap, &stackSize, mmap.mmapPage->data_tail + stackSizePos,
1502 sizeof(stackSize));
1503 if (stackSize == 0) {
1504 return false;
1505 }
1506 size_t dynSizePos = stackSizePos + sizeof(uint64_t) + stackSize;
1507 uint64_t dynSize = 0;
1508 GetRecordFieldFromMmap(mmap, &dynSize, mmap.mmapPage->data_tail + dynSizePos, sizeof(dynSize));
1509 uint64_t newStackSize = std::min((dynSize + alignSize - 1) &
1510 (~(alignSize >= 1 ? alignSize - 1 : 0)), stackSize);
1511 if (newStackSize >= stackSize) {
1512 return false;
1513 }
1514 HLOGM("stackSize %" PRIx64 " dynSize %" PRIx64 " newStackSize %" PRIx64 "\n", stackSize, dynSize, newStackSize);
1515 // move and cut stack_data
1516 // mmap: |<+++copy1+++>|<++++++copy2++++++>|<---------------cut--------------->|<+++copy3+++>|
1517 // ^ ^ ^ ^
1518 // new_header stackSizePos <stackSize-dynSize> dynSizePos
1519 uint16_t recordSize = mmap.header.size;
1520 mmap.header.size -= stackSize - newStackSize; // reduce the stack size
1521 uint8_t *buf = recordBuf_->AllocForWrite(mmap.header.size);
1522 // copy1: new_header
1523 CHECK_TRUE(buf == nullptr, false, 0, "");
1524 if (memcpy_s(buf, sizeof(perf_event_header), &(mmap.header), sizeof(perf_event_header)) != 0) {
1525 HLOGEP("memcpy_s %p to %p failed. size %zd", &(mmap.header), buf,
1526 sizeof(perf_event_header));
1527 }
1528 size_t copyPos = sizeof(perf_event_header);
1529 size_t copySize = stackSizePos - sizeof(perf_event_header) + sizeof(stackSize) + newStackSize;
1530 // copy2: copy stack_size, data[stack_size],
1531 GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + copyPos, copySize);
1532 copyPos += copySize;
1533 // copy3: copy dyn_size
1534 GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + dynSizePos,
1535 recordSize - dynSizePos);
1536 // update stack_size
1537 if (memcpy_s(buf + stackSizePos, sizeof(stackSize), &(newStackSize), sizeof(newStackSize)) != 0) {
1538 HLOGEP("CutStackAndMove: memcpy_s newStack to buf stackSizePos failed. size %zd", sizeof(newStackSize));
1539 }
1540 recordBuf_->EndWrite();
1541 __sync_synchronize();
1542 mmap.mmapPage->data_tail += recordSize;
1543 mmap.dataSize -= recordSize;
1544 return true;
1545 }
1546
MoveRecordToBuf(MmapFd & mmap,bool & isAuxEvent,u64 & auxOffset,u64 & auxSize,u32 & pid,u32 & tid)1547 void PerfEvents::MoveRecordToBuf(MmapFd &mmap, bool &isAuxEvent, u64 &auxOffset, u64 &auxSize, u32 &pid, u32 &tid)
1548 {
1549 uint8_t *buf = nullptr;
1550 if (mmap.header.type == PERF_RECORD_SAMPLE) {
1551 if (recordBuf_->GetFreeSize() <= BUFFER_CRITICAL_LEVEL) {
1552 lostSamples_++;
1553 HLOGD("BUFFER_CRITICAL_LEVEL: lost sample record");
1554 goto RETURN;
1555 }
1556 if (CutStackAndMove(mmap)) {
1557 return;
1558 }
1559 } else if (mmap.header.type == PERF_RECORD_LOST) {
1560 // in PERF_RECORD_LOST : header + u64 id + u64 lost
1561 constexpr size_t lostPos = sizeof(perf_event_header) + sizeof(uint64_t);
1562 uint64_t lost = 0;
1563 GetRecordFieldFromMmap(mmap, &lost, mmap.mmapPage->data_tail + lostPos, sizeof(lost));
1564 lostSamples_ += lost;
1565 HLOGD("PERF_RECORD_LOST: lost sample record");
1566 goto RETURN;
1567 }
1568 if (mmap.header.type == PERF_RECORD_AUX) {
1569 isAuxEvent = true;
1570 // in AUX : header + u64 aux_offset + u64 aux_size
1571 uint64_t auxOffsetPos = sizeof(perf_event_header);
1572 uint64_t auxSizePos = sizeof(perf_event_header) + sizeof(uint64_t);
1573 uint64_t pidPos = auxSizePos + sizeof(uint64_t) * 2; // 2 : offset
1574 uint64_t tidPos = pidPos + sizeof(uint32_t);
1575 GetRecordFieldFromMmap(mmap, &auxOffset, mmap.mmapPage->data_tail + auxOffsetPos, sizeof(auxOffset));
1576 GetRecordFieldFromMmap(mmap, &auxSize, mmap.mmapPage->data_tail + auxSizePos, sizeof(auxSize));
1577 GetRecordFieldFromMmap(mmap, &pid, mmap.mmapPage->data_tail + pidPos, sizeof(pid));
1578 GetRecordFieldFromMmap(mmap, &tid, mmap.mmapPage->data_tail + tidPos, sizeof(tid));
1579 }
1580
1581 if ((buf = recordBuf_->AllocForWrite(mmap.header.size)) == nullptr) {
1582 // this record type must be Non-Sample
1583 lostNonSamples_++;
1584 HLOGD("alloc buffer failed: lost non-sample record");
1585 goto RETURN;
1586 }
1587
1588 GetRecordFieldFromMmap(mmap, buf, mmap.mmapPage->data_tail, mmap.header.size);
1589 recordBuf_->EndWrite();
1590 RETURN:
1591 __sync_synchronize();
1592 mmap.mmapPage->data_tail += mmap.header.size;
1593 mmap.dataSize -= mmap.header.size;
1594 }
1595
ReadRecordFromBuf()1596 void PerfEvents::ReadRecordFromBuf()
1597 {
1598 const perf_event_attr *attr = GetDefaultAttr();
1599 uint8_t *p = nullptr;
1600
1601 while (readRecordThreadRunning_) {
1602 {
1603 std::unique_lock<std::mutex> lk(mtxRrecordBuf_);
1604 cvRecordBuf_.wait(lk, [this] {
1605 if (recordBufReady_) {
1606 recordBufReady_ = false;
1607 return true;
1608 }
1609 return !readRecordThreadRunning_;
1610 });
1611 }
1612 while ((p = recordBuf_->GetReadData()) != nullptr) {
1613 uint32_t *type = reinterpret_cast<uint32_t *>(p);
1614 #ifdef HIPERF_DEBUG_TIME
1615 const auto readingStartTime_ = steady_clock::now();
1616 #endif
1617 #if !HIDEBUG_SKIP_CALLBACK
1618 recordCallBack_(GetPerfSampleFromCache(*type, p, *attr));
1619 #endif
1620 recordEventCount_++;
1621 #ifdef HIPERF_DEBUG_TIME
1622 recordCallBackTime_ +=
1623 duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1624 #endif
1625 recordBuf_->EndRead();
1626 }
1627 }
1628 HLOGD("exit because trackStoped");
1629
1630 // read the data left over in buffer
1631 while ((p = recordBuf_->GetReadData()) != nullptr) {
1632 uint32_t *type = reinterpret_cast<uint32_t *>(p);
1633 #ifdef HIPERF_DEBUG_TIME
1634 const auto readingStartTime_ = steady_clock::now();
1635 #endif
1636 #if !HIDEBUG_SKIP_CALLBACK
1637 recordCallBack_(GetPerfSampleFromCache(*type, p, *attr));
1638 #endif
1639 recordEventCount_++;
1640 #ifdef HIPERF_DEBUG_TIME
1641 recordCallBackTime_ += duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1642 #endif
1643 recordBuf_->EndRead();
1644 }
1645 HLOGD("read all records from buffer");
1646 }
1647
HaveTargetsExit(const std::chrono::steady_clock::time_point & startTime)1648 bool PerfEvents::HaveTargetsExit(const std::chrono::steady_clock::time_point &startTime)
1649 {
1650 if (systemTarget_) {
1651 return false;
1652 }
1653 if (trackedCommand_) {
1654 if (trackedCommand_->GetState() < TrackedCommand::State::COMMAND_STARTED) {
1655 return false; // not start yet
1656 }
1657 int wstatus;
1658 if (trackedCommand_->WaitCommand(wstatus)) {
1659 milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1660 printf("tracked command(%s) has exited (total %" PRId64 " ms)\n",
1661 trackedCommand_->GetCommandName().c_str(), (uint64_t)usedMsTick.count());
1662 return true;
1663 }
1664 return false;
1665 }
1666
1667 for (auto it = pids_.begin(); it != pids_.end();) {
1668 if (IsDir("/proc/" + std::to_string(*it))) {
1669 it++;
1670 } else {
1671 it = pids_.erase(it);
1672 }
1673 }
1674 if (pids_.empty()) {
1675 milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1676 printf("tracked processes have exited (total %" PRId64 " ms)\n", (uint64_t)usedMsTick.count());
1677 return true;
1678 }
1679 return false;
1680 }
1681
RecordLoop()1682 void PerfEvents::RecordLoop()
1683 {
1684 // calc the time
1685 const auto startTime = steady_clock::now();
1686 const auto endTime = startTime + timeOut_;
1687 milliseconds usedTimeMsTick {};
1688 int count = 1;
1689
1690 while (g_trackRunning) {
1691 // time check point
1692 const auto thisTime = steady_clock::now();
1693 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1694 if ((uint64_t)usedTimeMsTick.count() > (uint64_t)(count * THOUSANDS)) {
1695 if (HaveTargetsExit(startTime)) {
1696 break;
1697 }
1698 ++count;
1699 }
1700
1701 if (thisTime >= endTime) {
1702 printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1703 if (trackedCommand_) {
1704 trackedCommand_->Stop();
1705 }
1706 break;
1707 }
1708
1709 int timeLeft = duration_cast<milliseconds>(endTime - thisTime).count();
1710 if (IsRecordInMmap(std::min(timeLeft, pollTimeOut_))) {
1711 ReadRecordsFromMmaps();
1712 }
1713 }
1714
1715 if (!g_trackRunning) {
1716 // for user interrupt situation, print time statistic
1717 usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1718 printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1719 }
1720 }
1721
StatLoop()1722 void PerfEvents::StatLoop()
1723 {
1724 // calc the time
1725 const auto startTime = steady_clock::now();
1726 const auto endTime = startTime + timeOut_;
1727 auto nextReportTime = startTime + timeReport_;
1728 milliseconds usedTimeMsTick {};
1729 __u64 durationInSec = 0;
1730 int64_t thesholdTimeInMs = 2 * HUNDREDS;
1731
1732 while (g_trackRunning) {
1733 // time check point
1734 const auto thisTime = steady_clock::now();
1735 if (timeReport_ != milliseconds::zero()) {
1736 // stat cmd
1737 if (thisTime >= nextReportTime) {
1738 // only for log or debug?
1739 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1740 durationInSec = usedTimeMsTick.count();
1741 auto lefTimeMsTick = duration_cast<milliseconds>(endTime - thisTime);
1742 printf("\nReport at %" PRId64 " ms (%" PRId64 " ms left):\n",
1743 (uint64_t)usedTimeMsTick.count(), (uint64_t)lefTimeMsTick.count());
1744 // end of comments
1745 nextReportTime += timeReport_;
1746 StatReport(durationInSec);
1747 }
1748 }
1749
1750 if (HaveTargetsExit(startTime)) {
1751 break;
1752 }
1753
1754 if (thisTime >= endTime) {
1755 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1756 durationInSec = usedTimeMsTick.count();
1757 printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1758 if (trackedCommand_) {
1759 trackedCommand_->Stop();
1760 }
1761 break;
1762 }
1763
1764 // lefttime > 200ms sleep 100ms, else sleep 200us
1765 uint64_t defaultSleepUs = 2 * HUNDREDS; // 200us
1766 if (timeReport_ == milliseconds::zero()
1767 && (timeOut_.count() * THOUSANDS) > thesholdTimeInMs) {
1768 milliseconds leftTimeMsTmp = duration_cast<milliseconds>(endTime - thisTime);
1769 if (leftTimeMsTmp.count() > thesholdTimeInMs) {
1770 defaultSleepUs = HUNDREDS * THOUSANDS; // 100ms
1771 }
1772 }
1773 std::this_thread::sleep_for(microseconds(defaultSleepUs));
1774 }
1775
1776 if (!g_trackRunning) {
1777 // for user interrupt situation, print time statistic
1778 usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1779 printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1780 }
1781
1782 if (timeReport_ == milliseconds::zero()) {
1783 StatReport(durationInSec);
1784 }
1785 }
1786
GetTypeName(perf_type_id type_id)1787 const std::string PerfEvents::GetTypeName(perf_type_id type_id)
1788 {
1789 auto it = PERF_TYPES.find(type_id);
1790 if (it != PERF_TYPES.end()) {
1791 return it->second;
1792 } else {
1793 return "<not found>";
1794 }
1795 }
1796 } // namespace HiPerf
1797 } // namespace Developtools
1798 } // namespace OHOS
1799