• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "perf_events.h"
16 
17 #include <cassert>
18 #include <cinttypes>
19 #include <csignal>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <iostream>
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/resource.h>
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 #if defined(CONFIG_HAS_SYSPARA)
29 #include <parameters.h>
30 #endif
31 
32 #include "spe_decoder.h"
33 #include "debug_logger.h"
34 #include "hiperf_hilog.h"
35 #include "register.h"
36 #include "subcommand_dump.h"
37 #include "symbols_file.h"
38 #include "utilities.h"
39 
40 using namespace std;
41 using namespace std::chrono;
42 namespace OHOS {
43 namespace Developtools {
44 namespace HiPerf {
45 static std::atomic_bool g_trackRunning = false;
46 
Open(perf_event_attr & attr,pid_t pid,int cpu,int groupFd,unsigned long flags)47 OHOS::UniqueFd PerfEvents::Open(perf_event_attr &attr, pid_t pid, int cpu, int groupFd,
48                                 unsigned long flags)
49 {
50     OHOS::UniqueFd fd = UniqueFd(syscall(__NR_perf_event_open, &attr, pid, cpu, groupFd, flags));
51     if (fd < 0) {
52         HLOGEP("syscall perf_event_open failed. ");
53         // dump when open failed.
54         SubCommandDump::DumpPrintEventAttr(attr, std::numeric_limits<int>::min());
55     }
56     HLOGV("perf_event_open: got fd %d for pid %d cpu %d group %d flags %lu", fd.Get(), pid, cpu, groupFd, flags);
57     return fd;
58 }
59 
SpeReadData(void * dataPage,u64 * dataTail,uint8_t * buf,u32 size)60 void PerfEvents::SpeReadData(void *dataPage, u64 *dataTail, uint8_t *buf, u32 size)
61 {
62     void *src = nullptr;
63     u32 left = 0;
64     u32 offset = static_cast<u32>(*dataTail);
65     u32 copySize;
66     u32 traceSize = size;
67     CHECK_TRUE(size > (auxMmapPages_ * pageSize_ + sizeof(struct PerfRecordAuxtraceData)),
68                NO_RETVAL, 1, "buf size invalid");
69     while (traceSize > 0) {
70         offset = CALC_OFFSET(offset, auxMmapPages_ * pageSize_);
71         left = static_cast<u32>(auxMmapPages_ * pageSize_ - offset);
72         copySize = min(traceSize, left);
73         src = PTR_ADD(dataPage, offset);
74         if (memcpy_s(buf, left, src, copySize) != 0) {
75             HLOGV("SpeReadData memcpy_s failed.");
76         }
77 
78         traceSize -= copySize;
79         offset += copySize;
80         buf = reinterpret_cast<uint8_t *>(PTR_ADD(buf, copySize));
81     }
82 
83     *dataTail += size;
84 }
85 
arm_spe_reference()86 static u64 arm_spe_reference()
87 {
88     struct timespec ts;
89     clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
90     return static_cast<uint64_t>(ts.tv_sec) ^ static_cast<uint64_t>(ts.tv_nsec);
91 }
92 
ReadRecordsFromSpeMmaps(MmapFd & mmapFd,u64 auxOffset,u64 auxSize,u32 pid,u32 tid)93 void PerfEvents::ReadRecordsFromSpeMmaps(MmapFd& mmapFd, u64 auxOffset, u64 auxSize, u32 pid, u32 tid)
94 {
95     if (mmapFd.mmapPage == nullptr || mmapFd.auxBuf == nullptr) {
96         printf("ReadRecordsFromSpeMmaps mmapFd.mmapPage == nullptr, mmapFd.fd: %d", mmapFd.fd);
97         return;
98     }
99     perf_event_mmap_page *userPage = reinterpret_cast<perf_event_mmap_page *>(mmapFd.mmapPage);
100     void *auxPage = mmapFd.auxBuf;
101     userPage->aux_tail = auxOffset - auxSize;
102     u64 auxHead = userPage->aux_head;
103     u64 auxTail = userPage->aux_tail;
104     HLOGD("mmap cpu %d, aux_head: %llu, aux_tail:%llu, auxOffset:%llu, auxSize:%llu",
105           mmapFd.cpu, auxHead, auxTail, auxOffset, auxSize);
106     if (auxHead <= auxTail) {
107         return;
108     }
109     if (auxSize > auxMmapPages_ * pageSize_) {
110         userPage->aux_tail += auxSize;
111         return;
112     }
113 
114     int cpu = mmapFd.cpu;
115     __sync_synchronize();
116     PerfRecordAuxtrace auxtraceRecord = PerfRecordAuxtrace(auxSize, auxTail,
117                                                            arm_spe_reference(), cpu, tid, cpu, pid);
118     static std::vector<u8> vbuf(RECORD_SIZE_LIMIT);
119     uint8_t *buf;
120     if ((buf = recordBuf_->AllocForWrite(auxtraceRecord.header.size + auxSize)) == nullptr) {
121         HLOGD("alloc buffer failed: PerfRecordAuxtrace record, readSize: %llu", auxSize);
122         return;
123     }
124     auxtraceRecord.GetBinary1(vbuf);
125     if (memcpy_s(buf, auxtraceRecord.header.size, vbuf.data(), auxtraceRecord.header.size) != 0) {
126         HLOGE("memcpy_s return failed");
127         return;
128     }
129     buf += auxtraceRecord.header.size;
130 
131     while (auxSize > 0) {
132         u64 readSize = pageSize_;
133         if (auxSize < pageSize_) {
134             readSize = auxSize;
135         }
136         __sync_synchronize();
137         SpeReadData(auxPage, &auxTail, buf, readSize);
138         __sync_synchronize();
139         userPage->aux_tail += readSize;
140         auxTail = userPage->aux_tail;
141         buf += readSize;
142         auxSize -= readSize;
143     }
144     recordBuf_->EndWrite();
145 }
146 
GetSpeType()147 u32 GetSpeType()
148 {
149     FILE *fd;
150     u32 speType;
151 
152     fd = fopen("/sys/devices/arm_spe_0/type", "r");
153     if (fd == nullptr) {
154         HLOGV("open sysfs file failed");
155         return -1;
156     }
157     if (fscanf_s(fd, "%u", &speType) <= 0) {
158         HLOGV("fscanf_s file failed");
159         (void)fclose(fd);
160         return -1;
161     }
162 
163     (void)fclose(fd);
164     return speType;
165 }
166 
PerfEvents()167 PerfEvents::PerfEvents() : timeOut_(DEFAULT_TIMEOUT * THOUSANDS), timeReport_(0)
168 {
169     pageSize_ = sysconf(_SC_PAGESIZE);
170     HLOGI("BuildArch %s", GetArchName(BUILD_ARCH_TYPE).c_str());
171 }
172 
~PerfEvents()173 PerfEvents::~PerfEvents()
174 {
175     // close mmap
176     for (auto it = cpuMmap_.begin(); it != cpuMmap_.end();) {
177         const MmapFd &mmapItem = it->second;
178         if (!isSpe_) {
179             munmap(mmapItem.mmapPage, (1 + mmapPages_) * pageSize_);
180         } else {
181             munmap(mmapItem.mmapPage, (1 + auxMmapPages_) * pageSize_);
182             munmap(mmapItem.auxBuf, auxMmapPages_ * pageSize_);
183         }
184         it = cpuMmap_.erase(it);
185     }
186 
187     ExitReadRecordBufThread();
188 }
189 
IsEventSupport(perf_type_id type,__u64 config)190 bool PerfEvents::IsEventSupport(perf_type_id type, __u64 config)
191 {
192     unique_ptr<perf_event_attr> attr = PerfEvents::CreateDefaultAttr(type, config);
193     CHECK_TRUE(attr == nullptr, false, 0, "");
194     UniqueFd fd = Open(*attr.get());
195     if (fd < 0) {
196         printf("event not support %s\n", GetStaticConfigName(type, config).c_str());
197         return false;
198     }
199     return true;
200 }
IsEventAttrSupport(perf_event_attr & attr)201 bool PerfEvents::IsEventAttrSupport(perf_event_attr &attr)
202 {
203     UniqueFd fd = Open(attr);
204     if (fd < 0) {
205         return false;
206     }
207     return true;
208 }
209 
SetBranchSampleType(uint64_t value)210 bool PerfEvents::SetBranchSampleType(uint64_t value)
211 {
212     if (value != 0) {
213         // cpu-clcles event must be supported
214         unique_ptr<perf_event_attr> attr =
215             PerfEvents::CreateDefaultAttr(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
216         CHECK_TRUE(attr == nullptr, false, 0, "");
217         attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
218         attr->branch_sample_type = value;
219         if (!IsEventAttrSupport(*attr.get())) {
220             return false;
221         }
222     }
223     branchSampleType_ = value;
224     return true;
225 }
226 
AddDefaultEvent(perf_type_id type)227 bool PerfEvents::AddDefaultEvent(perf_type_id type)
228 {
229     auto it = DEFAULT_TYPE_CONFIGS.find(type);
230     if (it != DEFAULT_TYPE_CONFIGS.end()) {
231         for (auto config : it->second) {
232             AddEvent(type, config);
233         }
234     }
235     return true;
236 }
237 
AddOffCpuEvent()238 bool PerfEvents::AddOffCpuEvent()
239 {
240     std::string eventName = "sched:sched_switch";
241     if (eventSpaceType_ == EventSpaceType::USER) {
242         eventName += ":u";
243     } else if (eventSpaceType_ == EventSpaceType::KERNEL) {
244         eventName += ":k";
245     }
246     return AddEvent(eventName);
247 }
248 
AddEvents(const std::vector<std::string> & eventStrings,bool group)249 bool PerfEvents::AddEvents(const std::vector<std::string> &eventStrings, bool group)
250 {
251     bool followGroup = false;
252     HLOGV(" %s", VectorToString(eventStrings).c_str());
253 
254     for (std::string eventString : eventStrings) {
255         if (!AddEvent(eventString, followGroup)) {
256             return false;
257         }
258         // this is group request , Follow-up events need to follow the previous group
259         if (group) {
260             followGroup = true;
261         }
262     }
263     return true;
264 }
265 
266 // event name can have :k or :u suffix
267 // tracepoint event name is like sched:sched_switch
268 // clang-format off
ParseEventName(const std::string & nameStr,std::string & name,bool & excludeUser,bool & excludeKernel,bool & isTracePoint)269 bool PerfEvents::ParseEventName(const std::string &nameStr,
270     std::string &name, bool &excludeUser, bool &excludeKernel, bool &isTracePoint)
271 // clang-format on
272 {
273     name = nameStr;
274     excludeUser = false;
275     excludeKernel = false;
276     isTracePoint = false;
277     if (nameStr.find(":") != std::string::npos) {
278         static constexpr size_t maxNumberTokensNoTracePoint = 2;
279         static constexpr size_t maxNumberTokensTracePoint = 3;
280         std::vector<std::string> eventTokens = StringSplit(nameStr, ":");
281         if (eventTokens.size() == maxNumberTokensTracePoint) {
282             // tracepoint event with :u or :k
283             if (eventTokens.back() == "k") {
284                 excludeUser = true;
285                 HLOGV("kernelOnly event");
286             } else if (eventTokens.back() == "u") {
287                 excludeKernel = true;
288                 HLOGV("userOnly event");
289             } else {
290                 HLOGV("unknown event name %s", nameStr.c_str());
291                 return false;
292             }
293             name = eventTokens[0] + ":" + eventTokens[1];
294             isTracePoint = true;
295         } else if (eventTokens.size() == maxNumberTokensNoTracePoint) {
296             name = eventTokens[0];
297             if (eventTokens.back() == "k") {
298                 excludeUser = true;
299                 HLOGV("kernelOnly event");
300             } else if (eventTokens.back() == "u") {
301                 excludeKernel = true;
302                 HLOGV("userOnly event");
303             } else {
304                 name = nameStr;
305                 isTracePoint = true;
306                 HLOGV("tracepoint event is in form of xx:xxx");
307             }
308         } else {
309             printf("unknown ':' format:'%s'\n", nameStr.c_str());
310             return false;
311         }
312         if (reportCallBack_) {
313             if ((eventTokens[0] == "sw-task-clock" || eventTokens[0] == "sw-cpu-clock") &&
314                 (excludeUser || excludeKernel)) {
315                 printf(
316                     "event type %s with modifier u and modifier k is not supported by the kernel.",
317                     eventTokens[0].c_str());
318                 return false;
319             }
320         }
321     }
322     return true;
323 }
324 
AddEvent(const std::string & eventString,bool followGroup)325 bool PerfEvents::AddEvent(const std::string &eventString, bool followGroup)
326 {
327     std::string eventName;
328     bool excludeUser = false;
329     bool excludeKernel = false;
330     bool isTracePointEvent = false;
331     if (!ParseEventName(eventString, eventName, excludeUser, excludeKernel, isTracePointEvent)) {
332         return false;
333     }
334     if (excludeUser) {
335         eventSpaceType_ |= EventSpaceType::KERNEL;
336     } else if (excludeKernel) {
337         eventSpaceType_ |= EventSpaceType::USER;
338     } else {
339         eventSpaceType_ |= EventSpaceType::USER_KERNEL;
340     }
341 
342     // find if
343     if (isTracePointEvent) {
344         if (traceConfigTable.empty()) {
345             LoadTracepointEventTypesFromSystem();
346         }
347         for (auto traceType : traceConfigTable) {
348             if (traceType.second == eventName) {
349                 return AddEvent(PERF_TYPE_TRACEPOINT, traceType.first, excludeUser, excludeKernel,
350                                 followGroup);
351             }
352         }
353     } else {
354         if (eventName == "arm_spe_0") {
355             u32 speType = GetSpeType();
356             return AddSpeEvent(speType);
357         }
358         if (StringStartsWith(eventName, "0x")
359             && eventName.length() <= MAX_HEX_EVENT_NAME_LENGTH && IsHexDigits(eventName)) {
360             return AddEvent(PERF_TYPE_RAW, std::stoull(eventName, nullptr, NUMBER_FORMAT_HEX_BASE),
361                             excludeUser, excludeKernel, followGroup);
362         } else {
363             auto [find, typeId, configId] = GetStaticConfigId(eventName);
364             if (find) {
365                 return AddEvent(typeId, configId, excludeUser, excludeKernel, followGroup);
366             }
367         }
368     }
369 
370     printf("%s event is not supported by the kernel.\n", eventName.c_str());
371     return false;
372 }
373 
AddSpeEvent(u32 type,bool followGroup)374 bool PerfEvents::AddSpeEvent(u32 type, bool followGroup)
375 {
376     EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back() :
377                                      eventGroupItem_.emplace_back();
378     EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
379 
380     if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
381         HLOGE("memset_s failed in PerfEvents::AddEvent");
382         return false;
383     }
384     eventItem.attr.type = type;
385     eventItem.attr.sample_period = MULTIPLE_SIZE;
386     eventItem.attr.size = sizeof(perf_event_attr);
387     eventItem.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
388     eventItem.attr.inherit = (inherit_ ? 1 : 0);
389     eventItem.attr.sample_type = SAMPLE_ID;
390     eventItem.attr.sample_id_all = 1;
391     eventItem.attr.disabled = 1;
392     eventItem.attr.config = 0x700010007; // temp type
393     return true;
394 }
395 
SetConfig(std::map<const std::string,unsigned long long> & speOptMaps)396 void PerfEvents::SetConfig(std::map<const std::string, unsigned long long> &speOptMaps)
397 {
398     int jitterOffset = 16;
399     int branchOffset = 32;
400     int loadOffset = 33;
401     int storeOffset = 34;
402     config_ |= (speOptMaps["ts_enable"] & 0x1) << 0;
403     config_ |= (speOptMaps["pa_enable"] & 0x1) << 1;
404     config_ |= (speOptMaps["jitter"] & 0x1) << jitterOffset;
405     config_ |= (speOptMaps["branch_filter"] & 0x1) << branchOffset;
406     config_ |= (speOptMaps["load_filter"] & 0x1) << loadOffset;
407     config_ |= (speOptMaps["store_filter"] & 0x1) << storeOffset;
408     config1_ |= speOptMaps["event_filter"];
409     config2_ |= speOptMaps["min_latency"] & 0xfff;
410 }
411 
AddEvent(perf_type_id type,__u64 config,bool excludeUser,bool excludeKernel,bool followGroup)412 bool PerfEvents::AddEvent(perf_type_id type, __u64 config, bool excludeUser, bool excludeKernel,
413                           bool followGroup)
414 {
415     HLOG_ASSERT(!excludeUser or !excludeKernel);
416     CHECK_TRUE(followGroup && eventGroupItem_.empty(), false, 1, "no group leader create before");
417     // found the event name
418     CHECK_TRUE(!IsEventSupport(type, config), false, 0, "");
419     HLOGV("type %d config %llu excludeUser %d excludeKernel %d followGroup %d", type, config,
420           excludeUser, excludeKernel, followGroup);
421 
422     // if use follow ?
423     EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back()
424                                                  : eventGroupItem_.emplace_back();
425     // always new item
426     EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
427 
428     eventItem.typeName = GetTypeName(type);
429     if (type == PERF_TYPE_TRACEPOINT) {
430         eventItem.configName = GetTraceConfigName(config);
431     } else {
432         eventItem.configName = GetStaticConfigName(type, config);
433     }
434 
435     // attr
436     if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
437         HLOGE("memset_s failed in PerfEvents::AddEvent");
438         return false;
439     }
440     eventItem.attr.size = sizeof(perf_event_attr);
441     eventItem.attr.type = type;
442     eventItem.attr.config = config;
443     eventItem.attr.disabled = 1;
444     eventItem.attr.read_format =
445         PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
446 
447     eventItem.attr.inherit = (inherit_ ? 1 : 0);
448     eventItem.attr.exclude_kernel = excludeKernel;
449     eventItem.attr.exclude_user = excludeUser;
450 
451     // we also need mmap for record
452     if (recordCallBack_) {
453         if (samplePeriod_ > 0) {
454             eventItem.attr.freq = 0;
455             eventItem.attr.sample_freq = 0;
456             eventItem.attr.sample_period = samplePeriod_;
457         } else if (sampleFreq_ > 0) {
458             eventItem.attr.freq = 1;
459             eventItem.attr.sample_freq = sampleFreq_;
460         } else {
461             if (type == PERF_TYPE_TRACEPOINT) {
462                 eventItem.attr.freq = 0;
463                 eventItem.attr.sample_period = DEFAULT_SAMPLE_PERIOD;
464             } else {
465                 eventItem.attr.freq = 1;
466                 eventItem.attr.sample_freq = DEFAULT_SAMPLE_FREQUNCY;
467             }
468         }
469 
470         eventItem.attr.watermark = 1;
471         eventItem.attr.wakeup_watermark = (mmapPages_ * pageSize_) >> 1;
472         static constexpr unsigned int maxWakeupMark = 1024 * 1024;
473         if (eventItem.attr.wakeup_watermark > maxWakeupMark) {
474             eventItem.attr.wakeup_watermark = maxWakeupMark;
475         }
476 
477         // for a group of events, only enable comm/mmap on the first event
478         if (!followGroup) {
479             eventItem.attr.comm = 1;
480             eventItem.attr.mmap = 1;
481             eventItem.attr.mmap2 = 1;
482             eventItem.attr.mmap_data = 1;
483         }
484 
485         if (sampleStackType_ == SampleStackType::DWARF) {
486             eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN |
487                                          PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
488             eventItem.attr.exclude_callchain_user = 1;
489             eventItem.attr.sample_regs_user = GetSupportedRegMask(GetDeviceArch());
490             eventItem.attr.sample_stack_user = dwarfSampleStackSize_;
491         } else if (sampleStackType_ == SampleStackType::FP) {
492             eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN;
493         } else {
494             eventItem.attr.sample_type = SAMPLE_TYPE;
495         }
496 
497         if (isHM_) {
498             eventItem.attr.sample_type |= PERF_SAMPLE_SERVER_PID;
499         }
500     }
501 
502     // set clock id
503     if (clockId_ != -1) {
504         eventItem.attr.use_clockid = 1;
505         eventItem.attr.clockid = clockId_;
506     }
507     if (branchSampleType_ != 0) {
508         eventItem.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
509         eventItem.attr.branch_sample_type = branchSampleType_;
510     }
511 
512     HLOGV("Add Event: '%s':'%s' %s %s %s", eventItem.typeName.c_str(), eventItem.configName.c_str(),
513           excludeUser ? "excludeUser" : "", excludeKernel ? "excludeKernel" : "",
514           followGroup ? "" : "group leader");
515 
516     return true;
517 }
518 
CreateDefaultAttr(perf_type_id type,__u64 config)519 std::unique_ptr<perf_event_attr> PerfEvents::CreateDefaultAttr(perf_type_id type, __u64 config)
520 {
521     unique_ptr<perf_event_attr> attr = make_unique<perf_event_attr>();
522     if (memset_s(attr.get(), sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
523         HLOGE("memset_s failed in PerfEvents::CreateDefaultAttr");
524         return nullptr;
525     }
526     attr->size = sizeof(perf_event_attr);
527     attr->type = type;
528     attr->config = config;
529     attr->disabled = 1;
530     return attr;
531 }
532 
533 // should move to upper caller
534 static struct sigaction g_oldSig {
535 };
CaptureSig()536 static bool CaptureSig()
537 {
538     HLOGD("capture Ctrl + C to end sampling decently");
539     struct sigaction sig {
540     };
541 
542     sig.sa_handler = [](int sig) {
543         printf("\n Ctrl + C detected.\n");
544         g_trackRunning = false;
545     };
546 
547     sig.sa_flags = 0;
548     if (sigaction(SIGINT, &sig, &g_oldSig) < 0) {
549         perror("Fail to call sigaction for SIGINT");
550         return false;
551     }
552     return true;
553 }
554 
RecoverCaptureSig()555 static void RecoverCaptureSig()
556 {
557     if (sigaction(SIGINT, &g_oldSig, nullptr) < 0) {
558         perror("Fail to call sigaction for SIGINT");
559     }
560 }
561 
562 // split to two part
563 // because WriteAttrAndId need fd id before start tracking
PrepareTracking(void)564 bool PerfEvents::PrepareTracking(void)
565 {
566     // 1. prepare cpu pid
567     CHECK_TRUE(!PrepareFdEvents(), false, 1, "PrepareFdEvents() failed");
568 
569     // 2. create events
570     CHECK_TRUE(!CreateFdEvents(), false, 1, "CreateFdEvents() failed");
571 
572     HLOGV("success");
573     prepared_ = true;
574     return true;
575 }
576 
ExitReadRecordBufThread()577 void PerfEvents::ExitReadRecordBufThread()
578 {
579     if (isLowPriorityThread_) {
580         if (setpriority(PRIO_PROCESS, gettid(), 0) != 0) {
581             HLOGW("failed to decrease priority of reading kernel");
582         }
583     }
584     if (readRecordBufThread_.joinable()) {
585         {
586             std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
587             readRecordThreadRunning_ = false;
588             __sync_synchronize();
589             cvRecordBuf_.notify_one();
590         }
591         readRecordBufThread_.join();
592     }
593 }
594 
PrepareRecordThread()595 bool PerfEvents::PrepareRecordThread()
596 {
597     try {
598         recordBuf_ = std::make_unique<RingBuffer>(CalcBufferSize());
599     } catch (const std::exception &e) {
600         printf("create record buffer(size %zu) failed: %s\n", CalcBufferSize(), e.what());
601         return false;
602     }
603     readRecordThreadRunning_ = true;
604     readRecordBufThread_ = std::thread(&PerfEvents::ReadRecordFromBuf, this);
605 
606     rlimit rlim;
607     int result = getrlimit(RLIMIT_NICE, &rlim);
608     const rlim_t lowPriority = 40;
609     if (result == 0 && rlim.rlim_cur == lowPriority) {
610         const int highPriority = -20;
611         result = setpriority(PRIO_PROCESS, gettid(), highPriority);
612         if (result != 0) {
613             HLOGW("failed to increase priority of reading kernel");
614         } else {
615             isLowPriorityThread_ = true;
616         }
617     }
618 
619     return true;
620 }
621 
WaitRecordThread()622 void PerfEvents::WaitRecordThread()
623 {
624     printf("Process and Saving data...\n");
625     ExitReadRecordBufThread();
626 
627     const auto usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - trackingEndTime_);
628     if (verboseReport_) {
629         printf("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
630     }
631     HLOGV("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
632 #ifdef HIPERF_DEBUG_TIME
633     printf("%zu record processed, used %0.3f ms(%4.2f us/record)\n", recordEventCount_,
634            recordCallBackTime_.count() / MS_DURATION,
635            recordCallBackTime_.count() / static_cast<double>(recordEventCount_));
636     printf("total wait sleep time %0.3f ms.\n", recordSleepTime_.count() / MS_DURATION);
637     printf("read from kernel time %0.3f ms.\n", recordKernelReadTime_.count() / MS_DURATION);
638 #endif
639 }
640 
StartTracking(bool immediately)641 bool PerfEvents::StartTracking(bool immediately)
642 {
643     if (!prepared_) {
644         HLOGD("do not prepared_");
645         return false;
646     }
647 
648     if (recordCallBack_) {
649         if (!PrepareRecordThread()) {
650             return false;
651         }
652     }
653 
654     HLOGD("step: 1. enable event");
655     trackingStartTime_ = steady_clock::now();
656     if (immediately) {
657         if (!EnableTracking()) {
658             HLOGE("PerfEvents::EnableTracking() failed");
659             return false;
660         }
661         printf("Profiling duration is %.3f seconds.\n", float(timeOut_.count()) / THOUSANDS);
662         printf("Start Profiling...\n");
663     }
664 
665     g_trackRunning = true;
666     if (!CaptureSig()) {
667         HLOGE("captureSig() failed");
668         g_trackRunning = false;
669         ExitReadRecordBufThread();
670         return false;
671     }
672 
673     HLOGD("step: 2. thread loop");
674     if (recordCallBack_) {
675         RecordLoop();
676     } else {
677         StatLoop();
678     }
679 
680     HLOGD("step: 3. disable event");
681     if (!PerfEventsEnable(false)) {
682         HLOGE("PerfEvents::PerfEventsEnable() failed");
683     }
684     if (recordCallBack_) {
685         // read left samples after disable events
686         ReadRecordsFromMmaps();
687     }
688     trackingEndTime_ = steady_clock::now();
689 
690     RecoverCaptureSig();
691 
692     if (recordCallBack_) {
693         WaitRecordThread();
694     }
695 
696     HLOGD("step: 4. exit");
697     return true;
698 }
699 
StopTracking(void)700 bool PerfEvents::StopTracking(void)
701 {
702     if (g_trackRunning) {
703         printf("some one called StopTracking\n");
704         g_trackRunning = false;
705         if (trackedCommand_) {
706             if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_STARTED) {
707                 trackedCommand_->Stop();
708             }
709         }
710         CHECK_TRUE(!PerfEventsEnable(false), false, 1, "StopTracking : PerfEventsEnable(false) failed");
711     }
712     return true;
713 }
714 
PauseTracking(void)715 bool PerfEvents::PauseTracking(void)
716 {
717     CHECK_TRUE(!startedTracking_, false, 0, "");
718     return PerfEventsEnable(false);
719 }
720 
ResumeTracking(void)721 bool PerfEvents::ResumeTracking(void)
722 {
723     CHECK_TRUE(!startedTracking_, false, 0, "");
724     return PerfEventsEnable(true);
725 }
726 
EnableTracking()727 bool PerfEvents::EnableTracking()
728 {
729     CHECK_TRUE(startedTracking_, true, 0, "");
730     CHECK_TRUE(!PerfEventsEnable(true), false, 1, "PerfEvents::PerfEventsEnable() failed");
731 
732     if (trackedCommand_) {
733         // start tracked Command
734         if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_WAITING) {
735             if (!trackedCommand_->StartCommand()) {
736                 int wstatus;
737                 if (!trackedCommand_->WaitCommand(wstatus)) {
738                     trackedCommand_->Stop();
739                 }
740                 std::string commandName = trackedCommand_->GetCommandName();
741                 printf("failed to execute command: %zu: %s\n", commandName.size(), commandName.c_str());
742                 return false;
743             }
744         } else if (trackedCommand_->GetState() != TrackedCommand::State::COMMAND_STARTED) {
745             return false;
746         }
747     }
748     startedTracking_ = true;
749     return true;
750 }
751 
IsTrackRunning()752 bool PerfEvents::IsTrackRunning()
753 {
754     return g_trackRunning;
755 }
756 
SetSystemTarget(bool systemTarget)757 void PerfEvents::SetSystemTarget(bool systemTarget)
758 {
759     systemTarget_ = systemTarget;
760 }
761 
SetCpu(std::vector<pid_t> cpus)762 void PerfEvents::SetCpu(std::vector<pid_t> cpus)
763 {
764     cpus_ = cpus;
765 }
766 
SetPid(std::vector<pid_t> pids)767 void PerfEvents::SetPid(std::vector<pid_t> pids)
768 {
769     pids_ = pids;
770 }
771 
SetTimeOut(float timeOut)772 void PerfEvents::SetTimeOut(float timeOut)
773 {
774     if (timeOut > 0) {
775         timeOut_ = milliseconds(static_cast<int>(timeOut * THOUSANDS));
776     }
777 }
778 
SetTimeReport(int timeReport)779 void PerfEvents::SetTimeReport(int timeReport)
780 {
781     static constexpr int minMsReportInterval = 10;
782     if (timeReport < minMsReportInterval && timeReport != 0) {
783         timeReport = minMsReportInterval;
784         printf("time report min value is %d.\n", timeReport);
785     }
786 
787     timeReport_ = milliseconds(timeReport);
788 }
789 
GetSupportEvents(perf_type_id type)790 std::map<__u64, std::string> PerfEvents::GetSupportEvents(perf_type_id type)
791 {
792     if (type == PERF_TYPE_TRACEPOINT) {
793         LoadTracepointEventTypesFromSystem();
794     }
795 
796     std::map<__u64, std::string> eventConfigs;
797     auto configTable = TYPE_CONFIGS.find(type);
798     if (configTable != TYPE_CONFIGS.end()) {
799         auto configs = configTable->second;
800         for (auto config : configs) {
801             if (type == PERF_TYPE_TRACEPOINT || IsEventSupport(type, (__u64)config.first)) {
802                 eventConfigs.insert(config);
803             } else {
804                 HLOGD("'%s' not support", config.second.c_str());
805             }
806         }
807     }
808     return eventConfigs;
809 }
810 
LoadTracepointEventTypesFromSystem()811 void PerfEvents::LoadTracepointEventTypesFromSystem()
812 {
813     if (traceConfigTable.empty()) {
814         std::string basePath {"/sys/kernel/tracing/events"};
815         if (access(basePath.c_str(), R_OK) != 0) {
816             basePath = "/sys/kernel/debug/tracing/events";
817         }
818         for (const auto &eventName : GetSubDirs(basePath)) {
819             std::string eventPath = basePath + "/" + eventName;
820             for (const auto &concreteEvent : GetSubDirs(eventPath)) {
821                 std::string idPath = eventPath + "/" + concreteEvent + "/id";
822                 {
823                     std::string resolvedPath = CanonicalizeSpecPath(idPath.c_str());
824                     std::ifstream ifs {resolvedPath};
825                     // clang-format off
826                     const std::string idStr = {
827                         std::istream_iterator<char>(ifs),
828                         std::istream_iterator<char>()
829                     };
830                     // clang-format on
831                     __u64 id {0};
832                     try {
833                         id = std::stoul(idStr, nullptr);
834                     } catch (...) {
835                         continue;
836                     }
837                     if (isHM_ && id < MIN_HM_TRACEPOINT_EVENT_ID) {
838                         continue;
839                     }
840                     auto typeConfigs = TYPE_CONFIGS.find(PERF_TYPE_TRACEPOINT);
841                     HLOG_ASSERT(typeConfigs != TYPE_CONFIGS.end());
842                     auto configPair = typeConfigs->second.insert(
843                         std::make_pair(id, eventName + ":" + concreteEvent));
844                     traceConfigTable.insert(std::make_pair(id, eventName + ":" + concreteEvent));
845                     ConfigTable::iterator it = configPair.first;
846                     HLOGV("TYPE_CONFIGS add %llu:%s in %zu", it->first, it->second.c_str(),
847                           typeConfigs->second.size());
848                 }
849             }
850         }
851     }
852 }
853 
SetPerCpu(bool perCpu)854 void PerfEvents::SetPerCpu(bool perCpu)
855 {
856     perCpu_ = perCpu;
857 }
858 
SetPerThread(bool perThread)859 void PerfEvents::SetPerThread(bool perThread)
860 {
861     perThread_ = perThread;
862 }
863 
SetVerboseReport(bool verboseReport)864 void PerfEvents::SetVerboseReport(bool verboseReport)
865 {
866     verboseReport_ = verboseReport;
867 }
868 
SetSampleFrequency(unsigned int frequency)869 void PerfEvents::SetSampleFrequency(unsigned int frequency)
870 {
871     if (frequency > 0) {
872         sampleFreq_ = frequency;
873     }
874     int maxRate = 0;
875     CHECK_TRUE(!ReadIntFromProcFile("/proc/sys/kernel/perf_event_max_sample_rate", maxRate),
876                NO_RETVAL, LOG_TYPE_PRINTF,
877                "read perf_event_max_sample_rate fail.\n");
878     if (sampleFreq_ > static_cast<unsigned int>(maxRate)) {
879         static bool printFlag = false;
880         sampleFreq_ = static_cast<unsigned int>(maxRate);
881         if (!printFlag) {
882             printf("Adjust sampling frequency to maximum allowed frequency %d.\n", maxRate);
883             printFlag = true;
884         }
885     }
886 }
887 
SetSamplePeriod(unsigned int period)888 void PerfEvents::SetSamplePeriod(unsigned int period)
889 {
890     if (period > 0) {
891         samplePeriod_ = period;
892     }
893 }
894 
SetMmapPages(size_t mmapPages)895 void PerfEvents::SetMmapPages(size_t mmapPages)
896 {
897     mmapPages_ = mmapPages;
898 }
899 
SetSampleStackType(SampleStackType type)900 void PerfEvents::SetSampleStackType(SampleStackType type)
901 {
902     sampleStackType_ = type;
903 }
904 
SetDwarfSampleStackSize(uint32_t stackSize)905 void PerfEvents::SetDwarfSampleStackSize(uint32_t stackSize)
906 {
907     HLOGD("request stack size is %u", stackSize);
908     dwarfSampleStackSize_ = stackSize;
909 }
910 
PerfEventsEnable(bool enable)911 bool PerfEvents::PerfEventsEnable(bool enable)
912 {
913     HLOGV("%s", std::to_string(enable).c_str());
914     for (const auto &eventGroupItem : eventGroupItem_) {
915         for (const auto &eventItem : eventGroupItem.eventItems) {
916             for (const auto &fdItem : eventItem.fdItems) {
917                 int result =
918                     ioctl(fdItem.fd, enable ? PERF_EVENT_IOC_ENABLE : PERF_EVENT_IOC_DISABLE, 0);
919                 if (result < 0) {
920                     printf("Cannot '%s' perf fd! type config name: '%s:%s'\n",
921                            enable ? "enable" : "disable", eventItem.typeName.c_str(),
922                            eventItem.configName.c_str());
923                     return false;
924                 }
925             }
926         }
927     }
928     return true;
929 }
930 
SetHM(bool isHM)931 void PerfEvents::SetHM(bool isHM)
932 {
933     isHM_ = isHM;
934 }
935 
SetStatCallBack(StatCallBack reportCallBack)936 void PerfEvents::SetStatCallBack(StatCallBack reportCallBack)
937 {
938     reportCallBack_ = reportCallBack;
939 }
SetRecordCallBack(RecordCallBack recordCallBack)940 void PerfEvents::SetRecordCallBack(RecordCallBack recordCallBack)
941 {
942     recordCallBack_ = recordCallBack;
943 }
944 
PutAllCpus()945 inline void PerfEvents::PutAllCpus()
946 {
947     int cpuConfigs = sysconf(_SC_NPROCESSORS_CONF);
948     for (int i = 0; i < cpuConfigs; i++) {
949         cpus_.push_back(i); // put all cpu
950     }
951 }
952 
PrepareFdEvents(void)953 bool PerfEvents::PrepareFdEvents(void)
954 {
955     /*
956     https://man7.org/linux/man-pages/man2/perf_event_open.2.html
957     pid == 0 and cpu == -1
958             This measures the calling process/thread on any CPU.
959 
960     pid == 0 and cpu >= 0
961             This measures the calling process/thread only when running
962             on the specified CPU.
963 
964     pid > 0 and cpu == -1
965             This measures the specified process/thread on any CPU.
966 
967     pid > 0 and cpu >= 0
968             This measures the specified process/thread only when
969             running on the specified CPU.
970 
971     pid == -1 and cpu >= 0
972             This measures all processes/threads on the specified CPU.
973             This requires CAP_PERFMON (since Linux 5.8) or
974             CAP_SYS_ADMIN capability or a
975             /proc/sys/kernel/perf_event_paranoid value of less than 1.
976 
977     pid == -1 and cpu == -1
978             This setting is invalid and will return an error.
979     */
980     if (systemTarget_) {
981         pids_.clear();
982         pids_.push_back(-1);
983     } else {
984         if (trackedCommand_) {
985             pids_.push_back(trackedCommand_->GetChildPid());
986         }
987         if (pids_.empty()) {
988             pids_.push_back(0); // no pid means use 0 as self pid
989         }
990     }
991     if (perCpu_ || perThread_) {
992         cpus_.clear();
993         PutAllCpus();
994     }
995     if (cpus_.empty()) {
996         PutAllCpus();
997     }
998 
999     // print info tell user which cpu and process we will select.
1000     if (pids_.size() == 1 && pids_[0] == -1) {
1001         HLOGI("target process: system scope \n");
1002     } else {
1003         HLOGI("target process: %zu (%s)\n", pids_.size(),
1004               (pids_[0] == 0) ? std::to_string(gettid()).c_str() : VectorToString(pids_).c_str());
1005     }
1006     if (cpus_.size() == 1 && cpus_[0] == -1) {
1007         HLOGI("target cpus: %ld \n", sysconf(_SC_NPROCESSORS_CONF));
1008     } else {
1009         HLOGI("target cpus: %zu / %ld (%s)\n", cpus_.size(), sysconf(_SC_NPROCESSORS_CONF),
1010             VectorToString(cpus_).c_str());
1011     }
1012 
1013     return true;
1014 }
1015 
CreateFdEvents(void)1016 bool PerfEvents::CreateFdEvents(void)
1017 {
1018     // must be some events , or will failed
1019     CHECK_TRUE(eventGroupItem_.empty(), false, LOG_TYPE_PRINTF, "no event select.\n");
1020 
1021     // create each fd by cpu and process user select
1022     /*
1023         https://man7.org/linux/man-pages/man2/perf_event_open.2.html
1024 
1025         (A single event on its own is created with group_fd = -1 and is
1026         considered to be a group with only 1 member.)
1027     */
1028     // Even if there is only one event, it is counted as a group.
1029 
1030     uint fdNumber = 0;
1031     uint eventNumber = 0;
1032     uint groupNumber = 0;
1033     for (auto &eventGroupItem : eventGroupItem_) {
1034         /*
1035             Explain what is the configuration of the group:
1036             Suppose we have 2 Event, 2 PID, and 3 CPU settings
1037             According to verification,
1038             Group's fd requires the pid to be the same as the cpu, the only difference is event
1039             In other words, if you want to bind E1 and E2 to the same group
1040             That can only be like this:
1041 
1042             event E1 pid P1 cpu C1 [Group 1]
1043             event E1 pid P1 cpu C2 [Group 2]
1044             event E1 pid P1 cpu C3 [Group 3]
1045 
1046             event E1 pid P2 cpu C1 [Group 4]
1047             event E1 pid P2 cpu C2 [Group 5]
1048             event E1 pid P2 cpu C3 [Group 6]
1049 
1050             event E2 pid P1 cpu C1 [Group 1]
1051             event E2 pid P1 cpu C2 [Group 2]
1052             event E2 pid P1 cpu C3 [Group 3]
1053 
1054             event E2 pid P2 cpu C1 [Group 4]
1055             event E2 pid P2 cpu C2 [Group 5]
1056             event E2 pid P2 cpu C3 [Group 6]
1057         */
1058         HLOGV("group %2u. eventGroupItem leader: '%s':", groupNumber++,
1059               eventGroupItem.eventItems[0].configName.c_str());
1060 
1061         int groupFdCache[cpus_.size()][pids_.size()];
1062         for (size_t i = 0; i < cpus_.size(); i++) {     // each cpu
1063             for (size_t j = 0; j < pids_.size(); j++) { // each pid
1064                 // The leader is created first, with group_fd = -1.
1065                 groupFdCache[i][j] = -1;
1066             }
1067         }
1068 
1069         uint eventIndex = 0;
1070         for (auto &eventItem : eventGroupItem.eventItems) {
1071             HLOGV(" - event %2u. eventName: '%s:%s'", eventIndex++, eventItem.typeName.c_str(),
1072                   eventItem.configName.c_str());
1073 
1074             for (size_t icpu = 0; icpu < cpus_.size(); icpu++) {     // each cpu
1075                 for (size_t ipid = 0; ipid < pids_.size(); ipid++) { // each pid
1076                     // one fd event group must match same cpu and same pid config (event can be
1077                     // different)
1078                     // clang-format off
1079                     UniqueFd fd = Open(eventItem.attr, pids_[ipid], cpus_[icpu],
1080                                        groupFdCache[icpu][ipid], 0);
1081                     // clang-format on
1082                     if (fd < 0) {
1083                         if (errno == ESRCH) {
1084                             if (verboseReport_) {
1085                                 printf("pid %d does not exist.\n", pids_[ipid]);
1086                             }
1087                             HLOGE("pid %d does not exist.\n", pids_[ipid]);
1088                             continue;
1089                         } else {
1090                             // clang-format off
1091                             if (verboseReport_) {
1092                                 char errInfo[ERRINFOLEN] = { 0 };
1093                                 strerror_r(errno, errInfo, ERRINFOLEN);
1094                                 printf("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1095                                     eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1096                             }
1097                             char errInfo[ERRINFOLEN] = { 0 };
1098                             strerror_r(errno, errInfo, ERRINFOLEN);
1099                             HLOGE("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1100                                 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1101                             // clang-format on
1102                             break; // jump to next cpu
1103                         }
1104                     }
1105                     // after open successed , fill the result
1106                     // make a new FdItem
1107                     FdItem &fdItem = eventItem.fdItems.emplace_back();
1108                     fdItem.fd = move(fd);
1109                     fdItem.cpu = cpus_[icpu];
1110                     fdItem.pid = pids_[ipid];
1111                     fdNumber++;
1112 
1113                     // if sampling, mmap ring buffer
1114                     if (recordCallBack_) {
1115                         if (isSpe_) {
1116                             CreateSpeMmap(fdItem, eventItem.attr);
1117                         } else {
1118                             CreateMmap(fdItem, eventItem.attr);
1119                         }
1120                     }
1121                     // update group leader
1122                     int groupFdCacheNum = groupFdCache[icpu][ipid];
1123                     if (groupFdCacheNum == -1) {
1124                         groupFdCache[icpu][ipid] = fdItem.fd.Get();
1125                     }
1126                 }
1127             }
1128             eventNumber++;
1129         }
1130     }
1131 
1132     CHECK_TRUE(fdNumber == 0, false, 1, "open %d fd for %d events", fdNumber, eventNumber);
1133 
1134     HLOGD("will try read %u events from %u fd (%zu groups):", eventNumber, fdNumber,
1135           eventGroupItem_.size());
1136 
1137     return true;
1138 }
1139 
StatReport(const __u64 & durationInSec)1140 bool PerfEvents::StatReport(const __u64 &durationInSec)
1141 {
1142     read_format_no_group readNoGroupValue;
1143 
1144     // only need read when need report
1145     HLOGM("eventGroupItem_:%zu", eventGroupItem_.size());
1146     __u64 groupId = 0;
1147     // clear countEvents data
1148     countEvents_.clear();
1149     for (const auto &eventGroupItem : eventGroupItem_) {
1150         HLOGM("eventItems:%zu", eventGroupItem.eventItems.size());
1151         groupId++;
1152         for (const auto &eventItem : eventGroupItem.eventItems) {
1153             // count event info together (every cpu , every pid)
1154             std::string configName = "";
1155             if (eventItem.attr.exclude_kernel) {
1156                 configName = eventItem.configName + ":u";
1157             } else if (eventItem.attr.exclude_user) {
1158                 configName = eventItem.configName + ":k";
1159             } else {
1160                 configName = eventItem.configName;
1161             }
1162             if (countEvents_.count(configName) == 0) {
1163                 auto countEvent = make_unique<CountEvent>(CountEvent {});
1164                 countEvents_[configName] = std::move(countEvent);
1165                 countEvents_[configName]->userOnly = eventItem.attr.exclude_kernel;
1166                 countEvents_[configName]->kernelOnly = eventItem.attr.exclude_user;
1167             }
1168             const std::unique_ptr<CountEvent> &countEvent = countEvents_[configName];
1169             HLOGM("eventItem.fdItems:%zu", eventItem.fdItems.size());
1170             for (const auto &fditem : eventItem.fdItems) {
1171                 if (read(fditem.fd, &readNoGroupValue, sizeof(readNoGroupValue)) > 0) {
1172                     countEvent->eventCount += readNoGroupValue.value;
1173                     countEvent->timeEnabled += readNoGroupValue.timeEnabled;
1174                     countEvent->timeRunning += readNoGroupValue.timeRunning;
1175                     countEvent->id = groupId;
1176                     if (durationInSec != 0) {
1177                         countEvent->usedCpus = (countEvent->eventCount / 1e9) / (durationInSec / THOUSANDS);
1178                     }
1179                     if (verboseReport_) {
1180                         printf("%s id:%llu(c%d:p%d) timeEnabled:%llu timeRunning:%llu value:%llu\n",
1181                                eventItem.configName.c_str(), readNoGroupValue.id, fditem.cpu, fditem.pid,
1182                                readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning, readNoGroupValue.value);
1183                     }
1184                     if ((perCpu_ || perThread_) && readNoGroupValue.value) {
1185                         countEvent->summaries.emplace_back(fditem.cpu, fditem.pid, readNoGroupValue.value,
1186                             readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning);
1187                     }
1188                 } else {
1189                     printf("read failed from event '%s'\n", eventItem.configName.c_str());
1190                 }
1191             }
1192         }
1193     }
1194 
1195     reportCallBack_(countEvents_);
1196 
1197     return true;
1198 }
1199 
CreateSpeMmap(const FdItem & item,const perf_event_attr & attr)1200 bool PerfEvents::CreateSpeMmap(const FdItem &item, const perf_event_attr &attr)
1201 {
1202     auto it = cpuMmap_.find(item.cpu);
1203     if (it == cpuMmap_.end()) {
1204         void *rbuf = mmap(nullptr, (1 + auxMmapPages_) * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1205                           item.fd.Get(), 0);
1206         CHECK_TRUE(rbuf == MMAP_FAILED, false, 0, "");
1207         void *auxRbuf = mmap(nullptr, auxMmapPages_ * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1208                              item.fd.Get(), 0);
1209         MmapFd mmapItem;
1210         mmapItem.fd = item.fd.Get();
1211         mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1212         mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1213         mmapItem.auxBuf = auxRbuf;
1214         mmapItem.bufSize = auxMmapPages_ * pageSize_;
1215         mmapItem.auxBufSize = auxMmapPages_ * pageSize_;
1216         mmapItem.attr = &attr;
1217         mmapItem.tid_ = item.pid;
1218         mmapItem.cpu = item.cpu;
1219         cpuMmap_[item.cpu] = mmapItem;
1220         pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1221     } else {
1222         const MmapFd &mmapItem = it->second;
1223         int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1224         if (rc != 0) {
1225             HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1226             perror("failed to share mapped buffer\n");
1227             return false;
1228         }
1229     }
1230     return true;
1231 }
1232 
CreateMmap(const FdItem & item,const perf_event_attr & attr)1233 bool PerfEvents::CreateMmap(const FdItem &item, const perf_event_attr &attr)
1234 {
1235     auto it = cpuMmap_.find(item.cpu);
1236     if (it == cpuMmap_.end()) {
1237         void *rbuf = mmap(nullptr, (1 + mmapPages_) * pageSize_, PROT_READ | PROT_WRITE, MAP_SHARED,
1238                           item.fd.Get(), 0);
1239         if (rbuf == MMAP_FAILED) {
1240             char errInfo[ERRINFOLEN] = {0};
1241             strerror_r(errno, errInfo, ERRINFOLEN);
1242             perror("errno:%d, errstr:%s", errno, errInfo);
1243             perror("Fail to call mmap \n");
1244             return false;
1245         }
1246         MmapFd mmapItem;
1247         mmapItem.fd = item.fd.Get();
1248         mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1249         mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1250         mmapItem.bufSize = mmapPages_ * pageSize_;
1251         mmapItem.attr = &attr;
1252         mmapItem.posCallChain = GetCallChainPosInSampleRecord(attr);
1253 
1254         cpuMmap_[item.cpu] = mmapItem;
1255         pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1256         HLOGD("CreateMmap success cpu %d fd %d", item.cpu, mmapItem.fd);
1257     } else {
1258         const MmapFd &mmapItem = it->second;
1259         int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1260         if (rc != 0) {
1261             HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1262             perror("failed to share mapped buffer\n");
1263             return false;
1264         }
1265     }
1266     return true;
1267 }
1268 
GetAttrWithId() const1269 std::vector<AttrWithId> PerfEvents::GetAttrWithId() const
1270 {
1271     std::vector<AttrWithId> result;
1272     HLOGV("eventGroupItem_ %zu :", eventGroupItem_.size());
1273 
1274     for (const auto &eventGroupItem : eventGroupItem_) {
1275         HLOGV(" eventItems %zu eventItems:", eventGroupItem.eventItems.size());
1276         for (const auto &eventItem : eventGroupItem.eventItems) {
1277             AttrWithId attrId;
1278             attrId.attr = eventItem.attr;
1279             attrId.name = eventItem.configName;
1280             HLOGV("  fdItems %zu fdItems:", eventItem.fdItems.size());
1281             for (const auto &fdItem : eventItem.fdItems) {
1282                 auto &id = attrId.ids.emplace_back(fdItem.GetPrefId());
1283                 HLOGV("    eventItem.fdItems GetPrefId %" PRIu64 "", id);
1284             }
1285             result.emplace_back(attrId);
1286         }
1287     }
1288     return result;
1289 }
1290 
CalcBufferSize()1291 size_t PerfEvents::CalcBufferSize()
1292 {
1293     size_t maxBufferSize;
1294     if (LittleMemory()) {
1295         maxBufferSize = MAX_BUFFER_SIZE_LITTLE;
1296     } else {
1297         maxBufferSize = MAX_BUFFER_SIZE_LARGE;
1298     }
1299 
1300     size_t bufferSize = maxBufferSize;
1301     if (!systemTarget_) {
1302         // suppose ring buffer is 4 times as much as mmap
1303         static constexpr int TIMES = 4;
1304         bufferSize = cpuMmap_.size() * mmapPages_ * pageSize_ * TIMES;
1305         if (bufferSize < MIN_BUFFER_SIZE) {
1306             bufferSize = MIN_BUFFER_SIZE;
1307         } else if (bufferSize > maxBufferSize) {
1308             bufferSize = maxBufferSize;
1309         }
1310     }
1311     HLOGD("CalcBufferSize return %zu", bufferSize);
1312     return bufferSize;
1313 }
1314 
IsRecordInMmap(int timeout)1315 inline bool PerfEvents::IsRecordInMmap(int timeout)
1316 {
1317     HLOGV("enter");
1318     if (pollFds_.size() > 0) {
1319         if (poll(static_cast<struct pollfd*>(pollFds_.data()), pollFds_.size(), timeout) <= 0) {
1320             // time out try again
1321             return false;
1322         }
1323     }
1324     HLOGV("poll record from mmap");
1325     return true;
1326 }
1327 
CompareRecordTime(const PerfEvents::MmapFd * left,const PerfEvents::MmapFd * right)1328 static bool CompareRecordTime(const PerfEvents::MmapFd *left, const PerfEvents::MmapFd *right)
1329 {
1330     return left->timestamp > right->timestamp;
1331 }
1332 
ReadRecordsFromMmaps()1333 void PerfEvents::ReadRecordsFromMmaps()
1334 {
1335 #ifdef HIPERF_DEBUG_TIME
1336     const auto readKenelStartTime = steady_clock::now();
1337 #endif
1338     // get readable mmap at this time
1339     for (auto &it : cpuMmap_) {
1340         ssize_t dataSize = it.second.mmapPage->data_head - it.second.mmapPage->data_tail;
1341         __sync_synchronize(); // this same as rmb in gcc, after reading mmapPage->data_head
1342         if (dataSize <= 0) {
1343             continue;
1344         }
1345         it.second.dataSize = dataSize;
1346         MmapRecordHeap_.push_back(&(it.second));
1347     }
1348     if (MmapRecordHeap_.empty()) {
1349         return;
1350     }
1351     bool enableFlag = false;
1352     if (MmapRecordHeap_.size() > 1) {
1353         for (const auto &it : MmapRecordHeap_) {
1354             GetRecordFromMmap(*it);
1355         }
1356         std::make_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.end(), CompareRecordTime);
1357 
1358         size_t heapSize = MmapRecordHeap_.size();
1359         while (heapSize > 1) {
1360             std::pop_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1361                           CompareRecordTime);
1362             bool auxEvent = false;
1363             u32 pid = 0;
1364             u32 tid = 0;
1365             u64 auxOffset = 0;
1366             u64 auxSize = 0;
1367             MoveRecordToBuf(*MmapRecordHeap_[heapSize - 1], auxEvent, auxOffset, auxSize, pid, tid);
1368             if (isSpe_ && auxEvent) {
1369                 ReadRecordsFromSpeMmaps(*MmapRecordHeap_[heapSize - 1], auxOffset, auxSize, pid, tid);
1370                 enableFlag = true;
1371             }
1372             if (GetRecordFromMmap(*MmapRecordHeap_[heapSize - 1])) {
1373                 std::push_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1374                                CompareRecordTime);
1375             } else {
1376                 heapSize--;
1377             }
1378         }
1379     }
1380 
1381     while (GetRecordFromMmap(*MmapRecordHeap_.front())) {
1382         bool auxEvent = false;
1383         u32 pid = 0;
1384         u32 tid = 0;
1385         u64 auxOffset = 0;
1386         u64 auxSize = 0;
1387         MoveRecordToBuf(*MmapRecordHeap_.front(), auxEvent, auxOffset, auxSize, pid, tid);
1388         if (isSpe_ && auxEvent) {
1389             ReadRecordsFromSpeMmaps(*MmapRecordHeap_.front(), auxOffset, auxSize, pid, tid);
1390             enableFlag = true;
1391         }
1392     }
1393     if (isSpe_ && enableFlag) {
1394         PerfEventsEnable(false);
1395         PerfEventsEnable(true);
1396     }
1397     MmapRecordHeap_.clear();
1398     {
1399         std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
1400         recordBufReady_ = true;
1401     }
1402     cvRecordBuf_.notify_one();
1403 #ifdef HIPERF_DEBUG_TIME
1404     recordKernelReadTime_ += duration_cast<milliseconds>(steady_clock::now() - readKenelStartTime);
1405 #endif
1406 }
1407 
GetRecordFromMmap(MmapFd & mmap)1408 bool PerfEvents::GetRecordFromMmap(MmapFd &mmap)
1409 {
1410     if (mmap.dataSize <= 0) {
1411         return false;
1412     }
1413 
1414     GetRecordFieldFromMmap(mmap, &(mmap.header), mmap.mmapPage->data_tail, sizeof(mmap.header));
1415     if (mmap.header.type != PERF_RECORD_SAMPLE) {
1416         mmap.timestamp = 0;
1417         return true;
1418     }
1419     // in PERF_RECORD_SAMPLE : header + u64 sample_id + u64 ip + u32 pid + u32 tid + u64 time
1420     constexpr size_t timePos = sizeof(perf_event_header) + sizeof(uint64_t) + sizeof(uint64_t) +
1421                                sizeof(uint32_t) + sizeof(uint32_t);
1422     GetRecordFieldFromMmap(mmap, &(mmap.timestamp), mmap.mmapPage->data_tail + timePos,
1423                            sizeof(mmap.timestamp));
1424     return true;
1425 }
1426 
GetRecordFieldFromMmap(MmapFd & mmap,void * dest,size_t pos,size_t size)1427 void PerfEvents::GetRecordFieldFromMmap(MmapFd &mmap, void *dest, size_t pos, size_t size)
1428 {
1429     CHECK_TRUE(mmap.bufSize == 0, NO_RETVAL, 0, "");
1430     pos = pos % mmap.bufSize;
1431     size_t tailSize = mmap.bufSize - pos;
1432     size_t copySize = std::min(size, tailSize);
1433     if (memcpy_s(dest, copySize, mmap.buf + pos, copySize) != 0) {
1434         HLOGEP("memcpy_s %p to %p failed. size %zd", mmap.buf + pos, dest, copySize);
1435     }
1436     if (copySize < size) {
1437         size -= copySize;
1438         if (memcpy_s(static_cast<uint8_t *>(dest) + copySize, size, mmap.buf, size) != 0) {
1439             HLOGEP("GetRecordFieldFromMmap: memcpy_s mmap.buf to dest failed. size %zd", size);
1440         }
1441     }
1442 }
1443 
GetCallChainPosInSampleRecord(const perf_event_attr & attr)1444 size_t PerfEvents::GetCallChainPosInSampleRecord(const perf_event_attr &attr)
1445 {
1446     // reference struct PerfRecordSampleData
1447     int fixedFieldNumber = __builtin_popcountll(
1448         attr.sample_type & (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1449                             PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | PERF_SAMPLE_ID |
1450                             PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD));
1451     size_t pos = sizeof(perf_event_header) + sizeof(uint64_t) * fixedFieldNumber;
1452     if (attr.sample_type & PERF_SAMPLE_READ) {
1453         pos += sizeof(read_format);
1454     }
1455     return pos;
1456 }
1457 
GetStackSizePosInSampleRecord(MmapFd & mmap)1458 size_t PerfEvents::GetStackSizePosInSampleRecord(MmapFd &mmap)
1459 {
1460     size_t pos = mmap.posCallChain;
1461     if (mmap.attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
1462         uint64_t nr = 0;
1463         GetRecordFieldFromMmap(mmap, &nr, mmap.mmapPage->data_tail + pos, sizeof(nr));
1464         pos += (sizeof(nr) + nr * sizeof(uint64_t));
1465     }
1466     if (mmap.attr->sample_type & PERF_SAMPLE_RAW) {
1467         uint32_t raw_size = 0;
1468         GetRecordFieldFromMmap(mmap, &raw_size, mmap.mmapPage->data_tail + pos, sizeof(raw_size));
1469         pos += (sizeof(raw_size) + raw_size);
1470     }
1471     if (mmap.attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
1472         uint64_t bnr = 0;
1473         GetRecordFieldFromMmap(mmap, &bnr, mmap.mmapPage->data_tail + pos, sizeof(bnr));
1474         pos += (sizeof(bnr) + bnr * sizeof(PerfBranchEntry));
1475     }
1476     if (mmap.attr->sample_type & PERF_SAMPLE_REGS_USER) {
1477         uint64_t user_abi = 0;
1478         GetRecordFieldFromMmap(mmap, &user_abi, mmap.mmapPage->data_tail + pos, sizeof(user_abi));
1479         pos += sizeof(user_abi);
1480         if (user_abi > 0) {
1481             uint64_t reg_nr = __builtin_popcountll(mmap.attr->sample_regs_user);
1482             pos += reg_nr * sizeof(uint64_t);
1483         }
1484     }
1485     if (mmap.attr->sample_type & PERF_SAMPLE_SERVER_PID) {
1486         uint64_t server_nr = 0;
1487         GetRecordFieldFromMmap(mmap, &server_nr, mmap.mmapPage->data_tail + pos, sizeof(server_nr));
1488         pos += (sizeof(server_nr) + server_nr * sizeof(uint64_t));
1489     }
1490     return pos;
1491 }
1492 
CutStackAndMove(MmapFd & mmap)1493 bool PerfEvents::CutStackAndMove(MmapFd &mmap)
1494 {
1495     constexpr uint32_t alignSize = 64;
1496     if (!(mmap.attr->sample_type & PERF_SAMPLE_STACK_USER)) {
1497         return false;
1498     }
1499     size_t stackSizePos = GetStackSizePosInSampleRecord(mmap);
1500     uint64_t stackSize = 0;
1501     GetRecordFieldFromMmap(mmap, &stackSize, mmap.mmapPage->data_tail + stackSizePos,
1502                            sizeof(stackSize));
1503     if (stackSize == 0) {
1504         return false;
1505     }
1506     size_t dynSizePos = stackSizePos + sizeof(uint64_t) + stackSize;
1507     uint64_t dynSize = 0;
1508     GetRecordFieldFromMmap(mmap, &dynSize, mmap.mmapPage->data_tail + dynSizePos, sizeof(dynSize));
1509     uint64_t newStackSize = std::min((dynSize + alignSize - 1) &
1510                                      (~(alignSize >= 1 ? alignSize - 1 : 0)), stackSize);
1511     if (newStackSize >= stackSize) {
1512         return false;
1513     }
1514     HLOGM("stackSize %" PRIx64 " dynSize %" PRIx64 " newStackSize %" PRIx64 "\n", stackSize, dynSize, newStackSize);
1515     // move and cut stack_data
1516     // mmap: |<+++copy1+++>|<++++++copy2++++++>|<---------------cut--------------->|<+++copy3+++>|
1517     //             ^                    ^                        ^                 ^
1518     //         new_header          stackSizePos         <stackSize-dynSize>     dynSizePos
1519     uint16_t recordSize = mmap.header.size;
1520     mmap.header.size -= stackSize - newStackSize; // reduce the stack size
1521     uint8_t *buf = recordBuf_->AllocForWrite(mmap.header.size);
1522     // copy1: new_header
1523     CHECK_TRUE(buf == nullptr, false, 0, "");
1524     if (memcpy_s(buf, sizeof(perf_event_header), &(mmap.header), sizeof(perf_event_header)) != 0) {
1525         HLOGEP("memcpy_s %p to %p failed. size %zd", &(mmap.header), buf,
1526                sizeof(perf_event_header));
1527     }
1528     size_t copyPos = sizeof(perf_event_header);
1529     size_t copySize = stackSizePos - sizeof(perf_event_header) + sizeof(stackSize) + newStackSize;
1530     // copy2: copy stack_size, data[stack_size],
1531     GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + copyPos, copySize);
1532     copyPos += copySize;
1533     // copy3: copy dyn_size
1534     GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + dynSizePos,
1535                            recordSize - dynSizePos);
1536     // update stack_size
1537     if (memcpy_s(buf + stackSizePos, sizeof(stackSize), &(newStackSize), sizeof(newStackSize)) != 0) {
1538         HLOGEP("CutStackAndMove: memcpy_s newStack to buf stackSizePos failed. size %zd", sizeof(newStackSize));
1539     }
1540     recordBuf_->EndWrite();
1541     __sync_synchronize();
1542     mmap.mmapPage->data_tail += recordSize;
1543     mmap.dataSize -= recordSize;
1544     return true;
1545 }
1546 
MoveRecordToBuf(MmapFd & mmap,bool & isAuxEvent,u64 & auxOffset,u64 & auxSize,u32 & pid,u32 & tid)1547 void PerfEvents::MoveRecordToBuf(MmapFd &mmap, bool &isAuxEvent, u64 &auxOffset, u64 &auxSize, u32 &pid, u32 &tid)
1548 {
1549     uint8_t *buf = nullptr;
1550     if (mmap.header.type == PERF_RECORD_SAMPLE) {
1551         if (recordBuf_->GetFreeSize() <= BUFFER_CRITICAL_LEVEL) {
1552             lostSamples_++;
1553             HLOGD("BUFFER_CRITICAL_LEVEL: lost sample record");
1554             goto RETURN;
1555         }
1556         if (CutStackAndMove(mmap)) {
1557             return;
1558         }
1559     } else if (mmap.header.type == PERF_RECORD_LOST) {
1560         // in PERF_RECORD_LOST : header + u64 id + u64 lost
1561         constexpr size_t lostPos = sizeof(perf_event_header) + sizeof(uint64_t);
1562         uint64_t lost = 0;
1563         GetRecordFieldFromMmap(mmap, &lost, mmap.mmapPage->data_tail + lostPos, sizeof(lost));
1564         lostSamples_ += lost;
1565         HLOGD("PERF_RECORD_LOST: lost sample record");
1566         goto RETURN;
1567     }
1568     if (mmap.header.type == PERF_RECORD_AUX) {
1569         isAuxEvent = true;
1570         // in AUX : header + u64 aux_offset + u64 aux_size
1571         uint64_t auxOffsetPos = sizeof(perf_event_header);
1572         uint64_t auxSizePos = sizeof(perf_event_header) + sizeof(uint64_t);
1573         uint64_t pidPos = auxSizePos + sizeof(uint64_t) * 2; // 2 : offset
1574         uint64_t tidPos = pidPos + sizeof(uint32_t);
1575         GetRecordFieldFromMmap(mmap, &auxOffset, mmap.mmapPage->data_tail + auxOffsetPos, sizeof(auxOffset));
1576         GetRecordFieldFromMmap(mmap, &auxSize, mmap.mmapPage->data_tail + auxSizePos, sizeof(auxSize));
1577         GetRecordFieldFromMmap(mmap, &pid, mmap.mmapPage->data_tail + pidPos, sizeof(pid));
1578         GetRecordFieldFromMmap(mmap, &tid, mmap.mmapPage->data_tail + tidPos, sizeof(tid));
1579     }
1580 
1581     if ((buf = recordBuf_->AllocForWrite(mmap.header.size)) == nullptr) {
1582         // this record type must be Non-Sample
1583         lostNonSamples_++;
1584         HLOGD("alloc buffer failed: lost non-sample record");
1585         goto RETURN;
1586     }
1587 
1588     GetRecordFieldFromMmap(mmap, buf, mmap.mmapPage->data_tail, mmap.header.size);
1589     recordBuf_->EndWrite();
1590 RETURN:
1591     __sync_synchronize();
1592     mmap.mmapPage->data_tail += mmap.header.size;
1593     mmap.dataSize -= mmap.header.size;
1594 }
1595 
ReadRecordFromBuf()1596 void PerfEvents::ReadRecordFromBuf()
1597 {
1598     const perf_event_attr *attr = GetDefaultAttr();
1599     uint8_t *p = nullptr;
1600 
1601     while (readRecordThreadRunning_) {
1602         {
1603             std::unique_lock<std::mutex> lk(mtxRrecordBuf_);
1604             cvRecordBuf_.wait(lk, [this] {
1605                 if (recordBufReady_) {
1606                     recordBufReady_ = false;
1607                     return true;
1608                 }
1609                 return !readRecordThreadRunning_;
1610             });
1611         }
1612         while ((p = recordBuf_->GetReadData()) != nullptr) {
1613             uint32_t *type = reinterpret_cast<uint32_t *>(p);
1614 #ifdef HIPERF_DEBUG_TIME
1615             const auto readingStartTime_ = steady_clock::now();
1616 #endif
1617 #if !HIDEBUG_SKIP_CALLBACK
1618             recordCallBack_(GetPerfSampleFromCache(*type, p, *attr));
1619 #endif
1620             recordEventCount_++;
1621 #ifdef HIPERF_DEBUG_TIME
1622             recordCallBackTime_ +=
1623                 duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1624 #endif
1625             recordBuf_->EndRead();
1626         }
1627     }
1628     HLOGD("exit because trackStoped");
1629 
1630     // read the data left over in buffer
1631     while ((p = recordBuf_->GetReadData()) != nullptr) {
1632         uint32_t *type = reinterpret_cast<uint32_t *>(p);
1633 #ifdef HIPERF_DEBUG_TIME
1634         const auto readingStartTime_ = steady_clock::now();
1635 #endif
1636 #if !HIDEBUG_SKIP_CALLBACK
1637         recordCallBack_(GetPerfSampleFromCache(*type, p, *attr));
1638 #endif
1639         recordEventCount_++;
1640 #ifdef HIPERF_DEBUG_TIME
1641         recordCallBackTime_ += duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1642 #endif
1643         recordBuf_->EndRead();
1644     }
1645     HLOGD("read all records from buffer");
1646 }
1647 
HaveTargetsExit(const std::chrono::steady_clock::time_point & startTime)1648 bool PerfEvents::HaveTargetsExit(const std::chrono::steady_clock::time_point &startTime)
1649 {
1650     if (systemTarget_) {
1651         return false;
1652     }
1653     if (trackedCommand_) {
1654         if (trackedCommand_->GetState() < TrackedCommand::State::COMMAND_STARTED) {
1655             return false; // not start yet
1656         }
1657         int wstatus;
1658         if (trackedCommand_->WaitCommand(wstatus)) {
1659             milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1660             printf("tracked command(%s) has exited (total %" PRId64 " ms)\n",
1661                    trackedCommand_->GetCommandName().c_str(), (uint64_t)usedMsTick.count());
1662             return true;
1663         }
1664         return false;
1665     }
1666 
1667     for (auto it = pids_.begin(); it != pids_.end();) {
1668         if (IsDir("/proc/" + std::to_string(*it))) {
1669             it++;
1670         } else {
1671             it = pids_.erase(it);
1672         }
1673     }
1674     if (pids_.empty()) {
1675         milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1676         printf("tracked processes have exited (total %" PRId64 " ms)\n", (uint64_t)usedMsTick.count());
1677         return true;
1678     }
1679     return false;
1680 }
1681 
RecordLoop()1682 void PerfEvents::RecordLoop()
1683 {
1684     // calc the time
1685     const auto startTime = steady_clock::now();
1686     const auto endTime = startTime + timeOut_;
1687     milliseconds usedTimeMsTick {};
1688     int count = 1;
1689 
1690     while (g_trackRunning) {
1691         // time check point
1692         const auto thisTime = steady_clock::now();
1693         usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1694         if ((uint64_t)usedTimeMsTick.count() > (uint64_t)(count * THOUSANDS)) {
1695             if (HaveTargetsExit(startTime)) {
1696                 break;
1697             }
1698             ++count;
1699         }
1700 
1701         if (thisTime >= endTime) {
1702             printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1703             if (trackedCommand_) {
1704                 trackedCommand_->Stop();
1705             }
1706             break;
1707         }
1708 
1709         int timeLeft = duration_cast<milliseconds>(endTime - thisTime).count();
1710         if (IsRecordInMmap(std::min(timeLeft, pollTimeOut_))) {
1711             ReadRecordsFromMmaps();
1712         }
1713     }
1714 
1715     if (!g_trackRunning) {
1716         // for user interrupt situation, print time statistic
1717         usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1718         printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1719     }
1720 }
1721 
StatLoop()1722 void PerfEvents::StatLoop()
1723 {
1724     // calc the time
1725     const auto startTime = steady_clock::now();
1726     const auto endTime = startTime + timeOut_;
1727     auto nextReportTime = startTime + timeReport_;
1728     milliseconds usedTimeMsTick {};
1729     __u64 durationInSec = 0;
1730     int64_t thesholdTimeInMs = 2 * HUNDREDS;
1731 
1732     while (g_trackRunning) {
1733         // time check point
1734         const auto thisTime = steady_clock::now();
1735         if (timeReport_ != milliseconds::zero()) {
1736             // stat cmd
1737             if (thisTime >= nextReportTime) {
1738                 // only for log or debug?
1739                 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1740                 durationInSec = usedTimeMsTick.count();
1741                 auto lefTimeMsTick = duration_cast<milliseconds>(endTime - thisTime);
1742                 printf("\nReport at %" PRId64 " ms (%" PRId64 " ms left):\n",
1743                        (uint64_t)usedTimeMsTick.count(), (uint64_t)lefTimeMsTick.count());
1744                 // end of comments
1745                 nextReportTime += timeReport_;
1746                 StatReport(durationInSec);
1747             }
1748         }
1749 
1750         if (HaveTargetsExit(startTime)) {
1751             break;
1752         }
1753 
1754         if (thisTime >= endTime) {
1755             usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1756             durationInSec = usedTimeMsTick.count();
1757             printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1758             if (trackedCommand_) {
1759                 trackedCommand_->Stop();
1760             }
1761             break;
1762         }
1763 
1764         // lefttime > 200ms sleep 100ms, else sleep 200us
1765         uint64_t defaultSleepUs = 2 * HUNDREDS; // 200us
1766         if (timeReport_ == milliseconds::zero()
1767             && (timeOut_.count() * THOUSANDS) > thesholdTimeInMs) {
1768             milliseconds leftTimeMsTmp = duration_cast<milliseconds>(endTime - thisTime);
1769             if (leftTimeMsTmp.count() > thesholdTimeInMs) {
1770                 defaultSleepUs = HUNDREDS * THOUSANDS; // 100ms
1771             }
1772         }
1773         std::this_thread::sleep_for(microseconds(defaultSleepUs));
1774     }
1775 
1776     if (!g_trackRunning) {
1777         // for user interrupt situation, print time statistic
1778         usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1779         printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1780     }
1781 
1782     if (timeReport_ == milliseconds::zero()) {
1783         StatReport(durationInSec);
1784     }
1785 }
1786 
GetTypeName(perf_type_id type_id)1787 const std::string PerfEvents::GetTypeName(perf_type_id type_id)
1788 {
1789     auto it = PERF_TYPES.find(type_id);
1790     if (it != PERF_TYPES.end()) {
1791         return it->second;
1792     } else {
1793         return "<not found>";
1794     }
1795 }
1796 } // namespace HiPerf
1797 } // namespace Developtools
1798 } // namespace OHOS
1799