• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "perf_events.h"
16 
17 #include <cassert>
18 #include <cinttypes>
19 #include <csignal>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <iostream>
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/resource.h>
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 #if defined(CONFIG_HAS_SYSPARA)
29 #include <parameters.h>
30 #endif
31 
32 #include "spe_decoder.h"
33 #include "debug_logger.h"
34 #include "hiperf_hilog.h"
35 #include "register.h"
36 #include "subcommand_dump.h"
37 #include "symbols_file.h"
38 #include "utilities.h"
39 
40 using namespace std::chrono;
41 namespace OHOS {
42 namespace Developtools {
43 namespace HiPerf {
44 bool PerfEvents::updateTimeThreadRunning_ = true;
45 std::atomic<uint64_t> PerfEvents::currentTimeSecond_ = 0;
46 static std::atomic_bool g_trackRunning = false;
47 static constexpr int32_t UPDATE_TIME_INTERVAL = 10;    // 10ms
48 static constexpr uint64_t NANO_SECONDS_PER_SECOND = 1000000000;
49 static constexpr uint32_t POLL_FAIL_COUNT_THRESHOLD = 10;
50 
Open(perf_event_attr & attr,const pid_t pid,const int cpu,const int groupFd,const unsigned long flags)51 OHOS::UniqueFd PerfEvents::Open(perf_event_attr &attr, const pid_t pid, const int cpu, const int groupFd,
52                                 const unsigned long flags)
53 {
54     OHOS::UniqueFd fd = UniqueFd(syscall(__NR_perf_event_open, &attr, pid, cpu, groupFd, flags));
55     if (fd < 0) {
56         HLOGEP("syscall perf_event_open failed. ");
57         // dump when open failed.
58         SubCommandDump::DumpPrintEventAttr(attr, std::numeric_limits<int>::min());
59     }
60     HLOGV("perf_event_open: got fd %d for pid %d cpu %d group %d flags %lu", fd.Get(), pid, cpu, groupFd, flags);
61     return fd;
62 }
63 
SpeReadData(void * dataPage,u64 * dataTail,uint8_t * buf,const u32 size)64 void PerfEvents::SpeReadData(void *dataPage, u64 *dataTail, uint8_t *buf, const u32 size)
65 {
66     void *src = nullptr;
67     u32 left = 0;
68     u32 offset = static_cast<u32>(*dataTail);
69     u32 copySize;
70     u32 traceSize = size;
71     CHECK_TRUE(size <= (auxMmapPages_ * pageSize_ + sizeof(struct PerfRecordAuxtraceData)),
72                NO_RETVAL, 1, "buf size invalid");
73     while (traceSize > 0) {
74         offset = CALC_OFFSET(offset, auxMmapPages_ * pageSize_);
75         left = static_cast<u32>(auxMmapPages_ * pageSize_ - offset);
76         copySize = std::min(traceSize, left);
77         src = PTR_ADD(dataPage, offset);
78         if (memcpy_s(buf, left, src, copySize) != 0) {
79             HLOGV("SpeReadData memcpy_s failed.");
80         }
81 
82         traceSize -= copySize;
83         offset += copySize;
84         buf = reinterpret_cast<uint8_t *>(PTR_ADD(buf, copySize));
85     }
86 
87     *dataTail += size;
88 }
89 
arm_spe_reference()90 static u64 arm_spe_reference()
91 {
92     struct timespec ts;
93     clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
94     return static_cast<uint64_t>(ts.tv_sec) ^ static_cast<uint64_t>(ts.tv_nsec);
95 }
96 
ReadRecordsFromSpeMmaps(MmapFd & mmapFd,const u64 auxOffset,u64 auxSize,const u32 pid,const u32 tid)97 void PerfEvents::ReadRecordsFromSpeMmaps(MmapFd& mmapFd, const u64 auxOffset,
98                                          u64 auxSize, const u32 pid, const u32 tid)
99 {
100     if (mmapFd.mmapPage == nullptr || mmapFd.auxBuf == nullptr) {
101         printf("ReadRecordsFromSpeMmaps mmapFd.mmapPage == nullptr, mmapFd.fd: %d", mmapFd.fd);
102         return;
103     }
104     perf_event_mmap_page *userPage = reinterpret_cast<perf_event_mmap_page *>(mmapFd.mmapPage);
105     void *auxPage = mmapFd.auxBuf;
106     userPage->aux_tail = auxOffset - auxSize;
107     u64 auxHead = userPage->aux_head;
108     u64 auxTail = userPage->aux_tail;
109     HLOGD("mmap cpu %d, aux_head: %llu, aux_tail:%llu, auxOffset:%llu, auxSize:%llu",
110           mmapFd.cpu, auxHead, auxTail, auxOffset, auxSize);
111     if (auxHead <= auxTail) {
112         return;
113     }
114     if (auxSize > auxMmapPages_ * pageSize_) {
115         userPage->aux_tail += auxSize;
116         return;
117     }
118 
119     int cpu = mmapFd.cpu;
120     __sync_synchronize();
121     PerfRecordAuxtrace auxtraceRecord = PerfRecordAuxtrace(auxSize, auxTail,
122                                                            arm_spe_reference(), cpu, tid, cpu, pid);
123     static std::vector<u8> vbuf(RECORD_SIZE_LIMIT);
124     uint8_t *buf;
125     if ((buf = recordBuf_->AllocForWrite(auxtraceRecord.header_.size + auxSize)) == nullptr) {
126         HLOGD("alloc buffer failed: PerfRecordAuxtrace record, readSize: %llu", auxSize);
127         return;
128     }
129     auxtraceRecord.GetBinary1(vbuf);
130     if (memcpy_s(buf, auxtraceRecord.header_.size, vbuf.data(), auxtraceRecord.header_.size) != 0) {
131         HLOGE("memcpy_s return failed");
132         return;
133     }
134     buf += auxtraceRecord.header_.size;
135 
136     while (auxSize > 0) {
137         u64 readSize = pageSize_;
138         if (auxSize < pageSize_) {
139             readSize = auxSize;
140         }
141         __sync_synchronize();
142         SpeReadData(auxPage, &auxTail, buf, readSize);
143         __sync_synchronize();
144         userPage->aux_tail += readSize;
145         auxTail = userPage->aux_tail;
146         buf += readSize;
147         auxSize -= readSize;
148     }
149     recordBuf_->EndWrite();
150 }
151 
GetSpeType()152 u32 GetSpeType()
153 {
154     FILE *fd;
155     u32 speType;
156 
157     fd = fopen("/sys/devices/arm_spe_0/type", "r");
158     if (fd == nullptr) {
159         HLOGV("open sysfs file failed");
160         return UINT_MAX;
161     }
162     if (fscanf_s(fd, "%u", &speType) <= 0) {
163         HLOGV("fscanf_s file failed");
164         (void)fclose(fd);
165         return UINT_MAX;
166     }
167 
168     (void)fclose(fd);
169     return speType;
170 }
171 
PerfEvents()172 PerfEvents::PerfEvents() : timeOut_(DEFAULT_TIMEOUT * THOUSANDS), timeReport_(0)
173 {
174     pageSize_ = sysconf(_SC_PAGESIZE);
175     HLOGI("BuildArch %s", GetArchName(BUILD_ARCH_TYPE).c_str());
176 }
177 
~PerfEvents()178 PerfEvents::~PerfEvents()
179 {
180     // close mmap
181     for (auto it = cpuMmap_.begin(); it != cpuMmap_.end();) {
182         const MmapFd &mmapItem = it->second;
183         if (!isSpe_) {
184             if (munmap(mmapItem.mmapPage, (1 + mmapPages_) * pageSize_) == -1) {
185                 HLOGW("munmap failed.");
186             }
187         } else {
188             if (munmap(mmapItem.mmapPage, (1 + auxMmapPages_) * pageSize_) == -1) {
189                 HLOGW("munmap failed.");
190             }
191             if (munmap(mmapItem.auxBuf, auxMmapPages_ * pageSize_) == -1) {
192                 HLOGW("munmap failed.");
193             }
194         }
195         it = cpuMmap_.erase(it);
196     }
197 
198     ExitReadRecordBufThread();
199     if (reportPtr_ != nullptr) {
200         fclose(reportPtr_);
201         reportPtr_ = nullptr;
202     }
203 }
204 
IsEventSupport(const perf_type_id type,const __u64 config)205 bool PerfEvents::IsEventSupport(const perf_type_id type, const __u64 config)
206 {
207     std::unique_ptr<perf_event_attr> attr = PerfEvents::CreateDefaultAttr(type, config);
208     CHECK_TRUE(attr != nullptr, false, 1, "attr is nullptr");
209     UniqueFd fd = Open(*attr.get());
210     if (fd < 0) {
211         printf("event not support %s\n", GetStaticConfigName(type, config).c_str());
212         return false;
213     }
214     return true;
215 }
IsEventAttrSupport(perf_event_attr & attr)216 bool PerfEvents::IsEventAttrSupport(perf_event_attr &attr)
217 {
218     UniqueFd fd = Open(attr);
219     if (fd < 0) {
220         return false;
221     }
222     return true;
223 }
224 
SetBranchSampleType(const uint64_t value)225 bool PerfEvents::SetBranchSampleType(const uint64_t value)
226 {
227     if (value != 0) {
228         // cpu-clcles event must be supported
229         std::unique_ptr<perf_event_attr> attr =
230             PerfEvents::CreateDefaultAttr(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
231         CHECK_TRUE(attr != nullptr, false, 0, "");
232         attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
233         attr->branch_sample_type = value;
234         if (!IsEventAttrSupport(*attr.get())) {
235             return false;
236         }
237     }
238     branchSampleType_ = value;
239     return true;
240 }
241 
AddDefaultEvent(const perf_type_id type)242 bool PerfEvents::AddDefaultEvent(const perf_type_id type)
243 {
244     auto it = DEFAULT_TYPE_CONFIGS.find(type);
245     if (it != DEFAULT_TYPE_CONFIGS.end()) {
246         for (auto config : it->second) {
247             AddEvent(type, config);
248         }
249     }
250     return true;
251 }
252 
AddOffCpuEvent()253 bool PerfEvents::AddOffCpuEvent()
254 {
255     std::string eventName = "sched:sched_switch";
256     if (eventSpaceType_ == EventSpaceType::USER) {
257         eventName += ":u";
258     } else if (eventSpaceType_ == EventSpaceType::KERNEL) {
259         eventName += ":k";
260     }
261     return AddEvent(eventName);
262 }
263 
AddEvents(const std::vector<std::string> & eventStrings,const bool group)264 bool PerfEvents::AddEvents(const std::vector<std::string> &eventStrings, const bool group)
265 {
266     bool followGroup = false;
267     HLOGV(" %s", VectorToString(eventStrings).c_str());
268 
269     for (std::string eventString : eventStrings) {
270         if (!AddEvent(eventString, followGroup)) {
271             return false;
272         }
273         // this is group request , Follow-up events need to follow the previous group
274         if (group) {
275             followGroup = true;
276         }
277     }
278     return true;
279 }
280 
281 // event name can have :k or :u suffix
282 // tracepoint event name is like sched:sched_switch
283 // clang-format off
ParseEventName(const std::string & nameStr,std::string & name,bool & excludeUser,bool & excludeKernel,bool & isTracePoint)284 bool PerfEvents::ParseEventName(const std::string &nameStr,
285     std::string &name, bool &excludeUser, bool &excludeKernel, bool &isTracePoint)
286 // clang-format on
287 {
288     name = nameStr;
289     excludeUser = false;
290     excludeKernel = false;
291     isTracePoint = false;
292     if (nameStr.find(":") != std::string::npos) {
293         static constexpr size_t maxNumberTokensNoTracePoint = 2;
294         static constexpr size_t maxNumberTokensTracePoint = 3;
295         std::vector<std::string> eventTokens = StringSplit(nameStr, ":");
296         if (eventTokens.size() == maxNumberTokensTracePoint) {
297             // tracepoint event with :u or :k
298             if (eventTokens.back() == "k") {
299                 excludeUser = true;
300                 HLOGV("kernelOnly event");
301             } else if (eventTokens.back() == "u") {
302                 excludeKernel = true;
303                 HLOGV("userOnly event");
304             } else {
305                 HLOGV("unknown event name %s", nameStr.c_str());
306                 return false;
307             }
308             name = eventTokens[0] + ":" + eventTokens[1];
309             isTracePoint = true;
310         } else if (eventTokens.size() == maxNumberTokensNoTracePoint) {
311             name = eventTokens[0];
312             if (eventTokens.back() == "k") {
313                 excludeUser = true;
314                 HLOGV("kernelOnly event");
315             } else if (eventTokens.back() == "u") {
316                 excludeKernel = true;
317                 HLOGV("userOnly event");
318             } else {
319                 name = nameStr;
320                 isTracePoint = true;
321                 HLOGV("tracepoint event is in form of xx:xxx");
322             }
323         } else {
324             printf("unknown ':' format:'%s'\n", nameStr.c_str());
325             return false;
326         }
327         if (reportCallBack_) {
328             if ((eventTokens[0] == "sw-task-clock" || eventTokens[0] == "sw-cpu-clock") &&
329                 (excludeUser || excludeKernel)) {
330                 printf(
331                     "event type %s with modifier u and modifier k is not supported by the kernel.",
332                     eventTokens[0].c_str());
333                 return false;
334             }
335         }
336     }
337     return true;
338 }
339 
AddEvent(const std::string & eventString,const bool followGroup)340 bool PerfEvents::AddEvent(const std::string &eventString, const bool followGroup)
341 {
342     std::string eventName;
343     bool excludeUser = false;
344     bool excludeKernel = false;
345     bool isTracePointEvent = false;
346     if (!ParseEventName(eventString, eventName, excludeUser, excludeKernel, isTracePointEvent)) {
347         return false;
348     }
349     if (excludeUser) {
350         eventSpaceType_ |= EventSpaceType::KERNEL;
351     } else if (excludeKernel) {
352         eventSpaceType_ |= EventSpaceType::USER;
353     } else {
354         eventSpaceType_ |= EventSpaceType::USER_KERNEL;
355     }
356 
357     // find if
358     if (isTracePointEvent) {
359         if (traceConfigTable.empty()) {
360             LoadTracepointEventTypesFromSystem();
361         }
362         for (auto traceType : traceConfigTable) {
363             if (traceType.second == eventName) {
364                 return AddEvent(PERF_TYPE_TRACEPOINT, traceType.first, excludeUser, excludeKernel,
365                                 followGroup);
366             }
367         }
368     } else {
369         if (eventName == "arm_spe_0") {
370             u32 speType = GetSpeType();
371             if (speType == UINT_MAX) {
372                 HLOGE("Failed to get SPE type.");
373                 return false;
374             }
375             return AddSpeEvent(speType);
376         }
377         if (StringStartsWith(eventName, "0x")
378             && eventName.length() <= MAX_HEX_EVENT_NAME_LENGTH && IsHexDigits(eventName)) {
379             return AddEvent(PERF_TYPE_RAW, std::stoull(eventName, nullptr, NUMBER_FORMAT_HEX_BASE),
380                             excludeUser, excludeKernel, followGroup);
381         } else {
382             auto [find, typeId, configId] = GetStaticConfigId(eventName);
383             if (find) {
384                 return AddEvent(typeId, configId, excludeUser, excludeKernel, followGroup);
385             }
386         }
387     }
388 
389     printf("%s event is not supported by the kernel.\n", eventName.c_str());
390     return false;
391 }
392 
AddSpeEvent(const u32 type,const bool followGroup)393 bool PerfEvents::AddSpeEvent(const u32 type, const bool followGroup)
394 {
395     EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back() :
396                                      eventGroupItem_.emplace_back();
397     EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
398 
399     if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
400         HLOGE("memset_s failed in PerfEvents::AddEvent");
401         return false;
402     }
403     eventItem.attr.type = type;
404     eventItem.attr.sample_period = MULTIPLE_SIZE;
405     eventItem.attr.size = sizeof(perf_event_attr);
406     eventItem.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
407     eventItem.attr.inherit = (inherit_ ? 1 : 0);
408     eventItem.attr.sample_type = SAMPLE_ID | PERF_SAMPLE_IP;
409     eventItem.attr.sample_id_all = 1;
410     eventItem.attr.disabled = 1;
411     eventItem.attr.config = 0x700010007; // 0x700010007 : enable all
412     if (config_ != 0) {
413         eventItem.attr.config = config_;
414     }
415     if (config1_ != 0) {
416         eventItem.attr.config1 = config1_;
417     }
418     if (config2_ != 0) {
419         eventItem.attr.config2 = config2_;
420     }
421     HLOGD("config_ is 0x%" PRIx64 ", config1_ is 0x%" PRIx64 ", config2_ is 0x%" PRIx64 "",
422           config_, config1_, config2_);
423     return true;
424 }
425 
SetConfig(std::map<const std::string,uint64_t> & speOptMaps)426 void PerfEvents::SetConfig(std::map<const std::string, uint64_t> &speOptMaps)
427 {
428     constexpr uint tsOffset = 0;
429     constexpr uint paOffset = 1;
430     constexpr uint pctOffset = 2;
431     constexpr uint jitterOffset = 16;
432     constexpr uint branchOffset = 32;
433     constexpr uint loadOffset = 33;
434     constexpr uint storeOffset = 34;
435     config_ |= (speOptMaps["ts_enable"] & 0x1) << tsOffset;
436     config_ |= (speOptMaps["pa_enable"] & 0x1) << paOffset;
437     config_ |= (speOptMaps["pct_enable"] & 0x1) << pctOffset;
438     config_ |= (speOptMaps["jitter"] & 0x1) << jitterOffset;
439     config_ |= (speOptMaps["branch_filter"] & 0x1) << branchOffset;
440     config_ |= (speOptMaps["load_filter"] & 0x1) << loadOffset;
441     config_ |= (speOptMaps["store_filter"] & 0x1) << storeOffset;
442     config1_ |= speOptMaps["event_filter"];
443     config2_ |= speOptMaps["min_latency"] & 0xfff;
444 }
445 
AddEvent(const perf_type_id type,const __u64 config,const bool excludeUser,const bool excludeKernel,const bool followGroup)446 bool PerfEvents::AddEvent(const perf_type_id type, const __u64 config, const bool excludeUser,
447                           const bool excludeKernel, const bool followGroup)
448 {
449     HLOG_ASSERT(!excludeUser || !excludeKernel);
450     CHECK_TRUE(!followGroup || !eventGroupItem_.empty(), false, 1, "no group leader create before");
451     // found the event name
452     CHECK_TRUE(IsEventSupport(type, config), false, 0, "");
453     HLOGV("type %d config %llu excludeUser %d excludeKernel %d followGroup %d", type, config,
454           excludeUser, excludeKernel, followGroup);
455 
456     // if use follow ?
457     EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back()
458                                                  : eventGroupItem_.emplace_back();
459     // always new item
460     EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
461 
462     eventItem.typeName = GetTypeName(type);
463     if (type == PERF_TYPE_TRACEPOINT) {
464         eventItem.configName = GetTraceConfigName(config);
465     } else {
466         eventItem.configName = GetStaticConfigName(type, config);
467     }
468 
469     // attr
470     if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
471         HLOGE("memset_s failed in PerfEvents::AddEvent");
472         return false;
473     }
474     eventItem.attr.size = sizeof(perf_event_attr);
475     eventItem.attr.type = type;
476     eventItem.attr.config = config;
477     eventItem.attr.disabled = 1;
478     eventItem.attr.read_format =
479         PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
480 
481     eventItem.attr.inherit = (inherit_ ? 1 : 0);
482     eventItem.attr.exclude_kernel = excludeKernel;
483     eventItem.attr.exclude_user = excludeUser;
484 
485     // we also need mmap for record
486     if (recordCallBack_) {
487         if (samplePeriod_ > 0) {
488             eventItem.attr.freq = 0;
489             eventItem.attr.sample_freq = 0;
490             eventItem.attr.sample_period = samplePeriod_;
491         } else if (sampleFreq_ > 0) {
492             eventItem.attr.freq = 1;
493             eventItem.attr.sample_freq = sampleFreq_;
494         } else {
495             if (type == PERF_TYPE_TRACEPOINT) {
496                 eventItem.attr.freq = 0;
497                 eventItem.attr.sample_period = DEFAULT_SAMPLE_PERIOD;
498             } else {
499                 eventItem.attr.freq = 1;
500                 eventItem.attr.sample_freq = DEFAULT_SAMPLE_FREQUNCY;
501             }
502         }
503 
504         eventItem.attr.watermark = 1;
505         eventItem.attr.wakeup_watermark = (mmapPages_ * pageSize_) >> 1;
506         static constexpr unsigned int maxWakeupMark = 1024 * 1024;
507         if (eventItem.attr.wakeup_watermark > maxWakeupMark) {
508             eventItem.attr.wakeup_watermark = maxWakeupMark;
509         }
510 
511         // for a group of events, only enable comm/mmap on the first event
512         if (!followGroup) {
513             eventItem.attr.comm = 1;
514             eventItem.attr.mmap = 1;
515             eventItem.attr.mmap2 = 1;
516             eventItem.attr.mmap_data = 1;
517         }
518 
519         if (sampleStackType_ == SampleStackType::DWARF) {
520             eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN |
521                                          PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
522             eventItem.attr.exclude_callchain_user = 1;
523             eventItem.attr.sample_regs_user = GetSupportedRegMask(GetDeviceArch());
524             eventItem.attr.sample_stack_user = dwarfSampleStackSize_;
525         } else if (sampleStackType_ == SampleStackType::FP) {
526             eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN;
527         } else {
528             eventItem.attr.sample_type = SAMPLE_TYPE;
529         }
530 
531         if (isHM_) {
532             eventItem.attr.sample_type |= PERF_SAMPLE_SERVER_PID;
533         }
534     }
535 
536     // set clock id
537     if (clockId_ != -1) {
538         eventItem.attr.use_clockid = 1;
539         eventItem.attr.clockid = clockId_;
540     }
541     if (branchSampleType_ != 0) {
542         eventItem.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
543         eventItem.attr.branch_sample_type = branchSampleType_;
544     }
545 
546     HLOGV("Add Event: '%s':'%s' %s %s %s", eventItem.typeName.c_str(), eventItem.configName.c_str(),
547           excludeUser ? "excludeUser" : "", excludeKernel ? "excludeKernel" : "",
548           followGroup ? "" : "group leader");
549 
550     return true;
551 }
552 
CreateDefaultAttr(const perf_type_id type,const __u64 config)553 std::unique_ptr<perf_event_attr> PerfEvents::CreateDefaultAttr(const perf_type_id type, const __u64 config)
554 {
555     std::unique_ptr<perf_event_attr> attr = std::make_unique<perf_event_attr>();
556     if (memset_s(attr.get(), sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
557         HLOGE("memset_s failed in PerfEvents::CreateDefaultAttr");
558         return nullptr;
559     }
560     attr->size = sizeof(perf_event_attr);
561     attr->type = type;
562     attr->config = config;
563     attr->disabled = 1;
564     return attr;
565 }
566 
567 // should move to upper caller
568 static struct sigaction g_oldSig {
569 };
CaptureSig()570 static bool CaptureSig()
571 {
572     HLOGD("capture Ctrl + C to end sampling decently");
573     struct sigaction sig {
574     };
575 
576     sig.sa_handler = [](int sig) {
577         printf("\n Ctrl + C detected.\n");
578         g_trackRunning = false;
579     };
580 
581     sig.sa_flags = 0;
582     if (sigaction(SIGINT, &sig, &g_oldSig) < 0) {
583         perror("Fail to call sigaction for SIGINT");
584         return false;
585     }
586     return true;
587 }
588 
RecoverCaptureSig()589 static void RecoverCaptureSig()
590 {
591     if (sigaction(SIGINT, &g_oldSig, nullptr) < 0) {
592         perror("Fail to call sigaction for SIGINT");
593     }
594 }
595 
596 // split to two part
597 // because WriteAttrAndId need fd id before start tracking
PrepareTracking(void)598 bool PerfEvents::PrepareTracking(void)
599 {
600     // 1. prepare cpu pid
601     CHECK_TRUE(PrepareFdEvents(), false, 1, "PrepareFdEvents() failed");
602 
603     // 2. create events
604     CHECK_TRUE(CreateFdEvents(), false, 1, "CreateFdEvents() failed");
605 
606     HLOGV("success");
607     prepared_ = true;
608     return true;
609 }
610 
ExitReadRecordBufThread()611 void PerfEvents::ExitReadRecordBufThread()
612 {
613     if (isLowPriorityThread_) {
614         if (setpriority(PRIO_PROCESS, gettid(), 0) != 0) {
615             HLOGW("failed to decrease priority of reading kernel");
616         }
617     }
618     if (readRecordBufThread_.joinable()) {
619         {
620             std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
621             readRecordThreadRunning_ = false;
622             __sync_synchronize();
623             cvRecordBuf_.notify_one();
624         }
625         readRecordBufThread_.join();
626     }
627 }
628 
PrepareRecordThread()629 bool PerfEvents::PrepareRecordThread()
630 {
631     try {
632         recordBuf_ = std::make_unique<RingBuffer>(CalcBufferSize());
633     } catch (const std::exception &e) {
634         printf("create record buffer(size %zu) failed: %s\n", CalcBufferSize(), e.what());
635         HIPERF_HILOGI(MODULE_DEFAULT, "create record buffer failed: %{public}s", e.what());
636         return false;
637     }
638     readRecordThreadRunning_ = true;
639     readRecordBufThread_ = std::thread(&PerfEvents::ReadRecordFromBuf, this);
640     if (backtrack_) {
641         std::thread updateTimeThread(&PerfEvents::UpdateCurrentTime);
642         updateTimeThread.detach();
643     }
644 
645     rlimit rlim;
646     int result = getrlimit(RLIMIT_NICE, &rlim);
647     const rlim_t lowPriority = 40;
648     if (result == 0 && rlim.rlim_cur == lowPriority) {
649         const int highPriority = -20;
650         result = setpriority(PRIO_PROCESS, gettid(), highPriority);
651         if (result != 0) {
652             HLOGW("failed to increase priority of reading kernel");
653         } else {
654             isLowPriorityThread_ = true;
655         }
656     }
657 
658     return true;
659 }
660 
WaitRecordThread()661 void PerfEvents::WaitRecordThread()
662 {
663     printf("Process and Saving data...\n");
664     ExitReadRecordBufThread();
665 
666     const auto usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - trackingEndTime_);
667     if (verboseReport_) {
668         printf("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
669     }
670     HLOGV("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
671 #ifdef HIPERF_DEBUG_TIME
672     printf("%zu record processed, used %0.3f ms(%4.2f us/record)\n", recordEventCount_,
673            recordCallBackTime_.count() / MS_DURATION,
674            recordCallBackTime_.count() / static_cast<double>(recordEventCount_));
675     printf("total wait sleep time %0.3f ms.\n", recordSleepTime_.count() / MS_DURATION);
676     printf("read from kernel time %0.3f ms.\n", recordKernelReadTime_.count() / MS_DURATION);
677 #endif
678 }
679 
StartTracking(const bool immediately)680 bool PerfEvents::StartTracking(const bool immediately)
681 {
682     if (!prepared_) {
683         HLOGD("do not prepared_");
684         return false;
685     }
686 
687     if (recordCallBack_ && !PrepareRecordThread()) {
688         HLOGW("PrepareRecordThread failed.");
689         return false;
690     }
691 
692     HLOGD("step: 1. enable event");
693     HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 1. enable event");
694     trackingStartTime_ = steady_clock::now();
695     if (immediately) {
696         if (!EnableTracking()) {
697             HLOGE("PerfEvents::EnableTracking() failed");
698             return false;
699         }
700         printf("Profiling duration is %.3f seconds.\n", float(timeOut_.count()) / THOUSANDS);
701         printf("Start Profiling...\n");
702     }
703 
704     g_trackRunning = true;
705     if (!CaptureSig()) {
706         HLOGE("captureSig() failed");
707         g_trackRunning = false;
708         ExitReadRecordBufThread();
709         return false;
710     }
711 
712     HLOGD("step: 2. thread loop");
713     HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 2. thread loop");
714     if (recordCallBack_) {
715         RecordLoop();
716     } else {
717         StatLoop();
718     }
719 
720     HLOGD("step: 3. disable event");
721     HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 3. disable event");
722     if (!PerfEventsEnable(false)) {
723         HLOGE("PerfEvents::PerfEventsEnable() failed");
724     }
725     if (recordCallBack_) {
726         // read left samples after disable events
727         ReadRecordsFromMmaps();
728     }
729     trackingEndTime_ = steady_clock::now();
730 
731     RecoverCaptureSig();
732 
733     HLOGD("step: 4. wait record thread");
734     HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 4. wait record thread");
735     if (recordCallBack_) {
736         WaitRecordThread();
737     }
738 
739     HLOGD("step: 5. exit");
740     HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 5. exit");
741     return true;
742 }
743 
StopTracking(void)744 bool PerfEvents::StopTracking(void)
745 {
746     if (g_trackRunning) {
747         printf("some one called StopTracking\n");
748         HLOGI("some one called StopTracking");
749         HIPERF_HILOGI(MODULE_DEFAULT, "some one called StopTracking");
750         g_trackRunning = false;
751         if (trackedCommand_) {
752             if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_STARTED) {
753                 trackedCommand_->Stop();
754             }
755         }
756         CHECK_TRUE(PerfEventsEnable(false), false, 1, "StopTracking : PerfEventsEnable(false) failed");
757     }
758     return true;
759 }
760 
PauseTracking(void)761 bool PerfEvents::PauseTracking(void)
762 {
763     CHECK_TRUE(startedTracking_, false, 0, "");
764     HIPERF_HILOGI(MODULE_DEFAULT, "some one called PauseTracking");
765     return PerfEventsEnable(false);
766 }
767 
ResumeTracking(void)768 bool PerfEvents::ResumeTracking(void)
769 {
770     CHECK_TRUE(startedTracking_, false, 0, "");
771     HIPERF_HILOGI(MODULE_DEFAULT, "some one called ResumeTracking");
772     return PerfEventsEnable(true);
773 }
774 
OutputTracking()775 bool PerfEvents::OutputTracking()
776 {
777     if (!startedTracking_) {
778         HIPERF_HILOGI(MODULE_DEFAULT, "OutputTracking failed, not start tracking...");
779         return false;
780     }
781 
782     if (IsOutputTracking()) {
783         HIPERF_HILOGI(MODULE_DEFAULT, "output in progress");
784         return true;
785     }
786 
787     outputEndTime_ = currentTimeSecond_.load();
788     outputTracking_ = true;
789     return true;
790 }
791 
EnableTracking()792 bool PerfEvents::EnableTracking()
793 {
794     CHECK_TRUE(!startedTracking_, true, 0, "");
795     CHECK_TRUE(PerfEventsEnable(true), false, 1, "PerfEvents::PerfEventsEnable() failed");
796 
797     if (trackedCommand_) {
798         // start tracked Command
799         if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_WAITING) {
800             if (!trackedCommand_->StartCommand()) {
801                 int wstatus;
802                 if (!trackedCommand_->WaitCommand(wstatus)) {
803                     trackedCommand_->Stop();
804                 }
805                 std::string commandName = trackedCommand_->GetCommandName();
806                 printf("failed to execute command: %zu: %s\n", commandName.size(), commandName.c_str());
807                 return false;
808             }
809         } else if (trackedCommand_->GetState() != TrackedCommand::State::COMMAND_STARTED) {
810             return false;
811         }
812     }
813     startedTracking_ = true;
814     return true;
815 }
816 
IsTrackRunning()817 bool PerfEvents::IsTrackRunning()
818 {
819     return g_trackRunning;
820 }
821 
IsOutputTracking()822 bool PerfEvents::IsOutputTracking()
823 {
824     return outputTracking_;
825 }
826 
SetOutputTrackingStatus(const bool status)827 void PerfEvents::SetOutputTrackingStatus(const bool status)
828 {
829     outputTracking_ = status;
830 }
831 
SetSystemTarget(const bool systemTarget)832 void PerfEvents::SetSystemTarget(const bool systemTarget)
833 {
834     systemTarget_ = systemTarget;
835 }
836 
SetCpu(const std::vector<pid_t> cpus)837 void PerfEvents::SetCpu(const std::vector<pid_t> cpus)
838 {
839     cpus_ = cpus;
840 }
841 
SetPid(const std::vector<pid_t> pids)842 void PerfEvents::SetPid(const std::vector<pid_t> pids)
843 {
844     pids_ = pids;
845 }
846 
SetTimeOut(const float timeOut)847 void PerfEvents::SetTimeOut(const float timeOut)
848 {
849     if (timeOut > 0) {
850         timeOut_ = milliseconds(static_cast<int>(timeOut * THOUSANDS));
851     }
852 }
853 
SetTimeReport(int timeReport)854 void PerfEvents::SetTimeReport(int timeReport)
855 {
856     static constexpr int minMsReportInterval = 10;
857     if (timeReport < minMsReportInterval && timeReport != 0) {
858         timeReport = minMsReportInterval;
859         printf("time report min value is %d.\n", timeReport);
860     }
861 
862     timeReport_ = milliseconds(timeReport);
863 }
864 
GetSupportEvents(const perf_type_id type)865 std::map<__u64, std::string> PerfEvents::GetSupportEvents(const perf_type_id type)
866 {
867     if (type == PERF_TYPE_TRACEPOINT) {
868         LoadTracepointEventTypesFromSystem();
869     }
870 
871     std::map<__u64, std::string> eventConfigs;
872     auto configTable = TYPE_CONFIGS.find(type);
873     if (configTable != TYPE_CONFIGS.end()) {
874         auto configs = configTable->second;
875         for (auto config : configs) {
876             if (type == PERF_TYPE_TRACEPOINT || IsEventSupport(type, (__u64)config.first)) {
877                 eventConfigs.insert(config);
878             } else {
879                 HLOGD("'%s' not support", config.second.c_str());
880             }
881         }
882     }
883     return eventConfigs;
884 }
885 
LoadTracepointEventTypesFromSystem()886 void PerfEvents::LoadTracepointEventTypesFromSystem()
887 {
888     if (traceConfigTable.empty()) {
889         std::string basePath {"/sys/kernel/tracing/events"};
890         if (access(basePath.c_str(), R_OK) != 0) {
891             basePath = "/sys/kernel/debug/tracing/events";
892         }
893         for (const auto &eventName : GetSubDirs(basePath)) {
894             std::string eventPath = basePath + "/" + eventName;
895             for (const auto &concreteEvent : GetSubDirs(eventPath)) {
896                 std::string idPath = eventPath + "/" + concreteEvent + "/id";
897                 {
898                     std::string resolvedPath = CanonicalizeSpecPath(idPath.c_str());
899                     std::ifstream ifs {resolvedPath};
900                     // clang-format off
901                     const std::string idStr = {
902                         std::istream_iterator<char>(ifs),
903                         std::istream_iterator<char>()
904                     };
905                     // clang-format on
906                     __u64 id {0};
907                     try {
908                         id = std::stoul(idStr, nullptr);
909                     } catch (...) {
910                         continue;
911                     }
912                     if (isHM_ && id < MIN_HM_TRACEPOINT_EVENT_ID) {
913                         continue;
914                     }
915                     auto typeConfigs = TYPE_CONFIGS.find(PERF_TYPE_TRACEPOINT);
916                     HLOG_ASSERT(typeConfigs != TYPE_CONFIGS.end());
917                     auto configPair = typeConfigs->second.insert(
918                         std::make_pair(id, eventName + ":" + concreteEvent));
919                     traceConfigTable.insert(std::make_pair(id, eventName + ":" + concreteEvent));
920                     ConfigTable::iterator it = configPair.first;
921                     HLOGV("TYPE_CONFIGS add %llu:%s in %zu", it->first, it->second.c_str(),
922                           typeConfigs->second.size());
923                 }
924             }
925         }
926     }
927 }
928 
SetPerCpu(const bool perCpu)929 void PerfEvents::SetPerCpu(const bool perCpu)
930 {
931     perCpu_ = perCpu;
932 }
933 
SetPerThread(const bool perThread)934 void PerfEvents::SetPerThread(const bool perThread)
935 {
936     perThread_ = perThread;
937 }
938 
SetVerboseReport(const bool verboseReport)939 void PerfEvents::SetVerboseReport(const bool verboseReport)
940 {
941     verboseReport_ = verboseReport;
942 }
943 
SetSampleFrequency(const unsigned int frequency)944 void PerfEvents::SetSampleFrequency(const unsigned int frequency)
945 {
946     if (frequency > 0) {
947         sampleFreq_ = frequency;
948     }
949     int maxRate = 0;
950     CHECK_TRUE(ReadIntFromProcFile("/proc/sys/kernel/perf_event_max_sample_rate", maxRate),
951                NO_RETVAL, LOG_TYPE_PRINTF,
952                "read perf_event_max_sample_rate fail.\n");
953     if (sampleFreq_ > static_cast<unsigned int>(maxRate)) {
954         static bool printFlag = false;
955         sampleFreq_ = static_cast<unsigned int>(maxRate);
956         if (!printFlag) {
957             printf("Adjust sampling frequency to maximum allowed frequency %d.\n", maxRate);
958             printFlag = true;
959         }
960     }
961 }
962 
SetSamplePeriod(const unsigned int period)963 void PerfEvents::SetSamplePeriod(const unsigned int period)
964 {
965     if (period > 0) {
966         samplePeriod_ = period;
967     }
968 }
969 
SetBackTrack(const bool backtrack)970 void PerfEvents::SetBackTrack(const bool backtrack)
971 {
972     backtrack_ = backtrack;
973 }
974 
SetBackTrackTime(const uint64_t backtrackTime)975 void PerfEvents::SetBackTrackTime(const uint64_t backtrackTime)
976 {
977     backtrackTime_ = backtrackTime;
978 }
979 
SetMmapPages(const size_t mmapPages)980 void PerfEvents::SetMmapPages(const size_t mmapPages)
981 {
982     mmapPages_ = mmapPages;
983 }
984 
SetSampleStackType(const SampleStackType type)985 void PerfEvents::SetSampleStackType(const SampleStackType type)
986 {
987     sampleStackType_ = type;
988 }
989 
SetDwarfSampleStackSize(const uint32_t stackSize)990 void PerfEvents::SetDwarfSampleStackSize(const uint32_t stackSize)
991 {
992     HLOGD("request stack size is %u", stackSize);
993     dwarfSampleStackSize_ = stackSize;
994 }
995 
PerfEventsEnable(const bool enable)996 bool PerfEvents::PerfEventsEnable(const bool enable)
997 {
998     HLOGV("%s", std::to_string(enable).c_str());
999     for (const auto &eventGroupItem : eventGroupItem_) {
1000         for (const auto &eventItem : eventGroupItem.eventItems) {
1001             for (const auto &fdItem : eventItem.fdItems) {
1002                 int result =
1003                     ioctl(fdItem.fd, enable ? PERF_EVENT_IOC_ENABLE : PERF_EVENT_IOC_DISABLE, 0);
1004                 if (result < 0) {
1005                     printf("Cannot '%s' perf fd! type config name: '%s:%s'\n",
1006                            enable ? "enable" : "disable", eventItem.typeName.c_str(),
1007                            eventItem.configName.c_str());
1008                     return false;
1009                 }
1010             }
1011         }
1012     }
1013     return true;
1014 }
1015 
SetHM(const bool isHM)1016 void PerfEvents::SetHM(const bool isHM)
1017 {
1018     isHM_ = isHM;
1019 }
1020 
SetStatCallBack(const StatCallBack reportCallBack)1021 void PerfEvents::SetStatCallBack(const StatCallBack reportCallBack)
1022 {
1023     reportCallBack_ = reportCallBack;
1024 }
1025 
SetStatReportFd(FILE * reportPtr)1026 void PerfEvents::SetStatReportFd(FILE* reportPtr)
1027 {
1028     reportPtr_ = reportPtr;
1029 }
1030 
SetRecordCallBack(const RecordCallBack recordCallBack)1031 void PerfEvents::SetRecordCallBack(const RecordCallBack recordCallBack)
1032 {
1033     recordCallBack_ = recordCallBack;
1034 }
1035 
PutAllCpus()1036 inline void PerfEvents::PutAllCpus()
1037 {
1038     int cpuConfigs = sysconf(_SC_NPROCESSORS_CONF);
1039     for (int i = 0; i < cpuConfigs; i++) {
1040         cpus_.push_back(i); // put all cpu
1041     }
1042 }
1043 
PrepareFdEvents(void)1044 bool PerfEvents::PrepareFdEvents(void)
1045 {
1046     /*
1047     https://man7.org/linux/man-pages/man2/perf_event_open.2.html
1048     pid == 0 and cpu == -1
1049             This measures the calling process/thread on any CPU.
1050 
1051     pid == 0 and cpu >= 0
1052             This measures the calling process/thread only when running
1053             on the specified CPU.
1054 
1055     pid > 0 and cpu == -1
1056             This measures the specified process/thread on any CPU.
1057 
1058     pid > 0 and cpu >= 0
1059             This measures the specified process/thread only when
1060             running on the specified CPU.
1061 
1062     pid == -1 and cpu >= 0
1063             This measures all processes/threads on the specified CPU.
1064             This requires CAP_PERFMON (since Linux 5.8) or
1065             CAP_SYS_ADMIN capability or a
1066             /proc/sys/kernel/perf_event_paranoid value of less than 1.
1067 
1068     pid == -1 and cpu == -1
1069             This setting is invalid and will return an error.
1070     */
1071     if (systemTarget_) {
1072         pids_.clear();
1073         pids_.push_back(-1);
1074     } else {
1075         if (trackedCommand_) {
1076             pids_.push_back(trackedCommand_->GetChildPid());
1077         }
1078         if (pids_.empty()) {
1079             pids_.push_back(0); // no pid means use 0 as self pid
1080         }
1081     }
1082     if (perCpu_ || perThread_) {
1083         cpus_.clear();
1084         PutAllCpus();
1085     }
1086     if (cpus_.empty()) {
1087         PutAllCpus();
1088     }
1089 
1090     // print info tell user which cpu and process we will select.
1091     if (pids_.size() == 1 && pids_[0] == -1) {
1092         HLOGI("target process: system scope \n");
1093     } else {
1094         HLOGI("target process: %zu (%s)\n", pids_.size(),
1095               (pids_[0] == 0) ? std::to_string(gettid()).c_str() : VectorToString(pids_).c_str());
1096     }
1097     if (cpus_.size() == 1 && cpus_[0] == -1) {
1098         HLOGI("target cpus: %ld \n", sysconf(_SC_NPROCESSORS_CONF));
1099     } else {
1100         HLOGI("target cpus: %zu / %ld (%s)\n", cpus_.size(), sysconf(_SC_NPROCESSORS_CONF),
1101             VectorToString(cpus_).c_str());
1102     }
1103 
1104     return true;
1105 }
1106 
CreateFdEvents(void)1107 bool PerfEvents::CreateFdEvents(void)
1108 {
1109     // must be some events , or will failed
1110     CHECK_TRUE(!eventGroupItem_.empty(), false, LOG_TYPE_PRINTF, "no event select.\n");
1111 
1112     // create each fd by cpu and process user select
1113     /*
1114         https://man7.org/linux/man-pages/man2/perf_event_open.2.html
1115 
1116         (A single event on its own is created with group_fd = -1 and is
1117         considered to be a group with only 1 member.)
1118     */
1119     // Even if there is only one event, it is counted as a group.
1120 
1121     uint fdNumber = 0;
1122     uint eventNumber = 0;
1123     uint groupNumber = 0;
1124     for (auto &eventGroupItem : eventGroupItem_) {
1125         /*
1126             Explain what is the configuration of the group:
1127             Suppose we have 2 Event, 2 PID, and 3 CPU settings
1128             According to verification,
1129             Group's fd requires the pid to be the same as the cpu, the only difference is event
1130             In other words, if you want to bind E1 and E2 to the same group
1131             That can only be like this:
1132 
1133             event E1 pid P1 cpu C1 [Group 1]
1134             event E1 pid P1 cpu C2 [Group 2]
1135             event E1 pid P1 cpu C3 [Group 3]
1136 
1137             event E1 pid P2 cpu C1 [Group 4]
1138             event E1 pid P2 cpu C2 [Group 5]
1139             event E1 pid P2 cpu C3 [Group 6]
1140 
1141             event E2 pid P1 cpu C1 [Group 1]
1142             event E2 pid P1 cpu C2 [Group 2]
1143             event E2 pid P1 cpu C3 [Group 3]
1144 
1145             event E2 pid P2 cpu C1 [Group 4]
1146             event E2 pid P2 cpu C2 [Group 5]
1147             event E2 pid P2 cpu C3 [Group 6]
1148         */
1149         HLOGV("group %2u. eventGroupItem leader: '%s':", groupNumber++,
1150               eventGroupItem.eventItems[0].configName.c_str());
1151 
1152         int groupFdCache[cpus_.size()][pids_.size()];
1153         for (size_t i = 0; i < cpus_.size(); i++) {     // each cpu
1154             for (size_t j = 0; j < pids_.size(); j++) { // each pid
1155                 // The leader is created first, with group_fd = -1.
1156                 groupFdCache[i][j] = -1;
1157             }
1158         }
1159 
1160         uint eventIndex = 0;
1161         for (auto &eventItem : eventGroupItem.eventItems) {
1162             HLOGV(" - event %2u. eventName: '%s:%s'", eventIndex++, eventItem.typeName.c_str(),
1163                   eventItem.configName.c_str());
1164 
1165             for (size_t icpu = 0; icpu < cpus_.size(); icpu++) {     // each cpu
1166                 for (size_t ipid = 0; ipid < pids_.size(); ipid++) { // each pid
1167                     // one fd event group must match same cpu and same pid config (event can be
1168                     // different)
1169                     // clang-format off
1170                     UniqueFd fd = Open(eventItem.attr, pids_[ipid], cpus_[icpu],
1171                                        groupFdCache[icpu][ipid], 0);
1172                     // clang-format on
1173                     if (fd < 0) {
1174                         if (errno == ESRCH) {
1175                             if (verboseReport_) {
1176                                 printf("pid %d does not exist.\n", pids_[ipid]);
1177                             }
1178                             HLOGE("pid %d does not exist.\n", pids_[ipid]);
1179                             HIPERF_HILOGE(MODULE_DEFAULT, "[CreateFdEvents] pid %{public}d does not exist.",
1180                                 pids_[ipid]);
1181                             continue;
1182                         } else {
1183                             // clang-format off
1184                             if (verboseReport_) {
1185                                 char errInfo[ERRINFOLEN] = { 0 };
1186                                 strerror_r(errno, errInfo, ERRINFOLEN);
1187                                 printf("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1188                                     eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1189                             }
1190                             char errInfo[ERRINFOLEN] = { 0 };
1191                             strerror_r(errno, errInfo, ERRINFOLEN);
1192                             HLOGE("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1193                                 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1194                             // clang-format on
1195                             break; // jump to next cpu
1196                         }
1197                     }
1198                     // after open successed , fill the result
1199                     // make a new FdItem
1200                     FdItem &fdItem = eventItem.fdItems.emplace_back();
1201                     fdItem.fd = std::move(fd);
1202                     fdItem.cpu = cpus_[icpu];
1203                     fdItem.pid = pids_[ipid];
1204                     fdNumber++;
1205 
1206                     // if sampling, mmap ring buffer
1207                     bool createMmapSucc = true;
1208                     if (recordCallBack_) {
1209                         createMmapSucc = isSpe_ ?
1210                             CreateSpeMmap(fdItem, eventItem.attr) : CreateMmap(fdItem, eventItem.attr);
1211                     }
1212                     if (!createMmapSucc) {
1213                         printf("create mmap fail\n");
1214                         HIPERF_HILOGI(MODULE_DEFAULT, "create mmap fail");
1215                         return false;
1216                     }
1217                     // update group leader
1218                     int groupFdCacheNum = groupFdCache[icpu][ipid];
1219                     if (groupFdCacheNum == -1) {
1220                         groupFdCache[icpu][ipid] = fdItem.fd.Get();
1221                     }
1222                 }
1223             }
1224             eventNumber++;
1225         }
1226     }
1227 
1228     CHECK_TRUE(fdNumber != 0, false, 1, "open %d fd for %d events", fdNumber, eventNumber);
1229 
1230     HLOGD("will try read %u events from %u fd (%zu groups):", eventNumber, fdNumber,
1231           eventGroupItem_.size());
1232 
1233     return true;
1234 }
1235 
StatReport(const __u64 & durationInSec)1236 bool PerfEvents::StatReport(const __u64 &durationInSec)
1237 {
1238     read_format_no_group readNoGroupValue;
1239 
1240     // only need read when need report
1241     HLOGM("eventGroupItem_:%zu", eventGroupItem_.size());
1242     __u64 groupId = 0;
1243     // clear countEvents data
1244     countEvents_.clear();
1245     for (const auto &eventGroupItem : eventGroupItem_) {
1246         HLOGM("eventItems:%zu", eventGroupItem.eventItems.size());
1247         groupId++;
1248         for (const auto &eventItem : eventGroupItem.eventItems) {
1249             // count event info together (every cpu , every pid)
1250             std::string configName = "";
1251             if (eventItem.attr.exclude_kernel) {
1252                 configName = eventItem.configName + ":u";
1253             } else if (eventItem.attr.exclude_user) {
1254                 configName = eventItem.configName + ":k";
1255             } else {
1256                 configName = eventItem.configName;
1257             }
1258             if (countEvents_.count(configName) == 0) {
1259                 auto countEvent = std::make_unique<CountEvent>(CountEvent {});
1260                 countEvents_[configName] = std::move(countEvent);
1261                 countEvents_[configName]->userOnly = eventItem.attr.exclude_kernel;
1262                 countEvents_[configName]->kernelOnly = eventItem.attr.exclude_user;
1263             }
1264             const std::unique_ptr<CountEvent> &countEvent = countEvents_[configName];
1265             HLOGM("eventItem.fdItems:%zu", eventItem.fdItems.size());
1266             for (const auto &fditem : eventItem.fdItems) {
1267                 if (read(fditem.fd, &readNoGroupValue, sizeof(readNoGroupValue)) > 0) {
1268                     countEvent->eventCount += readNoGroupValue.value;
1269                     countEvent->timeEnabled += readNoGroupValue.timeEnabled;
1270                     countEvent->timeRunning += readNoGroupValue.timeRunning;
1271                     countEvent->id = groupId;
1272                     if (durationInSec != 0) {
1273                         countEvent->usedCpus = (countEvent->eventCount / 1e9) / (durationInSec / THOUSANDS);
1274                     }
1275                     if (verboseReport_) {
1276                         printf("%s id:%llu(c%d:p%d) timeEnabled:%llu timeRunning:%llu value:%llu\n",
1277                                eventItem.configName.c_str(), readNoGroupValue.id, fditem.cpu, fditem.pid,
1278                                readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning, readNoGroupValue.value);
1279                     }
1280                     if ((perCpu_ || perThread_) && readNoGroupValue.value) {
1281                         countEvent->summaries.emplace_back(fditem.cpu, fditem.pid, readNoGroupValue.value,
1282                             readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning);
1283                     }
1284                 } else {
1285                     printf("read failed from event '%s'\n", eventItem.configName.c_str());
1286                 }
1287             }
1288         }
1289     }
1290 
1291     reportCallBack_(countEvents_, reportPtr_);
1292 
1293     return true;
1294 }
1295 
CreateSpeMmap(const FdItem & item,const perf_event_attr & attr)1296 bool PerfEvents::CreateSpeMmap(const FdItem &item, const perf_event_attr &attr)
1297 {
1298     auto it = cpuMmap_.find(item.cpu);
1299     if (it == cpuMmap_.end()) {
1300         void *rbuf = mmap(nullptr, (1 + auxMmapPages_) * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1301                           item.fd.Get(), 0);
1302         CHECK_TRUE(rbuf != MMAP_FAILED, false, 1, "");
1303         void *auxRbuf = mmap(nullptr, auxMmapPages_ * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1304                              item.fd.Get(), 0);
1305         MmapFd mmapItem;
1306         mmapItem.fd = item.fd.Get();
1307         mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1308         mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1309         mmapItem.auxBuf = auxRbuf;
1310         mmapItem.bufSize = auxMmapPages_ * pageSize_;
1311         mmapItem.auxBufSize = auxMmapPages_ * pageSize_;
1312         mmapItem.attr = &attr;
1313         mmapItem.tid_ = item.pid;
1314         mmapItem.cpu = item.cpu;
1315         cpuMmap_[item.cpu] = mmapItem;
1316         pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1317     } else {
1318         const MmapFd &mmapItem = it->second;
1319         int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1320         if (rc != 0) {
1321             HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1322             perror("failed to share mapped buffer\n");
1323             return false;
1324         }
1325     }
1326     return true;
1327 }
1328 
CreateMmap(const FdItem & item,const perf_event_attr & attr)1329 bool PerfEvents::CreateMmap(const FdItem &item, const perf_event_attr &attr)
1330 {
1331     auto it = cpuMmap_.find(item.cpu);
1332     if (it == cpuMmap_.end()) {
1333         void *rbuf = mmap(nullptr, (1 + mmapPages_) * pageSize_, PROT_READ | PROT_WRITE, MAP_SHARED,
1334                           item.fd.Get(), 0);
1335         if (rbuf == MMAP_FAILED) {
1336             char errInfo[ERRINFOLEN] = {0};
1337             strerror_r(errno, errInfo, ERRINFOLEN);
1338             perror("errno:%d, errstr:%s", errno, errInfo);
1339             perror("Fail to call mmap \n");
1340             HIPERF_HILOGE(MODULE_DEFAULT, "[CreateMmap] Fail to call mmap. errno:%{public}d, errstr:%{public}s",
1341                 errno, errInfo);
1342             return false;
1343         }
1344         MmapFd mmapItem;
1345         mmapItem.fd = item.fd.Get();
1346         mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1347         mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1348         mmapItem.bufSize = mmapPages_ * pageSize_;
1349         mmapItem.attr = &attr;
1350         mmapItem.posCallChain = GetCallChainPosInSampleRecord(attr);
1351 
1352         cpuMmap_[item.cpu] = mmapItem;
1353         pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1354         HLOGD("CreateMmap success cpu %d fd %d mmapPages_ %u", item.cpu, mmapItem.fd, mmapPages_);
1355     } else {
1356         const MmapFd &mmapItem = it->second;
1357         int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1358         if (rc != 0) {
1359             HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1360             perror("failed to share mapped buffer\n");
1361             return false;
1362         }
1363     }
1364     return true;
1365 }
1366 
GetAttrWithId() const1367 std::vector<AttrWithId> PerfEvents::GetAttrWithId() const
1368 {
1369     std::vector<AttrWithId> result;
1370     HLOGV("eventGroupItem_ %zu :", eventGroupItem_.size());
1371 
1372     for (const auto &eventGroupItem : eventGroupItem_) {
1373         HLOGV(" eventItems %zu eventItems:", eventGroupItem.eventItems.size());
1374         for (const auto &eventItem : eventGroupItem.eventItems) {
1375             AttrWithId attrId;
1376             attrId.attr = eventItem.attr;
1377             attrId.name = eventItem.configName;
1378             HLOGV("  fdItems %zu fdItems:", eventItem.fdItems.size());
1379             for (const auto &fdItem : eventItem.fdItems) {
1380                 auto &id = attrId.ids.emplace_back(fdItem.GetPrefId());
1381                 HLOGV("    eventItem.fdItems GetPrefId %" PRIu64 "", id);
1382             }
1383             result.emplace_back(attrId);
1384         }
1385     }
1386     return result;
1387 }
1388 
1389 #ifdef CONFIG_HAS_CCM
GetBufferSizeCfg(size_t & maxBufferSize,size_t & minBufferSize)1390 void PerfEvents::GetBufferSizeCfg(size_t &maxBufferSize, size_t &minBufferSize)
1391 {
1392     size_t tmpMaxBufferSize = 0;
1393     size_t tmpMinBufferSize = 0;
1394     if (GetCfgValue(PRODUCT_CONFIG_PATH, CFG_MAX_BUFFER_SIZE, tmpMaxBufferSize)) {
1395         if (!CheckOutOfRange(tmpMaxBufferSize, BUFFER_LOW_LEVEL, MAX_BUFFER_SIZE_LARGE)) {
1396             maxBufferSize = tmpMaxBufferSize;
1397             HIPERF_HILOGI(MODULE_DEFAULT, "GetCfgValue %{public}s: %{public}zu", CFG_MAX_BUFFER_SIZE, maxBufferSize);
1398         } else {
1399             HIPERF_HILOGE(MODULE_DEFAULT, "GetCfgValue %{public}s failed, %{public}zu out of range",
1400                           CFG_MAX_BUFFER_SIZE, tmpMaxBufferSize);
1401         }
1402     }
1403     if (GetCfgValue(PRODUCT_CONFIG_PATH, CFG_MIN_BUFFER_SIZE, tmpMinBufferSize)) {
1404         if (!CheckOutOfRange(tmpMinBufferSize, BUFFER_LOW_LEVEL, MAX_BUFFER_SIZE_LARGE)) {
1405             minBufferSize = tmpMinBufferSize;
1406             HIPERF_HILOGI(MODULE_DEFAULT, "GetCfgValue %{public}s: %{public}zu", CFG_MIN_BUFFER_SIZE, minBufferSize);
1407         } else {
1408             HIPERF_HILOGE(MODULE_DEFAULT, "GetCfgValue %{public}s failed, %{public}zu out of range",
1409                           CFG_MIN_BUFFER_SIZE, tmpMinBufferSize);
1410         }
1411     }
1412 }
1413 #endif
1414 
CalcBufferSize()1415 size_t PerfEvents::CalcBufferSize()
1416 {
1417     size_t maxBufferSize;
1418     if (LittleMemory()) {
1419         maxBufferSize = MAX_BUFFER_SIZE_LITTLE;
1420     } else {
1421         maxBufferSize = MAX_BUFFER_SIZE_LARGE;
1422     }
1423     size_t minBufferSize = MIN_BUFFER_SIZE;
1424 #ifdef CONFIG_HAS_CCM
1425     GetBufferSizeCfg(maxBufferSize, minBufferSize);
1426 #endif
1427 
1428     size_t bufferSize = maxBufferSize;
1429     if (backtrack_ || !systemTarget_) {
1430         // suppose ring buffer is 4 times as much as mmap
1431         static constexpr int TIMES = 4;
1432         bufferSize = cpuMmap_.size() * mmapPages_ * pageSize_ * TIMES;
1433         if (bufferSize < minBufferSize) {
1434             bufferSize = minBufferSize;
1435         } else if (bufferSize > maxBufferSize) {
1436             bufferSize = maxBufferSize;
1437         }
1438     }
1439     HLOGD("CalcBufferSize return %zu", bufferSize);
1440     return bufferSize;
1441 }
1442 
IsRecordInMmap(const int timeout)1443 inline bool PerfEvents::IsRecordInMmap(const int timeout)
1444 {
1445     HLOGV("enter");
1446     if (pollFds_.size() > 0) {
1447         static uint32_t pollFailCount = 0;
1448         if (poll(static_cast<struct pollfd*>(pollFds_.data()), pollFds_.size(), timeout) <= 0) {
1449             // time out try again
1450             if (++pollFailCount >= POLL_FAIL_COUNT_THRESHOLD) {
1451                 pollFailCount = 0;
1452                 HIPERF_HILOGW(MODULE_DEFAULT, "mmap have no data for the past 5s");
1453             }
1454             return false;
1455         } else {
1456             pollFailCount = 0;
1457         }
1458     }
1459     HLOGV("poll record from mmap");
1460     return true;
1461 }
1462 
CompareRecordTime(const PerfEvents::MmapFd * left,const PerfEvents::MmapFd * right)1463 static bool CompareRecordTime(const PerfEvents::MmapFd *left, const PerfEvents::MmapFd *right)
1464 {
1465     return left->timestamp > right->timestamp;
1466 }
1467 
ReadRecordsFromMmaps()1468 void PerfEvents::ReadRecordsFromMmaps()
1469 {
1470 #ifdef HIPERF_DEBUG_TIME
1471     const auto readKenelStartTime = steady_clock::now();
1472 #endif
1473     // get readable mmap at this time
1474     for (auto &it : cpuMmap_) {
1475         ssize_t dataSize = it.second.mmapPage->data_head - it.second.mmapPage->data_tail;
1476         __sync_synchronize(); // this same as rmb in gcc, after reading mmapPage->data_head
1477         if (dataSize <= 0) {
1478             continue;
1479         }
1480         it.second.dataSize = dataSize;
1481         MmapRecordHeap_.push_back(&(it.second));
1482     }
1483     if (MmapRecordHeap_.empty()) {
1484         return;
1485     }
1486     bool enableFlag = false;
1487     if (MmapRecordHeap_.size() > 1) {
1488         for (const auto &it : MmapRecordHeap_) {
1489             GetRecordFromMmap(*it);
1490         }
1491         std::make_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.end(), CompareRecordTime);
1492 
1493         size_t heapSize = MmapRecordHeap_.size();
1494         while (heapSize > 1) {
1495             std::pop_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1496                           CompareRecordTime);
1497             bool auxEvent = false;
1498             u32 pid = 0;
1499             u32 tid = 0;
1500             u64 auxOffset = 0;
1501             u64 auxSize = 0;
1502             MoveRecordToBuf(*MmapRecordHeap_[heapSize - 1], auxEvent, auxOffset, auxSize, pid, tid);
1503             if (isSpe_ && auxEvent) {
1504                 ReadRecordsFromSpeMmaps(*MmapRecordHeap_[heapSize - 1], auxOffset, auxSize, pid, tid);
1505                 enableFlag = true;
1506             }
1507             if (GetRecordFromMmap(*MmapRecordHeap_[heapSize - 1])) {
1508                 std::push_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1509                                CompareRecordTime);
1510             } else {
1511                 heapSize--;
1512             }
1513         }
1514     }
1515 
1516     while (GetRecordFromMmap(*MmapRecordHeap_.front())) {
1517         bool auxEvent = false;
1518         u32 pid = 0;
1519         u32 tid = 0;
1520         u64 auxOffset = 0;
1521         u64 auxSize = 0;
1522         MoveRecordToBuf(*MmapRecordHeap_.front(), auxEvent, auxOffset, auxSize, pid, tid);
1523         if (isSpe_ && auxEvent) {
1524             ReadRecordsFromSpeMmaps(*MmapRecordHeap_.front(), auxOffset, auxSize, pid, tid);
1525             enableFlag = true;
1526         }
1527     }
1528     if (isSpe_ && enableFlag) {
1529         PerfEventsEnable(false);
1530         PerfEventsEnable(true);
1531     }
1532     MmapRecordHeap_.clear();
1533     {
1534         std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
1535         recordBufReady_ = true;
1536     }
1537     cvRecordBuf_.notify_one();
1538 #ifdef HIPERF_DEBUG_TIME
1539     recordKernelReadTime_ += duration_cast<milliseconds>(steady_clock::now() - readKenelStartTime);
1540 #endif
1541 }
1542 
GetRecordFromMmap(MmapFd & mmap)1543 bool PerfEvents::GetRecordFromMmap(MmapFd &mmap)
1544 {
1545     if (mmap.dataSize <= 0) {
1546         return false;
1547     }
1548 
1549     GetRecordFieldFromMmap(mmap, &(mmap.header), mmap.mmapPage->data_tail, sizeof(mmap.header));
1550     if (mmap.header.type != PERF_RECORD_SAMPLE) {
1551         mmap.timestamp = 0;
1552         return true;
1553     }
1554     // in PERF_RECORD_SAMPLE : header + u64 sample_id + u64 ip + u32 pid + u32 tid + u64 time
1555     constexpr size_t timePos = sizeof(perf_event_header) + sizeof(uint64_t) + sizeof(uint64_t) +
1556                                sizeof(uint32_t) + sizeof(uint32_t);
1557     GetRecordFieldFromMmap(mmap, &(mmap.timestamp), mmap.mmapPage->data_tail + timePos,
1558                            sizeof(mmap.timestamp));
1559     return true;
1560 }
1561 
GetRecordFieldFromMmap(MmapFd & mmap,void * dest,size_t pos,size_t size)1562 void PerfEvents::GetRecordFieldFromMmap(MmapFd &mmap, void *dest, size_t pos, size_t size)
1563 {
1564     CHECK_TRUE(mmap.bufSize != 0, NO_RETVAL, 0, "");
1565     pos = pos % mmap.bufSize;
1566     size_t tailSize = mmap.bufSize - pos;
1567     size_t copySize = std::min(size, tailSize);
1568     if (memcpy_s(dest, copySize, mmap.buf + pos, copySize) != 0) {
1569         HLOGEP("memcpy_s mmap.buf + pos to dest failed. size %zd", copySize);
1570     }
1571     if (copySize < size) {
1572         size -= copySize;
1573         if (memcpy_s(static_cast<uint8_t *>(dest) + copySize, size, mmap.buf, size) != 0) {
1574             HLOGEP("memcpy_s mmap.buf to dest failed. size %zd", size);
1575         }
1576     }
1577 }
1578 
GetCallChainPosInSampleRecord(const perf_event_attr & attr)1579 size_t PerfEvents::GetCallChainPosInSampleRecord(const perf_event_attr &attr)
1580 {
1581     // reference struct PerfRecordSampleData
1582     int fixedFieldNumber = __builtin_popcountll(
1583         attr.sample_type & (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1584                             PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | PERF_SAMPLE_ID |
1585                             PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD));
1586     size_t pos = sizeof(perf_event_header) + sizeof(uint64_t) * fixedFieldNumber;
1587     if (attr.sample_type & PERF_SAMPLE_READ) {
1588         pos += sizeof(read_format);
1589     }
1590     return pos;
1591 }
1592 
GetStackSizePosInSampleRecord(MmapFd & mmap)1593 size_t PerfEvents::GetStackSizePosInSampleRecord(MmapFd &mmap)
1594 {
1595     size_t pos = mmap.posCallChain;
1596     if (mmap.attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
1597         uint64_t nr = 0;
1598         GetRecordFieldFromMmap(mmap, &nr, mmap.mmapPage->data_tail + pos, sizeof(nr));
1599         pos += (sizeof(nr) + nr * sizeof(uint64_t));
1600     }
1601     if (mmap.attr->sample_type & PERF_SAMPLE_RAW) {
1602         uint32_t raw_size = 0;
1603         GetRecordFieldFromMmap(mmap, &raw_size, mmap.mmapPage->data_tail + pos, sizeof(raw_size));
1604         pos += (sizeof(raw_size) + raw_size);
1605     }
1606     if (mmap.attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
1607         uint64_t bnr = 0;
1608         GetRecordFieldFromMmap(mmap, &bnr, mmap.mmapPage->data_tail + pos, sizeof(bnr));
1609         pos += (sizeof(bnr) + bnr * sizeof(PerfBranchEntry));
1610     }
1611     if (mmap.attr->sample_type & PERF_SAMPLE_REGS_USER) {
1612         uint64_t user_abi = 0;
1613         GetRecordFieldFromMmap(mmap, &user_abi, mmap.mmapPage->data_tail + pos, sizeof(user_abi));
1614         pos += sizeof(user_abi);
1615         if (user_abi > 0) {
1616             uint64_t reg_nr = __builtin_popcountll(mmap.attr->sample_regs_user);
1617             pos += reg_nr * sizeof(uint64_t);
1618         }
1619     }
1620     if (mmap.attr->sample_type & PERF_SAMPLE_SERVER_PID) {
1621         uint64_t server_nr = 0;
1622         GetRecordFieldFromMmap(mmap, &server_nr, mmap.mmapPage->data_tail + pos, sizeof(server_nr));
1623         pos += (sizeof(server_nr) + server_nr * sizeof(uint64_t));
1624     }
1625     return pos;
1626 }
1627 
CutStackAndMove(MmapFd & mmap)1628 bool PerfEvents::CutStackAndMove(MmapFd &mmap)
1629 {
1630     constexpr uint32_t alignSize = 64;
1631     if (!(mmap.attr->sample_type & PERF_SAMPLE_STACK_USER)) {
1632         return false;
1633     }
1634     size_t stackSizePos = GetStackSizePosInSampleRecord(mmap);
1635     uint64_t stackSize = 0;
1636     GetRecordFieldFromMmap(mmap, &stackSize, mmap.mmapPage->data_tail + stackSizePos,
1637                            sizeof(stackSize));
1638     if (stackSize == 0) {
1639         return false;
1640     }
1641     size_t dynSizePos = stackSizePos + sizeof(uint64_t) + stackSize;
1642     uint64_t dynSize = 0;
1643     GetRecordFieldFromMmap(mmap, &dynSize, mmap.mmapPage->data_tail + dynSizePos, sizeof(dynSize));
1644     uint64_t newStackSize = std::min((dynSize + alignSize - 1) &
1645                                      (~(alignSize >= 1 ? alignSize - 1 : 0)), stackSize);
1646     if (newStackSize >= stackSize) {
1647         return false;
1648     }
1649     HLOGM("stackSize %" PRIx64 " dynSize %" PRIx64 " newStackSize %" PRIx64 "\n", stackSize, dynSize, newStackSize);
1650     // move and cut stack_data
1651     // mmap: |<+++copy1+++>|<++++++copy2++++++>|<---------------cut--------------->|<+++copy3+++>|
1652     //             ^                    ^                        ^                 ^
1653     //         new_header          stackSizePos         <stackSize-dynSize>     dynSizePos
1654     uint16_t recordSize = mmap.header.size;
1655     mmap.header.size -= stackSize - newStackSize; // reduce the stack size
1656     uint8_t *buf = recordBuf_->AllocForWrite(mmap.header.size);
1657     // copy1: new_header
1658     CHECK_TRUE(buf != nullptr, false, 0, "");
1659     if (memcpy_s(buf, sizeof(perf_event_header), &(mmap.header), sizeof(perf_event_header)) != 0) {
1660         HLOGEP("memcpy_s mmap.header to buf failed. size %zd", sizeof(perf_event_header));
1661     }
1662     size_t copyPos = sizeof(perf_event_header);
1663     size_t copySize = stackSizePos - sizeof(perf_event_header) + sizeof(stackSize) + newStackSize;
1664     // copy2: copy stack_size, data[stack_size],
1665     GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + copyPos, copySize);
1666     copyPos += copySize;
1667     // copy3: copy dyn_size
1668     GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + dynSizePos,
1669                            recordSize - dynSizePos);
1670     // update stack_size
1671     if (memcpy_s(buf + stackSizePos, sizeof(stackSize), &(newStackSize), sizeof(newStackSize)) != 0) {
1672         HLOGEP("memcpy_s newStack to buf stackSizePos failed. size %zd", sizeof(newStackSize));
1673     }
1674     recordBuf_->EndWrite();
1675     __sync_synchronize();
1676     mmap.mmapPage->data_tail += recordSize;
1677     mmap.dataSize -= recordSize;
1678     return true;
1679 }
1680 
MoveRecordToBuf(MmapFd & mmap,bool & isAuxEvent,u64 & auxOffset,u64 & auxSize,u32 & pid,u32 & tid)1681 void PerfEvents::MoveRecordToBuf(MmapFd &mmap, bool &isAuxEvent, u64 &auxOffset, u64 &auxSize, u32 &pid, u32 &tid)
1682 {
1683     uint8_t *buf = nullptr;
1684     if (mmap.header.type == PERF_RECORD_SAMPLE) {
1685         if (recordBuf_->GetFreeSize() <= BUFFER_CRITICAL_LEVEL) {
1686             lostSamples_++;
1687             HLOGD("BUFFER_CRITICAL_LEVEL: lost sample record");
1688             goto RETURN;
1689         }
1690         if (CutStackAndMove(mmap)) {
1691             return;
1692         }
1693     } else if (mmap.header.type == PERF_RECORD_LOST) {
1694         // in PERF_RECORD_LOST : header + u64 id + u64 lost
1695         constexpr size_t lostPos = sizeof(perf_event_header) + sizeof(uint64_t);
1696         uint64_t lost = 0;
1697         GetRecordFieldFromMmap(mmap, &lost, mmap.mmapPage->data_tail + lostPos, sizeof(lost));
1698         lostSamples_ += lost;
1699         HLOGD("PERF_RECORD_LOST: lost sample record");
1700         goto RETURN;
1701     }
1702     if (mmap.header.type == PERF_RECORD_AUX) {
1703         isAuxEvent = true;
1704         // in AUX : header + u64 aux_offset + u64 aux_size
1705         uint64_t auxOffsetPos = sizeof(perf_event_header);
1706         uint64_t auxSizePos = sizeof(perf_event_header) + sizeof(uint64_t);
1707         uint64_t pidPos = auxSizePos + sizeof(uint64_t) * 2; // 2 : offset
1708         uint64_t tidPos = pidPos + sizeof(uint32_t);
1709         GetRecordFieldFromMmap(mmap, &auxOffset, mmap.mmapPage->data_tail + auxOffsetPos, sizeof(auxOffset));
1710         GetRecordFieldFromMmap(mmap, &auxSize, mmap.mmapPage->data_tail + auxSizePos, sizeof(auxSize));
1711         GetRecordFieldFromMmap(mmap, &pid, mmap.mmapPage->data_tail + pidPos, sizeof(pid));
1712         GetRecordFieldFromMmap(mmap, &tid, mmap.mmapPage->data_tail + tidPos, sizeof(tid));
1713     }
1714 
1715     if ((buf = recordBuf_->AllocForWrite(mmap.header.size)) == nullptr) {
1716         // this record type must be Non-Sample
1717         lostNonSamples_++;
1718         HLOGD("alloc buffer failed: lost non-sample record");
1719         goto RETURN;
1720     }
1721 
1722     GetRecordFieldFromMmap(mmap, buf, mmap.mmapPage->data_tail, mmap.header.size);
1723     recordBuf_->EndWrite();
1724 RETURN:
1725     __sync_synchronize();
1726     mmap.mmapPage->data_tail += mmap.header.size;
1727     mmap.dataSize -= mmap.header.size;
1728 }
1729 
WaitDataFromRingBuffer()1730 inline void PerfEvents::WaitDataFromRingBuffer()
1731 {
1732     std::unique_lock<std::mutex> lock(mtxRrecordBuf_);
1733     cvRecordBuf_.wait(lock, [this] {
1734         if (recordBufReady_) {
1735             recordBufReady_ = false;
1736             return true;
1737         }
1738         return !readRecordThreadRunning_;
1739     });
1740 }
1741 
ProcessRecord(const perf_event_attr * attr,uint8_t * data)1742 inline bool PerfEvents::ProcessRecord(const perf_event_attr* attr, uint8_t* data)
1743 {
1744     uint32_t* type = reinterpret_cast<uint32_t *>(data);
1745 #ifdef HIPERF_DEBUG_TIME
1746     const auto readingStartTime_ = steady_clock::now();
1747 #endif
1748 #if !HIDEBUG_SKIP_CALLBACK
1749     PerfEventRecord& record = PerfEventRecordFactory::GetPerfEventRecord(*type, data, *attr);
1750     if (backtrack_ && readRecordThreadRunning_ && record.GetType() == PERF_RECORD_SAMPLE) {
1751         const PerfRecordSample& sample = static_cast<const PerfRecordSample&>(record);
1752         if (IsSkipRecordForBacktrack(sample)) {
1753             return false;
1754         }
1755     }
1756 
1757     recordCallBack_(record);
1758 #endif
1759     recordEventCount_++;
1760 #ifdef HIPERF_DEBUG_TIME
1761     recordCallBackTime_ += duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1762 #endif
1763     recordBuf_->EndRead();
1764     return true;
1765 }
1766 
ReadRecordFromBuf()1767 void PerfEvents::ReadRecordFromBuf()
1768 {
1769     const perf_event_attr *attr = GetDefaultAttr();
1770     uint8_t *p = nullptr;
1771 
1772     while (readRecordThreadRunning_) {
1773         WaitDataFromRingBuffer();
1774         bool output = outputTracking_;
1775         while ((p = recordBuf_->GetReadData()) != nullptr) {
1776             if (!ProcessRecord(attr, p)) {
1777                 break;
1778             }
1779         }
1780         if (backtrack_ && output) {
1781             outputTracking_ = false;
1782             outputEndTime_ = 0;
1783         }
1784     }
1785     HLOGD("exit because trackStoped");
1786 
1787     // read the data left over in buffer
1788     while ((p = recordBuf_->GetReadData()) != nullptr) {
1789         ProcessRecord(attr, p);
1790     }
1791     HLOGD("read all records from buffer");
1792 }
1793 
HaveTargetsExit(const std::chrono::steady_clock::time_point & startTime)1794 bool PerfEvents::HaveTargetsExit(const std::chrono::steady_clock::time_point &startTime)
1795 {
1796     if (systemTarget_) {
1797         return false;
1798     }
1799     if (trackedCommand_) {
1800         if (trackedCommand_->GetState() < TrackedCommand::State::COMMAND_STARTED) {
1801             return false; // not start yet
1802         }
1803         int wstatus;
1804         if (trackedCommand_->WaitCommand(wstatus)) {
1805             milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1806             printf("tracked command(%s) has exited (total %" PRId64 " ms)\n",
1807                    trackedCommand_->GetCommandName().c_str(), (uint64_t)usedMsTick.count());
1808             return true;
1809         }
1810         return false;
1811     }
1812 
1813     for (auto it = pids_.begin(); it != pids_.end();) {
1814         if (IsDir("/proc/" + std::to_string(*it))) {
1815             it++;
1816         } else {
1817             it = pids_.erase(it);
1818         }
1819     }
1820     if (pids_.empty()) {
1821         milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1822         printf("tracked processes have exited (total %" PRId64 " ms)\n", (uint64_t)usedMsTick.count());
1823         return true;
1824     }
1825     return false;
1826 }
1827 
RecordLoop()1828 void PerfEvents::RecordLoop()
1829 {
1830     // calc the time
1831     const auto startTime = steady_clock::now();
1832     const auto endTime = startTime + timeOut_;
1833     milliseconds usedTimeMsTick {};
1834     int count = 1;
1835 
1836     while (g_trackRunning) {
1837         // time check point
1838         const auto thisTime = steady_clock::now();
1839         usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1840         if ((uint64_t)usedTimeMsTick.count() > (uint64_t)(count * THOUSANDS)) {
1841             if (HaveTargetsExit(startTime)) {
1842                 break;
1843             }
1844             ++count;
1845         }
1846 
1847         if (!backtrack_ && thisTime >= endTime) {
1848             printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1849             if (trackedCommand_) {
1850                 trackedCommand_->Stop();
1851             }
1852             break;
1853         }
1854 
1855         int timeLeft = duration_cast<milliseconds>(endTime - thisTime).count();
1856         if (IsRecordInMmap(std::min(timeLeft, pollTimeOut_))) {
1857             ReadRecordsFromMmaps();
1858         }
1859     }
1860 
1861     if (!g_trackRunning) {
1862         // for user interrupt situation, print time statistic
1863         usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1864         printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1865     }
1866 }
1867 
StatLoop()1868 void PerfEvents::StatLoop()
1869 {
1870     // calc the time
1871     const auto startTime = steady_clock::now();
1872     const auto endTime = startTime + timeOut_;
1873     auto nextReportTime = startTime + timeReport_;
1874     milliseconds usedTimeMsTick {};
1875     __u64 durationInSec = 0;
1876     int64_t thresholdTimeInMs = 2 * HUNDREDS;
1877 
1878     while (g_trackRunning) {
1879         // time check point
1880         const auto thisTime = steady_clock::now();
1881         if (timeReport_ != milliseconds::zero()) {
1882             // stat cmd
1883             if (thisTime >= nextReportTime) {
1884                 // only for log or debug?
1885                 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1886                 durationInSec = usedTimeMsTick.count();
1887                 auto lefTimeMsTick = duration_cast<milliseconds>(endTime - thisTime);
1888                 if (reportPtr_ == nullptr) {
1889                     printf("\nReport at %" PRIu64 " ms (%" PRIu64 " ms left):\n",
1890                         static_cast<uint64_t>(usedTimeMsTick.count()),
1891                         static_cast<uint64_t>(lefTimeMsTick.count()));
1892                 } else {
1893                     fprintf(reportPtr_, "\nReport at %" PRIu64 " ms (%" PRIu64 " ms left):\n",
1894                         static_cast<uint64_t>(usedTimeMsTick.count()),
1895                         static_cast<uint64_t>(lefTimeMsTick.count()));
1896                 }
1897                 // end of comments
1898                 nextReportTime += timeReport_;
1899                 StatReport(durationInSec);
1900             }
1901         }
1902 
1903         if (HaveTargetsExit(startTime)) {
1904             break;
1905         }
1906 
1907         if (thisTime >= endTime) {
1908             usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1909             durationInSec = usedTimeMsTick.count();
1910             if (reportPtr_ == nullptr) {
1911                 printf("Timeout exit (total %" PRIu64 " ms)\n", static_cast<uint64_t>(usedTimeMsTick.count()));
1912             } else {
1913                 fprintf(reportPtr_, "Timeout exit (total %" PRIu64 " ms)\n",
1914                     static_cast<uint64_t>(usedTimeMsTick.count()));
1915             }
1916             if (trackedCommand_) {
1917                 trackedCommand_->Stop();
1918             }
1919             break;
1920         }
1921 
1922         // lefttime > 200ms sleep 100ms, else sleep 200us
1923         uint64_t defaultSleepUs = 2 * HUNDREDS; // 200us
1924         if (timeReport_ == milliseconds::zero()
1925             && (timeOut_.count() * THOUSANDS) > thresholdTimeInMs) {
1926             milliseconds leftTimeMsTmp = duration_cast<milliseconds>(endTime - thisTime);
1927             if (leftTimeMsTmp.count() > thresholdTimeInMs) {
1928                 defaultSleepUs = HUNDREDS * THOUSANDS; // 100ms
1929             }
1930         }
1931         std::this_thread::sleep_for(microseconds(defaultSleepUs));
1932     }
1933 
1934     if (!g_trackRunning) {
1935         // for user interrupt situation, print time statistic
1936         usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1937         printf("User interrupt exit (total %" PRIu64 " ms)\n", static_cast<uint64_t>(usedTimeMsTick.count()));
1938     }
1939 
1940     if (timeReport_ == milliseconds::zero()) {
1941         StatReport(durationInSec);
1942     }
1943 }
1944 
GetTypeName(const perf_type_id type_id)1945 const std::string PerfEvents::GetTypeName(const perf_type_id type_id)
1946 {
1947     auto it = PERF_TYPES.find(type_id);
1948     if (it != PERF_TYPES.end()) {
1949         return it->second;
1950     }
1951     return "<not found>";
1952 }
1953 
UpdateCurrentTime()1954 void PerfEvents::UpdateCurrentTime()
1955 {
1956     pthread_setname_np(pthread_self(), "timer_thread");
1957     while (updateTimeThreadRunning_) {
1958         struct timespec ts = {0};
1959         if (clock_gettime(CLOCK_MONOTONIC, &ts) != -1) {
1960             currentTimeSecond_.store(static_cast<uint64_t>(ts.tv_sec));
1961         }
1962 
1963         std::this_thread::sleep_for(std::chrono::milliseconds(UPDATE_TIME_INTERVAL));
1964     }
1965 }
1966 
1967 // check if this record should be saved, this function only can called in back track mode
IsSkipRecordForBacktrack(const PerfRecordSample & sample)1968 bool PerfEvents::IsSkipRecordForBacktrack(const PerfRecordSample& sample)
1969 {
1970     if (outputTracking_) {
1971         // when outputing record, only skip what later than end time
1972         if (sample.GetTime() / NANO_SECONDS_PER_SECOND > outputEndTime_) {
1973             outputTracking_ = false;
1974             outputEndTime_ = 0;
1975             return true;
1976         }
1977         return false;
1978     }
1979 
1980     // only keep recent record in backtrack time
1981     if ((currentTimeSecond_.load() - sample.GetTime() / NANO_SECONDS_PER_SECOND) > backtrackTime_) {
1982         return false;
1983     }
1984     return true;
1985 }
1986 } // namespace HiPerf
1987 } // namespace Developtools
1988 } // namespace OHOS
1989