1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "perf_events.h"
16
17 #include <cassert>
18 #include <cinttypes>
19 #include <csignal>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <iostream>
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/resource.h>
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 #if defined(CONFIG_HAS_SYSPARA)
29 #include <parameters.h>
30 #endif
31
32 #include "spe_decoder.h"
33 #include "debug_logger.h"
34 #include "hiperf_hilog.h"
35 #include "register.h"
36 #include "subcommand_dump.h"
37 #include "symbols_file.h"
38 #include "utilities.h"
39
40 using namespace std::chrono;
41 namespace OHOS {
42 namespace Developtools {
43 namespace HiPerf {
44 bool PerfEvents::updateTimeThreadRunning_ = true;
45 std::atomic<uint64_t> PerfEvents::currentTimeSecond_ = 0;
46 static std::atomic_bool g_trackRunning = false;
47 static constexpr int32_t UPDATE_TIME_INTERVAL = 10; // 10ms
48 static constexpr uint64_t NANO_SECONDS_PER_SECOND = 1000000000;
49 static constexpr uint32_t POLL_FAIL_COUNT_THRESHOLD = 10;
50
Open(perf_event_attr & attr,const pid_t pid,const int cpu,const int groupFd,const unsigned long flags)51 OHOS::UniqueFd PerfEvents::Open(perf_event_attr &attr, const pid_t pid, const int cpu, const int groupFd,
52 const unsigned long flags)
53 {
54 OHOS::UniqueFd fd = UniqueFd(syscall(__NR_perf_event_open, &attr, pid, cpu, groupFd, flags));
55 if (fd < 0) {
56 HLOGEP("syscall perf_event_open failed. ");
57 // dump when open failed.
58 SubCommandDump::DumpPrintEventAttr(attr, std::numeric_limits<int>::min());
59 }
60 HLOGV("perf_event_open: got fd %d for pid %d cpu %d group %d flags %lu", fd.Get(), pid, cpu, groupFd, flags);
61 return fd;
62 }
63
SpeReadData(void * dataPage,u64 * dataTail,uint8_t * buf,const u32 size)64 void PerfEvents::SpeReadData(void *dataPage, u64 *dataTail, uint8_t *buf, const u32 size)
65 {
66 void *src = nullptr;
67 u32 left = 0;
68 u32 offset = static_cast<u32>(*dataTail);
69 u32 copySize;
70 u32 traceSize = size;
71 CHECK_TRUE(size <= (auxMmapPages_ * pageSize_ + sizeof(struct PerfRecordAuxtraceData)),
72 NO_RETVAL, 1, "buf size invalid");
73 while (traceSize > 0) {
74 offset = CALC_OFFSET(offset, auxMmapPages_ * pageSize_);
75 left = static_cast<u32>(auxMmapPages_ * pageSize_ - offset);
76 copySize = std::min(traceSize, left);
77 src = PTR_ADD(dataPage, offset);
78 if (memcpy_s(buf, left, src, copySize) != 0) {
79 HLOGV("SpeReadData memcpy_s failed.");
80 }
81
82 traceSize -= copySize;
83 offset += copySize;
84 buf = reinterpret_cast<uint8_t *>(PTR_ADD(buf, copySize));
85 }
86
87 *dataTail += size;
88 }
89
arm_spe_reference()90 static u64 arm_spe_reference()
91 {
92 struct timespec ts;
93 clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
94 return static_cast<uint64_t>(ts.tv_sec) ^ static_cast<uint64_t>(ts.tv_nsec);
95 }
96
ReadRecordsFromSpeMmaps(MmapFd & mmapFd,const u64 auxOffset,u64 auxSize,const u32 pid,const u32 tid)97 void PerfEvents::ReadRecordsFromSpeMmaps(MmapFd& mmapFd, const u64 auxOffset,
98 u64 auxSize, const u32 pid, const u32 tid)
99 {
100 if (mmapFd.mmapPage == nullptr || mmapFd.auxBuf == nullptr) {
101 printf("ReadRecordsFromSpeMmaps mmapFd.mmapPage == nullptr, mmapFd.fd: %d", mmapFd.fd);
102 return;
103 }
104 perf_event_mmap_page *userPage = reinterpret_cast<perf_event_mmap_page *>(mmapFd.mmapPage);
105 void *auxPage = mmapFd.auxBuf;
106 userPage->aux_tail = auxOffset - auxSize;
107 u64 auxHead = userPage->aux_head;
108 u64 auxTail = userPage->aux_tail;
109 HLOGD("mmap cpu %d, aux_head: %llu, aux_tail:%llu, auxOffset:%llu, auxSize:%llu",
110 mmapFd.cpu, auxHead, auxTail, auxOffset, auxSize);
111 if (auxHead <= auxTail) {
112 return;
113 }
114 if (auxSize > auxMmapPages_ * pageSize_) {
115 userPage->aux_tail += auxSize;
116 return;
117 }
118
119 int cpu = mmapFd.cpu;
120 __sync_synchronize();
121 PerfRecordAuxtrace auxtraceRecord = PerfRecordAuxtrace(auxSize, auxTail,
122 arm_spe_reference(), cpu, tid, cpu, pid);
123 static std::vector<u8> vbuf(RECORD_SIZE_LIMIT);
124 uint8_t *buf;
125 if ((buf = recordBuf_->AllocForWrite(auxtraceRecord.header_.size + auxSize)) == nullptr) {
126 HLOGD("alloc buffer failed: PerfRecordAuxtrace record, readSize: %llu", auxSize);
127 return;
128 }
129 auxtraceRecord.GetBinary1(vbuf);
130 if (memcpy_s(buf, auxtraceRecord.header_.size, vbuf.data(), auxtraceRecord.header_.size) != 0) {
131 HLOGE("memcpy_s return failed");
132 return;
133 }
134 buf += auxtraceRecord.header_.size;
135
136 while (auxSize > 0) {
137 u64 readSize = pageSize_;
138 if (auxSize < pageSize_) {
139 readSize = auxSize;
140 }
141 __sync_synchronize();
142 SpeReadData(auxPage, &auxTail, buf, readSize);
143 __sync_synchronize();
144 userPage->aux_tail += readSize;
145 auxTail = userPage->aux_tail;
146 buf += readSize;
147 auxSize -= readSize;
148 }
149 recordBuf_->EndWrite();
150 }
151
GetSpeType()152 u32 GetSpeType()
153 {
154 FILE *fd;
155 u32 speType;
156
157 fd = fopen("/sys/devices/arm_spe_0/type", "r");
158 if (fd == nullptr) {
159 HLOGV("open sysfs file failed");
160 return UINT_MAX;
161 }
162 if (fscanf_s(fd, "%u", &speType) <= 0) {
163 HLOGV("fscanf_s file failed");
164 (void)fclose(fd);
165 return UINT_MAX;
166 }
167
168 (void)fclose(fd);
169 return speType;
170 }
171
PerfEvents()172 PerfEvents::PerfEvents() : timeOut_(DEFAULT_TIMEOUT * THOUSANDS), timeReport_(0)
173 {
174 pageSize_ = sysconf(_SC_PAGESIZE);
175 HLOGI("BuildArch %s", GetArchName(BUILD_ARCH_TYPE).c_str());
176 }
177
~PerfEvents()178 PerfEvents::~PerfEvents()
179 {
180 // close mmap
181 for (auto it = cpuMmap_.begin(); it != cpuMmap_.end();) {
182 const MmapFd &mmapItem = it->second;
183 if (!isSpe_) {
184 if (munmap(mmapItem.mmapPage, (1 + mmapPages_) * pageSize_) == -1) {
185 HLOGW("munmap failed.");
186 }
187 } else {
188 if (munmap(mmapItem.mmapPage, (1 + auxMmapPages_) * pageSize_) == -1) {
189 HLOGW("munmap failed.");
190 }
191 if (munmap(mmapItem.auxBuf, auxMmapPages_ * pageSize_) == -1) {
192 HLOGW("munmap failed.");
193 }
194 }
195 it = cpuMmap_.erase(it);
196 }
197
198 ExitReadRecordBufThread();
199 if (reportPtr_ != nullptr) {
200 fclose(reportPtr_);
201 reportPtr_ = nullptr;
202 }
203 }
204
IsEventSupport(const perf_type_id type,const __u64 config)205 bool PerfEvents::IsEventSupport(const perf_type_id type, const __u64 config)
206 {
207 std::unique_ptr<perf_event_attr> attr = PerfEvents::CreateDefaultAttr(type, config);
208 CHECK_TRUE(attr != nullptr, false, 1, "attr is nullptr");
209 UniqueFd fd = Open(*attr.get());
210 if (fd < 0) {
211 printf("event not support %s\n", GetStaticConfigName(type, config).c_str());
212 return false;
213 }
214 return true;
215 }
IsEventAttrSupport(perf_event_attr & attr)216 bool PerfEvents::IsEventAttrSupport(perf_event_attr &attr)
217 {
218 UniqueFd fd = Open(attr);
219 if (fd < 0) {
220 return false;
221 }
222 return true;
223 }
224
SetBranchSampleType(const uint64_t value)225 bool PerfEvents::SetBranchSampleType(const uint64_t value)
226 {
227 if (value != 0) {
228 // cpu-clcles event must be supported
229 std::unique_ptr<perf_event_attr> attr =
230 PerfEvents::CreateDefaultAttr(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
231 CHECK_TRUE(attr != nullptr, false, 0, "");
232 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
233 attr->branch_sample_type = value;
234 if (!IsEventAttrSupport(*attr.get())) {
235 return false;
236 }
237 }
238 branchSampleType_ = value;
239 return true;
240 }
241
AddDefaultEvent(const perf_type_id type)242 bool PerfEvents::AddDefaultEvent(const perf_type_id type)
243 {
244 auto it = DEFAULT_TYPE_CONFIGS.find(type);
245 if (it != DEFAULT_TYPE_CONFIGS.end()) {
246 for (auto config : it->second) {
247 AddEvent(type, config);
248 }
249 }
250 return true;
251 }
252
AddOffCpuEvent()253 bool PerfEvents::AddOffCpuEvent()
254 {
255 std::string eventName = "sched:sched_switch";
256 if (eventSpaceType_ == EventSpaceType::USER) {
257 eventName += ":u";
258 } else if (eventSpaceType_ == EventSpaceType::KERNEL) {
259 eventName += ":k";
260 }
261 return AddEvent(eventName);
262 }
263
AddEvents(const std::vector<std::string> & eventStrings,const bool group)264 bool PerfEvents::AddEvents(const std::vector<std::string> &eventStrings, const bool group)
265 {
266 bool followGroup = false;
267 HLOGV(" %s", VectorToString(eventStrings).c_str());
268
269 for (std::string eventString : eventStrings) {
270 if (!AddEvent(eventString, followGroup)) {
271 return false;
272 }
273 // this is group request , Follow-up events need to follow the previous group
274 if (group) {
275 followGroup = true;
276 }
277 }
278 return true;
279 }
280
281 // event name can have :k or :u suffix
282 // tracepoint event name is like sched:sched_switch
283 // clang-format off
ParseEventName(const std::string & nameStr,std::string & name,bool & excludeUser,bool & excludeKernel,bool & isTracePoint)284 bool PerfEvents::ParseEventName(const std::string &nameStr,
285 std::string &name, bool &excludeUser, bool &excludeKernel, bool &isTracePoint)
286 // clang-format on
287 {
288 name = nameStr;
289 excludeUser = false;
290 excludeKernel = false;
291 isTracePoint = false;
292 if (nameStr.find(":") != std::string::npos) {
293 static constexpr size_t maxNumberTokensNoTracePoint = 2;
294 static constexpr size_t maxNumberTokensTracePoint = 3;
295 std::vector<std::string> eventTokens = StringSplit(nameStr, ":");
296 if (eventTokens.size() == maxNumberTokensTracePoint) {
297 // tracepoint event with :u or :k
298 if (eventTokens.back() == "k") {
299 excludeUser = true;
300 HLOGV("kernelOnly event");
301 } else if (eventTokens.back() == "u") {
302 excludeKernel = true;
303 HLOGV("userOnly event");
304 } else {
305 HLOGV("unknown event name %s", nameStr.c_str());
306 return false;
307 }
308 name = eventTokens[0] + ":" + eventTokens[1];
309 isTracePoint = true;
310 } else if (eventTokens.size() == maxNumberTokensNoTracePoint) {
311 name = eventTokens[0];
312 if (eventTokens.back() == "k") {
313 excludeUser = true;
314 HLOGV("kernelOnly event");
315 } else if (eventTokens.back() == "u") {
316 excludeKernel = true;
317 HLOGV("userOnly event");
318 } else {
319 name = nameStr;
320 isTracePoint = true;
321 HLOGV("tracepoint event is in form of xx:xxx");
322 }
323 } else {
324 printf("unknown ':' format:'%s'\n", nameStr.c_str());
325 return false;
326 }
327 if (reportCallBack_) {
328 if ((eventTokens[0] == "sw-task-clock" || eventTokens[0] == "sw-cpu-clock") &&
329 (excludeUser || excludeKernel)) {
330 printf(
331 "event type %s with modifier u and modifier k is not supported by the kernel.",
332 eventTokens[0].c_str());
333 return false;
334 }
335 }
336 }
337 return true;
338 }
339
AddEvent(const std::string & eventString,const bool followGroup)340 bool PerfEvents::AddEvent(const std::string &eventString, const bool followGroup)
341 {
342 std::string eventName;
343 bool excludeUser = false;
344 bool excludeKernel = false;
345 bool isTracePointEvent = false;
346 if (!ParseEventName(eventString, eventName, excludeUser, excludeKernel, isTracePointEvent)) {
347 return false;
348 }
349 if (excludeUser) {
350 eventSpaceType_ |= EventSpaceType::KERNEL;
351 } else if (excludeKernel) {
352 eventSpaceType_ |= EventSpaceType::USER;
353 } else {
354 eventSpaceType_ |= EventSpaceType::USER_KERNEL;
355 }
356
357 // find if
358 if (isTracePointEvent) {
359 if (traceConfigTable.empty()) {
360 LoadTracepointEventTypesFromSystem();
361 }
362 for (auto traceType : traceConfigTable) {
363 if (traceType.second == eventName) {
364 return AddEvent(PERF_TYPE_TRACEPOINT, traceType.first, excludeUser, excludeKernel,
365 followGroup);
366 }
367 }
368 } else {
369 if (eventName == "arm_spe_0") {
370 u32 speType = GetSpeType();
371 if (speType == UINT_MAX) {
372 HLOGE("Failed to get SPE type.");
373 return false;
374 }
375 return AddSpeEvent(speType);
376 }
377 if (StringStartsWith(eventName, "0x")
378 && eventName.length() <= MAX_HEX_EVENT_NAME_LENGTH && IsHexDigits(eventName)) {
379 return AddEvent(PERF_TYPE_RAW, std::stoull(eventName, nullptr, NUMBER_FORMAT_HEX_BASE),
380 excludeUser, excludeKernel, followGroup);
381 } else {
382 auto [find, typeId, configId] = GetStaticConfigId(eventName);
383 if (find) {
384 return AddEvent(typeId, configId, excludeUser, excludeKernel, followGroup);
385 }
386 }
387 }
388
389 printf("%s event is not supported by the kernel.\n", eventName.c_str());
390 return false;
391 }
392
AddSpeEvent(const u32 type,const bool followGroup)393 bool PerfEvents::AddSpeEvent(const u32 type, const bool followGroup)
394 {
395 EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back() :
396 eventGroupItem_.emplace_back();
397 EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
398
399 if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
400 HLOGE("memset_s failed in PerfEvents::AddEvent");
401 return false;
402 }
403 eventItem.attr.type = type;
404 eventItem.attr.sample_period = MULTIPLE_SIZE;
405 eventItem.attr.size = sizeof(perf_event_attr);
406 eventItem.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
407 eventItem.attr.inherit = (inherit_ ? 1 : 0);
408 eventItem.attr.sample_type = SAMPLE_ID | PERF_SAMPLE_IP;
409 eventItem.attr.sample_id_all = 1;
410 eventItem.attr.disabled = 1;
411 eventItem.attr.config = 0x700010007; // 0x700010007 : enable all
412 if (config_ != 0) {
413 eventItem.attr.config = config_;
414 }
415 if (config1_ != 0) {
416 eventItem.attr.config1 = config1_;
417 }
418 if (config2_ != 0) {
419 eventItem.attr.config2 = config2_;
420 }
421 HLOGD("config_ is 0x%" PRIx64 ", config1_ is 0x%" PRIx64 ", config2_ is 0x%" PRIx64 "",
422 config_, config1_, config2_);
423 return true;
424 }
425
SetConfig(std::map<const std::string,uint64_t> & speOptMaps)426 void PerfEvents::SetConfig(std::map<const std::string, uint64_t> &speOptMaps)
427 {
428 constexpr uint tsOffset = 0;
429 constexpr uint paOffset = 1;
430 constexpr uint pctOffset = 2;
431 constexpr uint jitterOffset = 16;
432 constexpr uint branchOffset = 32;
433 constexpr uint loadOffset = 33;
434 constexpr uint storeOffset = 34;
435 config_ |= (speOptMaps["ts_enable"] & 0x1) << tsOffset;
436 config_ |= (speOptMaps["pa_enable"] & 0x1) << paOffset;
437 config_ |= (speOptMaps["pct_enable"] & 0x1) << pctOffset;
438 config_ |= (speOptMaps["jitter"] & 0x1) << jitterOffset;
439 config_ |= (speOptMaps["branch_filter"] & 0x1) << branchOffset;
440 config_ |= (speOptMaps["load_filter"] & 0x1) << loadOffset;
441 config_ |= (speOptMaps["store_filter"] & 0x1) << storeOffset;
442 config1_ |= speOptMaps["event_filter"];
443 config2_ |= speOptMaps["min_latency"] & 0xfff;
444 }
445
AddEvent(const perf_type_id type,const __u64 config,const bool excludeUser,const bool excludeKernel,const bool followGroup)446 bool PerfEvents::AddEvent(const perf_type_id type, const __u64 config, const bool excludeUser,
447 const bool excludeKernel, const bool followGroup)
448 {
449 HLOG_ASSERT(!excludeUser || !excludeKernel);
450 CHECK_TRUE(!followGroup || !eventGroupItem_.empty(), false, 1, "no group leader create before");
451 // found the event name
452 CHECK_TRUE(IsEventSupport(type, config), false, 0, "");
453 HLOGV("type %d config %llu excludeUser %d excludeKernel %d followGroup %d", type, config,
454 excludeUser, excludeKernel, followGroup);
455
456 // if use follow ?
457 EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back()
458 : eventGroupItem_.emplace_back();
459 // always new item
460 EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
461
462 eventItem.typeName = GetTypeName(type);
463 if (type == PERF_TYPE_TRACEPOINT) {
464 eventItem.configName = GetTraceConfigName(config);
465 } else {
466 eventItem.configName = GetStaticConfigName(type, config);
467 }
468
469 // attr
470 if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
471 HLOGE("memset_s failed in PerfEvents::AddEvent");
472 return false;
473 }
474 eventItem.attr.size = sizeof(perf_event_attr);
475 eventItem.attr.type = type;
476 eventItem.attr.config = config;
477 eventItem.attr.disabled = 1;
478 eventItem.attr.read_format =
479 PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
480
481 eventItem.attr.inherit = (inherit_ ? 1 : 0);
482 eventItem.attr.exclude_kernel = excludeKernel;
483 eventItem.attr.exclude_user = excludeUser;
484
485 // we also need mmap for record
486 if (recordCallBack_) {
487 if (samplePeriod_ > 0) {
488 eventItem.attr.freq = 0;
489 eventItem.attr.sample_freq = 0;
490 eventItem.attr.sample_period = samplePeriod_;
491 } else if (sampleFreq_ > 0) {
492 eventItem.attr.freq = 1;
493 eventItem.attr.sample_freq = sampleFreq_;
494 } else {
495 if (type == PERF_TYPE_TRACEPOINT) {
496 eventItem.attr.freq = 0;
497 eventItem.attr.sample_period = DEFAULT_SAMPLE_PERIOD;
498 } else {
499 eventItem.attr.freq = 1;
500 eventItem.attr.sample_freq = DEFAULT_SAMPLE_FREQUNCY;
501 }
502 }
503
504 eventItem.attr.watermark = 1;
505 eventItem.attr.wakeup_watermark = (mmapPages_ * pageSize_) >> 1;
506 static constexpr unsigned int maxWakeupMark = 1024 * 1024;
507 if (eventItem.attr.wakeup_watermark > maxWakeupMark) {
508 eventItem.attr.wakeup_watermark = maxWakeupMark;
509 }
510
511 // for a group of events, only enable comm/mmap on the first event
512 if (!followGroup) {
513 eventItem.attr.comm = 1;
514 eventItem.attr.mmap = 1;
515 eventItem.attr.mmap2 = 1;
516 eventItem.attr.mmap_data = 1;
517 }
518
519 if (sampleStackType_ == SampleStackType::DWARF) {
520 eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN |
521 PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
522 eventItem.attr.exclude_callchain_user = 1;
523 eventItem.attr.sample_regs_user = GetSupportedRegMask(GetDeviceArch());
524 eventItem.attr.sample_stack_user = dwarfSampleStackSize_;
525 } else if (sampleStackType_ == SampleStackType::FP) {
526 eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN;
527 } else {
528 eventItem.attr.sample_type = SAMPLE_TYPE;
529 }
530
531 if (isHM_) {
532 eventItem.attr.sample_type |= PERF_SAMPLE_SERVER_PID;
533 }
534 }
535
536 // set clock id
537 if (clockId_ != -1) {
538 eventItem.attr.use_clockid = 1;
539 eventItem.attr.clockid = clockId_;
540 }
541 if (branchSampleType_ != 0) {
542 eventItem.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
543 eventItem.attr.branch_sample_type = branchSampleType_;
544 }
545
546 HLOGV("Add Event: '%s':'%s' %s %s %s", eventItem.typeName.c_str(), eventItem.configName.c_str(),
547 excludeUser ? "excludeUser" : "", excludeKernel ? "excludeKernel" : "",
548 followGroup ? "" : "group leader");
549
550 return true;
551 }
552
CreateDefaultAttr(const perf_type_id type,const __u64 config)553 std::unique_ptr<perf_event_attr> PerfEvents::CreateDefaultAttr(const perf_type_id type, const __u64 config)
554 {
555 std::unique_ptr<perf_event_attr> attr = std::make_unique<perf_event_attr>();
556 if (memset_s(attr.get(), sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
557 HLOGE("memset_s failed in PerfEvents::CreateDefaultAttr");
558 return nullptr;
559 }
560 attr->size = sizeof(perf_event_attr);
561 attr->type = type;
562 attr->config = config;
563 attr->disabled = 1;
564 return attr;
565 }
566
567 // should move to upper caller
568 static struct sigaction g_oldSig {
569 };
CaptureSig()570 static bool CaptureSig()
571 {
572 HLOGD("capture Ctrl + C to end sampling decently");
573 struct sigaction sig {
574 };
575
576 sig.sa_handler = [](int sig) {
577 printf("\n Ctrl + C detected.\n");
578 g_trackRunning = false;
579 };
580
581 sig.sa_flags = 0;
582 if (sigaction(SIGINT, &sig, &g_oldSig) < 0) {
583 perror("Fail to call sigaction for SIGINT");
584 return false;
585 }
586 return true;
587 }
588
RecoverCaptureSig()589 static void RecoverCaptureSig()
590 {
591 if (sigaction(SIGINT, &g_oldSig, nullptr) < 0) {
592 perror("Fail to call sigaction for SIGINT");
593 }
594 }
595
596 // split to two part
597 // because WriteAttrAndId need fd id before start tracking
PrepareTracking(void)598 bool PerfEvents::PrepareTracking(void)
599 {
600 // 1. prepare cpu pid
601 CHECK_TRUE(PrepareFdEvents(), false, 1, "PrepareFdEvents() failed");
602
603 // 2. create events
604 CHECK_TRUE(CreateFdEvents(), false, 1, "CreateFdEvents() failed");
605
606 HLOGV("success");
607 prepared_ = true;
608 return true;
609 }
610
ExitReadRecordBufThread()611 void PerfEvents::ExitReadRecordBufThread()
612 {
613 if (isLowPriorityThread_) {
614 if (setpriority(PRIO_PROCESS, gettid(), 0) != 0) {
615 HLOGW("failed to decrease priority of reading kernel");
616 }
617 }
618 if (readRecordBufThread_.joinable()) {
619 {
620 std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
621 readRecordThreadRunning_ = false;
622 __sync_synchronize();
623 cvRecordBuf_.notify_one();
624 }
625 readRecordBufThread_.join();
626 }
627 }
628
PrepareRecordThread()629 bool PerfEvents::PrepareRecordThread()
630 {
631 try {
632 recordBuf_ = std::make_unique<RingBuffer>(CalcBufferSize());
633 } catch (const std::exception &e) {
634 printf("create record buffer(size %zu) failed: %s\n", CalcBufferSize(), e.what());
635 HIPERF_HILOGI(MODULE_DEFAULT, "create record buffer failed: %{public}s", e.what());
636 return false;
637 }
638 readRecordThreadRunning_ = true;
639 readRecordBufThread_ = std::thread(&PerfEvents::ReadRecordFromBuf, this);
640 if (backtrack_) {
641 std::thread updateTimeThread(&PerfEvents::UpdateCurrentTime);
642 updateTimeThread.detach();
643 }
644
645 rlimit rlim;
646 int result = getrlimit(RLIMIT_NICE, &rlim);
647 const rlim_t lowPriority = 40;
648 if (result == 0 && rlim.rlim_cur == lowPriority) {
649 const int highPriority = -20;
650 result = setpriority(PRIO_PROCESS, gettid(), highPriority);
651 if (result != 0) {
652 HLOGW("failed to increase priority of reading kernel");
653 } else {
654 isLowPriorityThread_ = true;
655 }
656 }
657
658 return true;
659 }
660
WaitRecordThread()661 void PerfEvents::WaitRecordThread()
662 {
663 printf("Process and Saving data...\n");
664 ExitReadRecordBufThread();
665
666 const auto usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - trackingEndTime_);
667 if (verboseReport_) {
668 printf("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
669 }
670 HLOGV("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
671 #ifdef HIPERF_DEBUG_TIME
672 printf("%zu record processed, used %0.3f ms(%4.2f us/record)\n", recordEventCount_,
673 recordCallBackTime_.count() / MS_DURATION,
674 recordCallBackTime_.count() / static_cast<double>(recordEventCount_));
675 printf("total wait sleep time %0.3f ms.\n", recordSleepTime_.count() / MS_DURATION);
676 printf("read from kernel time %0.3f ms.\n", recordKernelReadTime_.count() / MS_DURATION);
677 #endif
678 }
679
StartTracking(const bool immediately)680 bool PerfEvents::StartTracking(const bool immediately)
681 {
682 if (!prepared_) {
683 HLOGD("do not prepared_");
684 return false;
685 }
686
687 if (recordCallBack_ && !PrepareRecordThread()) {
688 HLOGW("PrepareRecordThread failed.");
689 return false;
690 }
691
692 HLOGD("step: 1. enable event");
693 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 1. enable event");
694 trackingStartTime_ = steady_clock::now();
695 if (immediately) {
696 if (!EnableTracking()) {
697 HLOGE("PerfEvents::EnableTracking() failed");
698 return false;
699 }
700 printf("Profiling duration is %.3f seconds.\n", float(timeOut_.count()) / THOUSANDS);
701 printf("Start Profiling...\n");
702 }
703
704 g_trackRunning = true;
705 if (!CaptureSig()) {
706 HLOGE("captureSig() failed");
707 g_trackRunning = false;
708 ExitReadRecordBufThread();
709 return false;
710 }
711
712 HLOGD("step: 2. thread loop");
713 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 2. thread loop");
714 if (recordCallBack_) {
715 RecordLoop();
716 } else {
717 StatLoop();
718 }
719
720 HLOGD("step: 3. disable event");
721 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 3. disable event");
722 if (!PerfEventsEnable(false)) {
723 HLOGE("PerfEvents::PerfEventsEnable() failed");
724 }
725 if (recordCallBack_) {
726 // read left samples after disable events
727 ReadRecordsFromMmaps();
728 }
729 trackingEndTime_ = steady_clock::now();
730
731 RecoverCaptureSig();
732
733 HLOGD("step: 4. wait record thread");
734 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 4. wait record thread");
735 if (recordCallBack_) {
736 WaitRecordThread();
737 }
738
739 HLOGD("step: 5. exit");
740 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 5. exit");
741 return true;
742 }
743
StopTracking(void)744 bool PerfEvents::StopTracking(void)
745 {
746 if (g_trackRunning) {
747 printf("some one called StopTracking\n");
748 HLOGI("some one called StopTracking");
749 HIPERF_HILOGI(MODULE_DEFAULT, "some one called StopTracking");
750 g_trackRunning = false;
751 if (trackedCommand_) {
752 if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_STARTED) {
753 trackedCommand_->Stop();
754 }
755 }
756 CHECK_TRUE(PerfEventsEnable(false), false, 1, "StopTracking : PerfEventsEnable(false) failed");
757 }
758 return true;
759 }
760
PauseTracking(void)761 bool PerfEvents::PauseTracking(void)
762 {
763 CHECK_TRUE(startedTracking_, false, 0, "");
764 HIPERF_HILOGI(MODULE_DEFAULT, "some one called PauseTracking");
765 return PerfEventsEnable(false);
766 }
767
ResumeTracking(void)768 bool PerfEvents::ResumeTracking(void)
769 {
770 CHECK_TRUE(startedTracking_, false, 0, "");
771 HIPERF_HILOGI(MODULE_DEFAULT, "some one called ResumeTracking");
772 return PerfEventsEnable(true);
773 }
774
OutputTracking()775 bool PerfEvents::OutputTracking()
776 {
777 if (!startedTracking_) {
778 HIPERF_HILOGI(MODULE_DEFAULT, "OutputTracking failed, not start tracking...");
779 return false;
780 }
781
782 if (IsOutputTracking()) {
783 HIPERF_HILOGI(MODULE_DEFAULT, "output in progress");
784 return true;
785 }
786
787 outputEndTime_ = currentTimeSecond_.load();
788 outputTracking_ = true;
789 return true;
790 }
791
EnableTracking()792 bool PerfEvents::EnableTracking()
793 {
794 CHECK_TRUE(!startedTracking_, true, 0, "");
795 CHECK_TRUE(PerfEventsEnable(true), false, 1, "PerfEvents::PerfEventsEnable() failed");
796
797 if (trackedCommand_) {
798 // start tracked Command
799 if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_WAITING) {
800 if (!trackedCommand_->StartCommand()) {
801 int wstatus;
802 if (!trackedCommand_->WaitCommand(wstatus)) {
803 trackedCommand_->Stop();
804 }
805 std::string commandName = trackedCommand_->GetCommandName();
806 printf("failed to execute command: %zu: %s\n", commandName.size(), commandName.c_str());
807 return false;
808 }
809 } else if (trackedCommand_->GetState() != TrackedCommand::State::COMMAND_STARTED) {
810 return false;
811 }
812 }
813 startedTracking_ = true;
814 return true;
815 }
816
IsTrackRunning()817 bool PerfEvents::IsTrackRunning()
818 {
819 return g_trackRunning;
820 }
821
IsOutputTracking()822 bool PerfEvents::IsOutputTracking()
823 {
824 return outputTracking_;
825 }
826
SetOutputTrackingStatus(const bool status)827 void PerfEvents::SetOutputTrackingStatus(const bool status)
828 {
829 outputTracking_ = status;
830 }
831
SetSystemTarget(const bool systemTarget)832 void PerfEvents::SetSystemTarget(const bool systemTarget)
833 {
834 systemTarget_ = systemTarget;
835 }
836
SetCpu(const std::vector<pid_t> cpus)837 void PerfEvents::SetCpu(const std::vector<pid_t> cpus)
838 {
839 cpus_ = cpus;
840 }
841
SetPid(const std::vector<pid_t> pids)842 void PerfEvents::SetPid(const std::vector<pid_t> pids)
843 {
844 pids_ = pids;
845 }
846
SetTimeOut(const float timeOut)847 void PerfEvents::SetTimeOut(const float timeOut)
848 {
849 if (timeOut > 0) {
850 timeOut_ = milliseconds(static_cast<int>(timeOut * THOUSANDS));
851 }
852 }
853
SetTimeReport(int timeReport)854 void PerfEvents::SetTimeReport(int timeReport)
855 {
856 static constexpr int minMsReportInterval = 10;
857 if (timeReport < minMsReportInterval && timeReport != 0) {
858 timeReport = minMsReportInterval;
859 printf("time report min value is %d.\n", timeReport);
860 }
861
862 timeReport_ = milliseconds(timeReport);
863 }
864
GetSupportEvents(const perf_type_id type)865 std::map<__u64, std::string> PerfEvents::GetSupportEvents(const perf_type_id type)
866 {
867 if (type == PERF_TYPE_TRACEPOINT) {
868 LoadTracepointEventTypesFromSystem();
869 }
870
871 std::map<__u64, std::string> eventConfigs;
872 auto configTable = TYPE_CONFIGS.find(type);
873 if (configTable != TYPE_CONFIGS.end()) {
874 auto configs = configTable->second;
875 for (auto config : configs) {
876 if (type == PERF_TYPE_TRACEPOINT || IsEventSupport(type, (__u64)config.first)) {
877 eventConfigs.insert(config);
878 } else {
879 HLOGD("'%s' not support", config.second.c_str());
880 }
881 }
882 }
883 return eventConfigs;
884 }
885
LoadTracepointEventTypesFromSystem()886 void PerfEvents::LoadTracepointEventTypesFromSystem()
887 {
888 if (traceConfigTable.empty()) {
889 std::string basePath {"/sys/kernel/tracing/events"};
890 if (access(basePath.c_str(), R_OK) != 0) {
891 basePath = "/sys/kernel/debug/tracing/events";
892 }
893 for (const auto &eventName : GetSubDirs(basePath)) {
894 std::string eventPath = basePath + "/" + eventName;
895 for (const auto &concreteEvent : GetSubDirs(eventPath)) {
896 std::string idPath = eventPath + "/" + concreteEvent + "/id";
897 {
898 std::string resolvedPath = CanonicalizeSpecPath(idPath.c_str());
899 std::ifstream ifs {resolvedPath};
900 // clang-format off
901 const std::string idStr = {
902 std::istream_iterator<char>(ifs),
903 std::istream_iterator<char>()
904 };
905 // clang-format on
906 __u64 id {0};
907 try {
908 id = std::stoul(idStr, nullptr);
909 } catch (...) {
910 continue;
911 }
912 if (isHM_ && id < MIN_HM_TRACEPOINT_EVENT_ID) {
913 continue;
914 }
915 auto typeConfigs = TYPE_CONFIGS.find(PERF_TYPE_TRACEPOINT);
916 HLOG_ASSERT(typeConfigs != TYPE_CONFIGS.end());
917 auto configPair = typeConfigs->second.insert(
918 std::make_pair(id, eventName + ":" + concreteEvent));
919 traceConfigTable.insert(std::make_pair(id, eventName + ":" + concreteEvent));
920 ConfigTable::iterator it = configPair.first;
921 HLOGV("TYPE_CONFIGS add %llu:%s in %zu", it->first, it->second.c_str(),
922 typeConfigs->second.size());
923 }
924 }
925 }
926 }
927 }
928
SetPerCpu(const bool perCpu)929 void PerfEvents::SetPerCpu(const bool perCpu)
930 {
931 perCpu_ = perCpu;
932 }
933
SetPerThread(const bool perThread)934 void PerfEvents::SetPerThread(const bool perThread)
935 {
936 perThread_ = perThread;
937 }
938
SetVerboseReport(const bool verboseReport)939 void PerfEvents::SetVerboseReport(const bool verboseReport)
940 {
941 verboseReport_ = verboseReport;
942 }
943
SetSampleFrequency(const unsigned int frequency)944 void PerfEvents::SetSampleFrequency(const unsigned int frequency)
945 {
946 if (frequency > 0) {
947 sampleFreq_ = frequency;
948 }
949 int maxRate = 0;
950 CHECK_TRUE(ReadIntFromProcFile("/proc/sys/kernel/perf_event_max_sample_rate", maxRate),
951 NO_RETVAL, LOG_TYPE_PRINTF,
952 "read perf_event_max_sample_rate fail.\n");
953 if (sampleFreq_ > static_cast<unsigned int>(maxRate)) {
954 static bool printFlag = false;
955 sampleFreq_ = static_cast<unsigned int>(maxRate);
956 if (!printFlag) {
957 printf("Adjust sampling frequency to maximum allowed frequency %d.\n", maxRate);
958 printFlag = true;
959 }
960 }
961 }
962
SetSamplePeriod(const unsigned int period)963 void PerfEvents::SetSamplePeriod(const unsigned int period)
964 {
965 if (period > 0) {
966 samplePeriod_ = period;
967 }
968 }
969
SetBackTrack(const bool backtrack)970 void PerfEvents::SetBackTrack(const bool backtrack)
971 {
972 backtrack_ = backtrack;
973 }
974
SetBackTrackTime(const uint64_t backtrackTime)975 void PerfEvents::SetBackTrackTime(const uint64_t backtrackTime)
976 {
977 backtrackTime_ = backtrackTime;
978 }
979
SetMmapPages(const size_t mmapPages)980 void PerfEvents::SetMmapPages(const size_t mmapPages)
981 {
982 mmapPages_ = mmapPages;
983 }
984
SetSampleStackType(const SampleStackType type)985 void PerfEvents::SetSampleStackType(const SampleStackType type)
986 {
987 sampleStackType_ = type;
988 }
989
SetDwarfSampleStackSize(const uint32_t stackSize)990 void PerfEvents::SetDwarfSampleStackSize(const uint32_t stackSize)
991 {
992 HLOGD("request stack size is %u", stackSize);
993 dwarfSampleStackSize_ = stackSize;
994 }
995
PerfEventsEnable(const bool enable)996 bool PerfEvents::PerfEventsEnable(const bool enable)
997 {
998 HLOGV("%s", std::to_string(enable).c_str());
999 for (const auto &eventGroupItem : eventGroupItem_) {
1000 for (const auto &eventItem : eventGroupItem.eventItems) {
1001 for (const auto &fdItem : eventItem.fdItems) {
1002 int result =
1003 ioctl(fdItem.fd, enable ? PERF_EVENT_IOC_ENABLE : PERF_EVENT_IOC_DISABLE, 0);
1004 if (result < 0) {
1005 printf("Cannot '%s' perf fd! type config name: '%s:%s'\n",
1006 enable ? "enable" : "disable", eventItem.typeName.c_str(),
1007 eventItem.configName.c_str());
1008 return false;
1009 }
1010 }
1011 }
1012 }
1013 return true;
1014 }
1015
SetHM(const bool isHM)1016 void PerfEvents::SetHM(const bool isHM)
1017 {
1018 isHM_ = isHM;
1019 }
1020
SetStatCallBack(const StatCallBack reportCallBack)1021 void PerfEvents::SetStatCallBack(const StatCallBack reportCallBack)
1022 {
1023 reportCallBack_ = reportCallBack;
1024 }
1025
SetStatReportFd(FILE * reportPtr)1026 void PerfEvents::SetStatReportFd(FILE* reportPtr)
1027 {
1028 reportPtr_ = reportPtr;
1029 }
1030
SetRecordCallBack(const RecordCallBack recordCallBack)1031 void PerfEvents::SetRecordCallBack(const RecordCallBack recordCallBack)
1032 {
1033 recordCallBack_ = recordCallBack;
1034 }
1035
PutAllCpus()1036 inline void PerfEvents::PutAllCpus()
1037 {
1038 int cpuConfigs = sysconf(_SC_NPROCESSORS_CONF);
1039 for (int i = 0; i < cpuConfigs; i++) {
1040 cpus_.push_back(i); // put all cpu
1041 }
1042 }
1043
PrepareFdEvents(void)1044 bool PerfEvents::PrepareFdEvents(void)
1045 {
1046 /*
1047 https://man7.org/linux/man-pages/man2/perf_event_open.2.html
1048 pid == 0 and cpu == -1
1049 This measures the calling process/thread on any CPU.
1050
1051 pid == 0 and cpu >= 0
1052 This measures the calling process/thread only when running
1053 on the specified CPU.
1054
1055 pid > 0 and cpu == -1
1056 This measures the specified process/thread on any CPU.
1057
1058 pid > 0 and cpu >= 0
1059 This measures the specified process/thread only when
1060 running on the specified CPU.
1061
1062 pid == -1 and cpu >= 0
1063 This measures all processes/threads on the specified CPU.
1064 This requires CAP_PERFMON (since Linux 5.8) or
1065 CAP_SYS_ADMIN capability or a
1066 /proc/sys/kernel/perf_event_paranoid value of less than 1.
1067
1068 pid == -1 and cpu == -1
1069 This setting is invalid and will return an error.
1070 */
1071 if (systemTarget_) {
1072 pids_.clear();
1073 pids_.push_back(-1);
1074 } else {
1075 if (trackedCommand_) {
1076 pids_.push_back(trackedCommand_->GetChildPid());
1077 }
1078 if (pids_.empty()) {
1079 pids_.push_back(0); // no pid means use 0 as self pid
1080 }
1081 }
1082 if (perCpu_ || perThread_) {
1083 cpus_.clear();
1084 PutAllCpus();
1085 }
1086 if (cpus_.empty()) {
1087 PutAllCpus();
1088 }
1089
1090 // print info tell user which cpu and process we will select.
1091 if (pids_.size() == 1 && pids_[0] == -1) {
1092 HLOGI("target process: system scope \n");
1093 } else {
1094 HLOGI("target process: %zu (%s)\n", pids_.size(),
1095 (pids_[0] == 0) ? std::to_string(gettid()).c_str() : VectorToString(pids_).c_str());
1096 }
1097 if (cpus_.size() == 1 && cpus_[0] == -1) {
1098 HLOGI("target cpus: %ld \n", sysconf(_SC_NPROCESSORS_CONF));
1099 } else {
1100 HLOGI("target cpus: %zu / %ld (%s)\n", cpus_.size(), sysconf(_SC_NPROCESSORS_CONF),
1101 VectorToString(cpus_).c_str());
1102 }
1103
1104 return true;
1105 }
1106
CreateFdEvents(void)1107 bool PerfEvents::CreateFdEvents(void)
1108 {
1109 // must be some events , or will failed
1110 CHECK_TRUE(!eventGroupItem_.empty(), false, LOG_TYPE_PRINTF, "no event select.\n");
1111
1112 // create each fd by cpu and process user select
1113 /*
1114 https://man7.org/linux/man-pages/man2/perf_event_open.2.html
1115
1116 (A single event on its own is created with group_fd = -1 and is
1117 considered to be a group with only 1 member.)
1118 */
1119 // Even if there is only one event, it is counted as a group.
1120
1121 uint fdNumber = 0;
1122 uint eventNumber = 0;
1123 uint groupNumber = 0;
1124 for (auto &eventGroupItem : eventGroupItem_) {
1125 /*
1126 Explain what is the configuration of the group:
1127 Suppose we have 2 Event, 2 PID, and 3 CPU settings
1128 According to verification,
1129 Group's fd requires the pid to be the same as the cpu, the only difference is event
1130 In other words, if you want to bind E1 and E2 to the same group
1131 That can only be like this:
1132
1133 event E1 pid P1 cpu C1 [Group 1]
1134 event E1 pid P1 cpu C2 [Group 2]
1135 event E1 pid P1 cpu C3 [Group 3]
1136
1137 event E1 pid P2 cpu C1 [Group 4]
1138 event E1 pid P2 cpu C2 [Group 5]
1139 event E1 pid P2 cpu C3 [Group 6]
1140
1141 event E2 pid P1 cpu C1 [Group 1]
1142 event E2 pid P1 cpu C2 [Group 2]
1143 event E2 pid P1 cpu C3 [Group 3]
1144
1145 event E2 pid P2 cpu C1 [Group 4]
1146 event E2 pid P2 cpu C2 [Group 5]
1147 event E2 pid P2 cpu C3 [Group 6]
1148 */
1149 HLOGV("group %2u. eventGroupItem leader: '%s':", groupNumber++,
1150 eventGroupItem.eventItems[0].configName.c_str());
1151
1152 int groupFdCache[cpus_.size()][pids_.size()];
1153 for (size_t i = 0; i < cpus_.size(); i++) { // each cpu
1154 for (size_t j = 0; j < pids_.size(); j++) { // each pid
1155 // The leader is created first, with group_fd = -1.
1156 groupFdCache[i][j] = -1;
1157 }
1158 }
1159
1160 uint eventIndex = 0;
1161 for (auto &eventItem : eventGroupItem.eventItems) {
1162 HLOGV(" - event %2u. eventName: '%s:%s'", eventIndex++, eventItem.typeName.c_str(),
1163 eventItem.configName.c_str());
1164
1165 for (size_t icpu = 0; icpu < cpus_.size(); icpu++) { // each cpu
1166 for (size_t ipid = 0; ipid < pids_.size(); ipid++) { // each pid
1167 // one fd event group must match same cpu and same pid config (event can be
1168 // different)
1169 // clang-format off
1170 UniqueFd fd = Open(eventItem.attr, pids_[ipid], cpus_[icpu],
1171 groupFdCache[icpu][ipid], 0);
1172 // clang-format on
1173 if (fd < 0) {
1174 if (errno == ESRCH) {
1175 if (verboseReport_) {
1176 printf("pid %d does not exist.\n", pids_[ipid]);
1177 }
1178 HLOGE("pid %d does not exist.\n", pids_[ipid]);
1179 HIPERF_HILOGE(MODULE_DEFAULT, "[CreateFdEvents] pid %{public}d does not exist.",
1180 pids_[ipid]);
1181 continue;
1182 } else {
1183 // clang-format off
1184 if (verboseReport_) {
1185 char errInfo[ERRINFOLEN] = { 0 };
1186 strerror_r(errno, errInfo, ERRINFOLEN);
1187 printf("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1188 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1189 }
1190 char errInfo[ERRINFOLEN] = { 0 };
1191 strerror_r(errno, errInfo, ERRINFOLEN);
1192 HLOGE("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1193 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1194 // clang-format on
1195 break; // jump to next cpu
1196 }
1197 }
1198 // after open successed , fill the result
1199 // make a new FdItem
1200 FdItem &fdItem = eventItem.fdItems.emplace_back();
1201 fdItem.fd = std::move(fd);
1202 fdItem.cpu = cpus_[icpu];
1203 fdItem.pid = pids_[ipid];
1204 fdNumber++;
1205
1206 // if sampling, mmap ring buffer
1207 bool createMmapSucc = true;
1208 if (recordCallBack_) {
1209 createMmapSucc = isSpe_ ?
1210 CreateSpeMmap(fdItem, eventItem.attr) : CreateMmap(fdItem, eventItem.attr);
1211 }
1212 if (!createMmapSucc) {
1213 printf("create mmap fail\n");
1214 HIPERF_HILOGI(MODULE_DEFAULT, "create mmap fail");
1215 return false;
1216 }
1217 // update group leader
1218 int groupFdCacheNum = groupFdCache[icpu][ipid];
1219 if (groupFdCacheNum == -1) {
1220 groupFdCache[icpu][ipid] = fdItem.fd.Get();
1221 }
1222 }
1223 }
1224 eventNumber++;
1225 }
1226 }
1227
1228 CHECK_TRUE(fdNumber != 0, false, 1, "open %d fd for %d events", fdNumber, eventNumber);
1229
1230 HLOGD("will try read %u events from %u fd (%zu groups):", eventNumber, fdNumber,
1231 eventGroupItem_.size());
1232
1233 return true;
1234 }
1235
StatReport(const __u64 & durationInSec)1236 bool PerfEvents::StatReport(const __u64 &durationInSec)
1237 {
1238 read_format_no_group readNoGroupValue;
1239
1240 // only need read when need report
1241 HLOGM("eventGroupItem_:%zu", eventGroupItem_.size());
1242 __u64 groupId = 0;
1243 // clear countEvents data
1244 countEvents_.clear();
1245 for (const auto &eventGroupItem : eventGroupItem_) {
1246 HLOGM("eventItems:%zu", eventGroupItem.eventItems.size());
1247 groupId++;
1248 for (const auto &eventItem : eventGroupItem.eventItems) {
1249 // count event info together (every cpu , every pid)
1250 std::string configName = "";
1251 if (eventItem.attr.exclude_kernel) {
1252 configName = eventItem.configName + ":u";
1253 } else if (eventItem.attr.exclude_user) {
1254 configName = eventItem.configName + ":k";
1255 } else {
1256 configName = eventItem.configName;
1257 }
1258 if (countEvents_.count(configName) == 0) {
1259 auto countEvent = std::make_unique<CountEvent>(CountEvent {});
1260 countEvents_[configName] = std::move(countEvent);
1261 countEvents_[configName]->userOnly = eventItem.attr.exclude_kernel;
1262 countEvents_[configName]->kernelOnly = eventItem.attr.exclude_user;
1263 }
1264 const std::unique_ptr<CountEvent> &countEvent = countEvents_[configName];
1265 HLOGM("eventItem.fdItems:%zu", eventItem.fdItems.size());
1266 for (const auto &fditem : eventItem.fdItems) {
1267 if (read(fditem.fd, &readNoGroupValue, sizeof(readNoGroupValue)) > 0) {
1268 countEvent->eventCount += readNoGroupValue.value;
1269 countEvent->timeEnabled += readNoGroupValue.timeEnabled;
1270 countEvent->timeRunning += readNoGroupValue.timeRunning;
1271 countEvent->id = groupId;
1272 if (durationInSec != 0) {
1273 countEvent->usedCpus = (countEvent->eventCount / 1e9) / (durationInSec / THOUSANDS);
1274 }
1275 if (verboseReport_) {
1276 printf("%s id:%llu(c%d:p%d) timeEnabled:%llu timeRunning:%llu value:%llu\n",
1277 eventItem.configName.c_str(), readNoGroupValue.id, fditem.cpu, fditem.pid,
1278 readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning, readNoGroupValue.value);
1279 }
1280 if ((perCpu_ || perThread_) && readNoGroupValue.value) {
1281 countEvent->summaries.emplace_back(fditem.cpu, fditem.pid, readNoGroupValue.value,
1282 readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning);
1283 }
1284 } else {
1285 printf("read failed from event '%s'\n", eventItem.configName.c_str());
1286 }
1287 }
1288 }
1289 }
1290
1291 reportCallBack_(countEvents_, reportPtr_);
1292
1293 return true;
1294 }
1295
CreateSpeMmap(const FdItem & item,const perf_event_attr & attr)1296 bool PerfEvents::CreateSpeMmap(const FdItem &item, const perf_event_attr &attr)
1297 {
1298 auto it = cpuMmap_.find(item.cpu);
1299 if (it == cpuMmap_.end()) {
1300 void *rbuf = mmap(nullptr, (1 + auxMmapPages_) * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1301 item.fd.Get(), 0);
1302 CHECK_TRUE(rbuf != MMAP_FAILED, false, 1, "");
1303 void *auxRbuf = mmap(nullptr, auxMmapPages_ * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1304 item.fd.Get(), 0);
1305 MmapFd mmapItem;
1306 mmapItem.fd = item.fd.Get();
1307 mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1308 mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1309 mmapItem.auxBuf = auxRbuf;
1310 mmapItem.bufSize = auxMmapPages_ * pageSize_;
1311 mmapItem.auxBufSize = auxMmapPages_ * pageSize_;
1312 mmapItem.attr = &attr;
1313 mmapItem.tid_ = item.pid;
1314 mmapItem.cpu = item.cpu;
1315 cpuMmap_[item.cpu] = mmapItem;
1316 pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1317 } else {
1318 const MmapFd &mmapItem = it->second;
1319 int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1320 if (rc != 0) {
1321 HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1322 perror("failed to share mapped buffer\n");
1323 return false;
1324 }
1325 }
1326 return true;
1327 }
1328
CreateMmap(const FdItem & item,const perf_event_attr & attr)1329 bool PerfEvents::CreateMmap(const FdItem &item, const perf_event_attr &attr)
1330 {
1331 auto it = cpuMmap_.find(item.cpu);
1332 if (it == cpuMmap_.end()) {
1333 void *rbuf = mmap(nullptr, (1 + mmapPages_) * pageSize_, PROT_READ | PROT_WRITE, MAP_SHARED,
1334 item.fd.Get(), 0);
1335 if (rbuf == MMAP_FAILED) {
1336 char errInfo[ERRINFOLEN] = {0};
1337 strerror_r(errno, errInfo, ERRINFOLEN);
1338 perror("errno:%d, errstr:%s", errno, errInfo);
1339 perror("Fail to call mmap \n");
1340 HIPERF_HILOGE(MODULE_DEFAULT, "[CreateMmap] Fail to call mmap. errno:%{public}d, errstr:%{public}s",
1341 errno, errInfo);
1342 return false;
1343 }
1344 MmapFd mmapItem;
1345 mmapItem.fd = item.fd.Get();
1346 mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1347 mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1348 mmapItem.bufSize = mmapPages_ * pageSize_;
1349 mmapItem.attr = &attr;
1350 mmapItem.posCallChain = GetCallChainPosInSampleRecord(attr);
1351
1352 cpuMmap_[item.cpu] = mmapItem;
1353 pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1354 HLOGD("CreateMmap success cpu %d fd %d mmapPages_ %u", item.cpu, mmapItem.fd, mmapPages_);
1355 } else {
1356 const MmapFd &mmapItem = it->second;
1357 int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1358 if (rc != 0) {
1359 HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1360 perror("failed to share mapped buffer\n");
1361 return false;
1362 }
1363 }
1364 return true;
1365 }
1366
GetAttrWithId() const1367 std::vector<AttrWithId> PerfEvents::GetAttrWithId() const
1368 {
1369 std::vector<AttrWithId> result;
1370 HLOGV("eventGroupItem_ %zu :", eventGroupItem_.size());
1371
1372 for (const auto &eventGroupItem : eventGroupItem_) {
1373 HLOGV(" eventItems %zu eventItems:", eventGroupItem.eventItems.size());
1374 for (const auto &eventItem : eventGroupItem.eventItems) {
1375 AttrWithId attrId;
1376 attrId.attr = eventItem.attr;
1377 attrId.name = eventItem.configName;
1378 HLOGV(" fdItems %zu fdItems:", eventItem.fdItems.size());
1379 for (const auto &fdItem : eventItem.fdItems) {
1380 auto &id = attrId.ids.emplace_back(fdItem.GetPrefId());
1381 HLOGV(" eventItem.fdItems GetPrefId %" PRIu64 "", id);
1382 }
1383 result.emplace_back(attrId);
1384 }
1385 }
1386 return result;
1387 }
1388
1389 #ifdef CONFIG_HAS_CCM
GetBufferSizeCfg(size_t & maxBufferSize,size_t & minBufferSize)1390 void PerfEvents::GetBufferSizeCfg(size_t &maxBufferSize, size_t &minBufferSize)
1391 {
1392 size_t tmpMaxBufferSize = 0;
1393 size_t tmpMinBufferSize = 0;
1394 if (GetCfgValue(PRODUCT_CONFIG_PATH, CFG_MAX_BUFFER_SIZE, tmpMaxBufferSize)) {
1395 if (!CheckOutOfRange(tmpMaxBufferSize, BUFFER_LOW_LEVEL, MAX_BUFFER_SIZE_LARGE)) {
1396 maxBufferSize = tmpMaxBufferSize;
1397 HIPERF_HILOGI(MODULE_DEFAULT, "GetCfgValue %{public}s: %{public}zu", CFG_MAX_BUFFER_SIZE, maxBufferSize);
1398 } else {
1399 HIPERF_HILOGE(MODULE_DEFAULT, "GetCfgValue %{public}s failed, %{public}zu out of range",
1400 CFG_MAX_BUFFER_SIZE, tmpMaxBufferSize);
1401 }
1402 }
1403 if (GetCfgValue(PRODUCT_CONFIG_PATH, CFG_MIN_BUFFER_SIZE, tmpMinBufferSize)) {
1404 if (!CheckOutOfRange(tmpMinBufferSize, BUFFER_LOW_LEVEL, MAX_BUFFER_SIZE_LARGE)) {
1405 minBufferSize = tmpMinBufferSize;
1406 HIPERF_HILOGI(MODULE_DEFAULT, "GetCfgValue %{public}s: %{public}zu", CFG_MIN_BUFFER_SIZE, minBufferSize);
1407 } else {
1408 HIPERF_HILOGE(MODULE_DEFAULT, "GetCfgValue %{public}s failed, %{public}zu out of range",
1409 CFG_MIN_BUFFER_SIZE, tmpMinBufferSize);
1410 }
1411 }
1412 }
1413 #endif
1414
CalcBufferSize()1415 size_t PerfEvents::CalcBufferSize()
1416 {
1417 size_t maxBufferSize;
1418 if (LittleMemory()) {
1419 maxBufferSize = MAX_BUFFER_SIZE_LITTLE;
1420 } else {
1421 maxBufferSize = MAX_BUFFER_SIZE_LARGE;
1422 }
1423 size_t minBufferSize = MIN_BUFFER_SIZE;
1424 #ifdef CONFIG_HAS_CCM
1425 GetBufferSizeCfg(maxBufferSize, minBufferSize);
1426 #endif
1427
1428 size_t bufferSize = maxBufferSize;
1429 if (backtrack_ || !systemTarget_) {
1430 // suppose ring buffer is 4 times as much as mmap
1431 static constexpr int TIMES = 4;
1432 bufferSize = cpuMmap_.size() * mmapPages_ * pageSize_ * TIMES;
1433 if (bufferSize < minBufferSize) {
1434 bufferSize = minBufferSize;
1435 } else if (bufferSize > maxBufferSize) {
1436 bufferSize = maxBufferSize;
1437 }
1438 }
1439 HLOGD("CalcBufferSize return %zu", bufferSize);
1440 return bufferSize;
1441 }
1442
IsRecordInMmap(const int timeout)1443 inline bool PerfEvents::IsRecordInMmap(const int timeout)
1444 {
1445 HLOGV("enter");
1446 if (pollFds_.size() > 0) {
1447 static uint32_t pollFailCount = 0;
1448 if (poll(static_cast<struct pollfd*>(pollFds_.data()), pollFds_.size(), timeout) <= 0) {
1449 // time out try again
1450 if (++pollFailCount >= POLL_FAIL_COUNT_THRESHOLD) {
1451 pollFailCount = 0;
1452 HIPERF_HILOGW(MODULE_DEFAULT, "mmap have no data for the past 5s");
1453 }
1454 return false;
1455 } else {
1456 pollFailCount = 0;
1457 }
1458 }
1459 HLOGV("poll record from mmap");
1460 return true;
1461 }
1462
CompareRecordTime(const PerfEvents::MmapFd * left,const PerfEvents::MmapFd * right)1463 static bool CompareRecordTime(const PerfEvents::MmapFd *left, const PerfEvents::MmapFd *right)
1464 {
1465 return left->timestamp > right->timestamp;
1466 }
1467
ReadRecordsFromMmaps()1468 void PerfEvents::ReadRecordsFromMmaps()
1469 {
1470 #ifdef HIPERF_DEBUG_TIME
1471 const auto readKenelStartTime = steady_clock::now();
1472 #endif
1473 // get readable mmap at this time
1474 for (auto &it : cpuMmap_) {
1475 ssize_t dataSize = it.second.mmapPage->data_head - it.second.mmapPage->data_tail;
1476 __sync_synchronize(); // this same as rmb in gcc, after reading mmapPage->data_head
1477 if (dataSize <= 0) {
1478 continue;
1479 }
1480 it.second.dataSize = dataSize;
1481 MmapRecordHeap_.push_back(&(it.second));
1482 }
1483 if (MmapRecordHeap_.empty()) {
1484 return;
1485 }
1486 bool enableFlag = false;
1487 if (MmapRecordHeap_.size() > 1) {
1488 for (const auto &it : MmapRecordHeap_) {
1489 GetRecordFromMmap(*it);
1490 }
1491 std::make_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.end(), CompareRecordTime);
1492
1493 size_t heapSize = MmapRecordHeap_.size();
1494 while (heapSize > 1) {
1495 std::pop_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1496 CompareRecordTime);
1497 bool auxEvent = false;
1498 u32 pid = 0;
1499 u32 tid = 0;
1500 u64 auxOffset = 0;
1501 u64 auxSize = 0;
1502 MoveRecordToBuf(*MmapRecordHeap_[heapSize - 1], auxEvent, auxOffset, auxSize, pid, tid);
1503 if (isSpe_ && auxEvent) {
1504 ReadRecordsFromSpeMmaps(*MmapRecordHeap_[heapSize - 1], auxOffset, auxSize, pid, tid);
1505 enableFlag = true;
1506 }
1507 if (GetRecordFromMmap(*MmapRecordHeap_[heapSize - 1])) {
1508 std::push_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1509 CompareRecordTime);
1510 } else {
1511 heapSize--;
1512 }
1513 }
1514 }
1515
1516 while (GetRecordFromMmap(*MmapRecordHeap_.front())) {
1517 bool auxEvent = false;
1518 u32 pid = 0;
1519 u32 tid = 0;
1520 u64 auxOffset = 0;
1521 u64 auxSize = 0;
1522 MoveRecordToBuf(*MmapRecordHeap_.front(), auxEvent, auxOffset, auxSize, pid, tid);
1523 if (isSpe_ && auxEvent) {
1524 ReadRecordsFromSpeMmaps(*MmapRecordHeap_.front(), auxOffset, auxSize, pid, tid);
1525 enableFlag = true;
1526 }
1527 }
1528 if (isSpe_ && enableFlag) {
1529 PerfEventsEnable(false);
1530 PerfEventsEnable(true);
1531 }
1532 MmapRecordHeap_.clear();
1533 {
1534 std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
1535 recordBufReady_ = true;
1536 }
1537 cvRecordBuf_.notify_one();
1538 #ifdef HIPERF_DEBUG_TIME
1539 recordKernelReadTime_ += duration_cast<milliseconds>(steady_clock::now() - readKenelStartTime);
1540 #endif
1541 }
1542
GetRecordFromMmap(MmapFd & mmap)1543 bool PerfEvents::GetRecordFromMmap(MmapFd &mmap)
1544 {
1545 if (mmap.dataSize <= 0) {
1546 return false;
1547 }
1548
1549 GetRecordFieldFromMmap(mmap, &(mmap.header), mmap.mmapPage->data_tail, sizeof(mmap.header));
1550 if (mmap.header.type != PERF_RECORD_SAMPLE) {
1551 mmap.timestamp = 0;
1552 return true;
1553 }
1554 // in PERF_RECORD_SAMPLE : header + u64 sample_id + u64 ip + u32 pid + u32 tid + u64 time
1555 constexpr size_t timePos = sizeof(perf_event_header) + sizeof(uint64_t) + sizeof(uint64_t) +
1556 sizeof(uint32_t) + sizeof(uint32_t);
1557 GetRecordFieldFromMmap(mmap, &(mmap.timestamp), mmap.mmapPage->data_tail + timePos,
1558 sizeof(mmap.timestamp));
1559 return true;
1560 }
1561
GetRecordFieldFromMmap(MmapFd & mmap,void * dest,size_t pos,size_t size)1562 void PerfEvents::GetRecordFieldFromMmap(MmapFd &mmap, void *dest, size_t pos, size_t size)
1563 {
1564 CHECK_TRUE(mmap.bufSize != 0, NO_RETVAL, 0, "");
1565 pos = pos % mmap.bufSize;
1566 size_t tailSize = mmap.bufSize - pos;
1567 size_t copySize = std::min(size, tailSize);
1568 if (memcpy_s(dest, copySize, mmap.buf + pos, copySize) != 0) {
1569 HLOGEP("memcpy_s mmap.buf + pos to dest failed. size %zd", copySize);
1570 }
1571 if (copySize < size) {
1572 size -= copySize;
1573 if (memcpy_s(static_cast<uint8_t *>(dest) + copySize, size, mmap.buf, size) != 0) {
1574 HLOGEP("memcpy_s mmap.buf to dest failed. size %zd", size);
1575 }
1576 }
1577 }
1578
GetCallChainPosInSampleRecord(const perf_event_attr & attr)1579 size_t PerfEvents::GetCallChainPosInSampleRecord(const perf_event_attr &attr)
1580 {
1581 // reference struct PerfRecordSampleData
1582 int fixedFieldNumber = __builtin_popcountll(
1583 attr.sample_type & (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1584 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | PERF_SAMPLE_ID |
1585 PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD));
1586 size_t pos = sizeof(perf_event_header) + sizeof(uint64_t) * fixedFieldNumber;
1587 if (attr.sample_type & PERF_SAMPLE_READ) {
1588 pos += sizeof(read_format);
1589 }
1590 return pos;
1591 }
1592
GetStackSizePosInSampleRecord(MmapFd & mmap)1593 size_t PerfEvents::GetStackSizePosInSampleRecord(MmapFd &mmap)
1594 {
1595 size_t pos = mmap.posCallChain;
1596 if (mmap.attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
1597 uint64_t nr = 0;
1598 GetRecordFieldFromMmap(mmap, &nr, mmap.mmapPage->data_tail + pos, sizeof(nr));
1599 pos += (sizeof(nr) + nr * sizeof(uint64_t));
1600 }
1601 if (mmap.attr->sample_type & PERF_SAMPLE_RAW) {
1602 uint32_t raw_size = 0;
1603 GetRecordFieldFromMmap(mmap, &raw_size, mmap.mmapPage->data_tail + pos, sizeof(raw_size));
1604 pos += (sizeof(raw_size) + raw_size);
1605 }
1606 if (mmap.attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
1607 uint64_t bnr = 0;
1608 GetRecordFieldFromMmap(mmap, &bnr, mmap.mmapPage->data_tail + pos, sizeof(bnr));
1609 pos += (sizeof(bnr) + bnr * sizeof(PerfBranchEntry));
1610 }
1611 if (mmap.attr->sample_type & PERF_SAMPLE_REGS_USER) {
1612 uint64_t user_abi = 0;
1613 GetRecordFieldFromMmap(mmap, &user_abi, mmap.mmapPage->data_tail + pos, sizeof(user_abi));
1614 pos += sizeof(user_abi);
1615 if (user_abi > 0) {
1616 uint64_t reg_nr = __builtin_popcountll(mmap.attr->sample_regs_user);
1617 pos += reg_nr * sizeof(uint64_t);
1618 }
1619 }
1620 if (mmap.attr->sample_type & PERF_SAMPLE_SERVER_PID) {
1621 uint64_t server_nr = 0;
1622 GetRecordFieldFromMmap(mmap, &server_nr, mmap.mmapPage->data_tail + pos, sizeof(server_nr));
1623 pos += (sizeof(server_nr) + server_nr * sizeof(uint64_t));
1624 }
1625 return pos;
1626 }
1627
CutStackAndMove(MmapFd & mmap)1628 bool PerfEvents::CutStackAndMove(MmapFd &mmap)
1629 {
1630 constexpr uint32_t alignSize = 64;
1631 if (!(mmap.attr->sample_type & PERF_SAMPLE_STACK_USER)) {
1632 return false;
1633 }
1634 size_t stackSizePos = GetStackSizePosInSampleRecord(mmap);
1635 uint64_t stackSize = 0;
1636 GetRecordFieldFromMmap(mmap, &stackSize, mmap.mmapPage->data_tail + stackSizePos,
1637 sizeof(stackSize));
1638 if (stackSize == 0) {
1639 return false;
1640 }
1641 size_t dynSizePos = stackSizePos + sizeof(uint64_t) + stackSize;
1642 uint64_t dynSize = 0;
1643 GetRecordFieldFromMmap(mmap, &dynSize, mmap.mmapPage->data_tail + dynSizePos, sizeof(dynSize));
1644 uint64_t newStackSize = std::min((dynSize + alignSize - 1) &
1645 (~(alignSize >= 1 ? alignSize - 1 : 0)), stackSize);
1646 if (newStackSize >= stackSize) {
1647 return false;
1648 }
1649 HLOGM("stackSize %" PRIx64 " dynSize %" PRIx64 " newStackSize %" PRIx64 "\n", stackSize, dynSize, newStackSize);
1650 // move and cut stack_data
1651 // mmap: |<+++copy1+++>|<++++++copy2++++++>|<---------------cut--------------->|<+++copy3+++>|
1652 // ^ ^ ^ ^
1653 // new_header stackSizePos <stackSize-dynSize> dynSizePos
1654 uint16_t recordSize = mmap.header.size;
1655 mmap.header.size -= stackSize - newStackSize; // reduce the stack size
1656 uint8_t *buf = recordBuf_->AllocForWrite(mmap.header.size);
1657 // copy1: new_header
1658 CHECK_TRUE(buf != nullptr, false, 0, "");
1659 if (memcpy_s(buf, sizeof(perf_event_header), &(mmap.header), sizeof(perf_event_header)) != 0) {
1660 HLOGEP("memcpy_s mmap.header to buf failed. size %zd", sizeof(perf_event_header));
1661 }
1662 size_t copyPos = sizeof(perf_event_header);
1663 size_t copySize = stackSizePos - sizeof(perf_event_header) + sizeof(stackSize) + newStackSize;
1664 // copy2: copy stack_size, data[stack_size],
1665 GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + copyPos, copySize);
1666 copyPos += copySize;
1667 // copy3: copy dyn_size
1668 GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + dynSizePos,
1669 recordSize - dynSizePos);
1670 // update stack_size
1671 if (memcpy_s(buf + stackSizePos, sizeof(stackSize), &(newStackSize), sizeof(newStackSize)) != 0) {
1672 HLOGEP("memcpy_s newStack to buf stackSizePos failed. size %zd", sizeof(newStackSize));
1673 }
1674 recordBuf_->EndWrite();
1675 __sync_synchronize();
1676 mmap.mmapPage->data_tail += recordSize;
1677 mmap.dataSize -= recordSize;
1678 return true;
1679 }
1680
MoveRecordToBuf(MmapFd & mmap,bool & isAuxEvent,u64 & auxOffset,u64 & auxSize,u32 & pid,u32 & tid)1681 void PerfEvents::MoveRecordToBuf(MmapFd &mmap, bool &isAuxEvent, u64 &auxOffset, u64 &auxSize, u32 &pid, u32 &tid)
1682 {
1683 uint8_t *buf = nullptr;
1684 if (mmap.header.type == PERF_RECORD_SAMPLE) {
1685 if (recordBuf_->GetFreeSize() <= BUFFER_CRITICAL_LEVEL) {
1686 lostSamples_++;
1687 HLOGD("BUFFER_CRITICAL_LEVEL: lost sample record");
1688 goto RETURN;
1689 }
1690 if (CutStackAndMove(mmap)) {
1691 return;
1692 }
1693 } else if (mmap.header.type == PERF_RECORD_LOST) {
1694 // in PERF_RECORD_LOST : header + u64 id + u64 lost
1695 constexpr size_t lostPos = sizeof(perf_event_header) + sizeof(uint64_t);
1696 uint64_t lost = 0;
1697 GetRecordFieldFromMmap(mmap, &lost, mmap.mmapPage->data_tail + lostPos, sizeof(lost));
1698 lostSamples_ += lost;
1699 HLOGD("PERF_RECORD_LOST: lost sample record");
1700 goto RETURN;
1701 }
1702 if (mmap.header.type == PERF_RECORD_AUX) {
1703 isAuxEvent = true;
1704 // in AUX : header + u64 aux_offset + u64 aux_size
1705 uint64_t auxOffsetPos = sizeof(perf_event_header);
1706 uint64_t auxSizePos = sizeof(perf_event_header) + sizeof(uint64_t);
1707 uint64_t pidPos = auxSizePos + sizeof(uint64_t) * 2; // 2 : offset
1708 uint64_t tidPos = pidPos + sizeof(uint32_t);
1709 GetRecordFieldFromMmap(mmap, &auxOffset, mmap.mmapPage->data_tail + auxOffsetPos, sizeof(auxOffset));
1710 GetRecordFieldFromMmap(mmap, &auxSize, mmap.mmapPage->data_tail + auxSizePos, sizeof(auxSize));
1711 GetRecordFieldFromMmap(mmap, &pid, mmap.mmapPage->data_tail + pidPos, sizeof(pid));
1712 GetRecordFieldFromMmap(mmap, &tid, mmap.mmapPage->data_tail + tidPos, sizeof(tid));
1713 }
1714
1715 if ((buf = recordBuf_->AllocForWrite(mmap.header.size)) == nullptr) {
1716 // this record type must be Non-Sample
1717 lostNonSamples_++;
1718 HLOGD("alloc buffer failed: lost non-sample record");
1719 goto RETURN;
1720 }
1721
1722 GetRecordFieldFromMmap(mmap, buf, mmap.mmapPage->data_tail, mmap.header.size);
1723 recordBuf_->EndWrite();
1724 RETURN:
1725 __sync_synchronize();
1726 mmap.mmapPage->data_tail += mmap.header.size;
1727 mmap.dataSize -= mmap.header.size;
1728 }
1729
WaitDataFromRingBuffer()1730 inline void PerfEvents::WaitDataFromRingBuffer()
1731 {
1732 std::unique_lock<std::mutex> lock(mtxRrecordBuf_);
1733 cvRecordBuf_.wait(lock, [this] {
1734 if (recordBufReady_) {
1735 recordBufReady_ = false;
1736 return true;
1737 }
1738 return !readRecordThreadRunning_;
1739 });
1740 }
1741
ProcessRecord(const perf_event_attr * attr,uint8_t * data)1742 inline bool PerfEvents::ProcessRecord(const perf_event_attr* attr, uint8_t* data)
1743 {
1744 uint32_t* type = reinterpret_cast<uint32_t *>(data);
1745 #ifdef HIPERF_DEBUG_TIME
1746 const auto readingStartTime_ = steady_clock::now();
1747 #endif
1748 #if !HIDEBUG_SKIP_CALLBACK
1749 PerfEventRecord& record = PerfEventRecordFactory::GetPerfEventRecord(*type, data, *attr);
1750 if (backtrack_ && readRecordThreadRunning_ && record.GetType() == PERF_RECORD_SAMPLE) {
1751 const PerfRecordSample& sample = static_cast<const PerfRecordSample&>(record);
1752 if (IsSkipRecordForBacktrack(sample)) {
1753 return false;
1754 }
1755 }
1756
1757 recordCallBack_(record);
1758 #endif
1759 recordEventCount_++;
1760 #ifdef HIPERF_DEBUG_TIME
1761 recordCallBackTime_ += duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1762 #endif
1763 recordBuf_->EndRead();
1764 return true;
1765 }
1766
ReadRecordFromBuf()1767 void PerfEvents::ReadRecordFromBuf()
1768 {
1769 const perf_event_attr *attr = GetDefaultAttr();
1770 uint8_t *p = nullptr;
1771
1772 while (readRecordThreadRunning_) {
1773 WaitDataFromRingBuffer();
1774 bool output = outputTracking_;
1775 while ((p = recordBuf_->GetReadData()) != nullptr) {
1776 if (!ProcessRecord(attr, p)) {
1777 break;
1778 }
1779 }
1780 if (backtrack_ && output) {
1781 outputTracking_ = false;
1782 outputEndTime_ = 0;
1783 }
1784 }
1785 HLOGD("exit because trackStoped");
1786
1787 // read the data left over in buffer
1788 while ((p = recordBuf_->GetReadData()) != nullptr) {
1789 ProcessRecord(attr, p);
1790 }
1791 HLOGD("read all records from buffer");
1792 }
1793
HaveTargetsExit(const std::chrono::steady_clock::time_point & startTime)1794 bool PerfEvents::HaveTargetsExit(const std::chrono::steady_clock::time_point &startTime)
1795 {
1796 if (systemTarget_) {
1797 return false;
1798 }
1799 if (trackedCommand_) {
1800 if (trackedCommand_->GetState() < TrackedCommand::State::COMMAND_STARTED) {
1801 return false; // not start yet
1802 }
1803 int wstatus;
1804 if (trackedCommand_->WaitCommand(wstatus)) {
1805 milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1806 printf("tracked command(%s) has exited (total %" PRId64 " ms)\n",
1807 trackedCommand_->GetCommandName().c_str(), (uint64_t)usedMsTick.count());
1808 return true;
1809 }
1810 return false;
1811 }
1812
1813 for (auto it = pids_.begin(); it != pids_.end();) {
1814 if (IsDir("/proc/" + std::to_string(*it))) {
1815 it++;
1816 } else {
1817 it = pids_.erase(it);
1818 }
1819 }
1820 if (pids_.empty()) {
1821 milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1822 printf("tracked processes have exited (total %" PRId64 " ms)\n", (uint64_t)usedMsTick.count());
1823 return true;
1824 }
1825 return false;
1826 }
1827
RecordLoop()1828 void PerfEvents::RecordLoop()
1829 {
1830 // calc the time
1831 const auto startTime = steady_clock::now();
1832 const auto endTime = startTime + timeOut_;
1833 milliseconds usedTimeMsTick {};
1834 int count = 1;
1835
1836 while (g_trackRunning) {
1837 // time check point
1838 const auto thisTime = steady_clock::now();
1839 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1840 if ((uint64_t)usedTimeMsTick.count() > (uint64_t)(count * THOUSANDS)) {
1841 if (HaveTargetsExit(startTime)) {
1842 break;
1843 }
1844 ++count;
1845 }
1846
1847 if (!backtrack_ && thisTime >= endTime) {
1848 printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1849 if (trackedCommand_) {
1850 trackedCommand_->Stop();
1851 }
1852 break;
1853 }
1854
1855 int timeLeft = duration_cast<milliseconds>(endTime - thisTime).count();
1856 if (IsRecordInMmap(std::min(timeLeft, pollTimeOut_))) {
1857 ReadRecordsFromMmaps();
1858 }
1859 }
1860
1861 if (!g_trackRunning) {
1862 // for user interrupt situation, print time statistic
1863 usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1864 printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1865 }
1866 }
1867
StatLoop()1868 void PerfEvents::StatLoop()
1869 {
1870 // calc the time
1871 const auto startTime = steady_clock::now();
1872 const auto endTime = startTime + timeOut_;
1873 auto nextReportTime = startTime + timeReport_;
1874 milliseconds usedTimeMsTick {};
1875 __u64 durationInSec = 0;
1876 int64_t thresholdTimeInMs = 2 * HUNDREDS;
1877
1878 while (g_trackRunning) {
1879 // time check point
1880 const auto thisTime = steady_clock::now();
1881 if (timeReport_ != milliseconds::zero()) {
1882 // stat cmd
1883 if (thisTime >= nextReportTime) {
1884 // only for log or debug?
1885 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1886 durationInSec = usedTimeMsTick.count();
1887 auto lefTimeMsTick = duration_cast<milliseconds>(endTime - thisTime);
1888 if (reportPtr_ == nullptr) {
1889 printf("\nReport at %" PRIu64 " ms (%" PRIu64 " ms left):\n",
1890 static_cast<uint64_t>(usedTimeMsTick.count()),
1891 static_cast<uint64_t>(lefTimeMsTick.count()));
1892 } else {
1893 fprintf(reportPtr_, "\nReport at %" PRIu64 " ms (%" PRIu64 " ms left):\n",
1894 static_cast<uint64_t>(usedTimeMsTick.count()),
1895 static_cast<uint64_t>(lefTimeMsTick.count()));
1896 }
1897 // end of comments
1898 nextReportTime += timeReport_;
1899 StatReport(durationInSec);
1900 }
1901 }
1902
1903 if (HaveTargetsExit(startTime)) {
1904 break;
1905 }
1906
1907 if (thisTime >= endTime) {
1908 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1909 durationInSec = usedTimeMsTick.count();
1910 if (reportPtr_ == nullptr) {
1911 printf("Timeout exit (total %" PRIu64 " ms)\n", static_cast<uint64_t>(usedTimeMsTick.count()));
1912 } else {
1913 fprintf(reportPtr_, "Timeout exit (total %" PRIu64 " ms)\n",
1914 static_cast<uint64_t>(usedTimeMsTick.count()));
1915 }
1916 if (trackedCommand_) {
1917 trackedCommand_->Stop();
1918 }
1919 break;
1920 }
1921
1922 // lefttime > 200ms sleep 100ms, else sleep 200us
1923 uint64_t defaultSleepUs = 2 * HUNDREDS; // 200us
1924 if (timeReport_ == milliseconds::zero()
1925 && (timeOut_.count() * THOUSANDS) > thresholdTimeInMs) {
1926 milliseconds leftTimeMsTmp = duration_cast<milliseconds>(endTime - thisTime);
1927 if (leftTimeMsTmp.count() > thresholdTimeInMs) {
1928 defaultSleepUs = HUNDREDS * THOUSANDS; // 100ms
1929 }
1930 }
1931 std::this_thread::sleep_for(microseconds(defaultSleepUs));
1932 }
1933
1934 if (!g_trackRunning) {
1935 // for user interrupt situation, print time statistic
1936 usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1937 printf("User interrupt exit (total %" PRIu64 " ms)\n", static_cast<uint64_t>(usedTimeMsTick.count()));
1938 }
1939
1940 if (timeReport_ == milliseconds::zero()) {
1941 StatReport(durationInSec);
1942 }
1943 }
1944
GetTypeName(const perf_type_id type_id)1945 const std::string PerfEvents::GetTypeName(const perf_type_id type_id)
1946 {
1947 auto it = PERF_TYPES.find(type_id);
1948 if (it != PERF_TYPES.end()) {
1949 return it->second;
1950 }
1951 return "<not found>";
1952 }
1953
UpdateCurrentTime()1954 void PerfEvents::UpdateCurrentTime()
1955 {
1956 pthread_setname_np(pthread_self(), "timer_thread");
1957 while (updateTimeThreadRunning_) {
1958 struct timespec ts = {0};
1959 if (clock_gettime(CLOCK_MONOTONIC, &ts) != -1) {
1960 currentTimeSecond_.store(static_cast<uint64_t>(ts.tv_sec));
1961 }
1962
1963 std::this_thread::sleep_for(std::chrono::milliseconds(UPDATE_TIME_INTERVAL));
1964 }
1965 }
1966
1967 // check if this record should be saved, this function only can called in back track mode
IsSkipRecordForBacktrack(const PerfRecordSample & sample)1968 bool PerfEvents::IsSkipRecordForBacktrack(const PerfRecordSample& sample)
1969 {
1970 if (outputTracking_) {
1971 // when outputing record, only skip what later than end time
1972 if (sample.GetTime() / NANO_SECONDS_PER_SECOND > outputEndTime_) {
1973 outputTracking_ = false;
1974 outputEndTime_ = 0;
1975 return true;
1976 }
1977 return false;
1978 }
1979
1980 // only keep recent record in backtrack time
1981 if ((currentTimeSecond_.load() - sample.GetTime() / NANO_SECONDS_PER_SECOND) > backtrackTime_) {
1982 return false;
1983 }
1984 return true;
1985 }
1986 } // namespace HiPerf
1987 } // namespace Developtools
1988 } // namespace OHOS
1989