1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "perf_events.h"
16
17 #include <cassert>
18 #include <cinttypes>
19 #include <csignal>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <iostream>
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/resource.h>
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 #if defined(CONFIG_HAS_SYSPARA)
29 #include <parameters.h>
30 #endif
31
32 #include "spe_decoder.h"
33 #include "debug_logger.h"
34 #include "hiperf_hilog.h"
35 #include "register.h"
36 #include "subcommand_dump.h"
37 #include "symbols_file.h"
38 #include "utilities.h"
39
40 using namespace std::chrono;
41 namespace OHOS {
42 namespace Developtools {
43 namespace HiPerf {
44 bool PerfEvents::updateTimeThreadRunning_ = true;
45 std::atomic<uint64_t> PerfEvents::currentTimeSecond_ = 0;
46 static std::atomic_bool g_trackRunning = false;
47 static constexpr int32_t UPDATE_TIME_INTERVAL = 10; // 10ms
48 static constexpr uint64_t NANO_SECONDS_PER_SECOND = 1000000000;
49 static constexpr uint32_t POLL_FAIL_COUNT_THRESHOLD = 10;
50
Open(perf_event_attr & attr,pid_t pid,int cpu,int groupFd,unsigned long flags)51 OHOS::UniqueFd PerfEvents::Open(perf_event_attr &attr, pid_t pid, int cpu, int groupFd,
52 unsigned long flags)
53 {
54 OHOS::UniqueFd fd = UniqueFd(syscall(__NR_perf_event_open, &attr, pid, cpu, groupFd, flags));
55 if (fd < 0) {
56 HLOGEP("syscall perf_event_open failed. ");
57 // dump when open failed.
58 SubCommandDump::DumpPrintEventAttr(attr, std::numeric_limits<int>::min());
59 }
60 HLOGV("perf_event_open: got fd %d for pid %d cpu %d group %d flags %lu", fd.Get(), pid, cpu, groupFd, flags);
61 return fd;
62 }
63
SpeReadData(void * dataPage,u64 * dataTail,uint8_t * buf,u32 size)64 void PerfEvents::SpeReadData(void *dataPage, u64 *dataTail, uint8_t *buf, u32 size)
65 {
66 void *src = nullptr;
67 u32 left = 0;
68 u32 offset = static_cast<u32>(*dataTail);
69 u32 copySize;
70 u32 traceSize = size;
71 CHECK_TRUE(size > (auxMmapPages_ * pageSize_ + sizeof(struct PerfRecordAuxtraceData)),
72 NO_RETVAL, 1, "buf size invalid");
73 while (traceSize > 0) {
74 offset = CALC_OFFSET(offset, auxMmapPages_ * pageSize_);
75 left = static_cast<u32>(auxMmapPages_ * pageSize_ - offset);
76 copySize = std::min(traceSize, left);
77 src = PTR_ADD(dataPage, offset);
78 if (memcpy_s(buf, left, src, copySize) != 0) {
79 HLOGV("SpeReadData memcpy_s failed.");
80 }
81
82 traceSize -= copySize;
83 offset += copySize;
84 buf = reinterpret_cast<uint8_t *>(PTR_ADD(buf, copySize));
85 }
86
87 *dataTail += size;
88 }
89
arm_spe_reference()90 static u64 arm_spe_reference()
91 {
92 struct timespec ts;
93 clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
94 return static_cast<uint64_t>(ts.tv_sec) ^ static_cast<uint64_t>(ts.tv_nsec);
95 }
96
ReadRecordsFromSpeMmaps(MmapFd & mmapFd,u64 auxOffset,u64 auxSize,u32 pid,u32 tid)97 void PerfEvents::ReadRecordsFromSpeMmaps(MmapFd& mmapFd, u64 auxOffset, u64 auxSize, u32 pid, u32 tid)
98 {
99 if (mmapFd.mmapPage == nullptr || mmapFd.auxBuf == nullptr) {
100 printf("ReadRecordsFromSpeMmaps mmapFd.mmapPage == nullptr, mmapFd.fd: %d", mmapFd.fd);
101 return;
102 }
103 perf_event_mmap_page *userPage = reinterpret_cast<perf_event_mmap_page *>(mmapFd.mmapPage);
104 void *auxPage = mmapFd.auxBuf;
105 userPage->aux_tail = auxOffset - auxSize;
106 u64 auxHead = userPage->aux_head;
107 u64 auxTail = userPage->aux_tail;
108 HLOGD("mmap cpu %d, aux_head: %llu, aux_tail:%llu, auxOffset:%llu, auxSize:%llu",
109 mmapFd.cpu, auxHead, auxTail, auxOffset, auxSize);
110 if (auxHead <= auxTail) {
111 return;
112 }
113 if (auxSize > auxMmapPages_ * pageSize_) {
114 userPage->aux_tail += auxSize;
115 return;
116 }
117
118 int cpu = mmapFd.cpu;
119 __sync_synchronize();
120 PerfRecordAuxtrace auxtraceRecord = PerfRecordAuxtrace(auxSize, auxTail,
121 arm_spe_reference(), cpu, tid, cpu, pid);
122 static std::vector<u8> vbuf(RECORD_SIZE_LIMIT);
123 uint8_t *buf;
124 if ((buf = recordBuf_->AllocForWrite(auxtraceRecord.header_.size + auxSize)) == nullptr) {
125 HLOGD("alloc buffer failed: PerfRecordAuxtrace record, readSize: %llu", auxSize);
126 return;
127 }
128 auxtraceRecord.GetBinary1(vbuf);
129 if (memcpy_s(buf, auxtraceRecord.header_.size, vbuf.data(), auxtraceRecord.header_.size) != 0) {
130 HLOGE("memcpy_s return failed");
131 return;
132 }
133 buf += auxtraceRecord.header_.size;
134
135 while (auxSize > 0) {
136 u64 readSize = pageSize_;
137 if (auxSize < pageSize_) {
138 readSize = auxSize;
139 }
140 __sync_synchronize();
141 SpeReadData(auxPage, &auxTail, buf, readSize);
142 __sync_synchronize();
143 userPage->aux_tail += readSize;
144 auxTail = userPage->aux_tail;
145 buf += readSize;
146 auxSize -= readSize;
147 }
148 recordBuf_->EndWrite();
149 }
150
GetSpeType()151 u32 GetSpeType()
152 {
153 FILE *fd;
154 u32 speType;
155
156 fd = fopen("/sys/devices/arm_spe_0/type", "r");
157 if (fd == nullptr) {
158 HLOGV("open sysfs file failed");
159 return -1;
160 }
161 if (fscanf_s(fd, "%u", &speType) <= 0) {
162 HLOGV("fscanf_s file failed");
163 (void)fclose(fd);
164 return -1;
165 }
166
167 (void)fclose(fd);
168 return speType;
169 }
170
PerfEvents()171 PerfEvents::PerfEvents() : timeOut_(DEFAULT_TIMEOUT * THOUSANDS), timeReport_(0)
172 {
173 pageSize_ = sysconf(_SC_PAGESIZE);
174 HLOGI("BuildArch %s", GetArchName(BUILD_ARCH_TYPE).c_str());
175 }
176
~PerfEvents()177 PerfEvents::~PerfEvents()
178 {
179 // close mmap
180 for (auto it = cpuMmap_.begin(); it != cpuMmap_.end();) {
181 const MmapFd &mmapItem = it->second;
182 if (!isSpe_) {
183 if (munmap(mmapItem.mmapPage, (1 + mmapPages_) * pageSize_) == -1) {
184 HLOGW("munmap failed.");
185 }
186 } else {
187 if (munmap(mmapItem.mmapPage, (1 + auxMmapPages_) * pageSize_) == -1) {
188 HLOGW("munmap failed.");
189 }
190 if (munmap(mmapItem.auxBuf, auxMmapPages_ * pageSize_) == -1) {
191 HLOGW("munmap failed.");
192 }
193 }
194 it = cpuMmap_.erase(it);
195 }
196
197 ExitReadRecordBufThread();
198 }
199
IsEventSupport(perf_type_id type,__u64 config)200 bool PerfEvents::IsEventSupport(perf_type_id type, __u64 config)
201 {
202 std::unique_ptr<perf_event_attr> attr = PerfEvents::CreateDefaultAttr(type, config);
203 CHECK_TRUE(attr == nullptr, false, 1, "attr is nullptr");
204 UniqueFd fd = Open(*attr.get());
205 if (fd < 0) {
206 printf("event not support %s\n", GetStaticConfigName(type, config).c_str());
207 return false;
208 }
209 return true;
210 }
IsEventAttrSupport(perf_event_attr & attr)211 bool PerfEvents::IsEventAttrSupport(perf_event_attr &attr)
212 {
213 UniqueFd fd = Open(attr);
214 if (fd < 0) {
215 return false;
216 }
217 return true;
218 }
219
SetBranchSampleType(uint64_t value)220 bool PerfEvents::SetBranchSampleType(uint64_t value)
221 {
222 if (value != 0) {
223 // cpu-clcles event must be supported
224 std::unique_ptr<perf_event_attr> attr =
225 PerfEvents::CreateDefaultAttr(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
226 CHECK_TRUE(attr == nullptr, false, 0, "");
227 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
228 attr->branch_sample_type = value;
229 if (!IsEventAttrSupport(*attr.get())) {
230 return false;
231 }
232 }
233 branchSampleType_ = value;
234 return true;
235 }
236
AddDefaultEvent(perf_type_id type)237 bool PerfEvents::AddDefaultEvent(perf_type_id type)
238 {
239 auto it = DEFAULT_TYPE_CONFIGS.find(type);
240 if (it != DEFAULT_TYPE_CONFIGS.end()) {
241 for (auto config : it->second) {
242 AddEvent(type, config);
243 }
244 }
245 return true;
246 }
247
AddOffCpuEvent()248 bool PerfEvents::AddOffCpuEvent()
249 {
250 std::string eventName = "sched:sched_switch";
251 if (eventSpaceType_ == EventSpaceType::USER) {
252 eventName += ":u";
253 } else if (eventSpaceType_ == EventSpaceType::KERNEL) {
254 eventName += ":k";
255 }
256 return AddEvent(eventName);
257 }
258
AddEvents(const std::vector<std::string> & eventStrings,bool group)259 bool PerfEvents::AddEvents(const std::vector<std::string> &eventStrings, bool group)
260 {
261 bool followGroup = false;
262 HLOGV(" %s", VectorToString(eventStrings).c_str());
263
264 for (std::string eventString : eventStrings) {
265 if (!AddEvent(eventString, followGroup)) {
266 return false;
267 }
268 // this is group request , Follow-up events need to follow the previous group
269 if (group) {
270 followGroup = true;
271 }
272 }
273 return true;
274 }
275
276 // event name can have :k or :u suffix
277 // tracepoint event name is like sched:sched_switch
278 // clang-format off
ParseEventName(const std::string & nameStr,std::string & name,bool & excludeUser,bool & excludeKernel,bool & isTracePoint)279 bool PerfEvents::ParseEventName(const std::string &nameStr,
280 std::string &name, bool &excludeUser, bool &excludeKernel, bool &isTracePoint)
281 // clang-format on
282 {
283 name = nameStr;
284 excludeUser = false;
285 excludeKernel = false;
286 isTracePoint = false;
287 if (nameStr.find(":") != std::string::npos) {
288 static constexpr size_t maxNumberTokensNoTracePoint = 2;
289 static constexpr size_t maxNumberTokensTracePoint = 3;
290 std::vector<std::string> eventTokens = StringSplit(nameStr, ":");
291 if (eventTokens.size() == maxNumberTokensTracePoint) {
292 // tracepoint event with :u or :k
293 if (eventTokens.back() == "k") {
294 excludeUser = true;
295 HLOGV("kernelOnly event");
296 } else if (eventTokens.back() == "u") {
297 excludeKernel = true;
298 HLOGV("userOnly event");
299 } else {
300 HLOGV("unknown event name %s", nameStr.c_str());
301 return false;
302 }
303 name = eventTokens[0] + ":" + eventTokens[1];
304 isTracePoint = true;
305 } else if (eventTokens.size() == maxNumberTokensNoTracePoint) {
306 name = eventTokens[0];
307 if (eventTokens.back() == "k") {
308 excludeUser = true;
309 HLOGV("kernelOnly event");
310 } else if (eventTokens.back() == "u") {
311 excludeKernel = true;
312 HLOGV("userOnly event");
313 } else {
314 name = nameStr;
315 isTracePoint = true;
316 HLOGV("tracepoint event is in form of xx:xxx");
317 }
318 } else {
319 printf("unknown ':' format:'%s'\n", nameStr.c_str());
320 return false;
321 }
322 if (reportCallBack_) {
323 if ((eventTokens[0] == "sw-task-clock" || eventTokens[0] == "sw-cpu-clock") &&
324 (excludeUser || excludeKernel)) {
325 printf(
326 "event type %s with modifier u and modifier k is not supported by the kernel.",
327 eventTokens[0].c_str());
328 return false;
329 }
330 }
331 }
332 return true;
333 }
334
AddEvent(const std::string & eventString,bool followGroup)335 bool PerfEvents::AddEvent(const std::string &eventString, bool followGroup)
336 {
337 std::string eventName;
338 bool excludeUser = false;
339 bool excludeKernel = false;
340 bool isTracePointEvent = false;
341 if (!ParseEventName(eventString, eventName, excludeUser, excludeKernel, isTracePointEvent)) {
342 return false;
343 }
344 if (excludeUser) {
345 eventSpaceType_ |= EventSpaceType::KERNEL;
346 } else if (excludeKernel) {
347 eventSpaceType_ |= EventSpaceType::USER;
348 } else {
349 eventSpaceType_ |= EventSpaceType::USER_KERNEL;
350 }
351
352 // find if
353 if (isTracePointEvent) {
354 if (traceConfigTable.empty()) {
355 LoadTracepointEventTypesFromSystem();
356 }
357 for (auto traceType : traceConfigTable) {
358 if (traceType.second == eventName) {
359 return AddEvent(PERF_TYPE_TRACEPOINT, traceType.first, excludeUser, excludeKernel,
360 followGroup);
361 }
362 }
363 } else {
364 if (eventName == "arm_spe_0") {
365 u32 speType = GetSpeType();
366 return AddSpeEvent(speType);
367 }
368 if (StringStartsWith(eventName, "0x")
369 && eventName.length() <= MAX_HEX_EVENT_NAME_LENGTH && IsHexDigits(eventName)) {
370 return AddEvent(PERF_TYPE_RAW, std::stoull(eventName, nullptr, NUMBER_FORMAT_HEX_BASE),
371 excludeUser, excludeKernel, followGroup);
372 } else {
373 auto [find, typeId, configId] = GetStaticConfigId(eventName);
374 if (find) {
375 return AddEvent(typeId, configId, excludeUser, excludeKernel, followGroup);
376 }
377 }
378 }
379
380 printf("%s event is not supported by the kernel.\n", eventName.c_str());
381 return false;
382 }
383
AddSpeEvent(u32 type,bool followGroup)384 bool PerfEvents::AddSpeEvent(u32 type, bool followGroup)
385 {
386 EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back() :
387 eventGroupItem_.emplace_back();
388 EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
389
390 if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
391 HLOGE("memset_s failed in PerfEvents::AddEvent");
392 return false;
393 }
394 eventItem.attr.type = type;
395 eventItem.attr.sample_period = MULTIPLE_SIZE;
396 eventItem.attr.size = sizeof(perf_event_attr);
397 eventItem.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
398 eventItem.attr.inherit = (inherit_ ? 1 : 0);
399 eventItem.attr.sample_type = SAMPLE_ID | PERF_SAMPLE_IP;
400 eventItem.attr.sample_id_all = 1;
401 eventItem.attr.disabled = 1;
402 eventItem.attr.config = 0x700010007; // 0x700010007 : enable all
403 if (config_ != 0) {
404 eventItem.attr.config = config_;
405 }
406 if (config1_ != 0) {
407 eventItem.attr.config1 = config1_;
408 }
409 if (config2_ != 0) {
410 eventItem.attr.config2 = config2_;
411 }
412 HLOGD("config_ is 0x%" PRIx64 ", config1_ is 0x%" PRIx64 ", config2_ is 0x%" PRIx64 "",
413 config_, config1_, config2_);
414 return true;
415 }
416
SetConfig(std::map<const std::string,uint64_t> & speOptMaps)417 void PerfEvents::SetConfig(std::map<const std::string, uint64_t> &speOptMaps)
418 {
419 constexpr uint tsOffset = 0;
420 constexpr uint paOffset = 1;
421 constexpr uint pctOffset = 2;
422 constexpr uint jitterOffset = 16;
423 constexpr uint branchOffset = 32;
424 constexpr uint loadOffset = 33;
425 constexpr uint storeOffset = 34;
426 config_ |= (speOptMaps["ts_enable"] & 0x1) << tsOffset;
427 config_ |= (speOptMaps["pa_enable"] & 0x1) << paOffset;
428 config_ |= (speOptMaps["pct_enable"] & 0x1) << pctOffset;
429 config_ |= (speOptMaps["jitter"] & 0x1) << jitterOffset;
430 config_ |= (speOptMaps["branch_filter"] & 0x1) << branchOffset;
431 config_ |= (speOptMaps["load_filter"] & 0x1) << loadOffset;
432 config_ |= (speOptMaps["store_filter"] & 0x1) << storeOffset;
433 config1_ |= speOptMaps["event_filter"];
434 config2_ |= speOptMaps["min_latency"] & 0xfff;
435 }
436
AddEvent(perf_type_id type,__u64 config,bool excludeUser,bool excludeKernel,bool followGroup)437 bool PerfEvents::AddEvent(perf_type_id type, __u64 config, bool excludeUser, bool excludeKernel,
438 bool followGroup)
439 {
440 HLOG_ASSERT(!excludeUser || !excludeKernel);
441 CHECK_TRUE(followGroup && eventGroupItem_.empty(), false, 1, "no group leader create before");
442 // found the event name
443 CHECK_TRUE(!IsEventSupport(type, config), false, 0, "");
444 HLOGV("type %d config %llu excludeUser %d excludeKernel %d followGroup %d", type, config,
445 excludeUser, excludeKernel, followGroup);
446
447 // if use follow ?
448 EventGroupItem &eventGroupItem = followGroup ? eventGroupItem_.back()
449 : eventGroupItem_.emplace_back();
450 // always new item
451 EventItem &eventItem = eventGroupItem.eventItems.emplace_back();
452
453 eventItem.typeName = GetTypeName(type);
454 if (type == PERF_TYPE_TRACEPOINT) {
455 eventItem.configName = GetTraceConfigName(config);
456 } else {
457 eventItem.configName = GetStaticConfigName(type, config);
458 }
459
460 // attr
461 if (memset_s(&eventItem.attr, sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
462 HLOGE("memset_s failed in PerfEvents::AddEvent");
463 return false;
464 }
465 eventItem.attr.size = sizeof(perf_event_attr);
466 eventItem.attr.type = type;
467 eventItem.attr.config = config;
468 eventItem.attr.disabled = 1;
469 eventItem.attr.read_format =
470 PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
471
472 eventItem.attr.inherit = (inherit_ ? 1 : 0);
473 eventItem.attr.exclude_kernel = excludeKernel;
474 eventItem.attr.exclude_user = excludeUser;
475
476 // we also need mmap for record
477 if (recordCallBack_) {
478 if (samplePeriod_ > 0) {
479 eventItem.attr.freq = 0;
480 eventItem.attr.sample_freq = 0;
481 eventItem.attr.sample_period = samplePeriod_;
482 } else if (sampleFreq_ > 0) {
483 eventItem.attr.freq = 1;
484 eventItem.attr.sample_freq = sampleFreq_;
485 } else {
486 if (type == PERF_TYPE_TRACEPOINT) {
487 eventItem.attr.freq = 0;
488 eventItem.attr.sample_period = DEFAULT_SAMPLE_PERIOD;
489 } else {
490 eventItem.attr.freq = 1;
491 eventItem.attr.sample_freq = DEFAULT_SAMPLE_FREQUNCY;
492 }
493 }
494
495 eventItem.attr.watermark = 1;
496 eventItem.attr.wakeup_watermark = (mmapPages_ * pageSize_) >> 1;
497 static constexpr unsigned int maxWakeupMark = 1024 * 1024;
498 if (eventItem.attr.wakeup_watermark > maxWakeupMark) {
499 eventItem.attr.wakeup_watermark = maxWakeupMark;
500 }
501
502 // for a group of events, only enable comm/mmap on the first event
503 if (!followGroup) {
504 eventItem.attr.comm = 1;
505 eventItem.attr.mmap = 1;
506 eventItem.attr.mmap2 = 1;
507 eventItem.attr.mmap_data = 1;
508 }
509
510 if (sampleStackType_ == SampleStackType::DWARF) {
511 eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN |
512 PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
513 eventItem.attr.exclude_callchain_user = 1;
514 eventItem.attr.sample_regs_user = GetSupportedRegMask(GetDeviceArch());
515 eventItem.attr.sample_stack_user = dwarfSampleStackSize_;
516 } else if (sampleStackType_ == SampleStackType::FP) {
517 eventItem.attr.sample_type = SAMPLE_TYPE | PERF_SAMPLE_CALLCHAIN;
518 } else {
519 eventItem.attr.sample_type = SAMPLE_TYPE;
520 }
521
522 if (isHM_) {
523 eventItem.attr.sample_type |= PERF_SAMPLE_SERVER_PID;
524 }
525 }
526
527 // set clock id
528 if (clockId_ != -1) {
529 eventItem.attr.use_clockid = 1;
530 eventItem.attr.clockid = clockId_;
531 }
532 if (branchSampleType_ != 0) {
533 eventItem.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
534 eventItem.attr.branch_sample_type = branchSampleType_;
535 }
536
537 HLOGV("Add Event: '%s':'%s' %s %s %s", eventItem.typeName.c_str(), eventItem.configName.c_str(),
538 excludeUser ? "excludeUser" : "", excludeKernel ? "excludeKernel" : "",
539 followGroup ? "" : "group leader");
540
541 return true;
542 }
543
CreateDefaultAttr(perf_type_id type,__u64 config)544 std::unique_ptr<perf_event_attr> PerfEvents::CreateDefaultAttr(perf_type_id type, __u64 config)
545 {
546 std::unique_ptr<perf_event_attr> attr = std::make_unique<perf_event_attr>();
547 if (memset_s(attr.get(), sizeof(perf_event_attr), 0, sizeof(perf_event_attr)) != EOK) {
548 HLOGE("memset_s failed in PerfEvents::CreateDefaultAttr");
549 return nullptr;
550 }
551 attr->size = sizeof(perf_event_attr);
552 attr->type = type;
553 attr->config = config;
554 attr->disabled = 1;
555 return attr;
556 }
557
558 // should move to upper caller
559 static struct sigaction g_oldSig {
560 };
CaptureSig()561 static bool CaptureSig()
562 {
563 HLOGD("capture Ctrl + C to end sampling decently");
564 struct sigaction sig {
565 };
566
567 sig.sa_handler = [](int sig) {
568 printf("\n Ctrl + C detected.\n");
569 g_trackRunning = false;
570 };
571
572 sig.sa_flags = 0;
573 if (sigaction(SIGINT, &sig, &g_oldSig) < 0) {
574 perror("Fail to call sigaction for SIGINT");
575 return false;
576 }
577 return true;
578 }
579
RecoverCaptureSig()580 static void RecoverCaptureSig()
581 {
582 if (sigaction(SIGINT, &g_oldSig, nullptr) < 0) {
583 perror("Fail to call sigaction for SIGINT");
584 }
585 }
586
587 // split to two part
588 // because WriteAttrAndId need fd id before start tracking
PrepareTracking(void)589 bool PerfEvents::PrepareTracking(void)
590 {
591 // 1. prepare cpu pid
592 CHECK_TRUE(!PrepareFdEvents(), false, 1, "PrepareFdEvents() failed");
593
594 // 2. create events
595 CHECK_TRUE(!CreateFdEvents(), false, 1, "CreateFdEvents() failed");
596
597 HLOGV("success");
598 prepared_ = true;
599 return true;
600 }
601
ExitReadRecordBufThread()602 void PerfEvents::ExitReadRecordBufThread()
603 {
604 if (isLowPriorityThread_) {
605 if (setpriority(PRIO_PROCESS, gettid(), 0) != 0) {
606 HLOGW("failed to decrease priority of reading kernel");
607 }
608 }
609 if (readRecordBufThread_.joinable()) {
610 {
611 std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
612 readRecordThreadRunning_ = false;
613 __sync_synchronize();
614 cvRecordBuf_.notify_one();
615 }
616 readRecordBufThread_.join();
617 }
618 }
619
PrepareRecordThread()620 bool PerfEvents::PrepareRecordThread()
621 {
622 try {
623 recordBuf_ = std::make_unique<RingBuffer>(CalcBufferSize());
624 } catch (const std::exception &e) {
625 printf("create record buffer(size %zu) failed: %s\n", CalcBufferSize(), e.what());
626 HIPERF_HILOGI(MODULE_DEFAULT, "create record buffer failed: %{public}s", e.what());
627 return false;
628 }
629 readRecordThreadRunning_ = true;
630 readRecordBufThread_ = std::thread(&PerfEvents::ReadRecordFromBuf, this);
631 if (backtrack_) {
632 std::thread updateTimeThread(&PerfEvents::UpdateCurrentTime);
633 updateTimeThread.detach();
634 }
635
636 rlimit rlim;
637 int result = getrlimit(RLIMIT_NICE, &rlim);
638 const rlim_t lowPriority = 40;
639 if (result == 0 && rlim.rlim_cur == lowPriority) {
640 const int highPriority = -20;
641 result = setpriority(PRIO_PROCESS, gettid(), highPriority);
642 if (result != 0) {
643 HLOGW("failed to increase priority of reading kernel");
644 } else {
645 isLowPriorityThread_ = true;
646 }
647 }
648
649 return true;
650 }
651
WaitRecordThread()652 void PerfEvents::WaitRecordThread()
653 {
654 printf("Process and Saving data...\n");
655 ExitReadRecordBufThread();
656
657 const auto usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - trackingEndTime_);
658 if (verboseReport_) {
659 printf("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
660 }
661 HLOGV("Record Process Completed (wait %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
662 #ifdef HIPERF_DEBUG_TIME
663 printf("%zu record processed, used %0.3f ms(%4.2f us/record)\n", recordEventCount_,
664 recordCallBackTime_.count() / MS_DURATION,
665 recordCallBackTime_.count() / static_cast<double>(recordEventCount_));
666 printf("total wait sleep time %0.3f ms.\n", recordSleepTime_.count() / MS_DURATION);
667 printf("read from kernel time %0.3f ms.\n", recordKernelReadTime_.count() / MS_DURATION);
668 #endif
669 }
670
StartTracking(bool immediately)671 bool PerfEvents::StartTracking(bool immediately)
672 {
673 if (!prepared_) {
674 HLOGD("do not prepared_");
675 return false;
676 }
677
678 if (recordCallBack_) {
679 if (!PrepareRecordThread()) {
680 HLOGW("PrepareRecordThread failed.");
681 return false;
682 }
683 }
684
685 HLOGD("step: 1. enable event");
686 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 1. enable event");
687 trackingStartTime_ = steady_clock::now();
688 if (immediately) {
689 if (!EnableTracking()) {
690 HLOGE("PerfEvents::EnableTracking() failed");
691 return false;
692 }
693 printf("Profiling duration is %.3f seconds.\n", float(timeOut_.count()) / THOUSANDS);
694 printf("Start Profiling...\n");
695 }
696
697 g_trackRunning = true;
698 if (!CaptureSig()) {
699 HLOGE("captureSig() failed");
700 g_trackRunning = false;
701 ExitReadRecordBufThread();
702 return false;
703 }
704
705 HLOGD("step: 2. thread loop");
706 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 2. thread loop");
707 if (recordCallBack_) {
708 RecordLoop();
709 } else {
710 StatLoop();
711 }
712
713 HLOGD("step: 3. disable event");
714 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 3. disable event");
715 if (!PerfEventsEnable(false)) {
716 HLOGE("PerfEvents::PerfEventsEnable() failed");
717 }
718 if (recordCallBack_) {
719 // read left samples after disable events
720 ReadRecordsFromMmaps();
721 }
722 trackingEndTime_ = steady_clock::now();
723
724 RecoverCaptureSig();
725
726 HLOGD("step: 4. wait record thread");
727 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 4. wait record thread");
728 if (recordCallBack_) {
729 WaitRecordThread();
730 }
731
732 HLOGD("step: 5. exit");
733 HIPERF_HILOGI(MODULE_DEFAULT, "StartTracking step: 5. exit");
734 return true;
735 }
736
StopTracking(void)737 bool PerfEvents::StopTracking(void)
738 {
739 if (g_trackRunning) {
740 printf("some one called StopTracking\n");
741 HIPERF_HILOGI(MODULE_DEFAULT, "some one called StopTracking");
742 g_trackRunning = false;
743 if (trackedCommand_) {
744 if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_STARTED) {
745 trackedCommand_->Stop();
746 }
747 }
748 CHECK_TRUE(!PerfEventsEnable(false), false, 1, "StopTracking : PerfEventsEnable(false) failed");
749 }
750 return true;
751 }
752
PauseTracking(void)753 bool PerfEvents::PauseTracking(void)
754 {
755 CHECK_TRUE(!startedTracking_, false, 0, "");
756 HIPERF_HILOGI(MODULE_DEFAULT, "some one called PauseTracking");
757 return PerfEventsEnable(false);
758 }
759
ResumeTracking(void)760 bool PerfEvents::ResumeTracking(void)
761 {
762 CHECK_TRUE(!startedTracking_, false, 0, "");
763 HIPERF_HILOGI(MODULE_DEFAULT, "some one called ResumeTracking");
764 return PerfEventsEnable(true);
765 }
766
OutputTracking()767 bool PerfEvents::OutputTracking()
768 {
769 if (!startedTracking_) {
770 HIPERF_HILOGI(MODULE_DEFAULT, "OutputTracking failed, not start tracking...");
771 return false;
772 }
773
774 if (IsOutputTracking()) {
775 HIPERF_HILOGI(MODULE_DEFAULT, "output in progress");
776 return true;
777 }
778
779 outputEndTime_ = currentTimeSecond_.load();
780 outputTracking_ = true;
781 return true;
782 }
783
EnableTracking()784 bool PerfEvents::EnableTracking()
785 {
786 CHECK_TRUE(startedTracking_, true, 0, "");
787 CHECK_TRUE(!PerfEventsEnable(true), false, 1, "PerfEvents::PerfEventsEnable() failed");
788
789 if (trackedCommand_) {
790 // start tracked Command
791 if (trackedCommand_->GetState() == TrackedCommand::State::COMMAND_WAITING) {
792 if (!trackedCommand_->StartCommand()) {
793 int wstatus;
794 if (!trackedCommand_->WaitCommand(wstatus)) {
795 trackedCommand_->Stop();
796 }
797 std::string commandName = trackedCommand_->GetCommandName();
798 printf("failed to execute command: %zu: %s\n", commandName.size(), commandName.c_str());
799 return false;
800 }
801 } else if (trackedCommand_->GetState() != TrackedCommand::State::COMMAND_STARTED) {
802 return false;
803 }
804 }
805 startedTracking_ = true;
806 return true;
807 }
808
IsTrackRunning()809 bool PerfEvents::IsTrackRunning()
810 {
811 return g_trackRunning;
812 }
813
IsOutputTracking()814 bool PerfEvents::IsOutputTracking()
815 {
816 return outputTracking_;
817 }
818
SetOutputTrackingStatus(bool status)819 void PerfEvents::SetOutputTrackingStatus(bool status)
820 {
821 outputTracking_ = status;
822 }
823
SetSystemTarget(bool systemTarget)824 void PerfEvents::SetSystemTarget(bool systemTarget)
825 {
826 systemTarget_ = systemTarget;
827 }
828
SetCpu(std::vector<pid_t> cpus)829 void PerfEvents::SetCpu(std::vector<pid_t> cpus)
830 {
831 cpus_ = cpus;
832 }
833
SetPid(std::vector<pid_t> pids)834 void PerfEvents::SetPid(std::vector<pid_t> pids)
835 {
836 pids_ = pids;
837 }
838
SetTimeOut(float timeOut)839 void PerfEvents::SetTimeOut(float timeOut)
840 {
841 if (timeOut > 0) {
842 timeOut_ = milliseconds(static_cast<int>(timeOut * THOUSANDS));
843 }
844 }
845
SetTimeReport(int timeReport)846 void PerfEvents::SetTimeReport(int timeReport)
847 {
848 static constexpr int minMsReportInterval = 10;
849 if (timeReport < minMsReportInterval && timeReport != 0) {
850 timeReport = minMsReportInterval;
851 printf("time report min value is %d.\n", timeReport);
852 }
853
854 timeReport_ = milliseconds(timeReport);
855 }
856
GetSupportEvents(perf_type_id type)857 std::map<__u64, std::string> PerfEvents::GetSupportEvents(perf_type_id type)
858 {
859 if (type == PERF_TYPE_TRACEPOINT) {
860 LoadTracepointEventTypesFromSystem();
861 }
862
863 std::map<__u64, std::string> eventConfigs;
864 auto configTable = TYPE_CONFIGS.find(type);
865 if (configTable != TYPE_CONFIGS.end()) {
866 auto configs = configTable->second;
867 for (auto config : configs) {
868 if (type == PERF_TYPE_TRACEPOINT || IsEventSupport(type, (__u64)config.first)) {
869 eventConfigs.insert(config);
870 } else {
871 HLOGD("'%s' not support", config.second.c_str());
872 }
873 }
874 }
875 return eventConfigs;
876 }
877
LoadTracepointEventTypesFromSystem()878 void PerfEvents::LoadTracepointEventTypesFromSystem()
879 {
880 if (traceConfigTable.empty()) {
881 std::string basePath {"/sys/kernel/tracing/events"};
882 if (access(basePath.c_str(), R_OK) != 0) {
883 basePath = "/sys/kernel/debug/tracing/events";
884 }
885 for (const auto &eventName : GetSubDirs(basePath)) {
886 std::string eventPath = basePath + "/" + eventName;
887 for (const auto &concreteEvent : GetSubDirs(eventPath)) {
888 std::string idPath = eventPath + "/" + concreteEvent + "/id";
889 {
890 std::string resolvedPath = CanonicalizeSpecPath(idPath.c_str());
891 std::ifstream ifs {resolvedPath};
892 // clang-format off
893 const std::string idStr = {
894 std::istream_iterator<char>(ifs),
895 std::istream_iterator<char>()
896 };
897 // clang-format on
898 __u64 id {0};
899 try {
900 id = std::stoul(idStr, nullptr);
901 } catch (...) {
902 continue;
903 }
904 if (isHM_ && id < MIN_HM_TRACEPOINT_EVENT_ID) {
905 continue;
906 }
907 auto typeConfigs = TYPE_CONFIGS.find(PERF_TYPE_TRACEPOINT);
908 HLOG_ASSERT(typeConfigs != TYPE_CONFIGS.end());
909 auto configPair = typeConfigs->second.insert(
910 std::make_pair(id, eventName + ":" + concreteEvent));
911 traceConfigTable.insert(std::make_pair(id, eventName + ":" + concreteEvent));
912 ConfigTable::iterator it = configPair.first;
913 HLOGV("TYPE_CONFIGS add %llu:%s in %zu", it->first, it->second.c_str(),
914 typeConfigs->second.size());
915 }
916 }
917 }
918 }
919 }
920
SetPerCpu(bool perCpu)921 void PerfEvents::SetPerCpu(bool perCpu)
922 {
923 perCpu_ = perCpu;
924 }
925
SetPerThread(bool perThread)926 void PerfEvents::SetPerThread(bool perThread)
927 {
928 perThread_ = perThread;
929 }
930
SetVerboseReport(bool verboseReport)931 void PerfEvents::SetVerboseReport(bool verboseReport)
932 {
933 verboseReport_ = verboseReport;
934 }
935
SetSampleFrequency(unsigned int frequency)936 void PerfEvents::SetSampleFrequency(unsigned int frequency)
937 {
938 if (frequency > 0) {
939 sampleFreq_ = frequency;
940 }
941 int maxRate = 0;
942 CHECK_TRUE(!ReadIntFromProcFile("/proc/sys/kernel/perf_event_max_sample_rate", maxRate),
943 NO_RETVAL, LOG_TYPE_PRINTF,
944 "read perf_event_max_sample_rate fail.\n");
945 if (sampleFreq_ > static_cast<unsigned int>(maxRate)) {
946 static bool printFlag = false;
947 sampleFreq_ = static_cast<unsigned int>(maxRate);
948 if (!printFlag) {
949 printf("Adjust sampling frequency to maximum allowed frequency %d.\n", maxRate);
950 printFlag = true;
951 }
952 }
953 }
954
SetSamplePeriod(unsigned int period)955 void PerfEvents::SetSamplePeriod(unsigned int period)
956 {
957 if (period > 0) {
958 samplePeriod_ = period;
959 }
960 }
961
SetBackTrack(bool backtrack)962 void PerfEvents::SetBackTrack(bool backtrack)
963 {
964 backtrack_ = backtrack;
965 }
966
SetBackTrackTime(uint64_t backtrackTime)967 void PerfEvents::SetBackTrackTime(uint64_t backtrackTime)
968 {
969 backtrackTime_ = backtrackTime;
970 }
971
SetMmapPages(size_t mmapPages)972 void PerfEvents::SetMmapPages(size_t mmapPages)
973 {
974 mmapPages_ = mmapPages;
975 }
976
SetSampleStackType(SampleStackType type)977 void PerfEvents::SetSampleStackType(SampleStackType type)
978 {
979 sampleStackType_ = type;
980 }
981
SetDwarfSampleStackSize(uint32_t stackSize)982 void PerfEvents::SetDwarfSampleStackSize(uint32_t stackSize)
983 {
984 HLOGD("request stack size is %u", stackSize);
985 dwarfSampleStackSize_ = stackSize;
986 }
987
PerfEventsEnable(bool enable)988 bool PerfEvents::PerfEventsEnable(bool enable)
989 {
990 HLOGV("%s", std::to_string(enable).c_str());
991 for (const auto &eventGroupItem : eventGroupItem_) {
992 for (const auto &eventItem : eventGroupItem.eventItems) {
993 for (const auto &fdItem : eventItem.fdItems) {
994 int result =
995 ioctl(fdItem.fd, enable ? PERF_EVENT_IOC_ENABLE : PERF_EVENT_IOC_DISABLE, 0);
996 if (result < 0) {
997 printf("Cannot '%s' perf fd! type config name: '%s:%s'\n",
998 enable ? "enable" : "disable", eventItem.typeName.c_str(),
999 eventItem.configName.c_str());
1000 return false;
1001 }
1002 }
1003 }
1004 }
1005 return true;
1006 }
1007
SetHM(bool isHM)1008 void PerfEvents::SetHM(bool isHM)
1009 {
1010 isHM_ = isHM;
1011 }
1012
SetStatCallBack(StatCallBack reportCallBack)1013 void PerfEvents::SetStatCallBack(StatCallBack reportCallBack)
1014 {
1015 reportCallBack_ = reportCallBack;
1016 }
SetRecordCallBack(RecordCallBack recordCallBack)1017 void PerfEvents::SetRecordCallBack(RecordCallBack recordCallBack)
1018 {
1019 recordCallBack_ = recordCallBack;
1020 }
1021
PutAllCpus()1022 inline void PerfEvents::PutAllCpus()
1023 {
1024 int cpuConfigs = sysconf(_SC_NPROCESSORS_CONF);
1025 for (int i = 0; i < cpuConfigs; i++) {
1026 cpus_.push_back(i); // put all cpu
1027 }
1028 }
1029
PrepareFdEvents(void)1030 bool PerfEvents::PrepareFdEvents(void)
1031 {
1032 /*
1033 https://man7.org/linux/man-pages/man2/perf_event_open.2.html
1034 pid == 0 and cpu == -1
1035 This measures the calling process/thread on any CPU.
1036
1037 pid == 0 and cpu >= 0
1038 This measures the calling process/thread only when running
1039 on the specified CPU.
1040
1041 pid > 0 and cpu == -1
1042 This measures the specified process/thread on any CPU.
1043
1044 pid > 0 and cpu >= 0
1045 This measures the specified process/thread only when
1046 running on the specified CPU.
1047
1048 pid == -1 and cpu >= 0
1049 This measures all processes/threads on the specified CPU.
1050 This requires CAP_PERFMON (since Linux 5.8) or
1051 CAP_SYS_ADMIN capability or a
1052 /proc/sys/kernel/perf_event_paranoid value of less than 1.
1053
1054 pid == -1 and cpu == -1
1055 This setting is invalid and will return an error.
1056 */
1057 if (systemTarget_) {
1058 pids_.clear();
1059 pids_.push_back(-1);
1060 } else {
1061 if (trackedCommand_) {
1062 pids_.push_back(trackedCommand_->GetChildPid());
1063 }
1064 if (pids_.empty()) {
1065 pids_.push_back(0); // no pid means use 0 as self pid
1066 }
1067 }
1068 if (perCpu_ || perThread_) {
1069 cpus_.clear();
1070 PutAllCpus();
1071 }
1072 if (cpus_.empty()) {
1073 PutAllCpus();
1074 }
1075
1076 // print info tell user which cpu and process we will select.
1077 if (pids_.size() == 1 && pids_[0] == -1) {
1078 HLOGI("target process: system scope \n");
1079 } else {
1080 HLOGI("target process: %zu (%s)\n", pids_.size(),
1081 (pids_[0] == 0) ? std::to_string(gettid()).c_str() : VectorToString(pids_).c_str());
1082 }
1083 if (cpus_.size() == 1 && cpus_[0] == -1) {
1084 HLOGI("target cpus: %ld \n", sysconf(_SC_NPROCESSORS_CONF));
1085 } else {
1086 HLOGI("target cpus: %zu / %ld (%s)\n", cpus_.size(), sysconf(_SC_NPROCESSORS_CONF),
1087 VectorToString(cpus_).c_str());
1088 }
1089
1090 return true;
1091 }
1092
CreateFdEvents(void)1093 bool PerfEvents::CreateFdEvents(void)
1094 {
1095 // must be some events , or will failed
1096 CHECK_TRUE(eventGroupItem_.empty(), false, LOG_TYPE_PRINTF, "no event select.\n");
1097
1098 // create each fd by cpu and process user select
1099 /*
1100 https://man7.org/linux/man-pages/man2/perf_event_open.2.html
1101
1102 (A single event on its own is created with group_fd = -1 and is
1103 considered to be a group with only 1 member.)
1104 */
1105 // Even if there is only one event, it is counted as a group.
1106
1107 uint fdNumber = 0;
1108 uint eventNumber = 0;
1109 uint groupNumber = 0;
1110 for (auto &eventGroupItem : eventGroupItem_) {
1111 /*
1112 Explain what is the configuration of the group:
1113 Suppose we have 2 Event, 2 PID, and 3 CPU settings
1114 According to verification,
1115 Group's fd requires the pid to be the same as the cpu, the only difference is event
1116 In other words, if you want to bind E1 and E2 to the same group
1117 That can only be like this:
1118
1119 event E1 pid P1 cpu C1 [Group 1]
1120 event E1 pid P1 cpu C2 [Group 2]
1121 event E1 pid P1 cpu C3 [Group 3]
1122
1123 event E1 pid P2 cpu C1 [Group 4]
1124 event E1 pid P2 cpu C2 [Group 5]
1125 event E1 pid P2 cpu C3 [Group 6]
1126
1127 event E2 pid P1 cpu C1 [Group 1]
1128 event E2 pid P1 cpu C2 [Group 2]
1129 event E2 pid P1 cpu C3 [Group 3]
1130
1131 event E2 pid P2 cpu C1 [Group 4]
1132 event E2 pid P2 cpu C2 [Group 5]
1133 event E2 pid P2 cpu C3 [Group 6]
1134 */
1135 HLOGV("group %2u. eventGroupItem leader: '%s':", groupNumber++,
1136 eventGroupItem.eventItems[0].configName.c_str());
1137
1138 int groupFdCache[cpus_.size()][pids_.size()];
1139 for (size_t i = 0; i < cpus_.size(); i++) { // each cpu
1140 for (size_t j = 0; j < pids_.size(); j++) { // each pid
1141 // The leader is created first, with group_fd = -1.
1142 groupFdCache[i][j] = -1;
1143 }
1144 }
1145
1146 uint eventIndex = 0;
1147 for (auto &eventItem : eventGroupItem.eventItems) {
1148 HLOGV(" - event %2u. eventName: '%s:%s'", eventIndex++, eventItem.typeName.c_str(),
1149 eventItem.configName.c_str());
1150
1151 for (size_t icpu = 0; icpu < cpus_.size(); icpu++) { // each cpu
1152 for (size_t ipid = 0; ipid < pids_.size(); ipid++) { // each pid
1153 // one fd event group must match same cpu and same pid config (event can be
1154 // different)
1155 // clang-format off
1156 UniqueFd fd = Open(eventItem.attr, pids_[ipid], cpus_[icpu],
1157 groupFdCache[icpu][ipid], 0);
1158 // clang-format on
1159 if (fd < 0) {
1160 if (errno == ESRCH) {
1161 if (verboseReport_) {
1162 printf("pid %d does not exist.\n", pids_[ipid]);
1163 }
1164 HLOGE("pid %d does not exist.\n", pids_[ipid]);
1165 continue;
1166 } else {
1167 // clang-format off
1168 if (verboseReport_) {
1169 char errInfo[ERRINFOLEN] = { 0 };
1170 strerror_r(errno, errInfo, ERRINFOLEN);
1171 printf("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1172 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1173 }
1174 char errInfo[ERRINFOLEN] = { 0 };
1175 strerror_r(errno, errInfo, ERRINFOLEN);
1176 HLOGE("%s event is not supported by the kernel on cpu %d. reason: %d:%s\n",
1177 eventItem.configName.c_str(), cpus_[icpu], errno, errInfo);
1178 // clang-format on
1179 break; // jump to next cpu
1180 }
1181 }
1182 // after open successed , fill the result
1183 // make a new FdItem
1184 FdItem &fdItem = eventItem.fdItems.emplace_back();
1185 fdItem.fd = std::move(fd);
1186 fdItem.cpu = cpus_[icpu];
1187 fdItem.pid = pids_[ipid];
1188 fdNumber++;
1189
1190 // if sampling, mmap ring buffer
1191 bool createMmapSucc = true;
1192 if (recordCallBack_) {
1193 createMmapSucc = isSpe_ ?
1194 CreateSpeMmap(fdItem, eventItem.attr) : CreateMmap(fdItem, eventItem.attr);
1195 }
1196 if (!createMmapSucc) {
1197 printf("create mmap fail\n");
1198 HIPERF_HILOGI(MODULE_DEFAULT, "create mmap fail");
1199 return false;
1200 }
1201 // update group leader
1202 int groupFdCacheNum = groupFdCache[icpu][ipid];
1203 if (groupFdCacheNum == -1) {
1204 groupFdCache[icpu][ipid] = fdItem.fd.Get();
1205 }
1206 }
1207 }
1208 eventNumber++;
1209 }
1210 }
1211
1212 CHECK_TRUE(fdNumber == 0, false, 1, "open %d fd for %d events", fdNumber, eventNumber);
1213
1214 HLOGD("will try read %u events from %u fd (%zu groups):", eventNumber, fdNumber,
1215 eventGroupItem_.size());
1216
1217 return true;
1218 }
1219
StatReport(const __u64 & durationInSec)1220 bool PerfEvents::StatReport(const __u64 &durationInSec)
1221 {
1222 read_format_no_group readNoGroupValue;
1223
1224 // only need read when need report
1225 HLOGM("eventGroupItem_:%zu", eventGroupItem_.size());
1226 __u64 groupId = 0;
1227 // clear countEvents data
1228 countEvents_.clear();
1229 for (const auto &eventGroupItem : eventGroupItem_) {
1230 HLOGM("eventItems:%zu", eventGroupItem.eventItems.size());
1231 groupId++;
1232 for (const auto &eventItem : eventGroupItem.eventItems) {
1233 // count event info together (every cpu , every pid)
1234 std::string configName = "";
1235 if (eventItem.attr.exclude_kernel) {
1236 configName = eventItem.configName + ":u";
1237 } else if (eventItem.attr.exclude_user) {
1238 configName = eventItem.configName + ":k";
1239 } else {
1240 configName = eventItem.configName;
1241 }
1242 if (countEvents_.count(configName) == 0) {
1243 auto countEvent = std::make_unique<CountEvent>(CountEvent {});
1244 countEvents_[configName] = std::move(countEvent);
1245 countEvents_[configName]->userOnly = eventItem.attr.exclude_kernel;
1246 countEvents_[configName]->kernelOnly = eventItem.attr.exclude_user;
1247 }
1248 const std::unique_ptr<CountEvent> &countEvent = countEvents_[configName];
1249 HLOGM("eventItem.fdItems:%zu", eventItem.fdItems.size());
1250 for (const auto &fditem : eventItem.fdItems) {
1251 if (read(fditem.fd, &readNoGroupValue, sizeof(readNoGroupValue)) > 0) {
1252 countEvent->eventCount += readNoGroupValue.value;
1253 countEvent->timeEnabled += readNoGroupValue.timeEnabled;
1254 countEvent->timeRunning += readNoGroupValue.timeRunning;
1255 countEvent->id = groupId;
1256 if (durationInSec != 0) {
1257 countEvent->usedCpus = (countEvent->eventCount / 1e9) / (durationInSec / THOUSANDS);
1258 }
1259 if (verboseReport_) {
1260 printf("%s id:%llu(c%d:p%d) timeEnabled:%llu timeRunning:%llu value:%llu\n",
1261 eventItem.configName.c_str(), readNoGroupValue.id, fditem.cpu, fditem.pid,
1262 readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning, readNoGroupValue.value);
1263 }
1264 if ((perCpu_ || perThread_) && readNoGroupValue.value) {
1265 countEvent->summaries.emplace_back(fditem.cpu, fditem.pid, readNoGroupValue.value,
1266 readNoGroupValue.timeEnabled, readNoGroupValue.timeRunning);
1267 }
1268 } else {
1269 printf("read failed from event '%s'\n", eventItem.configName.c_str());
1270 }
1271 }
1272 }
1273 }
1274
1275 reportCallBack_(countEvents_);
1276
1277 return true;
1278 }
1279
CreateSpeMmap(const FdItem & item,const perf_event_attr & attr)1280 bool PerfEvents::CreateSpeMmap(const FdItem &item, const perf_event_attr &attr)
1281 {
1282 auto it = cpuMmap_.find(item.cpu);
1283 if (it == cpuMmap_.end()) {
1284 void *rbuf = mmap(nullptr, (1 + auxMmapPages_) * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1285 item.fd.Get(), 0);
1286 CHECK_TRUE(rbuf == MMAP_FAILED, false, 1, "");
1287 void *auxRbuf = mmap(nullptr, auxMmapPages_ * pageSize_, (PROT_READ | PROT_WRITE), MAP_SHARED,
1288 item.fd.Get(), 0);
1289 MmapFd mmapItem;
1290 mmapItem.fd = item.fd.Get();
1291 mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1292 mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1293 mmapItem.auxBuf = auxRbuf;
1294 mmapItem.bufSize = auxMmapPages_ * pageSize_;
1295 mmapItem.auxBufSize = auxMmapPages_ * pageSize_;
1296 mmapItem.attr = &attr;
1297 mmapItem.tid_ = item.pid;
1298 mmapItem.cpu = item.cpu;
1299 cpuMmap_[item.cpu] = mmapItem;
1300 pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1301 } else {
1302 const MmapFd &mmapItem = it->second;
1303 int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1304 if (rc != 0) {
1305 HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1306 perror("failed to share mapped buffer\n");
1307 return false;
1308 }
1309 }
1310 return true;
1311 }
1312
CreateMmap(const FdItem & item,const perf_event_attr & attr)1313 bool PerfEvents::CreateMmap(const FdItem &item, const perf_event_attr &attr)
1314 {
1315 auto it = cpuMmap_.find(item.cpu);
1316 if (it == cpuMmap_.end()) {
1317 void *rbuf = mmap(nullptr, (1 + mmapPages_) * pageSize_, PROT_READ | PROT_WRITE, MAP_SHARED,
1318 item.fd.Get(), 0);
1319 if (rbuf == MMAP_FAILED) {
1320 char errInfo[ERRINFOLEN] = {0};
1321 strerror_r(errno, errInfo, ERRINFOLEN);
1322 perror("errno:%d, errstr:%s", errno, errInfo);
1323 perror("Fail to call mmap \n");
1324 return false;
1325 }
1326 MmapFd mmapItem;
1327 mmapItem.fd = item.fd.Get();
1328 mmapItem.mmapPage = reinterpret_cast<perf_event_mmap_page *>(rbuf);
1329 mmapItem.buf = reinterpret_cast<uint8_t *>(rbuf) + pageSize_;
1330 mmapItem.bufSize = mmapPages_ * pageSize_;
1331 mmapItem.attr = &attr;
1332 mmapItem.posCallChain = GetCallChainPosInSampleRecord(attr);
1333
1334 cpuMmap_[item.cpu] = mmapItem;
1335 pollFds_.emplace_back(pollfd {mmapItem.fd, POLLIN, 0});
1336 HLOGD("CreateMmap success cpu %d fd %d", item.cpu, mmapItem.fd);
1337 } else {
1338 const MmapFd &mmapItem = it->second;
1339 int rc = ioctl(item.fd.Get(), PERF_EVENT_IOC_SET_OUTPUT, mmapItem.fd);
1340 if (rc != 0) {
1341 HLOGEP("ioctl PERF_EVENT_IOC_SET_OUTPUT (%d -> %d) ", item.fd.Get(), mmapItem.fd);
1342 perror("failed to share mapped buffer\n");
1343 return false;
1344 }
1345 }
1346 return true;
1347 }
1348
GetAttrWithId() const1349 std::vector<AttrWithId> PerfEvents::GetAttrWithId() const
1350 {
1351 std::vector<AttrWithId> result;
1352 HLOGV("eventGroupItem_ %zu :", eventGroupItem_.size());
1353
1354 for (const auto &eventGroupItem : eventGroupItem_) {
1355 HLOGV(" eventItems %zu eventItems:", eventGroupItem.eventItems.size());
1356 for (const auto &eventItem : eventGroupItem.eventItems) {
1357 AttrWithId attrId;
1358 attrId.attr = eventItem.attr;
1359 attrId.name = eventItem.configName;
1360 HLOGV(" fdItems %zu fdItems:", eventItem.fdItems.size());
1361 for (const auto &fdItem : eventItem.fdItems) {
1362 auto &id = attrId.ids.emplace_back(fdItem.GetPrefId());
1363 HLOGV(" eventItem.fdItems GetPrefId %" PRIu64 "", id);
1364 }
1365 result.emplace_back(attrId);
1366 }
1367 }
1368 return result;
1369 }
1370
CalcBufferSize()1371 size_t PerfEvents::CalcBufferSize()
1372 {
1373 size_t maxBufferSize;
1374 if (LittleMemory()) {
1375 maxBufferSize = MAX_BUFFER_SIZE_LITTLE;
1376 } else {
1377 maxBufferSize = MAX_BUFFER_SIZE_LARGE;
1378 }
1379
1380 size_t bufferSize = maxBufferSize;
1381 if (backtrack_ || !systemTarget_) {
1382 // suppose ring buffer is 4 times as much as mmap
1383 static constexpr int TIMES = 4;
1384 bufferSize = cpuMmap_.size() * mmapPages_ * pageSize_ * TIMES;
1385 if (bufferSize < MIN_BUFFER_SIZE) {
1386 bufferSize = MIN_BUFFER_SIZE;
1387 } else if (bufferSize > maxBufferSize) {
1388 bufferSize = maxBufferSize;
1389 }
1390 }
1391 HLOGD("CalcBufferSize return %zu", bufferSize);
1392 return bufferSize;
1393 }
1394
IsRecordInMmap(int timeout)1395 inline bool PerfEvents::IsRecordInMmap(int timeout)
1396 {
1397 HLOGV("enter");
1398 if (pollFds_.size() > 0) {
1399 static uint32_t pollFailCount = 0;
1400 if (poll(static_cast<struct pollfd*>(pollFds_.data()), pollFds_.size(), timeout) <= 0) {
1401 // time out try again
1402 if (++pollFailCount >= POLL_FAIL_COUNT_THRESHOLD) {
1403 pollFailCount = 0;
1404 HIPERF_HILOGW(MODULE_DEFAULT, "mmap have no data for the past 5s");
1405 }
1406 return false;
1407 } else {
1408 pollFailCount = 0;
1409 }
1410 }
1411 HLOGV("poll record from mmap");
1412 return true;
1413 }
1414
CompareRecordTime(const PerfEvents::MmapFd * left,const PerfEvents::MmapFd * right)1415 static bool CompareRecordTime(const PerfEvents::MmapFd *left, const PerfEvents::MmapFd *right)
1416 {
1417 return left->timestamp > right->timestamp;
1418 }
1419
ReadRecordsFromMmaps()1420 void PerfEvents::ReadRecordsFromMmaps()
1421 {
1422 #ifdef HIPERF_DEBUG_TIME
1423 const auto readKenelStartTime = steady_clock::now();
1424 #endif
1425 // get readable mmap at this time
1426 for (auto &it : cpuMmap_) {
1427 ssize_t dataSize = it.second.mmapPage->data_head - it.second.mmapPage->data_tail;
1428 __sync_synchronize(); // this same as rmb in gcc, after reading mmapPage->data_head
1429 if (dataSize <= 0) {
1430 continue;
1431 }
1432 it.second.dataSize = dataSize;
1433 MmapRecordHeap_.push_back(&(it.second));
1434 }
1435 if (MmapRecordHeap_.empty()) {
1436 return;
1437 }
1438 bool enableFlag = false;
1439 if (MmapRecordHeap_.size() > 1) {
1440 for (const auto &it : MmapRecordHeap_) {
1441 GetRecordFromMmap(*it);
1442 }
1443 std::make_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.end(), CompareRecordTime);
1444
1445 size_t heapSize = MmapRecordHeap_.size();
1446 while (heapSize > 1) {
1447 std::pop_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1448 CompareRecordTime);
1449 bool auxEvent = false;
1450 u32 pid = 0;
1451 u32 tid = 0;
1452 u64 auxOffset = 0;
1453 u64 auxSize = 0;
1454 MoveRecordToBuf(*MmapRecordHeap_[heapSize - 1], auxEvent, auxOffset, auxSize, pid, tid);
1455 if (isSpe_ && auxEvent) {
1456 ReadRecordsFromSpeMmaps(*MmapRecordHeap_[heapSize - 1], auxOffset, auxSize, pid, tid);
1457 enableFlag = true;
1458 }
1459 if (GetRecordFromMmap(*MmapRecordHeap_[heapSize - 1])) {
1460 std::push_heap(MmapRecordHeap_.begin(), MmapRecordHeap_.begin() + heapSize,
1461 CompareRecordTime);
1462 } else {
1463 heapSize--;
1464 }
1465 }
1466 }
1467
1468 while (GetRecordFromMmap(*MmapRecordHeap_.front())) {
1469 bool auxEvent = false;
1470 u32 pid = 0;
1471 u32 tid = 0;
1472 u64 auxOffset = 0;
1473 u64 auxSize = 0;
1474 MoveRecordToBuf(*MmapRecordHeap_.front(), auxEvent, auxOffset, auxSize, pid, tid);
1475 if (isSpe_ && auxEvent) {
1476 ReadRecordsFromSpeMmaps(*MmapRecordHeap_.front(), auxOffset, auxSize, pid, tid);
1477 enableFlag = true;
1478 }
1479 }
1480 if (isSpe_ && enableFlag) {
1481 PerfEventsEnable(false);
1482 PerfEventsEnable(true);
1483 }
1484 MmapRecordHeap_.clear();
1485 {
1486 std::lock_guard<std::mutex> lk(mtxRrecordBuf_);
1487 recordBufReady_ = true;
1488 }
1489 cvRecordBuf_.notify_one();
1490 #ifdef HIPERF_DEBUG_TIME
1491 recordKernelReadTime_ += duration_cast<milliseconds>(steady_clock::now() - readKenelStartTime);
1492 #endif
1493 }
1494
GetRecordFromMmap(MmapFd & mmap)1495 bool PerfEvents::GetRecordFromMmap(MmapFd &mmap)
1496 {
1497 if (mmap.dataSize <= 0) {
1498 return false;
1499 }
1500
1501 GetRecordFieldFromMmap(mmap, &(mmap.header), mmap.mmapPage->data_tail, sizeof(mmap.header));
1502 if (mmap.header.type != PERF_RECORD_SAMPLE) {
1503 mmap.timestamp = 0;
1504 return true;
1505 }
1506 // in PERF_RECORD_SAMPLE : header + u64 sample_id + u64 ip + u32 pid + u32 tid + u64 time
1507 constexpr size_t timePos = sizeof(perf_event_header) + sizeof(uint64_t) + sizeof(uint64_t) +
1508 sizeof(uint32_t) + sizeof(uint32_t);
1509 GetRecordFieldFromMmap(mmap, &(mmap.timestamp), mmap.mmapPage->data_tail + timePos,
1510 sizeof(mmap.timestamp));
1511 return true;
1512 }
1513
GetRecordFieldFromMmap(MmapFd & mmap,void * dest,size_t pos,size_t size)1514 void PerfEvents::GetRecordFieldFromMmap(MmapFd &mmap, void *dest, size_t pos, size_t size)
1515 {
1516 CHECK_TRUE(mmap.bufSize == 0, NO_RETVAL, 0, "");
1517 pos = pos % mmap.bufSize;
1518 size_t tailSize = mmap.bufSize - pos;
1519 size_t copySize = std::min(size, tailSize);
1520 if (memcpy_s(dest, copySize, mmap.buf + pos, copySize) != 0) {
1521 HLOGEP("memcpy_s %p to %p failed. size %zd", mmap.buf + pos, dest, copySize);
1522 }
1523 if (copySize < size) {
1524 size -= copySize;
1525 if (memcpy_s(static_cast<uint8_t *>(dest) + copySize, size, mmap.buf, size) != 0) {
1526 HLOGEP("GetRecordFieldFromMmap: memcpy_s mmap.buf to dest failed. size %zd", size);
1527 }
1528 }
1529 }
1530
GetCallChainPosInSampleRecord(const perf_event_attr & attr)1531 size_t PerfEvents::GetCallChainPosInSampleRecord(const perf_event_attr &attr)
1532 {
1533 // reference struct PerfRecordSampleData
1534 int fixedFieldNumber = __builtin_popcountll(
1535 attr.sample_type & (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1536 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | PERF_SAMPLE_ID |
1537 PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD));
1538 size_t pos = sizeof(perf_event_header) + sizeof(uint64_t) * fixedFieldNumber;
1539 if (attr.sample_type & PERF_SAMPLE_READ) {
1540 pos += sizeof(read_format);
1541 }
1542 return pos;
1543 }
1544
GetStackSizePosInSampleRecord(MmapFd & mmap)1545 size_t PerfEvents::GetStackSizePosInSampleRecord(MmapFd &mmap)
1546 {
1547 size_t pos = mmap.posCallChain;
1548 if (mmap.attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
1549 uint64_t nr = 0;
1550 GetRecordFieldFromMmap(mmap, &nr, mmap.mmapPage->data_tail + pos, sizeof(nr));
1551 pos += (sizeof(nr) + nr * sizeof(uint64_t));
1552 }
1553 if (mmap.attr->sample_type & PERF_SAMPLE_RAW) {
1554 uint32_t raw_size = 0;
1555 GetRecordFieldFromMmap(mmap, &raw_size, mmap.mmapPage->data_tail + pos, sizeof(raw_size));
1556 pos += (sizeof(raw_size) + raw_size);
1557 }
1558 if (mmap.attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
1559 uint64_t bnr = 0;
1560 GetRecordFieldFromMmap(mmap, &bnr, mmap.mmapPage->data_tail + pos, sizeof(bnr));
1561 pos += (sizeof(bnr) + bnr * sizeof(PerfBranchEntry));
1562 }
1563 if (mmap.attr->sample_type & PERF_SAMPLE_REGS_USER) {
1564 uint64_t user_abi = 0;
1565 GetRecordFieldFromMmap(mmap, &user_abi, mmap.mmapPage->data_tail + pos, sizeof(user_abi));
1566 pos += sizeof(user_abi);
1567 if (user_abi > 0) {
1568 uint64_t reg_nr = __builtin_popcountll(mmap.attr->sample_regs_user);
1569 pos += reg_nr * sizeof(uint64_t);
1570 }
1571 }
1572 if (mmap.attr->sample_type & PERF_SAMPLE_SERVER_PID) {
1573 uint64_t server_nr = 0;
1574 GetRecordFieldFromMmap(mmap, &server_nr, mmap.mmapPage->data_tail + pos, sizeof(server_nr));
1575 pos += (sizeof(server_nr) + server_nr * sizeof(uint64_t));
1576 }
1577 return pos;
1578 }
1579
CutStackAndMove(MmapFd & mmap)1580 bool PerfEvents::CutStackAndMove(MmapFd &mmap)
1581 {
1582 constexpr uint32_t alignSize = 64;
1583 if (!(mmap.attr->sample_type & PERF_SAMPLE_STACK_USER)) {
1584 return false;
1585 }
1586 size_t stackSizePos = GetStackSizePosInSampleRecord(mmap);
1587 uint64_t stackSize = 0;
1588 GetRecordFieldFromMmap(mmap, &stackSize, mmap.mmapPage->data_tail + stackSizePos,
1589 sizeof(stackSize));
1590 if (stackSize == 0) {
1591 return false;
1592 }
1593 size_t dynSizePos = stackSizePos + sizeof(uint64_t) + stackSize;
1594 uint64_t dynSize = 0;
1595 GetRecordFieldFromMmap(mmap, &dynSize, mmap.mmapPage->data_tail + dynSizePos, sizeof(dynSize));
1596 uint64_t newStackSize = std::min((dynSize + alignSize - 1) &
1597 (~(alignSize >= 1 ? alignSize - 1 : 0)), stackSize);
1598 if (newStackSize >= stackSize) {
1599 return false;
1600 }
1601 HLOGM("stackSize %" PRIx64 " dynSize %" PRIx64 " newStackSize %" PRIx64 "\n", stackSize, dynSize, newStackSize);
1602 // move and cut stack_data
1603 // mmap: |<+++copy1+++>|<++++++copy2++++++>|<---------------cut--------------->|<+++copy3+++>|
1604 // ^ ^ ^ ^
1605 // new_header stackSizePos <stackSize-dynSize> dynSizePos
1606 uint16_t recordSize = mmap.header.size;
1607 mmap.header.size -= stackSize - newStackSize; // reduce the stack size
1608 uint8_t *buf = recordBuf_->AllocForWrite(mmap.header.size);
1609 // copy1: new_header
1610 CHECK_TRUE(buf == nullptr, false, 0, "");
1611 if (memcpy_s(buf, sizeof(perf_event_header), &(mmap.header), sizeof(perf_event_header)) != 0) {
1612 HLOGEP("memcpy_s %p to %p failed. size %zd", &(mmap.header), buf,
1613 sizeof(perf_event_header));
1614 }
1615 size_t copyPos = sizeof(perf_event_header);
1616 size_t copySize = stackSizePos - sizeof(perf_event_header) + sizeof(stackSize) + newStackSize;
1617 // copy2: copy stack_size, data[stack_size],
1618 GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + copyPos, copySize);
1619 copyPos += copySize;
1620 // copy3: copy dyn_size
1621 GetRecordFieldFromMmap(mmap, buf + copyPos, mmap.mmapPage->data_tail + dynSizePos,
1622 recordSize - dynSizePos);
1623 // update stack_size
1624 if (memcpy_s(buf + stackSizePos, sizeof(stackSize), &(newStackSize), sizeof(newStackSize)) != 0) {
1625 HLOGEP("CutStackAndMove: memcpy_s newStack to buf stackSizePos failed. size %zd", sizeof(newStackSize));
1626 }
1627 recordBuf_->EndWrite();
1628 __sync_synchronize();
1629 mmap.mmapPage->data_tail += recordSize;
1630 mmap.dataSize -= recordSize;
1631 return true;
1632 }
1633
MoveRecordToBuf(MmapFd & mmap,bool & isAuxEvent,u64 & auxOffset,u64 & auxSize,u32 & pid,u32 & tid)1634 void PerfEvents::MoveRecordToBuf(MmapFd &mmap, bool &isAuxEvent, u64 &auxOffset, u64 &auxSize, u32 &pid, u32 &tid)
1635 {
1636 uint8_t *buf = nullptr;
1637 if (mmap.header.type == PERF_RECORD_SAMPLE) {
1638 if (recordBuf_->GetFreeSize() <= BUFFER_CRITICAL_LEVEL) {
1639 lostSamples_++;
1640 HLOGD("BUFFER_CRITICAL_LEVEL: lost sample record");
1641 goto RETURN;
1642 }
1643 if (CutStackAndMove(mmap)) {
1644 return;
1645 }
1646 } else if (mmap.header.type == PERF_RECORD_LOST) {
1647 // in PERF_RECORD_LOST : header + u64 id + u64 lost
1648 constexpr size_t lostPos = sizeof(perf_event_header) + sizeof(uint64_t);
1649 uint64_t lost = 0;
1650 GetRecordFieldFromMmap(mmap, &lost, mmap.mmapPage->data_tail + lostPos, sizeof(lost));
1651 lostSamples_ += lost;
1652 HLOGD("PERF_RECORD_LOST: lost sample record");
1653 goto RETURN;
1654 }
1655 if (mmap.header.type == PERF_RECORD_AUX) {
1656 isAuxEvent = true;
1657 // in AUX : header + u64 aux_offset + u64 aux_size
1658 uint64_t auxOffsetPos = sizeof(perf_event_header);
1659 uint64_t auxSizePos = sizeof(perf_event_header) + sizeof(uint64_t);
1660 uint64_t pidPos = auxSizePos + sizeof(uint64_t) * 2; // 2 : offset
1661 uint64_t tidPos = pidPos + sizeof(uint32_t);
1662 GetRecordFieldFromMmap(mmap, &auxOffset, mmap.mmapPage->data_tail + auxOffsetPos, sizeof(auxOffset));
1663 GetRecordFieldFromMmap(mmap, &auxSize, mmap.mmapPage->data_tail + auxSizePos, sizeof(auxSize));
1664 GetRecordFieldFromMmap(mmap, &pid, mmap.mmapPage->data_tail + pidPos, sizeof(pid));
1665 GetRecordFieldFromMmap(mmap, &tid, mmap.mmapPage->data_tail + tidPos, sizeof(tid));
1666 }
1667
1668 if ((buf = recordBuf_->AllocForWrite(mmap.header.size)) == nullptr) {
1669 // this record type must be Non-Sample
1670 lostNonSamples_++;
1671 HLOGD("alloc buffer failed: lost non-sample record");
1672 goto RETURN;
1673 }
1674
1675 GetRecordFieldFromMmap(mmap, buf, mmap.mmapPage->data_tail, mmap.header.size);
1676 recordBuf_->EndWrite();
1677 RETURN:
1678 __sync_synchronize();
1679 mmap.mmapPage->data_tail += mmap.header.size;
1680 mmap.dataSize -= mmap.header.size;
1681 }
1682
WaitDataFromRingBuffer()1683 inline void PerfEvents::WaitDataFromRingBuffer()
1684 {
1685 std::unique_lock<std::mutex> lock(mtxRrecordBuf_);
1686 cvRecordBuf_.wait(lock, [this] {
1687 if (recordBufReady_) {
1688 recordBufReady_ = false;
1689 return true;
1690 }
1691 return !readRecordThreadRunning_;
1692 });
1693 }
1694
ProcessRecord(const perf_event_attr * attr,uint8_t * data)1695 inline bool PerfEvents::ProcessRecord(const perf_event_attr* attr, uint8_t* data)
1696 {
1697 uint32_t* type = reinterpret_cast<uint32_t *>(data);
1698 #ifdef HIPERF_DEBUG_TIME
1699 const auto readingStartTime_ = steady_clock::now();
1700 #endif
1701 #if !HIDEBUG_SKIP_CALLBACK
1702 PerfEventRecord& record = PerfEventRecordFactory::GetPerfEventRecord(*type, data, *attr);
1703 if (backtrack_ && readRecordThreadRunning_ && record.GetType() == PERF_RECORD_SAMPLE) {
1704 const PerfRecordSample& sample = static_cast<const PerfRecordSample&>(record);
1705 if (IsSkipRecordForBacktrack(sample)) {
1706 return false;
1707 }
1708 }
1709
1710 recordCallBack_(record);
1711 #endif
1712 recordEventCount_++;
1713 #ifdef HIPERF_DEBUG_TIME
1714 recordCallBackTime_ += duration_cast<milliseconds>(steady_clock::now() - readingStartTime_);
1715 #endif
1716 recordBuf_->EndRead();
1717 return true;
1718 }
1719
ReadRecordFromBuf()1720 void PerfEvents::ReadRecordFromBuf()
1721 {
1722 const perf_event_attr *attr = GetDefaultAttr();
1723 uint8_t *p = nullptr;
1724
1725 while (readRecordThreadRunning_) {
1726 WaitDataFromRingBuffer();
1727 bool output = outputTracking_;
1728 while ((p = recordBuf_->GetReadData()) != nullptr) {
1729 if (!ProcessRecord(attr, p)) {
1730 break;
1731 }
1732 }
1733 if (backtrack_ && output) {
1734 outputTracking_ = false;
1735 outputEndTime_ = 0;
1736 }
1737 }
1738 HLOGD("exit because trackStoped");
1739
1740 // read the data left over in buffer
1741 while ((p = recordBuf_->GetReadData()) != nullptr) {
1742 ProcessRecord(attr, p);
1743 }
1744 HLOGD("read all records from buffer");
1745 }
1746
HaveTargetsExit(const std::chrono::steady_clock::time_point & startTime)1747 bool PerfEvents::HaveTargetsExit(const std::chrono::steady_clock::time_point &startTime)
1748 {
1749 if (systemTarget_) {
1750 return false;
1751 }
1752 if (trackedCommand_) {
1753 if (trackedCommand_->GetState() < TrackedCommand::State::COMMAND_STARTED) {
1754 return false; // not start yet
1755 }
1756 int wstatus;
1757 if (trackedCommand_->WaitCommand(wstatus)) {
1758 milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1759 printf("tracked command(%s) has exited (total %" PRId64 " ms)\n",
1760 trackedCommand_->GetCommandName().c_str(), (uint64_t)usedMsTick.count());
1761 return true;
1762 }
1763 return false;
1764 }
1765
1766 for (auto it = pids_.begin(); it != pids_.end();) {
1767 if (IsDir("/proc/" + std::to_string(*it))) {
1768 it++;
1769 } else {
1770 it = pids_.erase(it);
1771 }
1772 }
1773 if (pids_.empty()) {
1774 milliseconds usedMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1775 printf("tracked processes have exited (total %" PRId64 " ms)\n", (uint64_t)usedMsTick.count());
1776 return true;
1777 }
1778 return false;
1779 }
1780
RecordLoop()1781 void PerfEvents::RecordLoop()
1782 {
1783 // calc the time
1784 const auto startTime = steady_clock::now();
1785 const auto endTime = startTime + timeOut_;
1786 milliseconds usedTimeMsTick {};
1787 int count = 1;
1788
1789 while (g_trackRunning) {
1790 // time check point
1791 const auto thisTime = steady_clock::now();
1792 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1793 if ((uint64_t)usedTimeMsTick.count() > (uint64_t)(count * THOUSANDS)) {
1794 if (HaveTargetsExit(startTime)) {
1795 break;
1796 }
1797 ++count;
1798 }
1799
1800 if (!backtrack_ && thisTime >= endTime) {
1801 printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1802 if (trackedCommand_) {
1803 trackedCommand_->Stop();
1804 }
1805 break;
1806 }
1807
1808 int timeLeft = duration_cast<milliseconds>(endTime - thisTime).count();
1809 if (IsRecordInMmap(std::min(timeLeft, pollTimeOut_))) {
1810 ReadRecordsFromMmaps();
1811 }
1812 }
1813
1814 if (!g_trackRunning) {
1815 // for user interrupt situation, print time statistic
1816 usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1817 printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1818 }
1819 }
1820
StatLoop()1821 void PerfEvents::StatLoop()
1822 {
1823 // calc the time
1824 const auto startTime = steady_clock::now();
1825 const auto endTime = startTime + timeOut_;
1826 auto nextReportTime = startTime + timeReport_;
1827 milliseconds usedTimeMsTick {};
1828 __u64 durationInSec = 0;
1829 int64_t thresholdTimeInMs = 2 * HUNDREDS;
1830
1831 while (g_trackRunning) {
1832 // time check point
1833 const auto thisTime = steady_clock::now();
1834 if (timeReport_ != milliseconds::zero()) {
1835 // stat cmd
1836 if (thisTime >= nextReportTime) {
1837 // only for log or debug?
1838 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1839 durationInSec = usedTimeMsTick.count();
1840 auto lefTimeMsTick = duration_cast<milliseconds>(endTime - thisTime);
1841 printf("\nReport at %" PRId64 " ms (%" PRId64 " ms left):\n",
1842 (uint64_t)usedTimeMsTick.count(), (uint64_t)lefTimeMsTick.count());
1843 // end of comments
1844 nextReportTime += timeReport_;
1845 StatReport(durationInSec);
1846 }
1847 }
1848
1849 if (HaveTargetsExit(startTime)) {
1850 break;
1851 }
1852
1853 if (thisTime >= endTime) {
1854 usedTimeMsTick = duration_cast<milliseconds>(thisTime - startTime);
1855 durationInSec = usedTimeMsTick.count();
1856 printf("Timeout exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1857 if (trackedCommand_) {
1858 trackedCommand_->Stop();
1859 }
1860 break;
1861 }
1862
1863 // lefttime > 200ms sleep 100ms, else sleep 200us
1864 uint64_t defaultSleepUs = 2 * HUNDREDS; // 200us
1865 if (timeReport_ == milliseconds::zero()
1866 && (timeOut_.count() * THOUSANDS) > thresholdTimeInMs) {
1867 milliseconds leftTimeMsTmp = duration_cast<milliseconds>(endTime - thisTime);
1868 if (leftTimeMsTmp.count() > thresholdTimeInMs) {
1869 defaultSleepUs = HUNDREDS * THOUSANDS; // 100ms
1870 }
1871 }
1872 std::this_thread::sleep_for(microseconds(defaultSleepUs));
1873 }
1874
1875 if (!g_trackRunning) {
1876 // for user interrupt situation, print time statistic
1877 usedTimeMsTick = duration_cast<milliseconds>(steady_clock::now() - startTime);
1878 printf("User interrupt exit (total %" PRId64 " ms)\n", (uint64_t)usedTimeMsTick.count());
1879 }
1880
1881 if (timeReport_ == milliseconds::zero()) {
1882 StatReport(durationInSec);
1883 }
1884 }
1885
GetTypeName(perf_type_id type_id)1886 const std::string PerfEvents::GetTypeName(perf_type_id type_id)
1887 {
1888 auto it = PERF_TYPES.find(type_id);
1889 if (it != PERF_TYPES.end()) {
1890 return it->second;
1891 } else {
1892 return "<not found>";
1893 }
1894 }
1895
UpdateCurrentTime()1896 void PerfEvents::UpdateCurrentTime()
1897 {
1898 pthread_setname_np(pthread_self(), "timer_thread");
1899 while (updateTimeThreadRunning_) {
1900 struct timespec ts = {0};
1901 if (clock_gettime(CLOCK_MONOTONIC, &ts) != -1) {
1902 currentTimeSecond_.store(static_cast<uint64_t>(ts.tv_sec));
1903 }
1904
1905 std::this_thread::sleep_for(std::chrono::milliseconds(UPDATE_TIME_INTERVAL));
1906 }
1907 }
1908
1909 // check if this record should be saved, this function only can called in back track mode
IsSkipRecordForBacktrack(const PerfRecordSample & sample)1910 bool PerfEvents::IsSkipRecordForBacktrack(const PerfRecordSample& sample)
1911 {
1912 if (outputTracking_) {
1913 // when outputing record, only skip what later than end time
1914 if (sample.GetTime() / NANO_SECONDS_PER_SECOND > outputEndTime_) {
1915 outputTracking_ = false;
1916 outputEndTime_ = 0;
1917 return true;
1918 }
1919 return false;
1920 }
1921
1922 // only keep recent record in backtrack time
1923 if ((currentTimeSecond_.load() - sample.GetTime() / NANO_SECONDS_PER_SECOND) > backtrackTime_) {
1924 return false;
1925 }
1926 return true;
1927 }
1928 } // namespace HiPerf
1929 } // namespace Developtools
1930 } // namespace OHOS
1931