1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "thread_sampler.h"
17
18 #include <atomic>
19 #include <condition_variable>
20 #include <memory>
21 #include <queue>
22 #include <set>
23 #include <string>
24
25 #include <sys/mman.h>
26 #include <sys/prctl.h>
27 #include <syscall.h>
28 #include <csignal>
29
30 #include "unwinder.h"
31 #include "dfx_regs.h"
32 #include "dfx_elf.h"
33 #include "dfx_frame_formatter.h"
34 #include "sample_stack_printer.h"
35 #include "thread_sampler_utils.h"
36 #include "file_ex.h"
37
38 namespace OHOS {
39 namespace HiviewDFX {
40
ThreadSamplerSignalHandler(int sig,siginfo_t * si,void * context)41 void ThreadSampler::ThreadSamplerSignalHandler(int sig, siginfo_t* si, void* context)
42 {
43 #if defined(__aarch64__)
44 ThreadSampler::GetInstance().WriteContext(context);
45 #endif
46 }
47
ThreadSampler()48 ThreadSampler::ThreadSampler()
49 {
50 XCOLLIE_LOGI("Create ThreadSampler.\n");
51 }
52
~ThreadSampler()53 ThreadSampler::~ThreadSampler()
54 {
55 XCOLLIE_LOGI("Destroy ThreadSampler.\n");
56 }
57
FindUnwindTable(uintptr_t pc,UnwindTableInfo & outTableInfo,void * arg)58 int ThreadSampler::FindUnwindTable(uintptr_t pc, UnwindTableInfo& outTableInfo, void *arg)
59 {
60 UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
61 if (unwindInfo == nullptr) {
62 XCOLLIE_LOGE("invalid FindUnwindTable param\n");
63 return -1;
64 }
65
66 std::shared_ptr<DfxMap> map;
67 if (unwindInfo->maps->FindMapByAddr(pc, map)) {
68 if (map == nullptr) {
69 XCOLLIE_LOGE("FindUnwindTable: map is nullptr\n");
70 return -1;
71 }
72 auto elf = map->GetElf(getpid());
73 if (elf != nullptr) {
74 return elf->FindUnwindTableInfo(pc, map, outTableInfo);
75 }
76 }
77 return -1;
78 }
79
AccessMem(uintptr_t addr,uintptr_t * val,void * arg)80 int ThreadSampler::AccessMem(uintptr_t addr, uintptr_t *val, void *arg)
81 {
82 UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
83 if (unwindInfo == nullptr || addr + sizeof(uintptr_t) < addr) {
84 XCOLLIE_LOGE("invalid AccessMem param\n");
85 return -1;
86 }
87
88 *val = 0;
89 if (addr < unwindInfo->context->sp ||
90 addr + sizeof(uintptr_t) >= unwindInfo->context->sp + STACK_BUFFER_SIZE) {
91 return ThreadSampler::GetInstance().AccessElfMem(addr, val);
92 } else {
93 size_t stackOffset = addr - unwindInfo->context->sp;
94 if (stackOffset >= STACK_BUFFER_SIZE) {
95 XCOLLIE_LOGE("limit stack\n");
96 return -1;
97 }
98 *val = *(reinterpret_cast<uintptr_t *>(&unwindInfo->context->buffer[stackOffset]));
99 }
100 return 0;
101 }
102
GetMapByPc(uintptr_t pc,std::shared_ptr<DfxMap> & map,void * arg)103 int ThreadSampler::GetMapByPc(uintptr_t pc, std::shared_ptr<DfxMap>& map, void *arg)
104 {
105 UnwindInfo* unwindInfo = static_cast<UnwindInfo *>(arg);
106 if (unwindInfo == nullptr) {
107 XCOLLIE_LOGE("invalid GetMapByPc param\n");
108 return -1;
109 }
110
111 return unwindInfo->maps->FindMapByAddr(pc, map) ? 0 : -1;
112 }
113
Init(int collectStackCount)114 bool ThreadSampler::Init(int collectStackCount)
115 {
116 if (init_) {
117 return true;
118 }
119
120 if (!InitRecordBuffer()) {
121 XCOLLIE_LOGE("Failed to InitRecordBuffer\n");
122 Deinit();
123 return false;
124 }
125
126 if (!InitUnwinder()) {
127 XCOLLIE_LOGE("Failed to InitUnwinder\n");
128 Deinit();
129 return false;
130 }
131
132 pid_ = getprocpid();
133 if (!InitUniqueStackTable()) {
134 XCOLLIE_LOGE("Failed to InitUniqueStackTable\n");
135 Deinit();
136 return false;
137 }
138
139 if (!InstallSignalHandler()) {
140 XCOLLIE_LOGE("Failed to InstallSignalHandler\n");
141 Deinit();
142 return false;
143 }
144
145 if (collectStackCount <= 0) {
146 XCOLLIE_LOGE("Invalid collectStackCount\n");
147 Deinit();
148 return false;
149 }
150 stackIdCount_.reserve(collectStackCount);
151
152 init_ = true;
153 return true;
154 }
155
InitRecordBuffer()156 bool ThreadSampler::InitRecordBuffer()
157 {
158 if (mmapStart_ != MAP_FAILED) {
159 return true;
160 }
161 // create buffer
162 bufferSize_ = SAMPLER_MAX_BUFFER_SZ * sizeof(struct ThreadUnwindContext);
163 mmapStart_ = mmap(nullptr, bufferSize_,
164 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
165 if (mmapStart_ == MAP_FAILED) {
166 XCOLLIE_LOGE("Failed to create buffer for thread sampler!(%{public}d)\n", errno);
167 return false;
168 }
169
170 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, mmapStart_, bufferSize_, "sampler_buf");
171 return true;
172 }
173
ReleaseRecordBuffer()174 void ThreadSampler::ReleaseRecordBuffer()
175 {
176 if (mmapStart_ == MAP_FAILED) {
177 return;
178 }
179 // release buffer
180 if (munmap(mmapStart_, bufferSize_) != 0) {
181 XCOLLIE_LOGE("Failed to release buffer!(%{public}d)\n", errno);
182 return;
183 }
184 mmapStart_ = MAP_FAILED;
185 }
186
InitUnwinder()187 bool ThreadSampler::InitUnwinder()
188 {
189 accessors_ = std::make_shared<OHOS::HiviewDFX::UnwindAccessors>();
190 accessors_->AccessReg = nullptr;
191 accessors_->AccessMem = &ThreadSampler::AccessMem;
192 accessors_->GetMapByPc = &ThreadSampler::GetMapByPc;
193 accessors_->FindUnwindTable = &ThreadSampler::FindUnwindTable;
194 unwinder_ = std::make_shared<Unwinder>(accessors_, true);
195
196 maps_ = DfxMaps::Create();
197 if (maps_ == nullptr) {
198 XCOLLIE_LOGE("maps is nullptr\n");
199 return false;
200 }
201 if (!maps_->GetStackRange(stackBegin_, stackEnd_)) {
202 XCOLLIE_LOGE("Failed to get stack range\n");
203 return false;
204 }
205 return true;
206 }
207
InitUniqueStackTable()208 bool ThreadSampler::InitUniqueStackTable()
209 {
210 uniqueStackTable_ = std::make_unique<UniqueStackTable>(pid_, uniqueStackTableSize_);
211 if (!uniqueStackTable_->Init()) {
212 XCOLLIE_LOGE("Failed to init unique_table\n");
213 return false;
214 }
215 void* uniqueTableBufMMap = reinterpret_cast<void*>(uniqueStackTable_->GetHeadNode());
216 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, uniqueTableBufMMap, uniqueStackTableSize_, uniTableMMapName_.c_str());
217 return true;
218 }
219
DeinitUniqueStackTable()220 void ThreadSampler::DeinitUniqueStackTable()
221 {
222 uniqueStackTable_.reset();
223 }
224
DestroyUnwinder()225 void ThreadSampler::DestroyUnwinder()
226 {
227 maps_.reset();
228 unwinder_.reset();
229 accessors_.reset();
230 }
231
InstallSignalHandler()232 bool ThreadSampler::InstallSignalHandler()
233 {
234 struct sigaction action {};
235 sigfillset(&action.sa_mask);
236 action.sa_sigaction = ThreadSampler::ThreadSamplerSignalHandler;
237 action.sa_flags = SA_RESTART | SA_SIGINFO;
238 if (sigaction(MUSL_SIGNAL_SAMPLE_STACK, &action, nullptr) != 0) {
239 XCOLLIE_LOGE("Failed to register signal(%{public}d:%{public}d)", MUSL_SIGNAL_SAMPLE_STACK, errno);
240 return false;
241 }
242 return true;
243 }
244
UninstallSignalHandler()245 void ThreadSampler::UninstallSignalHandler()
246 {
247 if (signal(MUSL_SIGNAL_SAMPLE_STACK, SIG_IGN) == SIG_ERR) {
248 XCOLLIE_LOGE("Failed to unregister signal(%{public}d)", MUSL_SIGNAL_SAMPLE_STACK);
249 }
250 }
251
AccessElfMem(uintptr_t addr,uintptr_t * val)252 int ThreadSampler::AccessElfMem(uintptr_t addr, uintptr_t *val)
253 {
254 std::shared_ptr<DfxMap> map;
255 if (maps_->FindMapByAddr(addr, map)) {
256 if (map == nullptr) {
257 XCOLLIE_LOGE("AccessElfMem: map is nullptr\n");
258 return -1;
259 }
260 auto elf = map->GetElf(getpid());
261 if (elf != nullptr) {
262 uint64_t foff = addr - map->begin + map->offset - elf->GetBaseOffset();
263 if (elf->Read(foff, val, sizeof(uintptr_t))) {
264 return 0;
265 }
266 }
267 }
268 return -1;
269 }
270
GetReadContext()271 ThreadUnwindContext* ThreadSampler::GetReadContext()
272 {
273 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
274 int32_t index = readIndex_;
275 if (contextArray[index].requestTime == 0 || contextArray[index].snapshotTime == 0) {
276 return nullptr;
277 }
278
279 ThreadUnwindContext* ret = &contextArray[index];
280 readIndex_ = (index + 1) % SAMPLER_MAX_BUFFER_SZ;
281 return ret;
282 }
283
GetWriteContext()284 ThreadUnwindContext* ThreadSampler::GetWriteContext()
285 {
286 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
287 int32_t index = writeIndex_;
288 if (contextArray[index].requestTime > 0 &&
289 (contextArray[index].snapshotTime == 0 || contextArray[index].processTime == 0)) {
290 return nullptr;
291 }
292 return &contextArray[index];
293 }
294
WriteContext(void * context)295 __attribute__((no_sanitize("address"), no_sanitize("hwaddress"))) void ThreadSampler::WriteContext(void* context)
296 {
297 #if defined(__aarch64__)
298 if (!init_) {
299 return;
300 }
301 #if defined(CONSUME_STATISTICS)
302 uint64_t begin = GetCurrentTimeNanoseconds();
303 #endif
304 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
305 int32_t index = writeIndex_;
306 #if defined(CONSUME_STATISTICS)
307 signalTimeCost_ += begin - contextArray[index].requestTime;
308 #endif
309
310 // current buffer has not been processed, stop copy
311 if (contextArray[index].snapshotTime > 0 && contextArray[index].processTime == 0) {
312 return;
313 }
314
315 contextArray[index].fp = static_cast<ucontext_t*>(context)->uc_mcontext.regs[RegsEnumArm64::REG_FP];
316 contextArray[index].lr = static_cast<ucontext_t*>(context)->uc_mcontext.regs[RegsEnumArm64::REG_LR];
317 contextArray[index].sp = static_cast<ucontext_t*>(context)->uc_mcontext.sp;
318 contextArray[index].pc = static_cast<ucontext_t*>(context)->uc_mcontext.pc;
319 if (contextArray[index].sp < stackBegin_ ||
320 contextArray[index].sp >= stackEnd_) {
321 return;
322 }
323
324 uintptr_t curStackSz = stackEnd_ - contextArray[index].sp;
325 uintptr_t cpySz = curStackSz > STACK_BUFFER_SIZE ? STACK_BUFFER_SIZE : curStackSz;
326
327 for (uintptr_t pos = 0; pos < cpySz; pos++) {
328 reinterpret_cast<char*>(contextArray[index].buffer)[pos] =
329 reinterpret_cast<const char*>(contextArray[index].sp)[pos];
330 }
331
332 uint64_t end = GetCurrentTimeNanoseconds();
333 contextArray[index].processTime = 0;
334 contextArray[index].snapshotTime = end;
335 writeIndex_ = (index + 1) % SAMPLER_MAX_BUFFER_SZ;
336
337 #if defined(CONSUME_STATISTICS)
338 copyStackCount_++;
339 copyStackTimeCost_ += end - begin;
340 #endif
341 #endif // #if defined(__aarch64__)
342 }
343
SendSampleRequest()344 void ThreadSampler::SendSampleRequest()
345 {
346 ThreadUnwindContext* ptr = GetWriteContext();
347 if (ptr == nullptr) {
348 return;
349 }
350
351 uint64_t ts = GetCurrentTimeNanoseconds();
352
353 ptr->requestTime = ts;
354 siginfo_t si {0};
355 si.si_signo = MUSL_SIGNAL_SAMPLE_STACK;
356 si.si_errno = 0;
357 si.si_code = -1;
358 if (syscall(SYS_rt_tgsigqueueinfo, pid_, pid_, si.si_signo, &si) != 0) {
359 XCOLLIE_LOGE("Failed to queue signal(%{public}d) to %{public}d, errno(%{public}d).\n",
360 si.si_signo, pid_, errno);
361 return;
362 }
363 #if defined (CONSUME_STATISTICS)
364 requestCount_++;
365 #endif
366 }
367
ProcessStackBuffer()368 void ThreadSampler::ProcessStackBuffer()
369 {
370 #if defined(__aarch64__)
371 if (!init_) {
372 XCOLLIE_LOGE("sampler has not initialized.\n");
373 return;
374 }
375 while (true) {
376 ThreadUnwindContext* context = GetReadContext();
377 if (context == nullptr) {
378 break;
379 }
380
381 struct TimeAndFrames taf;
382 taf.requestTime = context->requestTime;
383 taf.snapshotTime = context->snapshotTime;
384
385 #if defined(CONSUME_STATISTICS)
386 uint64_t unwindStart = GetCurrentTimeNanoseconds();
387 #endif
388 UnwindInfo unwindInfo = {
389 .context = context,
390 .maps = maps_.get(),
391 };
392
393 DoUnwind(context, unwinder_, unwindInfo);
394 #if defined(CONSUME_STATISTICS)
395 uint64_t unwindEnd = GetCurrentTimeNanoseconds();
396 #endif
397 /* for print full stack */
398 auto frames = unwinder_->GetFrames();
399 taf.frameList = frames;
400 timeAndFrameList_.emplace_back(taf);
401 /* for print tree format stack */
402 auto pcs = unwinder_->GetPcs();
403 uint64_t stackId = 0;
404 auto stackIdPtr = reinterpret_cast<OHOS::HiviewDFX::StackId*>(&stackId);
405 uniqueStackTable_->PutPcsInTable(stackIdPtr, pcs.data(), pcs.size());
406 PutStackId(stackIdCount_, stackId);
407
408 uint64_t ts = GetCurrentTimeNanoseconds();
409
410 #if defined(CONSUME_STATISTICS)
411 processTimeCost_ += ts - unwindStart;
412 processCount_++;
413 unwindCount_++;
414 unwindTimeCost_ += unwindEnd - unwindStart;
415 #endif //#if defined(CONSUME_STATISTICS)
416 context->requestTime = 0;
417 context->snapshotTime = 0;
418 context->processTime = ts;
419 }
420 #endif // #if defined(__aarch64__)
421 }
422
Sample()423 int32_t ThreadSampler::Sample()
424 {
425 if (!init_) {
426 XCOLLIE_LOGE("sampler has not initialized.\n");
427 return -1;
428 }
429 #if defined(CONSUME_STATISTICS)
430 sampleCount_++;
431 #endif
432 SendSampleRequest();
433 ProcessStackBuffer();
434 return 0;
435 }
436
ResetConsumeInfo()437 void ThreadSampler::ResetConsumeInfo()
438 {
439 #if defined(CONSUME_STATISTICS)
440 sampleCount_ = 0;
441 requestCount_ = 0;
442 copyStackCount_ = 0;
443 copyStackTimeCost_ = 0;
444 processTimeCost_ = 0;
445 processCount_ = 0;
446 unwindCount_ = 0;
447 unwindTimeCost_ = 0;
448 signalTimeCost_ = 0;
449 #endif // #if defined(CONSUME_STATISTICS)
450 }
451
CollectStack(std::string & stack,bool treeFormat)452 bool ThreadSampler::CollectStack(std::string& stack, bool treeFormat)
453 {
454 ProcessStackBuffer();
455
456 if (!init_) {
457 XCOLLIE_LOGE("sampler has not initialized.\n");
458 }
459
460 stack.clear();
461 if (timeAndFrameList_.empty() && stackIdCount_.empty()) {
462 if (!LoadStringFromFile("/proc/self/wchan", stack)) {
463 XCOLLIE_LOGE("read file failed.\n");
464 }
465 if (stack.empty()) {
466 stack += "empty";
467 }
468 stack += "\n";
469 #if defined(CONSUME_STATISTICS)
470 ResetConsumeInfo();
471 #endif
472 return false;
473 }
474
475 #if defined(CONSUME_STATISTICS)
476 uint64_t collectStart = GetCurrentTimeNanoseconds();
477 #endif
478 auto printer = std::make_unique<SampleStackPrinter>(unwinder_, maps_);
479 if (!treeFormat) {
480 stack = printer->GetFullStack(timeAndFrameList_);
481 } else {
482 stack = printer->GetTreeStack(stackIdCount_, uniqueStackTable_);
483 }
484 timeAndFrameList_.clear();
485 stackIdCount_.clear();
486
487 #if defined(CONSUME_STATISTICS)
488 uint64_t collectEnd = GetCurrentTimeNanoseconds();
489 uint64_t elapse = collectEnd - collectStart;
490 XCOLLIE_LOGI("Sample count:%{public}llu\nRequest count:%{public}llu\n\
491 Snapshot count:%{public}llu\nAverage copy stack time:%{public}llu ns\n",
492 (unsigned long long)sampleCount_, (unsigned long long)requestCount_,
493 (unsigned long long)copyStackCount_, (unsigned long long)copyStackTimeCost_ / copyStackCount_);
494 XCOLLIE_LOGI("Average process time:%{public}llu ns\n", (unsigned long long)processTimeCost_/processCount_);
495 XCOLLIE_LOGI("Average unwind time:%{public}llu ns\n", (unsigned long long)unwindTimeCost_/unwindCount_);
496 XCOLLIE_LOGI("FormatStack time:%{public}llu ns\n", (unsigned long long)elapse);
497 ResetConsumeInfo();
498 #endif
499 return true;
500 }
501
Deinit()502 void ThreadSampler::Deinit()
503 {
504 DeinitUniqueStackTable();
505 DestroyUnwinder();
506 ReleaseRecordBuffer();
507 UninstallSignalHandler();
508 uniqueStackTableSize_ = DEFAULT_UNIQUE_STACK_TABLE_SIZE;
509 uniTableMMapName_ = "hicollie_buf";
510 init_ = false;
511 }
512 } // end of namespace HiviewDFX
513 } // end of namespace OHOS
514