1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "thread_sampler.h"
17
18 #include <atomic>
19 #include <condition_variable>
20 #include <csignal>
21 #include <memory>
22 #include <queue>
23 #include <set>
24 #include <string>
25
26 #include <sys/mman.h>
27 #include <sys/prctl.h>
28 #include <syscall.h>
29
30 #include "dfx_elf.h"
31 #include "dfx_frame_formatter.h"
32 #include "dfx_regs.h"
33 #include "file_ex.h"
34 #include "thread_sampler_utils.h"
35 #include "unwinder.h"
36
37 #define NO_SANITIZER __attribute__((no_sanitize("address"), no_sanitize("hwaddress")))
38
39 namespace OHOS {
40 namespace HiviewDFX {
ThreadSamplerSignalHandler(int sig,siginfo_t * si,void * context)41 void ThreadSampler::ThreadSamplerSignalHandler(int sig, siginfo_t* si, void* context)
42 {
43 #if defined(__aarch64__) || defined(__loongarch_lp64)
44 int preErrno = errno;
45 ThreadSampler::GetInstance().WriteContext(context);
46 errno = preErrno;
47 #endif
48 }
49
ThreadSampler()50 ThreadSampler::ThreadSampler()
51 {
52 XCOLLIE_LOGI("Create ThreadSampler.\n");
53 }
54
~ThreadSampler()55 ThreadSampler::~ThreadSampler()
56 {
57 XCOLLIE_LOGI("Destroy ThreadSampler.\n");
58 }
59
FindUnwindTable(uintptr_t pc,UnwindTableInfo & outTableInfo,void * arg)60 int ThreadSampler::FindUnwindTable(uintptr_t pc, UnwindTableInfo& outTableInfo, void* arg)
61 {
62 UnwindInfo* unwindInfo = static_cast<UnwindInfo*>(arg);
63 if (unwindInfo == nullptr) {
64 XCOLLIE_LOGE("invalid FindUnwindTable param\n");
65 return -1;
66 }
67
68 std::shared_ptr<DfxMap> map;
69 if (unwindInfo->maps->FindMapByAddr(pc, map)) {
70 if (map == nullptr) {
71 XCOLLIE_LOGE("FindUnwindTable: map is nullptr\n");
72 return -1;
73 }
74 auto elf = map->GetElf(getpid());
75 if (elf != nullptr) {
76 return elf->FindUnwindTableInfo(pc, map, outTableInfo);
77 }
78 }
79 return -1;
80 }
81
AccessMem(uintptr_t addr,uintptr_t * val,void * arg)82 int ThreadSampler::AccessMem(uintptr_t addr, uintptr_t* val, void* arg)
83 {
84 UnwindInfo* unwindInfo = static_cast<UnwindInfo*>(arg);
85 if (unwindInfo == nullptr || addr + sizeof(uintptr_t) < addr) {
86 XCOLLIE_LOGE("invalid AccessMem param\n");
87 return -1;
88 }
89
90 *val = 0;
91 if (addr < unwindInfo->context->sp || addr + sizeof(uintptr_t) >= unwindInfo->context->sp + STACK_BUFFER_SIZE) {
92 return ThreadSampler::GetInstance().AccessElfMem(addr, val);
93 } else {
94 size_t stackOffset = addr - unwindInfo->context->sp;
95 if (stackOffset >= STACK_BUFFER_SIZE) {
96 XCOLLIE_LOGE("limit stack\n");
97 return -1;
98 }
99 *val = *(reinterpret_cast<uintptr_t*>(&unwindInfo->context->buffer[stackOffset]));
100 }
101 return 0;
102 }
103
GetMapByPc(uintptr_t pc,std::shared_ptr<DfxMap> & map,void * arg)104 int ThreadSampler::GetMapByPc(uintptr_t pc, std::shared_ptr<DfxMap>& map, void* arg)
105 {
106 UnwindInfo* unwindInfo = static_cast<UnwindInfo*>(arg);
107 if (unwindInfo == nullptr) {
108 XCOLLIE_LOGE("invalid GetMapByPc param\n");
109 return -1;
110 }
111
112 return unwindInfo->maps->FindMapByAddr(pc, map) ? 0 : -1;
113 }
114
Init(int collectStackCount)115 bool ThreadSampler::Init(int collectStackCount)
116 {
117 if (init_) {
118 return true;
119 }
120
121 if (!InitRecordBuffer()) {
122 XCOLLIE_LOGE("Failed to InitRecordBuffer\n");
123 Deinit();
124 return false;
125 }
126
127 if (!InitUnwinder()) {
128 XCOLLIE_LOGE("Failed to InitUnwinder\n");
129 Deinit();
130 return false;
131 }
132
133 pid_ = getprocpid();
134 if (!InitStackPrinter()) {
135 XCOLLIE_LOGE("Failed to InitUniqueStackTable\n");
136 Deinit();
137 return false;
138 }
139
140 if (collectStackCount <= 0) {
141 XCOLLIE_LOGE("Invalid collectStackCount\n");
142 Deinit();
143 return false;
144 }
145
146 init_ = true;
147 return true;
148 }
149
InitRecordBuffer()150 bool ThreadSampler::InitRecordBuffer()
151 {
152 if (mmapStart_ != MAP_FAILED) {
153 return true;
154 }
155 // create buffer
156 bufferSize_ = SAMPLER_MAX_BUFFER_SZ * sizeof(struct ThreadUnwindContext);
157 mmapStart_ = mmap(nullptr, bufferSize_, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
158 if (mmapStart_ == MAP_FAILED) {
159 XCOLLIE_LOGE("Failed to create buffer for thread sampler!(%{public}d)\n", errno);
160 return false;
161 }
162
163 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, mmapStart_, bufferSize_, "sampler_buf");
164 return true;
165 }
166
ReleaseRecordBuffer()167 void ThreadSampler::ReleaseRecordBuffer()
168 {
169 if (mmapStart_ == MAP_FAILED) {
170 return;
171 }
172 // release buffer
173 if (munmap(mmapStart_, bufferSize_) != 0) {
174 XCOLLIE_LOGE("Failed to release buffer!(%{public}d)\n", errno);
175 return;
176 }
177 mmapStart_ = MAP_FAILED;
178 }
179
InitUnwinder()180 bool ThreadSampler::InitUnwinder()
181 {
182 accessors_ = std::make_shared<OHOS::HiviewDFX::UnwindAccessors>();
183 accessors_->AccessReg = nullptr;
184 accessors_->AccessMem = &ThreadSampler::AccessMem;
185 accessors_->GetMapByPc = &ThreadSampler::GetMapByPc;
186 accessors_->FindUnwindTable = &ThreadSampler::FindUnwindTable;
187 unwinder_ = std::make_shared<Unwinder>(accessors_, true);
188 unwinder_->EnableFillFrames(true);
189
190 maps_ = DfxMaps::Create();
191 if (maps_ == nullptr) {
192 XCOLLIE_LOGE("maps is nullptr\n");
193 return false;
194 }
195 if (!maps_->GetStackRange(stackBegin_, stackEnd_)) {
196 XCOLLIE_LOGE("Failed to get stack range\n");
197 return false;
198 }
199 return true;
200 }
201
InitStackPrinter()202 bool ThreadSampler::InitStackPrinter()
203 {
204 if (stackPrinter_ != nullptr) {
205 return true;
206 }
207 stackPrinter_ = std::make_unique<StackPrinter>();
208 stackPrinter_->SetUnwindInfo(unwinder_, maps_);
209 if (!stackPrinter_->InitUniqueTable(pid_, uniqueStackTableSize_, uniTableMMapName_)) {
210 XCOLLIE_LOGE("Failed to init unique_table\n");
211 return false;
212 }
213 return true;
214 }
215
DestroyUnwinder()216 void ThreadSampler::DestroyUnwinder()
217 {
218 maps_.reset();
219 unwinder_.reset();
220 accessors_.reset();
221 }
222
AccessElfMem(uintptr_t addr,uintptr_t * val)223 int ThreadSampler::AccessElfMem(uintptr_t addr, uintptr_t* val)
224 {
225 std::shared_ptr<DfxMap> map;
226 if (maps_->FindMapByAddr(addr, map)) {
227 if (map == nullptr) {
228 XCOLLIE_LOGE("AccessElfMem: map is nullptr\n");
229 return -1;
230 }
231 auto elf = map->GetElf(getpid());
232 if (elf != nullptr) {
233 uint64_t foff = addr - map->begin + map->offset - elf->GetBaseOffset();
234 if (elf->Read(foff, val, sizeof(uintptr_t))) {
235 return 0;
236 }
237 }
238 }
239 return -1;
240 }
241
GetReadContext()242 ThreadUnwindContext* ThreadSampler::GetReadContext()
243 {
244 if (mmapStart_ == MAP_FAILED) {
245 return nullptr;
246 }
247 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
248 int32_t index = readIndex_;
249 if (contextArray[index].requestTime == 0 || contextArray[index].snapshotTime == 0) {
250 return nullptr;
251 }
252
253 ThreadUnwindContext* ret = &contextArray[index];
254 readIndex_ = (index + 1) % SAMPLER_MAX_BUFFER_SZ;
255 return ret;
256 }
257
GetWriteContext()258 ThreadUnwindContext* ThreadSampler::GetWriteContext()
259 {
260 if (mmapStart_ == MAP_FAILED) {
261 return nullptr;
262 }
263 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
264 int32_t index = writeIndex_;
265 if (contextArray[index].requestTime > 0 &&
266 (contextArray[index].snapshotTime == 0 || contextArray[index].processTime == 0)) {
267 return nullptr;
268 }
269 return &contextArray[index];
270 }
271
WriteContext(void * context)272 NO_SANITIZER void ThreadSampler::WriteContext(void* context)
273 {
274 #if defined(__aarch64__) || defined(__loongarch_lp64)
275 if (!init_) {
276 return;
277 }
278 #if defined(CONSUME_STATISTICS)
279 uint64_t begin = GetCurrentTimeNanoseconds();
280 #endif
281 if (mmapStart_ == MAP_FAILED) {
282 return;
283 }
284 ThreadUnwindContext* contextArray = static_cast<ThreadUnwindContext*>(mmapStart_);
285 int32_t index = writeIndex_;
286 #if defined(CONSUME_STATISTICS)
287 signalTimeCost_ += begin - contextArray[index].requestTime;
288 #endif
289 // current buffer has not been processed, stop copy
290 if (contextArray[index].snapshotTime > 0 && contextArray[index].processTime == 0) {
291 return;
292 }
293 #if defined(__aarch64__)
294 contextArray[index].fp = static_cast<ucontext_t*>(context)->uc_mcontext.regs[RegsEnumArm64::REG_FP];
295 contextArray[index].lr = static_cast<ucontext_t*>(context)->uc_mcontext.regs[RegsEnumArm64::REG_LR];
296 contextArray[index].sp = static_cast<ucontext_t*>(context)->uc_mcontext.sp;
297 contextArray[index].pc = static_cast<ucontext_t*>(context)->uc_mcontext.pc;
298 #elif defined(__loongarch_lp64)
299 contextArray[index].fp = static_cast<ucontext_t*>(context)->uc_mcontext.__gregs[RegsEnumLoongArch64::REG_FP];
300 contextArray[index].lr =
301 static_cast<ucontext_t*>(context)->uc_mcontext.__gregs[RegsEnumLoongArch64::REG_LOONGARCH64_R1];
302 contextArray[index].sp = static_cast<ucontext_t*>(context)->uc_mcontext.__gregs[RegsEnumLoongArch64::REG_SP];
303 contextArray[index].pc = static_cast<ucontext_t*>(context)->uc_mcontext.__pc;
304 #endif
305 if (contextArray[index].sp < stackBegin_ || contextArray[index].sp >= stackEnd_) {
306 return;
307 }
308 uintptr_t curStackSz = stackEnd_ - contextArray[index].sp;
309 uintptr_t cpySz = curStackSz > STACK_BUFFER_SIZE ? STACK_BUFFER_SIZE : curStackSz;
310 for (uintptr_t pos = 0; pos < cpySz; pos++) {
311 reinterpret_cast<char*>(contextArray[index].buffer)[pos] =
312 reinterpret_cast<const char*>(contextArray[index].sp)[pos];
313 }
314 writeIndex_ = (index + 1) % SAMPLER_MAX_BUFFER_SZ;
315 uint64_t end = GetCurrentTimeNanoseconds();
316 contextArray[index].processTime.store(0, std::memory_order_relaxed);
317 contextArray[index].snapshotTime.store(end, std::memory_order_release);
318 #if defined(CONSUME_STATISTICS)
319 copyStackCount_++;
320 copyStackTimeCost_ += end - begin;
321 #endif
322 #endif // #if defined(__aarch64__) || defined(__loongarch_lp64)
323 }
324
SendSampleRequest()325 void ThreadSampler::SendSampleRequest()
326 {
327 ThreadUnwindContext* ptr = GetWriteContext();
328 if (ptr == nullptr) {
329 return;
330 }
331
332 uint64_t ts = GetCurrentTimeNanoseconds();
333
334 ptr->requestTime = ts;
335 siginfo_t si {0};
336 si.si_signo = MUSL_SIGNAL_SAMPLE_STACK;
337 si.si_errno = 0;
338 si.si_code = -1;
339 if (syscall(SYS_rt_tgsigqueueinfo, pid_, pid_, si.si_signo, &si) != 0) {
340 XCOLLIE_LOGE("Failed to queue signal(%{public}d) to %{public}d, errno(%{public}d).\n", si.si_signo, pid_,
341 errno);
342 return;
343 }
344 #if defined(CONSUME_STATISTICS)
345 requestCount_++;
346 #endif
347 }
348
ProcessStackBuffer()349 void ThreadSampler::ProcessStackBuffer()
350 {
351 #if defined(__aarch64__) || defined(__loongarch_lp64)
352 if (!init_) {
353 XCOLLIE_LOGE("sampler has not initialized.\n");
354 return;
355 }
356 while (true) {
357 ThreadUnwindContext* context = GetReadContext();
358 if (context == nullptr) {
359 break;
360 }
361
362 UnwindInfo unwindInfo = {
363 .context = context,
364 .maps = maps_.get(),
365 };
366
367 struct TimeStampedPcs p;
368 p.snapshotTime = unwindInfo.context->snapshotTime;
369
370 #if defined(CONSUME_STATISTICS)
371 uint64_t unwindStart = GetCurrentTimeNanoseconds();
372 #endif
373 DoUnwind(unwinder_, unwindInfo);
374 #if defined(CONSUME_STATISTICS)
375 uint64_t unwindEnd = GetCurrentTimeNanoseconds();
376 #endif
377 auto pcs = unwinder_->GetPcs();
378 /* for print full stack */
379 p.pcVec = pcs;
380 timeStampedPcsList_.emplace_back(p);
381 /* for print tree format stack */
382 stackPrinter_->PutPcsInTable(pcs, pid_, unwindInfo.context->snapshotTime);
383
384 uint64_t ts = GetCurrentTimeNanoseconds();
385
386 #if defined(CONSUME_STATISTICS)
387 processTimeCost_ += ts - unwindStart;
388 processCount_++;
389 unwindCount_++;
390 unwindTimeCost_ += unwindEnd - unwindStart;
391 #endif //#if defined(CONSUME_STATISTICS)
392 context->requestTime.store(0, std::memory_order_release);
393 context->snapshotTime.store(0, std::memory_order_release);
394 context->processTime.store(ts, std::memory_order_release);
395 }
396 #endif // #if defined(__aarch64__) || defined(__loongarch_lp64)
397 }
398
Sample()399 int32_t ThreadSampler::Sample()
400 {
401 if (!init_) {
402 XCOLLIE_LOGE("sampler has not initialized.\n");
403 return -1;
404 }
405 #if defined(CONSUME_STATISTICS)
406 sampleCount_++;
407 #endif
408 SendSampleRequest();
409 ProcessStackBuffer();
410 return 0;
411 }
412
ResetConsumeInfo()413 void ThreadSampler::ResetConsumeInfo()
414 {
415 #if defined(CONSUME_STATISTICS)
416 sampleCount_ = 0;
417 requestCount_ = 0;
418 copyStackCount_ = 0;
419 copyStackTimeCost_ = 0;
420 processTimeCost_ = 0;
421 processCount_ = 0;
422 unwindCount_ = 0;
423 unwindTimeCost_ = 0;
424 signalTimeCost_ = 0;
425 #endif // #if defined(CONSUME_STATISTICS)
426 }
427
CollectStack(std::string & stack,bool treeFormat)428 bool ThreadSampler::CollectStack(std::string& stack, bool treeFormat)
429 {
430 ProcessStackBuffer();
431
432 if (!init_) {
433 XCOLLIE_LOGE("sampler has not initialized.\n");
434 }
435
436 stack.clear();
437 heaviestStack_.clear();
438 if (timeStampedPcsList_.empty()) {
439 stack += "/proc/self/wchan: \n";
440 std::string fileStr = "";
441 if (!LoadStringFromFile("/proc/self/wchan", fileStr)) {
442 XCOLLIE_LOGE("read file failed.\n");
443 }
444 stack += fileStr;
445 stack += "\n";
446 #if defined(CONSUME_STATISTICS)
447 ResetConsumeInfo();
448 #endif
449 return false;
450 }
451
452 #if defined(CONSUME_STATISTICS)
453 uint64_t collectStart = GetCurrentTimeNanoseconds();
454 #endif
455 if (!treeFormat) {
456 stack = stackPrinter_->GetFullStack(timeStampedPcsList_);
457 } else {
458 stack = stackPrinter_->GetTreeStack(pid_);
459 heaviestStack_ = stackPrinter_->GetHeaviestStack(pid_);
460 }
461 timeStampedPcsList_.clear();
462
463 #if defined(CONSUME_STATISTICS)
464 uint64_t collectEnd = GetCurrentTimeNanoseconds();
465 uint64_t elapse = collectEnd - collectStart;
466 XCOLLIE_LOGI(
467 "Sample count:%{public}llu\nRequest count:%{public}llu\n\
468 Snapshot count:%{public}llu\nAverage copy stack time:%{public}llu ns\n",
469 (unsigned long long)sampleCount_, (unsigned long long)requestCount_, (unsigned long long)copyStackCount_,
470 (unsigned long long)copyStackTimeCost_ / copyStackCount_);
471 XCOLLIE_LOGI("Average process time:%{public}llu ns\n", (unsigned long long)processTimeCost_ / processCount_);
472 XCOLLIE_LOGI("Average unwind time:%{public}llu ns\n", (unsigned long long)unwindTimeCost_ / unwindCount_);
473 XCOLLIE_LOGI("FormatStack time:%{public}llu ns\n", (unsigned long long)elapse);
474 ResetConsumeInfo();
475 #endif
476 return true;
477 }
478
GetHeaviestStack() const479 std::string ThreadSampler::GetHeaviestStack() const
480 {
481 return heaviestStack_;
482 }
483
Deinit()484 bool ThreadSampler::Deinit()
485 {
486 stackPrinter_.reset();
487 DestroyUnwinder();
488 ReleaseRecordBuffer();
489 init_ = false;
490 return !init_;
491 }
492 } // end of namespace HiviewDFX
493 } // end of namespace OHOS
494