1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "dfx_dump_catcher.h"
17
18 #include <atomic>
19 #include <cerrno>
20 #include <memory>
21 #include <thread>
22 #include <vector>
23
24 #include <dlfcn.h>
25 #include <poll.h>
26 #include <sys/syscall.h>
27 #include <sys/types.h>
28 #include <securec.h>
29 #include <strings.h>
30
31 #include "backtrace_local.h"
32 #include "dfx_define.h"
33 #include "dfx_dump_res.h"
34 #include "dfx_kernel_stack.h"
35 #include "dfx_log.h"
36 #include "dfx_trace_dlsym.h"
37 #include "dfx_util.h"
38 #include "elapsed_time.h"
39 #include "faultloggerd_client.h"
40 #include "dfx_socket_request.h"
41 #include "file_ex.h"
42 #include "procinfo.h"
43
44 namespace OHOS {
45 namespace HiviewDFX {
46 namespace {
47 #ifdef LOG_DOMAIN
48 #undef LOG_DOMAIN
49 #define LOG_DOMAIN 0xD002D11
50 #endif
51
52 #ifdef LOG_TAG
53 #undef LOG_TAG
54 #define LOG_TAG "DfxDumpCatcher"
55 #endif
56 static const int DUMP_CATCHE_WORK_TIME_S = 60;
57 static const std::string DFXDUMPCATCHER_TAG = "DfxDumpCatcher";
58 static std::string g_kernelStackInfo;
59 static std::atomic_bool g_asyncThreadRunning;
60 static int32_t g_kernelStackRet = -1; // -1 : incomplete kernel stack dump
61 static pid_t g_kernelStackPid = 0;
62 static std::condition_variable g_cv;
63 static std::mutex g_kernelStackMutex;
64 static constexpr int WAIT_GET_KERNEL_STACK_TIMEOUT = 1000; // 1000 : time out 1000 ms
65 static constexpr uint32_t HIVIEW_UID = 1201;
66 static constexpr uint32_t FOUNDATION_UID = 5523;
67
68 enum DfxDumpPollRes : int32_t {
69 DUMP_POLL_INIT = -1,
70 DUMP_POLL_OK,
71 DUMP_POLL_FD,
72 DUMP_POLL_FAILED,
73 DUMP_POLL_TIMEOUT,
74 DUMP_POLL_RETURN,
75 };
76
77 enum DfxDumpStatRes : int32_t {
78 DUMP_RES_NO_KERNELSTACK = -2,
79 DUMP_RES_WITH_KERNELSTACK = -1,
80 DUMP_RES_WITH_USERSTACK = 0,
81 };
82 }
83
IsLinuxKernel()84 static bool IsLinuxKernel()
85 {
86 static bool isLinux = [] {
87 std::string content;
88 LoadStringFromFile("/proc/version", content);
89 if (content.empty()) {
90 return true;
91 }
92 if (content.find("Linux") != std::string::npos) {
93 return true;
94 }
95 return false;
96 }();
97 return isLinux;
98 }
99
InitKernelStackInfo()100 static void InitKernelStackInfo()
101 {
102 g_kernelStackInfo.clear();
103 g_kernelStackRet = -1;
104 g_kernelStackPid = 0;
105 }
106
DoDumpCurrTid(const size_t skipFrameNum,std::string & msg,size_t maxFrameNums)107 bool DfxDumpCatcher::DoDumpCurrTid(const size_t skipFrameNum, std::string& msg, size_t maxFrameNums)
108 {
109 bool ret = false;
110
111 ret = GetBacktrace(msg, skipFrameNum + 1, false, maxFrameNums);
112 if (!ret) {
113 int currTid = gettid();
114 msg.append("Failed to dump curr thread:" + std::to_string(currTid) + ".\n");
115 }
116 DFXLOGD("%{public}s :: DoDumpCurrTid :: return %{public}d.", DFXDUMPCATCHER_TAG.c_str(), ret);
117 return ret;
118 }
119
DoDumpLocalTid(const int tid,std::string & msg,size_t maxFrameNums)120 bool DfxDumpCatcher::DoDumpLocalTid(const int tid, std::string& msg, size_t maxFrameNums)
121 {
122 bool ret = false;
123 if (tid <= 0) {
124 DFXLOGE("%{public}s :: DoDumpLocalTid :: return false as param error.", DFXDUMPCATCHER_TAG.c_str());
125 return ret;
126 }
127 ret = GetBacktraceStringByTid(msg, tid, 0, false, maxFrameNums);
128 if (!ret) {
129 msg.append("Failed to dump thread:" + std::to_string(tid) + ".\n");
130 }
131 DFXLOGD("%{public}s :: DoDumpLocalTid :: return %{public}d.", DFXDUMPCATCHER_TAG.c_str(), ret);
132 return ret;
133 }
134
DoDumpLocalPid(int pid,std::string & msg,size_t maxFrameNums)135 bool DfxDumpCatcher::DoDumpLocalPid(int pid, std::string& msg, size_t maxFrameNums)
136 {
137 bool ret = false;
138 if (pid <= 0) {
139 DFXLOGE("%{public}s :: DoDumpLocalPid :: return false as param error.", DFXDUMPCATCHER_TAG.c_str());
140 return ret;
141 }
142 size_t skipFramNum = 5; // 5: skip 5 frame
143
144 msg = GetStacktraceHeader();
145 std::function<bool(int)> func = [&](int tid) {
146 if (tid <= 0) {
147 return false;
148 }
149 std::string threadMsg;
150 if (tid == gettid()) {
151 ret = DoDumpCurrTid(skipFramNum, threadMsg, maxFrameNums);
152 } else {
153 ret = DoDumpLocalTid(tid, threadMsg, maxFrameNums);
154 }
155 msg += threadMsg;
156 return ret;
157 };
158 std::vector<int> tids;
159 ret = GetTidsByPidWithFunc(getpid(), tids, func);
160 DFXLOGD("%{public}s :: DoDumpLocalPid :: return %{public}d.", DFXDUMPCATCHER_TAG.c_str(), ret);
161 return ret;
162 }
163
DoDumpRemoteLocked(int pid,int tid,std::string & msg,bool isJson,int timeout)164 int32_t DfxDumpCatcher::DoDumpRemoteLocked(int pid, int tid, std::string& msg, bool isJson, int timeout)
165 {
166 return DoDumpCatchRemote(pid, tid, msg, isJson, timeout);
167 }
168
DoDumpLocalLocked(int pid,int tid,std::string & msg,size_t maxFrameNums)169 bool DfxDumpCatcher::DoDumpLocalLocked(int pid, int tid, std::string& msg, size_t maxFrameNums)
170 {
171 bool ret = false;
172 if (tid == gettid()) {
173 size_t skipFramNum = 4; // 4: skip 4 frame
174 ret = DoDumpCurrTid(skipFramNum, msg, maxFrameNums);
175 } else if (tid == 0) {
176 ret = DoDumpLocalPid(pid, msg, maxFrameNums);
177 } else {
178 if (!IsThreadInPid(pid, tid)) {
179 msg.append("tid(" + std::to_string(tid) + ") is not in pid(" + std::to_string(pid) + ").\n");
180 } else {
181 ret = DoDumpLocalTid(tid, msg, maxFrameNums);
182 }
183 }
184
185 DFXLOGD("%{public}s :: DoDumpLocal :: ret(%{public}d).", DFXDUMPCATCHER_TAG.c_str(), ret);
186 return ret;
187 }
188
ReportDumpCatcherStats(int32_t pid,uint64_t requestTime,int32_t ret,void * retAddr)189 static void ReportDumpCatcherStats(int32_t pid,
190 uint64_t requestTime, int32_t ret, void* retAddr)
191 {
192 std::vector<uint8_t> buf(sizeof(struct FaultLoggerdStatsRequest), 0);
193 auto stat = reinterpret_cast<struct FaultLoggerdStatsRequest*>(buf.data());
194 stat->type = DUMP_CATCHER;
195 stat->pid = pid;
196 stat->requestTime = requestTime;
197 stat->dumpCatcherFinishTime = GetTimeMilliSeconds();
198 stat->result = (ret == DUMPCATCH_ESUCCESS) ? DUMP_RES_WITH_USERSTACK : DUMP_RES_WITH_KERNELSTACK;
199 if ((ret != DUMPCATCH_ESUCCESS) && g_kernelStackInfo.empty()) {
200 stat->result = DUMP_RES_NO_KERNELSTACK;
201 }
202 size_t copyLen;
203 std::string processName;
204 ReadProcessName(pid, processName);
205 copyLen = std::min(sizeof(stat->targetProcess) - 1, processName.size());
206 if (memcpy_s(stat->targetProcess, sizeof(stat->targetProcess) - 1, processName.c_str(), copyLen) != 0) {
207 DFXLOGE("%{public}s::Failed to copy target process", DFXDUMPCATCHER_TAG.c_str());
208 return;
209 }
210
211 if (ret != DUMPCATCH_ESUCCESS) {
212 std::string summary = DfxDumpCatchError::ToString(ret);
213 copyLen = std::min(sizeof(stat->summary) - 1, summary.size());
214 if (memcpy_s(stat->summary, sizeof(stat->summary) - 1, summary.c_str(), copyLen) != 0) {
215 DFXLOGE("%{public}s::Failed to copy dumpcatcher summary", DFXDUMPCATCHER_TAG.c_str());
216 return;
217 }
218 }
219
220 Dl_info info;
221 if (dladdr(retAddr, &info) != 0) {
222 copyLen = std::min(sizeof(stat->callerElf) - 1, strlen(info.dli_fname));
223 if (memcpy_s(stat->callerElf, sizeof(stat->callerElf) - 1, info.dli_fname, copyLen) != 0) {
224 DFXLOGE("%{public}s::Failed to copy caller elf info", DFXDUMPCATCHER_TAG.c_str());
225 return;
226 }
227 stat->offset = reinterpret_cast<uintptr_t>(retAddr) - reinterpret_cast<uintptr_t>(info.dli_fbase);
228 }
229
230 std::string cmdline;
231 if (OHOS::LoadStringFromFile("/proc/self/cmdline", cmdline)) {
232 copyLen = std::min(sizeof(stat->callerProcess) - 1, cmdline.size());
233 if (memcpy_s(stat->callerProcess, sizeof(stat->callerProcess) - 1,
234 cmdline.c_str(), copyLen) != 0) {
235 DFXLOGE("%{public}s::Failed to copy caller cmdline", DFXDUMPCATCHER_TAG.c_str());
236 return;
237 }
238 }
239
240 ReportDumpStats(stat);
241 }
242
IsBitOn(const std::string & content,const std::string & filed,int signal)243 static bool IsBitOn(const std::string& content, const std::string& filed, int signal)
244 {
245 if (content.find(filed) == std::string::npos) {
246 return false;
247 }
248 //SigBlk: 0000000000000000
249 std::string num = content.substr(content.find(filed) + filed.size() + 2, 16);
250 uint64_t hexValue = strtoul(num.c_str(), nullptr, 16);
251 uint64_t mask = 1ULL << (signal - 1);
252
253 return (hexValue & mask) != 0;
254 }
255
IsSignalBlocked(int pid,int32_t & ret)256 static bool IsSignalBlocked(int pid, int32_t& ret)
257 {
258 std::vector<int> tids;
259 std::vector<int> nstids;
260 GetTidsByPid(pid, tids, nstids);
261 std::string threadName;
262 std::string content;
263 int targetTid = -1;
264 for (size_t i = 0; i < tids.size(); ++i) {
265 ReadThreadNameByPidAndTid(pid, tids[i], threadName);
266 if (threadName == "OS_DfxWatchdog") {
267 targetTid = tids[i];
268 break;
269 }
270 }
271 if (targetTid != -1) {
272 std::string threadStatusPath = StringPrintf("/proc/%d/task/%d/status", pid, targetTid);
273 if (!LoadStringFromFile(threadStatusPath, content) || content.empty()) {
274 DFXLOGE("the pid(%{public}d)thread(%{public}d) read status fail, errno(%{public}d)", pid, targetTid, errno);
275 ret = DUMPCATCH_UNKNOWN;
276 return true;
277 }
278
279 if (IsBitOn(content, "SigBlk", SIGDUMP) || IsBitOn(content, "SigIgn", SIGDUMP)) {
280 DFXLOGI("the pid(%{public}d)thread(%{public}d) signal has been blocked by target process", pid, targetTid);
281 ret = DUMPCATCH_TIMEOUT_SIGNAL_BLOCK;
282 return true;
283 }
284 }
285 return false;
286 }
287
IsFrozen(int pid,int32_t & ret)288 static bool IsFrozen(int pid, int32_t& ret)
289 {
290 std::string content;
291 std::string cgroupPath = StringPrintf("/proc/%d/cgroup", pid);
292 if (!LoadStringFromFile(cgroupPath, content)) {
293 DFXLOGE("the pid (%{public}d) read cgroup fail, errno (%{public}d)", pid, errno);
294 ret = DUMPCATCH_UNKNOWN;
295 return true;
296 }
297
298 if (content.find("Frozen") != std::string::npos) {
299 DFXLOGI("the pid (%{public}d) has been frozen", pid);
300 ret = DUMPCATCH_TIMEOUT_KERNEL_FROZEN;
301 return true;
302 }
303 return false;
304 }
305
AnalyzeTimeoutReason(int pid,int32_t & ret)306 static void AnalyzeTimeoutReason(int pid, int32_t& ret)
307 {
308 std::string statusPath = StringPrintf("/proc/%d/status", pid);
309 if (access(statusPath.c_str(), F_OK) != 0) {
310 DFXLOGI("the pid (%{public}d) process exit during the dump, errno (%{public}d)", pid, errno);
311 ret = DUMPCATCH_TIMEOUT_PROCESS_KILLED;
312 return;
313 }
314
315 if (IsSignalBlocked(pid, ret)) {
316 return;
317 }
318
319 if (IsFrozen(pid, ret)) {
320 return;
321 }
322
323 DFXLOGI("the pid (%{public}d) dump slow", pid);
324 ret = DUMPCATCH_TIMEOUT_DUMP_SLOW;
325 }
326
DealWithPollRet(int pollRet,int pid,int32_t & ret,std::string & msg)327 void DfxDumpCatcher::DealWithPollRet(int pollRet, int pid, int32_t& ret, std::string& msg)
328 {
329 if (pollRet == DUMP_POLL_OK) {
330 ret = DUMPCATCH_ESUCCESS;
331 return;
332 }
333 if (g_kernelStackPid != pid) {
334 AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
335 }
336 msg.append(halfProcStatus_);
337 msg.append(halfProcWchan_);
338 switch (pollRet) {
339 case DUMP_POLL_FD:
340 ret = DUMPCATCH_EFD;
341 break;
342 case DUMP_POLL_FAILED:
343 ret = DUMPCATCH_EPOLL;
344 break;
345 case DUMP_POLL_TIMEOUT:
346 AnalyzeTimeoutReason(pid, ret);
347 break;
348 case DUMP_POLL_RETURN:
349 if (msg.find("ptrace attach thread failed") != std::string::npos) {
350 ret = DUMPCATCH_DUMP_EPTRACE;
351 } else if (msg.find("stop unwinding") != std::string::npos) {
352 ret = DUMPCATCH_DUMP_EUNWIND;
353 } else if (msg.find("mapinfo is not exist") != std::string::npos) {
354 ret = DUMPCATCH_DUMP_EMAP;
355 } else {
356 ret = DUMPCATCH_UNKNOWN;
357 }
358 break;
359 default:
360 ret = DUMPCATCH_UNKNOWN;
361 break;
362 }
363 }
364
DealWithSdkDumpRet(int sdkdumpRet,int pid,int32_t & ret,std::string & msg)365 void DfxDumpCatcher::DealWithSdkDumpRet(int sdkdumpRet, int pid, int32_t& ret, std::string& msg)
366 {
367 uint32_t uid = getuid();
368 if (sdkdumpRet == ResponseCode::SDK_DUMP_REPEAT) {
369 AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
370 msg.append("Result: pid(" + std::to_string(pid) + ") process is dumping.\n");
371 ret = DUMPCATCH_IS_DUMPING;
372 } else if (sdkdumpRet == ResponseCode::REQUEST_REJECT) {
373 msg.append("Result: pid(" + std::to_string(pid) + ") process check permission error.\n");
374 ret = DUMPCATCH_EPERMISSION;
375 } else if (sdkdumpRet == ResponseCode::SDK_DUMP_NOPROC) {
376 msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
377 ret = DUMPCATCH_NO_PROCESS;
378 } else if (sdkdumpRet == ResponseCode::SDK_PROCESS_CRASHED) {
379 msg.append("Result: pid(" + std::to_string(pid) + ") process has been crashed.\n");
380 ret = DUMPCATCH_HAS_CRASHED;
381 } else if (sdkdumpRet == ResponseCode::CONNECT_FAILED) {
382 if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
383 AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
384 }
385 msg.append("Result: pid(" + std::to_string(pid) + ") process fail to conntect faultloggerd.\n");
386 ret = DUMPCATCH_ECONNECT;
387 } else if (sdkdumpRet == ResponseCode::SEND_DATA_FAILED) {
388 if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
389 AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
390 }
391 msg.append("Result: pid(" + std::to_string(pid) + ") process fail to write to faultloggerd.\n");
392 ret = DUMPCATCH_EWRITE;
393 }
394 DFXLOGW("%{public}s :: %{public}s :: %{public}s", DFXDUMPCATCHER_TAG.c_str(), __func__, msg.c_str());
395 }
396
DealWithDumpCatchRet(int pid,int32_t & ret,std::string & msg)397 static std::pair<int, std::string> DealWithDumpCatchRet(int pid, int32_t& ret, std::string& msg)
398 {
399 int result = ret == 0 ? 0 : -1;
400 std::string reason;
401 if (result == 0) {
402 reason = "Reason:" + DfxDumpCatchError::ToString(ret) + "\n";
403 } else {
404 reason = "Reason:\nnormal stack:" + DfxDumpCatchError::ToString(ret) + "\n";
405 }
406 if (result != 0) {
407 if (pid == g_kernelStackPid && !g_asyncThreadRunning) {
408 msg.append(g_kernelStackInfo);
409 result = 1;
410 InitKernelStackInfo();
411 } else if (g_kernelStackRet != -1) {
412 ret = g_kernelStackRet;
413 reason += "kernel stack:" + DfxDumpCatchError::ToString(ret) + "\n";
414 g_kernelStackRet = -1;
415 } else if (g_kernelStackRet == -1) {
416 reason += "kernel stack:" + DfxDumpCatchError::ToString(DUMPCATCH_KERNELSTACK_NONEED) + "\n";
417 }
418 }
419 std::string toFind = "Result:";
420 size_t startPos = msg.find(toFind);
421 if (startPos != std::string::npos) {
422 size_t endPos = msg.find("\n", startPos);
423 if (endPos != std::string::npos) {
424 msg.erase(startPos, endPos - startPos + 1);
425 }
426 }
427 return std::make_pair(result, reason);
428 }
429
DumpCatchWithTimeout(int pid,std::string & msg,int timeout,int tid,bool isJson)430 std::pair<int, std::string> DfxDumpCatcher::DumpCatchWithTimeout(int pid, std::string& msg, int timeout,
431 int tid, bool isJson)
432 {
433 DfxEnableTraceDlsym(true);
434 ElapsedTime counter;
435 uint64_t requestTime = GetTimeMilliSeconds();
436 int32_t dumpcatchErrno = DUMPCATCH_UNKNOWN;
437 bool reportStat = false;
438 do {
439 if (pid <= 0 || tid <0 || timeout <= WAIT_GET_KERNEL_STACK_TIMEOUT) {
440 DFXLOGE("DumpCatchWithTimeout:: param error.");
441 dumpcatchErrno = DUMPCATCH_EPARAM;
442 break;
443 }
444 if (!IsLinuxKernel()) {
445 std::string statusPath = StringPrintf("/proc/%d/status", pid);
446 if (access(statusPath.c_str(), F_OK) != 0 && errno != EACCES) {
447 DFXLOGE("DumpCatchWithTimeout:: the pid(%{public}d) process has exited, errno(%{public}d)", pid, errno);
448 msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
449 dumpcatchErrno = DUMPCATCH_NO_PROCESS;
450 break;
451 }
452 }
453 std::unique_lock<std::mutex> lck(mutex_);
454 int currentPid = getpid();
455 if (pid == currentPid) {
456 DFXLOGE("DumpCatchWithTimeout:: param error (don't support dumpcatch self)");
457 dumpcatchErrno = DUMPCATCH_EPARAM;
458 break;
459 } else {
460 DFXLOGI("Receive DumpCatch request for cPid:(%{public}d), pid(%{public}d)", currentPid, pid);
461 dumpcatchErrno = DoDumpRemoteLocked(pid, tid, msg, isJson, timeout);
462 reportStat = true;
463 }
464 } while (false);
465
466 auto result = DealWithDumpCatchRet(pid, dumpcatchErrno, msg);
467 if (reportStat) {
468 void* retAddr = __builtin_return_address(0);
469 ReportDumpCatcherStats(pid, requestTime, dumpcatchErrno, retAddr);
470 }
471
472 DFXLOGI("dump_catch : pid = %{public}d, elapsed time = %{public}" PRId64 " ms, " \
473 "msgLength = %{public}zu, ret = %{public}d\n%{public}s",
474 pid, counter.Elapsed<std::chrono::milliseconds>(), msg.size(), result.first, result.second.c_str());
475 DfxEnableTraceDlsym(false);
476 return result;
477 }
478
DumpCatchProcess(int pid,std::string & msg,size_t maxFrameNums,bool isJson)479 int DfxDumpCatcher::DumpCatchProcess(int pid, std::string& msg, size_t maxFrameNums, bool isJson)
480 {
481 if (DumpCatch(pid, 0, msg, maxFrameNums, isJson)) {
482 return 0;
483 }
484 if (pid == g_kernelStackPid && !g_asyncThreadRunning) {
485 msg.append(g_kernelStackInfo);
486 InitKernelStackInfo();
487 return 1;
488 }
489 g_kernelStackRet = -1;
490 return -1;
491 }
492
DumpCatch(int pid,int tid,std::string & msg,size_t maxFrameNums,bool isJson)493 bool DfxDumpCatcher::DumpCatch(int pid, int tid, std::string& msg, size_t maxFrameNums, bool isJson)
494 {
495 bool ret = false;
496 if (pid <= 0 || tid < 0) {
497 DFXLOGE("%{public}s :: dump_catch :: param error.", DFXDUMPCATCHER_TAG.c_str());
498 return ret;
499 }
500 if (!IsLinuxKernel()) {
501 std::string statusPath = StringPrintf("/proc/%d/status", pid);
502 DFXLOGI("DumpCatch:: access pid(%{public}d) status", pid);
503 if (access(statusPath.c_str(), F_OK) != 0 && errno != EACCES) {
504 DFXLOGE("DumpCatch:: the pid(%{public}d) process has exited, errno(%{public}d)", pid, errno);
505 msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
506 return ret;
507 }
508 }
509 DfxEnableTraceDlsym(true);
510 ElapsedTime counter;
511 std::unique_lock<std::mutex> lck(mutex_);
512 int currentPid = getpid();
513 uint64_t requestTime = GetTimeMilliSeconds();
514 DFXLOGI("Receive DumpCatch request for cPid:(%{public}d), pid(%{public}d), " \
515 "tid:(%{public}d).", currentPid, pid, tid);
516 if (pid == currentPid) {
517 ret = DoDumpLocalLocked(pid, tid, msg, maxFrameNums);
518 } else {
519 if (maxFrameNums != DEFAULT_MAX_FRAME_NUM) {
520 DFXLOGI("%{public}s :: dump_catch :: maxFrameNums does not support setting " \
521 "when pid is not equal to caller pid", DFXDUMPCATCHER_TAG.c_str());
522 }
523 int timeout = (tid == 0 ? 3 : 10) * 1000; // when tid not zero, timeout is 10s
524 int32_t res = DoDumpRemoteLocked(pid, tid, msg, isJson, timeout);
525 if (res != DUMPCATCH_ESUCCESS && g_kernelStackRet != DUMPCATCH_ESUCCESS && g_kernelStackRet != -1) {
526 res = g_kernelStackRet;
527 }
528 void* retAddr = __builtin_return_address(0);
529 ReportDumpCatcherStats(pid, requestTime, res, retAddr);
530 ret = res == DUMPCATCH_ESUCCESS;
531 }
532
533 DFXLOGI("dump_catch : pid = %{public}d, elapsed time = %{public}" PRId64 " ms, ret = %{public}d, " \
534 "msgLength = %{public}zu",
535 pid, counter.Elapsed<std::chrono::milliseconds>(), ret, msg.size());
536 DfxEnableTraceDlsym(false);
537 return ret;
538 }
539
DumpCatchFd(int pid,int tid,std::string & msg,int fd,size_t maxFrameNums)540 bool DfxDumpCatcher::DumpCatchFd(int pid, int tid, std::string& msg, int fd, size_t maxFrameNums)
541 {
542 bool ret = false;
543 ret = DumpCatch(pid, tid, msg, maxFrameNums);
544 if (fd > 0) {
545 ret = OHOS_TEMP_FAILURE_RETRY(write(fd, msg.c_str(), msg.length()));
546 }
547 return ret;
548 }
549
DoDumpCatchRemote(int pid,int tid,std::string & msg,bool isJson,int timeout)550 int32_t DfxDumpCatcher::DoDumpCatchRemote(int pid, int tid, std::string& msg, bool isJson, int timeout)
551 {
552 DFX_TRACE_SCOPED_DLSYM("DoDumpCatchRemote");
553 int32_t ret = DUMPCATCH_UNKNOWN;
554 if (pid <= 0 || tid < 0 || timeout <= WAIT_GET_KERNEL_STACK_TIMEOUT) {
555 msg.append("Result: pid(" + std::to_string(pid) + ") param error.\n");
556 DFXLOGW("%{public}s :: %{public}s :: %{public}s", DFXDUMPCATCHER_TAG.c_str(), __func__, msg.c_str());
557 return DUMPCATCH_EPARAM;
558 }
559 pid_ = pid;
560 int pipeReadFd[] = { -1, -1 };
561 uint64_t sdkDumpStartTime = GetAbsTimeMilliSeconds();
562 int sdkdumpRet = RequestSdkDump(pid, tid, pipeReadFd, isJson, timeout);
563 if (sdkdumpRet != ResponseCode::REQUEST_SUCCESS) {
564 DealWithSdkDumpRet(sdkdumpRet, pid, ret, msg);
565 return ret;
566 }
567 // timeout sub the cost time of sdkdump
568 timeout -= static_cast<int>(GetAbsTimeMilliSeconds() - sdkDumpStartTime);
569 int pollRet = DoDumpRemotePid(pid, msg, pipeReadFd, isJson, timeout);
570 DealWithPollRet(pollRet, pid, ret, msg);
571 DFXLOGI("%{public}s :: %{public}s :: pid(%{public}d) ret: %{public}d", DFXDUMPCATCHER_TAG.c_str(),
572 __func__, pid, ret);
573 return ret;
574 }
575
DoDumpRemotePid(int pid,std::string & msg,int (& pipeReadFd)[2],bool isJson,int32_t timeout)576 int DfxDumpCatcher::DoDumpRemotePid(int pid, std::string& msg, int (&pipeReadFd)[2], bool isJson, int32_t timeout)
577 {
578 DFX_TRACE_SCOPED_DLSYM("DoDumpRemotePid");
579 if (timeout <= 0) {
580 DFXLOGW("timeout less than 0, try to get kernel stack and return directly!");
581 AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
582 RequestDelPipeFd(pid);
583 CloseFd(pipeReadFd[PIPE_BUF_INDEX]);
584 CloseFd(pipeReadFd[PIPE_RES_INDEX]);
585 return DUMP_POLL_TIMEOUT;
586 } else if (timeout < 1000) { // 1000 : one thousand milliseconds
587 DFXLOGW("timeout less than 1 seconds, get kernel stack directly!");
588 AsyncGetAllTidKernelStack(pid);
589 }
590 int ret = DoDumpRemotePoll(timeout, msg, pipeReadFd, isJson);
591 // request close fds in faultloggerd
592 RequestDelPipeFd(pid);
593 CloseFd(pipeReadFd[PIPE_BUF_INDEX]);
594 CloseFd(pipeReadFd[PIPE_RES_INDEX]);
595 DFXLOGI("%{public}s :: %{public}s :: pid(%{public}d) poll ret: %{public}d",
596 DFXDUMPCATCHER_TAG.c_str(), __func__, pid, ret);
597 return ret;
598 }
599
KernelRet2DumpcatchRet(int32_t ret)600 static int32_t KernelRet2DumpcatchRet(int32_t ret)
601 {
602 switch (ret) {
603 case KERNELSTACK_ECREATE:
604 return DUMPCATCH_KERNELSTACK_ECREATE;
605 case KERNELSTACK_EOPEN:
606 return DUMPCATCH_KERNELSTACK_EOPEN;
607 case KERNELSTACK_EIOCTL:
608 return DUMPCATCH_KERNELSTACK_EIOCTL;
609 default:
610 return DUMPCATCH_UNKNOWN;
611 }
612 }
613
CollectKernelStack(pid_t pid,int waitMilliSeconds)614 void DfxDumpCatcher::CollectKernelStack(pid_t pid, int waitMilliSeconds)
615 {
616 ElapsedTime timer;
617 std::string kernelStackInfo;
618 int32_t kernelRet = 0;
619 auto finishCollect = [waitMilliSeconds]() {
620 if (waitMilliSeconds > 0) {
621 std::unique_lock<std::mutex> lock(g_kernelStackMutex);
622 g_asyncThreadRunning = false;
623 lock.unlock();
624 g_cv.notify_all();
625 } else {
626 g_asyncThreadRunning = false;
627 }
628 };
629 std::string statusPath = StringPrintf("/proc/%d/status", pid);
630 if (access(statusPath.c_str(), F_OK) != 0) {
631 DFXLOGW("No process(%{public}d) status file exist!", pid);
632 finishCollect();
633 return;
634 }
635
636 std::function<bool(int)> func = [&](int tid) {
637 if (tid <= 0) {
638 return false;
639 }
640 std::string tidKernelStackInfo;
641 int32_t ret = DfxGetKernelStack(tid, tidKernelStackInfo);
642 if (ret == 0) {
643 kernelStackInfo.append(tidKernelStackInfo);
644 } else if (kernelRet == 0) {
645 kernelRet = ret;
646 }
647 return true;
648 };
649 std::vector<int> tids;
650 MAYBE_UNUSED bool ret = GetTidsByPidWithFunc(pid, tids, func);
651 if (kernelStackInfo.empty()) {
652 DFXLOGE("Process(%{public}d) collect kernel stack fail!", pid);
653 g_kernelStackRet = KernelRet2DumpcatchRet(kernelRet);
654 finishCollect();
655 return;
656 }
657 g_kernelStackPid = pid;
658 g_kernelStackInfo = kernelStackInfo;
659 g_kernelStackRet = 0;
660 finishCollect();
661 DFXLOGI("finish collect all tid info for pid(%{public}d) time(%{public}" PRId64 ")ms", pid,
662 timer.Elapsed<std::chrono::milliseconds>());
663 }
664
AsyncGetAllTidKernelStack(pid_t pid,int waitMilliSeconds)665 void DfxDumpCatcher::AsyncGetAllTidKernelStack(pid_t pid, int waitMilliSeconds)
666 {
667 ReadProcessStatus(halfProcStatus_, pid);
668 if (IsLinuxKernel()) {
669 ReadProcessWchan(halfProcWchan_, pid, false, true);
670 }
671 if (g_asyncThreadRunning) {
672 DFXLOGI("pid(%{public}d) get kernel stack thread is running, not get pid(%{public}d)", g_kernelStackPid, pid);
673 return;
674 }
675 g_asyncThreadRunning = true;
676 InitKernelStackInfo();
677 auto func = [pid, waitMilliSeconds] {
678 CollectKernelStack(pid, waitMilliSeconds);
679 };
680 if (waitMilliSeconds > 0) {
681 std::unique_lock<std::mutex> lock(g_kernelStackMutex);
682 std::thread kernelStackTask(func);
683 kernelStackTask.detach();
684 g_cv.wait_for(lock, std::chrono::milliseconds(WAIT_GET_KERNEL_STACK_TIMEOUT),
685 [] {return !g_asyncThreadRunning;});
686 } else {
687 std::thread kernelStackTask(func);
688 kernelStackTask.detach();
689 }
690 }
691
HandlePollError(const uint64_t endTime,int & remainTime,bool & collectAllTidStack,std::string & resMsg,int & ret)692 bool DfxDumpCatcher::HandlePollError(const uint64_t endTime, int& remainTime,
693 bool& collectAllTidStack, std::string& resMsg, int& ret)
694 {
695 if (errno == EINTR) {
696 uint64_t now = GetAbsTimeMilliSeconds();
697 if (now >= endTime) {
698 ret = DUMP_POLL_TIMEOUT;
699 resMsg.append("Result: poll timeout.\n");
700 return false;
701 }
702 if (!collectAllTidStack && (remainTime == DUMPCATCHER_REMOTE_P90_TIMEOUT)) {
703 AsyncGetAllTidKernelStack(pid_);
704 collectAllTidStack = true;
705 }
706 remainTime = static_cast<int>(endTime - now);
707 return true;
708 }
709 ret = DUMP_POLL_FAILED;
710 resMsg.append("Result: poll error, errno(" + std::to_string(errno) + ")\n");
711 return false;
712 }
713
HandlePollTimeout(const int timeout,int & remainTime,bool & collectAllTidStack,std::string & resMsg,int & ret)714 bool DfxDumpCatcher::HandlePollTimeout(const int timeout, int& remainTime,
715 bool& collectAllTidStack, std::string& resMsg, int& ret)
716 {
717 if (!collectAllTidStack && (remainTime == DUMPCATCHER_REMOTE_P90_TIMEOUT)) {
718 AsyncGetAllTidKernelStack(pid_);
719 remainTime = timeout - DUMPCATCHER_REMOTE_P90_TIMEOUT;
720 collectAllTidStack = true;
721 return true;
722 }
723 ret = DUMP_POLL_TIMEOUT;
724 resMsg.append("Result: poll timeout.\n");
725 return false;
726 }
727
HandlePollEvents(std::pair<int,std::string> & bufState,std::pair<int,std::string> & resState,const struct pollfd (& readFds)[2],bool & bPipeConnect,bool & res)728 bool DfxDumpCatcher::HandlePollEvents(std::pair<int, std::string>& bufState, std::pair<int, std::string>& resState,
729 const struct pollfd (&readFds)[2], bool& bPipeConnect, bool& res)
730 {
731 bool bufRet = true;
732 bool resRet = false;
733 bool eventRet = true;
734 for (auto& readFd : readFds) {
735 if (!bPipeConnect && (static_cast<uint32_t>(readFd.revents) & POLLIN)) {
736 bPipeConnect = true;
737 }
738
739 if (bPipeConnect &&
740 ((static_cast<uint32_t>(readFd.revents) & POLLERR) || (static_cast<uint32_t>(readFd.revents) & POLLHUP))) {
741 eventRet = false;
742 resState.second.append("Result: poll events error.\n");
743 break;
744 }
745
746 if ((static_cast<uint32_t>(readFd.revents) & POLLIN) != POLLIN) {
747 continue;
748 }
749
750 if (readFd.fd == bufState.first) {
751 bufRet = DoReadBuf(bufState.first, bufState.second);
752 } else if (readFd.fd == resState.first) {
753 resRet = DoReadRes(resState.first, res, resState.second);
754 }
755 }
756
757 if ((eventRet == false) || (bufRet == false) || (resRet == true)) {
758 DFXLOGI("%{public}s :: %{public}s :: eventRet(%{public}d) bufRet: %{public}d resRet: %{public}d",
759 DFXDUMPCATCHER_TAG.c_str(), __func__, eventRet, bufRet, resRet);
760 return false;
761 }
762 return true;
763 }
764
DumpRemotePoll(const int timeout,std::pair<int,std::string> & bufState,std::pair<int,std::string> & resState)765 std::pair<bool, int> DfxDumpCatcher::DumpRemotePoll(const int timeout,
766 std::pair<int, std::string>& bufState, std::pair<int, std::string>& resState)
767 {
768 int ret = DUMP_POLL_INIT;
769 bool res = false;
770 struct pollfd readFds[2];
771 (void)memset_s(readFds, sizeof(readFds), 0, sizeof(readFds));
772 readFds[0].fd = bufState.first;
773 readFds[0].events = POLLIN;
774 readFds[1].fd = resState.first;
775 readFds[1].events = POLLIN;
776 int fdsSize = sizeof(readFds) / sizeof(readFds[0]);
777 bool bPipeConnect = false;
778 int remainTime = DUMPCATCHER_REMOTE_P90_TIMEOUT < timeout ? DUMPCATCHER_REMOTE_P90_TIMEOUT : timeout;
779 bool collectAllTidStack = false;
780 uint64_t startTime = GetAbsTimeMilliSeconds();
781 uint64_t endTime = startTime + static_cast<uint64_t>(timeout);
782 bool isContinue = true;
783 do {
784 int pollRet = poll(readFds, fdsSize, remainTime);
785 if (pollRet < 0) {
786 isContinue = HandlePollError(endTime, remainTime, collectAllTidStack, resState.second, ret);
787 continue;
788 } else if (pollRet == 0) {
789 isContinue = HandlePollTimeout(timeout, remainTime, collectAllTidStack, resState.second, ret);
790 continue;
791 }
792 if (!HandlePollEvents(bufState, resState, readFds, bPipeConnect, res)) {
793 ret = DUMP_POLL_RETURN;
794 break;
795 }
796 uint64_t now = GetAbsTimeMilliSeconds();
797 if (now >= endTime) {
798 ret = DUMP_POLL_TIMEOUT;
799 resState.second.append("Result: poll timeout.\n");
800 break;
801 }
802 remainTime = static_cast<int>(endTime - now);
803 } while (isContinue);
804 return std::make_pair(res, ret);
805 }
806
DoDumpRemotePoll(int timeout,std::string & msg,const int (& pipeReadFd)[2],bool isJson)807 int DfxDumpCatcher::DoDumpRemotePoll(int timeout, std::string& msg, const int (&pipeReadFd)[2], bool isJson)
808 {
809 DFX_TRACE_SCOPED_DLSYM("DoDumpRemotePoll");
810 if (pipeReadFd[PIPE_BUF_INDEX] < 0 || pipeReadFd[PIPE_RES_INDEX] < 0) {
811 if (!isJson) {
812 msg = "Result: bufFd or resFd < 0.\n";
813 }
814 DFXLOGE("invalid bufFd or resFd");
815 return DUMP_POLL_FD;
816 }
817 std::pair<int, std::string> bufState = std::make_pair(pipeReadFd[PIPE_BUF_INDEX], "");
818 std::pair<int, std::string> resState = std::make_pair(pipeReadFd[PIPE_RES_INDEX], "");
819 std::pair<bool, int> result = DumpRemotePoll(timeout, bufState, resState);
820
821 DFXLOGI("%{public}s :: %{public}s :: %{public}s", DFXDUMPCATCHER_TAG.c_str(), __func__, resState.second.c_str());
822 msg = isJson && result.first ? bufState.second : (resState.second + bufState.second);
823 return result.first ? DUMP_POLL_OK : result.second;
824 }
825
DoReadBuf(int fd,std::string & msg)826 bool DfxDumpCatcher::DoReadBuf(int fd, std::string& msg)
827 {
828 bool ret = false;
829 char *buffer = new char[MAX_PIPE_SIZE];
830 do {
831 ssize_t nread = OHOS_TEMP_FAILURE_RETRY(read(fd, buffer, MAX_PIPE_SIZE));
832 if (nread <= 0) {
833 DFXLOGW("%{public}s :: %{public}s :: read error", DFXDUMPCATCHER_TAG.c_str(), __func__);
834 break;
835 }
836 DFXLOGD("%{public}s :: %{public}s :: nread: %{public}zu", DFXDUMPCATCHER_TAG.c_str(), __func__, nread);
837 ret = true;
838 msg.append(buffer);
839 } while (false);
840 delete []buffer;
841 return ret;
842 }
843
DoReadRes(int fd,bool & ret,std::string & msg)844 bool DfxDumpCatcher::DoReadRes(int fd, bool& ret, std::string& msg)
845 {
846 int32_t res = DumpErrorCode::DUMP_ESUCCESS;
847 ssize_t nread = OHOS_TEMP_FAILURE_RETRY(read(fd, &res, sizeof(res)));
848 if (nread <= 0 || nread != sizeof(res)) {
849 DFXLOGW("%{public}s :: %{public}s :: read error", DFXDUMPCATCHER_TAG.c_str(), __func__);
850 return false;
851 }
852 if (res == DumpErrorCode::DUMP_ESUCCESS) {
853 ret = true;
854 }
855 msg.append("Result: " + DfxDumpRes::ToString(res) + "\n");
856 return true;
857 }
858
DumpCatchMultiPid(const std::vector<int> pidV,std::string & msg)859 bool DfxDumpCatcher::DumpCatchMultiPid(const std::vector<int> pidV, std::string& msg)
860 {
861 bool ret = false;
862 int pidSize = (int)pidV.size();
863 if (pidSize <= 0) {
864 DFXLOGE("%{public}s :: %{public}s :: param error, pidSize(%{public}d).",
865 DFXDUMPCATCHER_TAG.c_str(), __func__, pidSize);
866 return ret;
867 }
868
869 std::unique_lock<std::mutex> lck(mutex_);
870 int currentPid = getpid();
871 int currentTid = gettid();
872 DFXLOGD("%{public}s :: %{public}s :: cPid(%{public}d), cTid(%{public}d), pidSize(%{public}d).",
873 DFXDUMPCATCHER_TAG.c_str(), \
874 __func__, currentPid, currentTid, pidSize);
875
876 time_t startTime = time(nullptr);
877 if (startTime > 0) {
878 DFXLOGD("%{public}s :: %{public}s :: startTime(%{public}" PRId64 ").",
879 DFXDUMPCATCHER_TAG.c_str(), __func__, startTime);
880 }
881
882 for (int i = 0; i < pidSize; i++) {
883 int pid = pidV[i];
884 std::string pidStr;
885 bool ret = DoDumpRemoteLocked(pid, 0, pidStr) == DUMPCATCH_ESUCCESS;
886 if (ret) {
887 msg.append(pidStr + "\n");
888 } else {
889 msg.append("Failed to dump process:" + std::to_string(pid));
890 }
891
892 time_t currentTime = time(nullptr);
893 if (currentTime > 0) {
894 DFXLOGD("%{public}s :: %{public}s :: startTime(%{public}" PRId64 "), currentTime(%{public}" PRId64 ").",
895 DFXDUMPCATCHER_TAG.c_str(), \
896 __func__, startTime, currentTime);
897 if (currentTime > startTime + DUMP_CATCHE_WORK_TIME_S) {
898 break;
899 }
900 }
901 }
902
903 DFXLOGD("%{public}s :: %{public}s :: msg(%{public}s).", DFXDUMPCATCHER_TAG.c_str(), __func__, msg.c_str());
904 if (msg.find("Tid:") != std::string::npos) {
905 ret = true;
906 }
907 return ret;
908 }
909 } // namespace HiviewDFX
910 } // namespace OHOS
911