• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "dfx_dump_catcher.h"
17 
18 #include <atomic>
19 #include <cerrno>
20 #include <memory>
21 #include <thread>
22 #include <vector>
23 
24 #include <dlfcn.h>
25 #include <poll.h>
26 #include <sys/syscall.h>
27 #include <sys/types.h>
28 #include <securec.h>
29 #include <strings.h>
30 
31 #include "backtrace_local.h"
32 #include "dfx_define.h"
33 #include "dfx_dump_res.h"
34 #include "dfx_kernel_stack.h"
35 #include "dfx_log.h"
36 #include "dfx_trace_dlsym.h"
37 #include "dfx_util.h"
38 #include "elapsed_time.h"
39 #include "faultloggerd_client.h"
40 #include "dfx_socket_request.h"
41 #include "file_ex.h"
42 #include "procinfo.h"
43 
44 namespace OHOS {
45 namespace HiviewDFX {
46 namespace {
47 #ifdef LOG_DOMAIN
48 #undef LOG_DOMAIN
49 #define LOG_DOMAIN 0xD002D11
50 #endif
51 
52 #ifdef LOG_TAG
53 #undef LOG_TAG
54 #define LOG_TAG "DfxDumpCatcher"
55 #endif
56 static const int DUMP_CATCHE_WORK_TIME_S = 60;
57 static const std::string DFXDUMPCATCHER_TAG = "DfxDumpCatcher";
58 static std::string g_kernelStackInfo;
59 static std::atomic_bool g_asyncThreadRunning;
60 static int32_t g_kernelStackRet = -1; // -1 : incomplete kernel stack dump
61 static pid_t g_kernelStackPid = 0;
62 static std::condition_variable g_cv;
63 static std::mutex g_kernelStackMutex;
64 static constexpr int WAIT_GET_KERNEL_STACK_TIMEOUT = 1000; // 1000 : time out 1000 ms
65 static constexpr uint32_t HIVIEW_UID = 1201;
66 static constexpr uint32_t FOUNDATION_UID = 5523;
67 
68 enum DfxDumpPollRes : int32_t {
69     DUMP_POLL_INIT = -1,
70     DUMP_POLL_OK,
71     DUMP_POLL_FD,
72     DUMP_POLL_FAILED,
73     DUMP_POLL_TIMEOUT,
74     DUMP_POLL_RETURN,
75 };
76 
77 enum DfxDumpStatRes : int32_t {
78     DUMP_RES_NO_KERNELSTACK = -2,
79     DUMP_RES_WITH_KERNELSTACK = -1,
80     DUMP_RES_WITH_USERSTACK = 0,
81 };
82 }
83 
IsLinuxKernel()84 static bool IsLinuxKernel()
85 {
86     static bool isLinux = [] {
87         std::string content;
88         LoadStringFromFile("/proc/version", content);
89         if (content.empty()) {
90             return true;
91         }
92         if (content.find("Linux") != std::string::npos) {
93             return true;
94         }
95         return false;
96     }();
97     return isLinux;
98 }
99 
InitKernelStackInfo()100 static void InitKernelStackInfo()
101 {
102     g_kernelStackInfo.clear();
103     g_kernelStackRet = -1;
104     g_kernelStackPid = 0;
105 }
106 
DoDumpCurrTid(const size_t skipFrameNum,std::string & msg,size_t maxFrameNums)107 bool DfxDumpCatcher::DoDumpCurrTid(const size_t skipFrameNum, std::string& msg, size_t maxFrameNums)
108 {
109     bool ret = false;
110 
111     ret = GetBacktrace(msg, skipFrameNum + 1, false, maxFrameNums);
112     if (!ret) {
113         int currTid = gettid();
114         msg.append("Failed to dump curr thread:" + std::to_string(currTid) + ".\n");
115     }
116     DFXLOGD("%{public}s :: DoDumpCurrTid :: return %{public}d.", DFXDUMPCATCHER_TAG.c_str(), ret);
117     return ret;
118 }
119 
DoDumpLocalTid(const int tid,std::string & msg,size_t maxFrameNums)120 bool DfxDumpCatcher::DoDumpLocalTid(const int tid, std::string& msg, size_t maxFrameNums)
121 {
122     bool ret = false;
123     if (tid <= 0) {
124         DFXLOGE("%{public}s :: DoDumpLocalTid :: return false as param error.", DFXDUMPCATCHER_TAG.c_str());
125         return ret;
126     }
127     ret = GetBacktraceStringByTid(msg, tid, 0, false, maxFrameNums);
128     if (!ret) {
129         msg.append("Failed to dump thread:" + std::to_string(tid) + ".\n");
130     }
131     DFXLOGD("%{public}s :: DoDumpLocalTid :: return %{public}d.", DFXDUMPCATCHER_TAG.c_str(), ret);
132     return ret;
133 }
134 
DoDumpLocalPid(int pid,std::string & msg,size_t maxFrameNums)135 bool DfxDumpCatcher::DoDumpLocalPid(int pid, std::string& msg, size_t maxFrameNums)
136 {
137     bool ret = false;
138     if (pid <= 0) {
139         DFXLOGE("%{public}s :: DoDumpLocalPid :: return false as param error.", DFXDUMPCATCHER_TAG.c_str());
140         return ret;
141     }
142     size_t skipFramNum = 5; // 5: skip 5 frame
143 
144     msg = GetStacktraceHeader();
145     std::function<bool(int)> func = [&](int tid) {
146         if (tid <= 0) {
147             return false;
148         }
149         std::string threadMsg;
150         if (tid == gettid()) {
151             ret = DoDumpCurrTid(skipFramNum, threadMsg, maxFrameNums);
152         } else {
153             ret = DoDumpLocalTid(tid, threadMsg, maxFrameNums);
154         }
155         msg += threadMsg;
156         return ret;
157     };
158     std::vector<int> tids;
159     ret = GetTidsByPidWithFunc(getpid(), tids, func);
160     DFXLOGD("%{public}s :: DoDumpLocalPid :: return %{public}d.", DFXDUMPCATCHER_TAG.c_str(), ret);
161     return ret;
162 }
163 
DoDumpRemoteLocked(int pid,int tid,std::string & msg,bool isJson,int timeout)164 int32_t DfxDumpCatcher::DoDumpRemoteLocked(int pid, int tid, std::string& msg, bool isJson, int timeout)
165 {
166     return DoDumpCatchRemote(pid, tid, msg, isJson, timeout);
167 }
168 
DoDumpLocalLocked(int pid,int tid,std::string & msg,size_t maxFrameNums)169 bool DfxDumpCatcher::DoDumpLocalLocked(int pid, int tid, std::string& msg, size_t maxFrameNums)
170 {
171     bool ret = false;
172     if (tid == gettid()) {
173         size_t skipFramNum = 4; // 4: skip 4 frame
174         ret = DoDumpCurrTid(skipFramNum, msg, maxFrameNums);
175     } else if (tid == 0) {
176         ret = DoDumpLocalPid(pid, msg, maxFrameNums);
177     } else {
178         if (!IsThreadInPid(pid, tid)) {
179             msg.append("tid(" + std::to_string(tid) + ") is not in pid(" + std::to_string(pid) + ").\n");
180         } else {
181             ret = DoDumpLocalTid(tid, msg, maxFrameNums);
182         }
183     }
184 
185     DFXLOGD("%{public}s :: DoDumpLocal :: ret(%{public}d).", DFXDUMPCATCHER_TAG.c_str(), ret);
186     return ret;
187 }
188 
ReportDumpCatcherStats(int32_t pid,uint64_t requestTime,int32_t ret,void * retAddr)189 static void ReportDumpCatcherStats(int32_t pid,
190     uint64_t requestTime, int32_t ret, void* retAddr)
191 {
192     std::vector<uint8_t> buf(sizeof(struct FaultLoggerdStatsRequest), 0);
193     auto stat = reinterpret_cast<struct FaultLoggerdStatsRequest*>(buf.data());
194     stat->type = DUMP_CATCHER;
195     stat->pid = pid;
196     stat->requestTime = requestTime;
197     stat->dumpCatcherFinishTime = GetTimeMilliSeconds();
198     stat->result = (ret == DUMPCATCH_ESUCCESS) ? DUMP_RES_WITH_USERSTACK : DUMP_RES_WITH_KERNELSTACK;
199     if ((ret != DUMPCATCH_ESUCCESS) && g_kernelStackInfo.empty()) {
200         stat->result = DUMP_RES_NO_KERNELSTACK;
201     }
202     size_t copyLen;
203     std::string processName;
204     ReadProcessName(pid, processName);
205     copyLen = std::min(sizeof(stat->targetProcess) - 1, processName.size());
206     if (memcpy_s(stat->targetProcess, sizeof(stat->targetProcess) - 1, processName.c_str(), copyLen) != 0) {
207         DFXLOGE("%{public}s::Failed to copy target process", DFXDUMPCATCHER_TAG.c_str());
208         return;
209     }
210 
211     if (ret != DUMPCATCH_ESUCCESS) {
212         std::string summary = DfxDumpCatchError::ToString(ret);
213         copyLen = std::min(sizeof(stat->summary) - 1, summary.size());
214         if (memcpy_s(stat->summary, sizeof(stat->summary) - 1, summary.c_str(), copyLen) != 0) {
215             DFXLOGE("%{public}s::Failed to copy dumpcatcher summary", DFXDUMPCATCHER_TAG.c_str());
216             return;
217         }
218     }
219 
220     Dl_info info;
221     if (dladdr(retAddr, &info) != 0) {
222         copyLen = std::min(sizeof(stat->callerElf) - 1, strlen(info.dli_fname));
223         if (memcpy_s(stat->callerElf, sizeof(stat->callerElf) - 1, info.dli_fname, copyLen) != 0) {
224             DFXLOGE("%{public}s::Failed to copy caller elf info", DFXDUMPCATCHER_TAG.c_str());
225             return;
226         }
227         stat->offset = reinterpret_cast<uintptr_t>(retAddr) - reinterpret_cast<uintptr_t>(info.dli_fbase);
228     }
229 
230     std::string cmdline;
231     if (OHOS::LoadStringFromFile("/proc/self/cmdline", cmdline)) {
232         copyLen = std::min(sizeof(stat->callerProcess) - 1, cmdline.size());
233         if (memcpy_s(stat->callerProcess, sizeof(stat->callerProcess) - 1,
234             cmdline.c_str(), copyLen) != 0) {
235             DFXLOGE("%{public}s::Failed to copy caller cmdline", DFXDUMPCATCHER_TAG.c_str());
236             return;
237         }
238     }
239 
240     ReportDumpStats(stat);
241 }
242 
IsBitOn(const std::string & content,const std::string & filed,int signal)243 static bool IsBitOn(const std::string& content, const std::string& filed, int signal)
244 {
245     if (content.find(filed) == std::string::npos) {
246         return false;
247     }
248     //SigBlk:   0000000000000000
249     std::string num = content.substr(content.find(filed) + filed.size() + 2, 16);
250     uint64_t hexValue = strtoul(num.c_str(), nullptr, 16);
251     uint64_t mask = 1ULL << (signal - 1);
252 
253     return (hexValue & mask) != 0;
254 }
255 
IsSignalBlocked(int pid,int32_t & ret)256 static bool IsSignalBlocked(int pid, int32_t& ret)
257 {
258     std::vector<int> tids;
259     std::vector<int> nstids;
260     GetTidsByPid(pid, tids, nstids);
261     std::string threadName;
262     std::string content;
263     int targetTid = -1;
264     for (size_t i = 0; i < tids.size(); ++i) {
265         ReadThreadNameByPidAndTid(pid, tids[i], threadName);
266         if (threadName == "OS_DfxWatchdog") {
267             targetTid = tids[i];
268             break;
269         }
270     }
271     if (targetTid != -1) {
272         std::string threadStatusPath = StringPrintf("/proc/%d/task/%d/status", pid, targetTid);
273         if (!LoadStringFromFile(threadStatusPath, content) || content.empty()) {
274             DFXLOGE("the pid(%{public}d)thread(%{public}d) read status fail, errno(%{public}d)", pid, targetTid, errno);
275             ret = DUMPCATCH_UNKNOWN;
276             return true;
277         }
278 
279         if (IsBitOn(content, "SigBlk", SIGDUMP) || IsBitOn(content, "SigIgn", SIGDUMP)) {
280             DFXLOGI("the pid(%{public}d)thread(%{public}d) signal has been blocked by target process", pid, targetTid);
281             ret = DUMPCATCH_TIMEOUT_SIGNAL_BLOCK;
282             return true;
283         }
284     }
285     return false;
286 }
287 
IsFrozen(int pid,int32_t & ret)288 static bool IsFrozen(int pid, int32_t& ret)
289 {
290     std::string content;
291     std::string cgroupPath = StringPrintf("/proc/%d/cgroup", pid);
292     if (!LoadStringFromFile(cgroupPath, content)) {
293         DFXLOGE("the pid (%{public}d) read cgroup fail, errno (%{public}d)", pid, errno);
294         ret = DUMPCATCH_UNKNOWN;
295         return true;
296     }
297 
298     if (content.find("Frozen") != std::string::npos) {
299         DFXLOGI("the pid (%{public}d) has been frozen", pid);
300         ret = DUMPCATCH_TIMEOUT_KERNEL_FROZEN;
301         return true;
302     }
303     return false;
304 }
305 
AnalyzeTimeoutReason(int pid,int32_t & ret)306 static void AnalyzeTimeoutReason(int pid, int32_t& ret)
307 {
308     std::string statusPath = StringPrintf("/proc/%d/status", pid);
309     if (access(statusPath.c_str(), F_OK) != 0) {
310         DFXLOGI("the pid (%{public}d) process exit during the dump, errno (%{public}d)", pid, errno);
311         ret = DUMPCATCH_TIMEOUT_PROCESS_KILLED;
312         return;
313     }
314 
315     if (IsSignalBlocked(pid, ret)) {
316         return;
317     }
318 
319     if (IsFrozen(pid, ret)) {
320         return;
321     }
322 
323     DFXLOGI("the pid (%{public}d) dump slow", pid);
324     ret = DUMPCATCH_TIMEOUT_DUMP_SLOW;
325 }
326 
DealWithPollRet(int pollRet,int pid,int32_t & ret,std::string & msg)327 void DfxDumpCatcher::DealWithPollRet(int pollRet, int pid, int32_t& ret, std::string& msg)
328 {
329     if (pollRet == DUMP_POLL_OK) {
330         ret = DUMPCATCH_ESUCCESS;
331         return;
332     }
333     if (g_kernelStackPid != pid) {
334         AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
335     }
336     msg.append(halfProcStatus_);
337     msg.append(halfProcWchan_);
338     switch (pollRet) {
339         case DUMP_POLL_FD:
340             ret = DUMPCATCH_EFD;
341             break;
342         case DUMP_POLL_FAILED:
343             ret = DUMPCATCH_EPOLL;
344             break;
345         case DUMP_POLL_TIMEOUT:
346             AnalyzeTimeoutReason(pid, ret);
347             break;
348         case DUMP_POLL_RETURN:
349             if (msg.find("ptrace attach thread failed") != std::string::npos) {
350                 ret = DUMPCATCH_DUMP_EPTRACE;
351             } else if (msg.find("stop unwinding") != std::string::npos) {
352                 ret = DUMPCATCH_DUMP_EUNWIND;
353             } else if (msg.find("mapinfo is not exist") != std::string::npos) {
354                 ret = DUMPCATCH_DUMP_EMAP;
355             } else {
356                 ret = DUMPCATCH_UNKNOWN;
357             }
358             break;
359         default:
360             ret = DUMPCATCH_UNKNOWN;
361             break;
362     }
363 }
364 
DealWithSdkDumpRet(int sdkdumpRet,int pid,int32_t & ret,std::string & msg)365 void DfxDumpCatcher::DealWithSdkDumpRet(int sdkdumpRet, int pid, int32_t& ret, std::string& msg)
366 {
367     uint32_t uid = getuid();
368     if (sdkdumpRet == ResponseCode::SDK_DUMP_REPEAT) {
369         AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
370         msg.append("Result: pid(" + std::to_string(pid) + ") process is dumping.\n");
371         ret = DUMPCATCH_IS_DUMPING;
372     } else if (sdkdumpRet == ResponseCode::REQUEST_REJECT) {
373         msg.append("Result: pid(" + std::to_string(pid) + ") process check permission error.\n");
374         ret = DUMPCATCH_EPERMISSION;
375     } else if (sdkdumpRet == ResponseCode::SDK_DUMP_NOPROC) {
376         msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
377         ret = DUMPCATCH_NO_PROCESS;
378     } else if (sdkdumpRet == ResponseCode::SDK_PROCESS_CRASHED) {
379         msg.append("Result: pid(" + std::to_string(pid) + ") process has been crashed.\n");
380         ret = DUMPCATCH_HAS_CRASHED;
381     } else if (sdkdumpRet == ResponseCode::CONNECT_FAILED) {
382         if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
383             AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
384         }
385         msg.append("Result: pid(" + std::to_string(pid) + ") process fail to conntect faultloggerd.\n");
386         ret = DUMPCATCH_ECONNECT;
387     } else if (sdkdumpRet == ResponseCode::SEND_DATA_FAILED) {
388         if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
389             AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
390         }
391         msg.append("Result: pid(" + std::to_string(pid) + ") process fail to write to faultloggerd.\n");
392         ret = DUMPCATCH_EWRITE;
393     }
394     DFXLOGW("%{public}s :: %{public}s :: %{public}s", DFXDUMPCATCHER_TAG.c_str(), __func__, msg.c_str());
395 }
396 
DealWithDumpCatchRet(int pid,int32_t & ret,std::string & msg)397 static std::pair<int, std::string> DealWithDumpCatchRet(int pid, int32_t& ret, std::string& msg)
398 {
399     int result = ret == 0 ? 0 : -1;
400     std::string reason;
401     if (result == 0) {
402         reason = "Reason:" + DfxDumpCatchError::ToString(ret) + "\n";
403     } else {
404         reason = "Reason:\nnormal stack:" + DfxDumpCatchError::ToString(ret) + "\n";
405     }
406     if (result != 0) {
407         if (pid == g_kernelStackPid && !g_asyncThreadRunning) {
408             msg.append(g_kernelStackInfo);
409             result = 1;
410             InitKernelStackInfo();
411         } else if (g_kernelStackRet != -1) {
412             ret = g_kernelStackRet;
413             reason += "kernel stack:" + DfxDumpCatchError::ToString(ret) + "\n";
414             g_kernelStackRet = -1;
415         } else if (g_kernelStackRet == -1) {
416             reason += "kernel stack:" + DfxDumpCatchError::ToString(DUMPCATCH_KERNELSTACK_NONEED) + "\n";
417         }
418     }
419     std::string toFind = "Result:";
420     size_t startPos = msg.find(toFind);
421     if (startPos != std::string::npos) {
422         size_t endPos = msg.find("\n", startPos);
423         if (endPos != std::string::npos) {
424             msg.erase(startPos, endPos - startPos + 1);
425         }
426     }
427     return std::make_pair(result, reason);
428 }
429 
DumpCatchWithTimeout(int pid,std::string & msg,int timeout,int tid,bool isJson)430 std::pair<int, std::string> DfxDumpCatcher::DumpCatchWithTimeout(int pid, std::string& msg, int timeout,
431     int tid, bool isJson)
432 {
433     DfxEnableTraceDlsym(true);
434     ElapsedTime counter;
435     uint64_t requestTime = GetTimeMilliSeconds();
436     int32_t dumpcatchErrno = DUMPCATCH_UNKNOWN;
437     bool reportStat = false;
438     do {
439         if (pid <= 0 || tid <0 || timeout <= WAIT_GET_KERNEL_STACK_TIMEOUT) {
440             DFXLOGE("DumpCatchWithTimeout:: param error.");
441             dumpcatchErrno = DUMPCATCH_EPARAM;
442             break;
443         }
444         if (!IsLinuxKernel()) {
445             std::string statusPath = StringPrintf("/proc/%d/status", pid);
446             if (access(statusPath.c_str(), F_OK) != 0 && errno != EACCES) {
447                 DFXLOGE("DumpCatchWithTimeout:: the pid(%{public}d) process has exited, errno(%{public}d)", pid, errno);
448                 msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
449                 dumpcatchErrno = DUMPCATCH_NO_PROCESS;
450                 break;
451             }
452         }
453         std::unique_lock<std::mutex> lck(mutex_);
454         int currentPid = getpid();
455         if (pid == currentPid) {
456             DFXLOGE("DumpCatchWithTimeout:: param error (don't support dumpcatch self)");
457             dumpcatchErrno = DUMPCATCH_EPARAM;
458             break;
459         } else {
460             DFXLOGI("Receive DumpCatch request for cPid:(%{public}d), pid(%{public}d)", currentPid, pid);
461             dumpcatchErrno = DoDumpRemoteLocked(pid, tid, msg, isJson, timeout);
462             reportStat = true;
463         }
464     } while (false);
465 
466     auto result = DealWithDumpCatchRet(pid, dumpcatchErrno, msg);
467     if (reportStat) {
468         void* retAddr = __builtin_return_address(0);
469         ReportDumpCatcherStats(pid, requestTime, dumpcatchErrno, retAddr);
470     }
471 
472     DFXLOGI("dump_catch : pid = %{public}d, elapsed time = %{public}" PRId64 " ms, " \
473         "msgLength = %{public}zu, ret = %{public}d\n%{public}s",
474         pid, counter.Elapsed<std::chrono::milliseconds>(), msg.size(), result.first, result.second.c_str());
475     DfxEnableTraceDlsym(false);
476     return result;
477 }
478 
DumpCatchProcess(int pid,std::string & msg,size_t maxFrameNums,bool isJson)479 int DfxDumpCatcher::DumpCatchProcess(int pid, std::string& msg, size_t maxFrameNums, bool isJson)
480 {
481     if (DumpCatch(pid, 0, msg, maxFrameNums, isJson)) {
482         return 0;
483     }
484     if (pid == g_kernelStackPid && !g_asyncThreadRunning) {
485         msg.append(g_kernelStackInfo);
486         InitKernelStackInfo();
487         return 1;
488     }
489     g_kernelStackRet = -1;
490     return -1;
491 }
492 
DumpCatch(int pid,int tid,std::string & msg,size_t maxFrameNums,bool isJson)493 bool DfxDumpCatcher::DumpCatch(int pid, int tid, std::string& msg, size_t maxFrameNums, bool isJson)
494 {
495     bool ret = false;
496     if (pid <= 0 || tid < 0) {
497         DFXLOGE("%{public}s :: dump_catch :: param error.", DFXDUMPCATCHER_TAG.c_str());
498         return ret;
499     }
500     if (!IsLinuxKernel()) {
501         std::string statusPath = StringPrintf("/proc/%d/status", pid);
502         DFXLOGI("DumpCatch:: access pid(%{public}d) status", pid);
503         if (access(statusPath.c_str(), F_OK) != 0 && errno != EACCES) {
504             DFXLOGE("DumpCatch:: the pid(%{public}d) process has exited, errno(%{public}d)", pid, errno);
505             msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
506             return ret;
507         }
508     }
509     DfxEnableTraceDlsym(true);
510     ElapsedTime counter;
511     std::unique_lock<std::mutex> lck(mutex_);
512     int currentPid = getpid();
513     uint64_t requestTime = GetTimeMilliSeconds();
514     DFXLOGI("Receive DumpCatch request for cPid:(%{public}d), pid(%{public}d), " \
515         "tid:(%{public}d).", currentPid, pid, tid);
516     if (pid == currentPid) {
517         ret = DoDumpLocalLocked(pid, tid, msg, maxFrameNums);
518     } else {
519         if (maxFrameNums != DEFAULT_MAX_FRAME_NUM) {
520             DFXLOGI("%{public}s :: dump_catch :: maxFrameNums does not support setting " \
521                 "when pid is not equal to caller pid", DFXDUMPCATCHER_TAG.c_str());
522         }
523         int timeout = (tid == 0 ? 3 : 10) * 1000; // when tid not zero, timeout is 10s
524         int32_t res = DoDumpRemoteLocked(pid, tid, msg, isJson, timeout);
525         if (res != DUMPCATCH_ESUCCESS && g_kernelStackRet != DUMPCATCH_ESUCCESS && g_kernelStackRet != -1) {
526             res = g_kernelStackRet;
527         }
528         void* retAddr = __builtin_return_address(0);
529         ReportDumpCatcherStats(pid, requestTime, res, retAddr);
530         ret = res == DUMPCATCH_ESUCCESS;
531     }
532 
533     DFXLOGI("dump_catch : pid = %{public}d, elapsed time = %{public}" PRId64 " ms, ret = %{public}d, " \
534         "msgLength = %{public}zu",
535         pid, counter.Elapsed<std::chrono::milliseconds>(), ret, msg.size());
536     DfxEnableTraceDlsym(false);
537     return ret;
538 }
539 
DumpCatchFd(int pid,int tid,std::string & msg,int fd,size_t maxFrameNums)540 bool DfxDumpCatcher::DumpCatchFd(int pid, int tid, std::string& msg, int fd, size_t maxFrameNums)
541 {
542     bool ret = false;
543     ret = DumpCatch(pid, tid, msg, maxFrameNums);
544     if (fd > 0) {
545         ret = OHOS_TEMP_FAILURE_RETRY(write(fd, msg.c_str(), msg.length()));
546     }
547     return ret;
548 }
549 
DoDumpCatchRemote(int pid,int tid,std::string & msg,bool isJson,int timeout)550 int32_t DfxDumpCatcher::DoDumpCatchRemote(int pid, int tid, std::string& msg, bool isJson, int timeout)
551 {
552     DFX_TRACE_SCOPED_DLSYM("DoDumpCatchRemote");
553     int32_t ret = DUMPCATCH_UNKNOWN;
554     if (pid <= 0 || tid < 0 || timeout <= WAIT_GET_KERNEL_STACK_TIMEOUT) {
555         msg.append("Result: pid(" + std::to_string(pid) + ") param error.\n");
556         DFXLOGW("%{public}s :: %{public}s :: %{public}s", DFXDUMPCATCHER_TAG.c_str(), __func__, msg.c_str());
557         return DUMPCATCH_EPARAM;
558     }
559     pid_ = pid;
560     int pipeReadFd[] = { -1, -1 };
561     uint64_t sdkDumpStartTime = GetAbsTimeMilliSeconds();
562     int sdkdumpRet = RequestSdkDump(pid, tid, pipeReadFd, isJson, timeout);
563     if (sdkdumpRet != ResponseCode::REQUEST_SUCCESS) {
564         DealWithSdkDumpRet(sdkdumpRet, pid, ret, msg);
565         return ret;
566     }
567     // timeout sub the cost time of sdkdump
568     timeout -= static_cast<int>(GetAbsTimeMilliSeconds() - sdkDumpStartTime);
569     int pollRet = DoDumpRemotePid(pid, msg, pipeReadFd, isJson, timeout);
570     DealWithPollRet(pollRet, pid, ret, msg);
571     DFXLOGI("%{public}s :: %{public}s :: pid(%{public}d) ret: %{public}d", DFXDUMPCATCHER_TAG.c_str(),
572         __func__, pid, ret);
573     return ret;
574 }
575 
DoDumpRemotePid(int pid,std::string & msg,int (& pipeReadFd)[2],bool isJson,int32_t timeout)576 int DfxDumpCatcher::DoDumpRemotePid(int pid, std::string& msg, int (&pipeReadFd)[2], bool isJson, int32_t timeout)
577 {
578     DFX_TRACE_SCOPED_DLSYM("DoDumpRemotePid");
579     if (timeout <= 0) {
580         DFXLOGW("timeout less than 0, try to get kernel stack and return directly!");
581         AsyncGetAllTidKernelStack(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
582         RequestDelPipeFd(pid);
583         CloseFd(pipeReadFd[PIPE_BUF_INDEX]);
584         CloseFd(pipeReadFd[PIPE_RES_INDEX]);
585         return DUMP_POLL_TIMEOUT;
586     } else if (timeout < 1000) { // 1000 : one thousand milliseconds
587         DFXLOGW("timeout less than 1 seconds, get kernel stack directly!");
588         AsyncGetAllTidKernelStack(pid);
589     }
590     int ret = DoDumpRemotePoll(timeout, msg, pipeReadFd, isJson);
591     // request close fds in faultloggerd
592     RequestDelPipeFd(pid);
593     CloseFd(pipeReadFd[PIPE_BUF_INDEX]);
594     CloseFd(pipeReadFd[PIPE_RES_INDEX]);
595     DFXLOGI("%{public}s :: %{public}s :: pid(%{public}d) poll ret: %{public}d",
596         DFXDUMPCATCHER_TAG.c_str(), __func__, pid, ret);
597     return ret;
598 }
599 
KernelRet2DumpcatchRet(int32_t ret)600 static int32_t KernelRet2DumpcatchRet(int32_t ret)
601 {
602     switch (ret) {
603         case KERNELSTACK_ECREATE:
604             return DUMPCATCH_KERNELSTACK_ECREATE;
605         case KERNELSTACK_EOPEN:
606             return DUMPCATCH_KERNELSTACK_EOPEN;
607         case KERNELSTACK_EIOCTL:
608             return DUMPCATCH_KERNELSTACK_EIOCTL;
609         default:
610             return DUMPCATCH_UNKNOWN;
611     }
612 }
613 
CollectKernelStack(pid_t pid,int waitMilliSeconds)614 void DfxDumpCatcher::CollectKernelStack(pid_t pid, int waitMilliSeconds)
615 {
616     ElapsedTime timer;
617     std::string kernelStackInfo;
618     int32_t kernelRet = 0;
619     auto finishCollect = [waitMilliSeconds]() {
620         if (waitMilliSeconds > 0) {
621             std::unique_lock<std::mutex> lock(g_kernelStackMutex);
622             g_asyncThreadRunning = false;
623             lock.unlock();
624             g_cv.notify_all();
625         } else {
626             g_asyncThreadRunning = false;
627         }
628     };
629     std::string statusPath = StringPrintf("/proc/%d/status", pid);
630     if (access(statusPath.c_str(), F_OK) != 0) {
631         DFXLOGW("No process(%{public}d) status file exist!", pid);
632         finishCollect();
633         return;
634     }
635 
636     std::function<bool(int)> func = [&](int tid) {
637         if (tid <= 0) {
638             return false;
639         }
640         std::string tidKernelStackInfo;
641         int32_t ret = DfxGetKernelStack(tid, tidKernelStackInfo);
642         if (ret == 0) {
643             kernelStackInfo.append(tidKernelStackInfo);
644         } else if (kernelRet == 0) {
645             kernelRet = ret;
646         }
647         return true;
648     };
649     std::vector<int> tids;
650     MAYBE_UNUSED bool ret = GetTidsByPidWithFunc(pid, tids, func);
651     if (kernelStackInfo.empty()) {
652         DFXLOGE("Process(%{public}d) collect kernel stack fail!", pid);
653         g_kernelStackRet = KernelRet2DumpcatchRet(kernelRet);
654         finishCollect();
655         return;
656     }
657     g_kernelStackPid = pid;
658     g_kernelStackInfo = kernelStackInfo;
659     g_kernelStackRet = 0;
660     finishCollect();
661     DFXLOGI("finish collect all tid info for pid(%{public}d) time(%{public}" PRId64 ")ms", pid,
662         timer.Elapsed<std::chrono::milliseconds>());
663 }
664 
AsyncGetAllTidKernelStack(pid_t pid,int waitMilliSeconds)665 void DfxDumpCatcher::AsyncGetAllTidKernelStack(pid_t pid, int waitMilliSeconds)
666 {
667     ReadProcessStatus(halfProcStatus_, pid);
668     if (IsLinuxKernel()) {
669         ReadProcessWchan(halfProcWchan_, pid, false, true);
670     }
671     if (g_asyncThreadRunning) {
672         DFXLOGI("pid(%{public}d) get kernel stack thread is running, not get pid(%{public}d)", g_kernelStackPid, pid);
673         return;
674     }
675     g_asyncThreadRunning = true;
676     InitKernelStackInfo();
677     auto func = [pid, waitMilliSeconds] {
678         CollectKernelStack(pid, waitMilliSeconds);
679     };
680     if (waitMilliSeconds > 0) {
681         std::unique_lock<std::mutex> lock(g_kernelStackMutex);
682         std::thread kernelStackTask(func);
683         kernelStackTask.detach();
684         g_cv.wait_for(lock, std::chrono::milliseconds(WAIT_GET_KERNEL_STACK_TIMEOUT),
685             [] {return !g_asyncThreadRunning;});
686     } else {
687         std::thread kernelStackTask(func);
688         kernelStackTask.detach();
689     }
690 }
691 
HandlePollError(const uint64_t endTime,int & remainTime,bool & collectAllTidStack,std::string & resMsg,int & ret)692 bool DfxDumpCatcher::HandlePollError(const uint64_t endTime, int& remainTime,
693     bool& collectAllTidStack, std::string& resMsg, int& ret)
694 {
695     if (errno == EINTR) {
696         uint64_t now = GetAbsTimeMilliSeconds();
697         if (now >= endTime) {
698             ret = DUMP_POLL_TIMEOUT;
699             resMsg.append("Result: poll timeout.\n");
700             return false;
701         }
702         if (!collectAllTidStack && (remainTime == DUMPCATCHER_REMOTE_P90_TIMEOUT)) {
703             AsyncGetAllTidKernelStack(pid_);
704             collectAllTidStack = true;
705         }
706         remainTime = static_cast<int>(endTime - now);
707         return true;
708     }
709     ret = DUMP_POLL_FAILED;
710     resMsg.append("Result: poll error, errno(" + std::to_string(errno) + ")\n");
711     return false;
712 }
713 
HandlePollTimeout(const int timeout,int & remainTime,bool & collectAllTidStack,std::string & resMsg,int & ret)714 bool DfxDumpCatcher::HandlePollTimeout(const int timeout, int& remainTime,
715     bool& collectAllTidStack, std::string& resMsg, int& ret)
716 {
717     if (!collectAllTidStack && (remainTime == DUMPCATCHER_REMOTE_P90_TIMEOUT)) {
718         AsyncGetAllTidKernelStack(pid_);
719         remainTime = timeout - DUMPCATCHER_REMOTE_P90_TIMEOUT;
720         collectAllTidStack = true;
721         return true;
722     }
723     ret = DUMP_POLL_TIMEOUT;
724     resMsg.append("Result: poll timeout.\n");
725     return false;
726 }
727 
HandlePollEvents(std::pair<int,std::string> & bufState,std::pair<int,std::string> & resState,const struct pollfd (& readFds)[2],bool & bPipeConnect,bool & res)728 bool DfxDumpCatcher::HandlePollEvents(std::pair<int, std::string>& bufState, std::pair<int, std::string>& resState,
729     const struct pollfd (&readFds)[2], bool& bPipeConnect, bool& res)
730 {
731     bool bufRet = true;
732     bool resRet = false;
733     bool eventRet = true;
734     for (auto& readFd : readFds) {
735         if (!bPipeConnect && (static_cast<uint32_t>(readFd.revents) & POLLIN)) {
736             bPipeConnect = true;
737         }
738 
739         if (bPipeConnect &&
740             ((static_cast<uint32_t>(readFd.revents) & POLLERR) || (static_cast<uint32_t>(readFd.revents) & POLLHUP))) {
741             eventRet = false;
742             resState.second.append("Result: poll events error.\n");
743             break;
744         }
745 
746         if ((static_cast<uint32_t>(readFd.revents) & POLLIN) != POLLIN) {
747             continue;
748         }
749 
750         if (readFd.fd == bufState.first) {
751             bufRet = DoReadBuf(bufState.first, bufState.second);
752         } else if (readFd.fd == resState.first) {
753             resRet = DoReadRes(resState.first, res, resState.second);
754         }
755     }
756 
757     if ((eventRet == false) || (bufRet == false) || (resRet == true)) {
758         DFXLOGI("%{public}s :: %{public}s :: eventRet(%{public}d) bufRet: %{public}d resRet: %{public}d",
759             DFXDUMPCATCHER_TAG.c_str(), __func__, eventRet, bufRet, resRet);
760         return false;
761     }
762     return true;
763 }
764 
DumpRemotePoll(const int timeout,std::pair<int,std::string> & bufState,std::pair<int,std::string> & resState)765 std::pair<bool, int> DfxDumpCatcher::DumpRemotePoll(const int timeout,
766     std::pair<int, std::string>& bufState, std::pair<int, std::string>& resState)
767 {
768     int ret = DUMP_POLL_INIT;
769     bool res = false;
770     struct pollfd readFds[2];
771     (void)memset_s(readFds, sizeof(readFds), 0, sizeof(readFds));
772     readFds[0].fd = bufState.first;
773     readFds[0].events = POLLIN;
774     readFds[1].fd = resState.first;
775     readFds[1].events = POLLIN;
776     int fdsSize = sizeof(readFds) / sizeof(readFds[0]);
777     bool bPipeConnect = false;
778     int remainTime = DUMPCATCHER_REMOTE_P90_TIMEOUT < timeout ? DUMPCATCHER_REMOTE_P90_TIMEOUT : timeout;
779     bool collectAllTidStack = false;
780     uint64_t startTime = GetAbsTimeMilliSeconds();
781     uint64_t endTime = startTime + static_cast<uint64_t>(timeout);
782     bool isContinue = true;
783     do {
784         int pollRet = poll(readFds, fdsSize, remainTime);
785         if (pollRet < 0) {
786             isContinue = HandlePollError(endTime, remainTime, collectAllTidStack, resState.second, ret);
787             continue;
788         } else if (pollRet == 0) {
789             isContinue = HandlePollTimeout(timeout, remainTime, collectAllTidStack, resState.second, ret);
790             continue;
791         }
792         if (!HandlePollEvents(bufState, resState, readFds, bPipeConnect, res)) {
793             ret = DUMP_POLL_RETURN;
794             break;
795         }
796         uint64_t now = GetAbsTimeMilliSeconds();
797         if (now >= endTime) {
798             ret = DUMP_POLL_TIMEOUT;
799             resState.second.append("Result: poll timeout.\n");
800             break;
801         }
802         remainTime = static_cast<int>(endTime - now);
803     } while (isContinue);
804     return std::make_pair(res, ret);
805 }
806 
DoDumpRemotePoll(int timeout,std::string & msg,const int (& pipeReadFd)[2],bool isJson)807 int DfxDumpCatcher::DoDumpRemotePoll(int timeout, std::string& msg, const int (&pipeReadFd)[2], bool isJson)
808 {
809     DFX_TRACE_SCOPED_DLSYM("DoDumpRemotePoll");
810     if (pipeReadFd[PIPE_BUF_INDEX] < 0 || pipeReadFd[PIPE_RES_INDEX] < 0) {
811         if (!isJson) {
812             msg = "Result: bufFd or resFd < 0.\n";
813         }
814         DFXLOGE("invalid bufFd or resFd");
815         return DUMP_POLL_FD;
816     }
817     std::pair<int, std::string> bufState = std::make_pair(pipeReadFd[PIPE_BUF_INDEX], "");
818     std::pair<int, std::string> resState = std::make_pair(pipeReadFd[PIPE_RES_INDEX], "");
819     std::pair<bool, int> result = DumpRemotePoll(timeout, bufState, resState);
820 
821     DFXLOGI("%{public}s :: %{public}s :: %{public}s", DFXDUMPCATCHER_TAG.c_str(), __func__, resState.second.c_str());
822     msg = isJson && result.first ? bufState.second : (resState.second + bufState.second);
823     return result.first ? DUMP_POLL_OK : result.second;
824 }
825 
DoReadBuf(int fd,std::string & msg)826 bool DfxDumpCatcher::DoReadBuf(int fd, std::string& msg)
827 {
828     bool ret = false;
829     char *buffer = new char[MAX_PIPE_SIZE];
830     do {
831         ssize_t nread = OHOS_TEMP_FAILURE_RETRY(read(fd, buffer, MAX_PIPE_SIZE));
832         if (nread <= 0) {
833             DFXLOGW("%{public}s :: %{public}s :: read error", DFXDUMPCATCHER_TAG.c_str(), __func__);
834             break;
835         }
836         DFXLOGD("%{public}s :: %{public}s :: nread: %{public}zu", DFXDUMPCATCHER_TAG.c_str(), __func__, nread);
837         ret = true;
838         msg.append(buffer);
839     } while (false);
840     delete []buffer;
841     return ret;
842 }
843 
DoReadRes(int fd,bool & ret,std::string & msg)844 bool DfxDumpCatcher::DoReadRes(int fd, bool& ret, std::string& msg)
845 {
846     int32_t res = DumpErrorCode::DUMP_ESUCCESS;
847     ssize_t nread = OHOS_TEMP_FAILURE_RETRY(read(fd, &res, sizeof(res)));
848     if (nread <= 0 || nread != sizeof(res)) {
849         DFXLOGW("%{public}s :: %{public}s :: read error", DFXDUMPCATCHER_TAG.c_str(), __func__);
850         return false;
851     }
852     if (res == DumpErrorCode::DUMP_ESUCCESS) {
853         ret = true;
854     }
855     msg.append("Result: " + DfxDumpRes::ToString(res) + "\n");
856     return true;
857 }
858 
DumpCatchMultiPid(const std::vector<int> pidV,std::string & msg)859 bool DfxDumpCatcher::DumpCatchMultiPid(const std::vector<int> pidV, std::string& msg)
860 {
861     bool ret = false;
862     int pidSize = (int)pidV.size();
863     if (pidSize <= 0) {
864         DFXLOGE("%{public}s :: %{public}s :: param error, pidSize(%{public}d).",
865             DFXDUMPCATCHER_TAG.c_str(), __func__, pidSize);
866         return ret;
867     }
868 
869     std::unique_lock<std::mutex> lck(mutex_);
870     int currentPid = getpid();
871     int currentTid = gettid();
872     DFXLOGD("%{public}s :: %{public}s :: cPid(%{public}d), cTid(%{public}d), pidSize(%{public}d).",
873         DFXDUMPCATCHER_TAG.c_str(), \
874         __func__, currentPid, currentTid, pidSize);
875 
876     time_t startTime = time(nullptr);
877     if (startTime > 0) {
878         DFXLOGD("%{public}s :: %{public}s :: startTime(%{public}" PRId64 ").",
879             DFXDUMPCATCHER_TAG.c_str(), __func__, startTime);
880     }
881 
882     for (int i = 0; i < pidSize; i++) {
883         int pid = pidV[i];
884         std::string pidStr;
885         bool ret = DoDumpRemoteLocked(pid, 0, pidStr) == DUMPCATCH_ESUCCESS;
886         if (ret) {
887             msg.append(pidStr + "\n");
888         } else {
889             msg.append("Failed to dump process:" + std::to_string(pid));
890         }
891 
892         time_t currentTime = time(nullptr);
893         if (currentTime > 0) {
894             DFXLOGD("%{public}s :: %{public}s :: startTime(%{public}" PRId64 "), currentTime(%{public}" PRId64 ").",
895                 DFXDUMPCATCHER_TAG.c_str(), \
896                 __func__, startTime, currentTime);
897             if (currentTime > startTime + DUMP_CATCHE_WORK_TIME_S) {
898                 break;
899             }
900         }
901     }
902 
903     DFXLOGD("%{public}s :: %{public}s :: msg(%{public}s).", DFXDUMPCATCHER_TAG.c_str(), __func__, msg.c_str());
904     if (msg.find("Tid:") != std::string::npos) {
905         ret = true;
906     }
907     return ret;
908 }
909 } // namespace HiviewDFX
910 } // namespace OHOS
911