• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "dfx_dump_catcher.h"
17 
18 #include <atomic>
19 #include <cerrno>
20 #include <condition_variable>
21 #include <mutex>
22 #include <thread>
23 #include <vector>
24 
25 #include <dlfcn.h>
26 #include <poll.h>
27 #include <securec.h>
28 #include <unistd.h>
29 #include <sys/syscall.h>
30 #include <sys/types.h>
31 
32 #include "backtrace_local.h"
33 #include "dfx_define.h"
34 #include "dfx_dump_catcher_errno.h"
35 #include "dfx_dump_catcher_slow_policy.h"
36 #include "dfx_dump_res.h"
37 #include "dfx_log.h"
38 #include "dfx_socket_request.h"
39 #include "dfx_trace_dlsym.h"
40 #include "dfx_util.h"
41 #include "elapsed_time.h"
42 #include "faultloggerd_client.h"
43 #include "file_ex.h"
44 #include "kernel_stack_async_collector.h"
45 #include "procinfo.h"
46 #include "smart_fd.h"
47 #include "string_printf.h"
48 
49 namespace OHOS {
50 namespace HiviewDFX {
51 namespace {
52 #ifdef LOG_DOMAIN
53 #undef LOG_DOMAIN
54 #define LOG_DOMAIN 0xD002D11
55 #endif
56 
57 #ifdef LOG_TAG
58 #undef LOG_TAG
59 #define LOG_TAG "DfxDumpCatcher"
60 #endif
61 static const int DUMP_CATCHE_WORK_TIME_S = 60;
62 static constexpr int WAIT_GET_KERNEL_STACK_TIMEOUT = 1000; // 1000 : time out 1000 ms
63 static constexpr uint32_t HIVIEW_UID = 1201;
64 static constexpr uint32_t FOUNDATION_UID = 5523;
65 
66 enum DfxDumpStatRes : int32_t {
67     DUMP_RES_NO_KERNELSTACK = -2,
68     DUMP_RES_WITH_KERNELSTACK = -1,
69     DUMP_RES_WITH_USERSTACK = 0,
70     DUMP_RES_WITH_USERSTACK_NO_PARSE_SYMBOL = 1,
71     DUMP_RES_WITH_USERSTACK_PARSE_SYMBOL_TIMEOUT = 2,
72 };
73 }
74 
75 struct DumpCatcherPipeData {
DumpCatcherPipeDataOHOS::HiviewDFX::DumpCatcherPipeData76     DumpCatcherPipeData(int32_t pid, int32_t bufPipe, int32_t resPipe) : pid(pid), bufFd(bufPipe), resFd(resPipe) {}
~DumpCatcherPipeDataOHOS::HiviewDFX::DumpCatcherPipeData77     ~DumpCatcherPipeData()
78     {
79         // request close fds in faultloggerd
80         RequestDelPipeFd(pid);
81     }
82     DumpCatcherPipeData(const DumpCatcherPipeData&) = delete;
83     DumpCatcherPipeData& operator=(const DumpCatcherPipeData&) = delete;
84 
85     int pid{-1};
86     SmartFd bufFd;
87     SmartFd resFd;
88     std::string bufMsg = "";
89     std::string resMsg = "";
90 } ;
91 
IsLinuxKernel()92 static bool IsLinuxKernel()
93 {
94     static bool isLinux = [] {
95         std::string content;
96         LoadStringFromFile("/proc/version", content);
97         if (content.empty()) {
98             return true;
99         }
100         if (content.find("Linux") != std::string::npos) {
101             return true;
102         }
103         return false;
104     }();
105     return isLinux;
106 }
107 
108 class DfxDumpCatcher::Impl {
109 public:
110     bool DumpCatch(int pid, int tid, std::string& msg, size_t maxFrameNums, bool isJson);
111     bool DumpCatchFd(int pid, int tid, std::string& msg, int fd, size_t maxFrameNums);
112     bool DumpCatchMultiPid(const std::vector<int> &pids, std::string& msg);
113     std::pair<int, std::string> DumpCatchWithTimeout(int pid, std::string& msg, int timeout, int tid, bool isJson);
114 private:
115     bool DoDumpCurrTid(const size_t skipFrameNum, std::string& msg, size_t maxFrameNums);
116     bool DoDumpLocalTid(const int tid, std::string& msg, size_t maxFrameNums);
117     bool DoDumpLocalPid(int pid, std::string& msg, size_t maxFrameNums);
118     bool DoDumpLocalLocked(int pid, int tid, std::string& msg, size_t maxFrameNums);
119     int32_t DoDumpRemoteLocked(int pid, int tid, std::string& msg, bool isJson = false,
120         int timeout = DUMPCATCHER_REMOTE_TIMEOUT);
121     int32_t DoDumpCatchRemote(int pid, int tid, std::string& msg, bool isJson = false,
122         int timeout = DUMPCATCHER_REMOTE_TIMEOUT);
123     int DoDumpRemotePid(int pid, std::string& msg, DumpCatcherPipeData& pipeData,
124         bool isJson = false, int32_t timeout = DUMPCATCHER_REMOTE_TIMEOUT);
125     bool HandlePollError(int pid, const uint64_t endTime, int& remainTime, int& pollRet, std::string& resMsg);
126     bool HandlePollTimeout(int pid, const int timeout, int& remainTime, int& pollRet, std::string& resMsg);
127     bool HandlePollEvents(int pid, const struct pollfd (&readFds)[2],
128         bool& bPipeConnect, int& pollRet, DumpCatcherPipeData& pipeData);
129     int DumpRemotePoll(int pid, const int timeout, DumpCatcherPipeData& pipeData);
130     int DoDumpRemotePoll(int pid, int timeout, std::string& msg, DumpCatcherPipeData& pipeData, bool isJson = false);
131     bool DoReadBuf(DumpCatcherPipeData& pipeData);
132     bool DoReadRes(int& pollRet, DumpCatcherPipeData& pipeData);
133     void DealAfterPollFail(int pid, std::string& msg);
134     void DealWithPollRet(int pollRet, int pid, int32_t& ret, std::string& msg);
135     void DealWithSdkDumpRet(int sdkdumpRet, int pid, int32_t& ret, std::string& msg);
136     std::pair<int, std::string> DealWithDumpCatchRet(int pid, int32_t& ret, std::string& msg);
137     void ReportDumpCatcherStats(int32_t pid, uint64_t requestTime, int32_t ret, void* retAddr);
138 
139     static int32_t KernelRet2DumpcatchRet(int32_t ret);
140     static const int DUMPCATCHER_REMOTE_P90_TIMEOUT = 1000;
141     static const int DUMPCATCHER_REMOTE_TIMEOUT = 10000;
142 
143     std::mutex mutex_;
144     bool notifyCollect_ = false;
145     KernelStackAsyncCollector stackKit_;
146     KernelStackAsyncCollector::KernelResult stack_;
147 };
148 
DfxDumpCatcher()149 DfxDumpCatcher::DfxDumpCatcher() : impl_(std::make_shared<Impl>())
150 {}
151 
DumpCatch(int pid,int tid,std::string & msg,size_t maxFrameNums,bool isJson)152 bool DfxDumpCatcher::DumpCatch(int pid, int tid, std::string& msg, size_t maxFrameNums, bool isJson)
153 {
154     return impl_->DumpCatch(pid, tid, msg, maxFrameNums, isJson);
155 }
156 
DumpCatchFd(int pid,int tid,std::string & msg,int fd,size_t maxFrameNums)157 bool DfxDumpCatcher::DumpCatchFd(int pid, int tid, std::string& msg, int fd, size_t maxFrameNums)
158 {
159     return impl_->DumpCatchFd(pid, tid, msg, fd, maxFrameNums);
160 }
161 
DumpCatchMultiPid(const std::vector<int> & pids,std::string & msg)162 bool DfxDumpCatcher::DumpCatchMultiPid(const std::vector<int> &pids, std::string& msg)
163 {
164     return impl_->DumpCatchMultiPid(pids, msg);
165 }
166 
DumpCatchWithTimeout(int pid,std::string & msg,int timeout,int tid,bool isJson)167 std::pair<int, std::string> DfxDumpCatcher::DumpCatchWithTimeout(int pid, std::string& msg,
168     int timeout, int tid, bool isJson)
169 {
170     return impl_->DumpCatchWithTimeout(pid, msg, timeout, tid, isJson);
171 }
172 
DoDumpCurrTid(const size_t skipFrameNum,std::string & msg,size_t maxFrameNums)173 bool DfxDumpCatcher::Impl::DoDumpCurrTid(const size_t skipFrameNum, std::string& msg, size_t maxFrameNums)
174 {
175     bool ret = false;
176 
177     ret = GetBacktrace(msg, skipFrameNum + 1, false, maxFrameNums);
178     if (!ret) {
179         int currTid = gettid();
180         msg.append("Failed to dump curr thread:" + std::to_string(currTid) + ".\n");
181     }
182     DFXLOGD("DoDumpCurrTid :: return %{public}d.", ret);
183     return ret;
184 }
185 
DoDumpLocalTid(const int tid,std::string & msg,size_t maxFrameNums)186 bool DfxDumpCatcher::Impl::DoDumpLocalTid(const int tid, std::string& msg, size_t maxFrameNums)
187 {
188     bool ret = false;
189     if (tid <= 0) {
190         DFXLOGE("DoDumpLocalTid :: return false as param error.");
191         return ret;
192     }
193     ret = GetBacktraceStringByTid(msg, tid, 0, false, maxFrameNums);
194     if (!ret) {
195         msg.append("Failed to dump thread:" + std::to_string(tid) + ".\n");
196     }
197     DFXLOGD("DoDumpLocalTid :: return %{public}d.", ret);
198     return ret;
199 }
200 
DoDumpLocalPid(int pid,std::string & msg,size_t maxFrameNums)201 bool DfxDumpCatcher::Impl::DoDumpLocalPid(int pid, std::string& msg, size_t maxFrameNums)
202 {
203     bool ret = false;
204     if (pid <= 0) {
205         DFXLOGE("DoDumpLocalPid :: return false as param error.");
206         return ret;
207     }
208     size_t skipFramNum = 5; // 5: skip 5 frame
209 
210     msg = GetStacktraceHeader();
211     std::function<bool(int)> func = [&](int tid) {
212         if (tid <= 0) {
213             return false;
214         }
215         std::string threadMsg;
216         if (tid == gettid()) {
217             ret = DoDumpCurrTid(skipFramNum, threadMsg, maxFrameNums);
218         } else {
219             ret = DoDumpLocalTid(tid, threadMsg, maxFrameNums);
220         }
221         msg += threadMsg;
222         return ret;
223     };
224     std::vector<int> tids;
225     ret = GetTidsByPidWithFunc(getpid(), tids, func);
226     DFXLOGD("DoDumpLocalPid :: return %{public}d.", ret);
227     return ret;
228 }
229 
DoDumpRemoteLocked(int pid,int tid,std::string & msg,bool isJson,int timeout)230 int32_t DfxDumpCatcher::Impl::DoDumpRemoteLocked(int pid, int tid, std::string& msg, bool isJson, int timeout)
231 {
232     return DoDumpCatchRemote(pid, tid, msg, isJson, timeout);
233 }
234 
DoDumpLocalLocked(int pid,int tid,std::string & msg,size_t maxFrameNums)235 bool DfxDumpCatcher::Impl::DoDumpLocalLocked(int pid, int tid, std::string& msg, size_t maxFrameNums)
236 {
237     bool ret = false;
238     if (tid == gettid()) {
239         size_t skipFramNum = 4; // 4: skip 4 frame
240         ret = DoDumpCurrTid(skipFramNum, msg, maxFrameNums);
241     } else if (tid == 0) {
242         ret = DoDumpLocalPid(pid, msg, maxFrameNums);
243     } else {
244         if (!IsThreadInPid(pid, tid)) {
245             msg.append("tid(" + std::to_string(tid) + ") is not in pid(" + std::to_string(pid) + ").\n");
246         } else {
247             ret = DoDumpLocalTid(tid, msg, maxFrameNums);
248         }
249     }
250 
251     DFXLOGD("DoDumpLocal :: ret(%{public}d).", ret);
252     return ret;
253 }
254 
ConvertDumpResultToDumpStats(int32_t dumpRes)255 static int32_t ConvertDumpResultToDumpStats(int32_t dumpRes)
256 {
257     int32_t stats = DUMP_RES_WITH_KERNELSTACK;
258     switch (dumpRes) {
259         case DUMPCATCH_ESUCCESS:
260             stats = DUMP_RES_WITH_USERSTACK;
261             break;
262         case DUMPCATCH_DUMP_ESYMBOL_NO_PARSE:
263             stats = DUMP_RES_WITH_USERSTACK_NO_PARSE_SYMBOL;
264             break;
265         case DUMPCATCH_DUMP_ESYMBOL_PARSE_TIMEOUT:
266             stats = DUMP_RES_WITH_USERSTACK_PARSE_SYMBOL_TIMEOUT;
267             break;
268         default:
269             break;
270     }
271     return stats;
272 }
273 
ReportDumpCatcherStats(int32_t pid,uint64_t requestTime,int32_t ret,void * retAddr)274 void DfxDumpCatcher::Impl::ReportDumpCatcherStats(int32_t pid,
275     uint64_t requestTime, int32_t ret, void* retAddr)
276 {
277     std::vector<uint8_t> buf(sizeof(struct FaultLoggerdStatsRequest), 0);
278     auto stat = reinterpret_cast<struct FaultLoggerdStatsRequest*>(buf.data());
279     stat->type = DUMP_CATCHER;
280     stat->pid = pid;
281     stat->requestTime = requestTime;
282     stat->dumpCatcherFinishTime = GetTimeMilliSeconds();
283     stat->result = ConvertDumpResultToDumpStats(ret);
284     if ((stat->result == DUMP_RES_WITH_KERNELSTACK) && stack_.msg.empty()) {
285         stat->result = DUMP_RES_NO_KERNELSTACK;
286     }
287     stat->targetProcessThreadCount = stack_.threadCount;
288     size_t copyLen;
289     std::string processName;
290     ReadProcessName(pid, processName);
291     copyLen = std::min(sizeof(stat->targetProcess) - 1, processName.size());
292     if (memcpy_s(stat->targetProcess, sizeof(stat->targetProcess) - 1, processName.c_str(), copyLen) != 0) {
293         DFXLOGE("Failed to copy target process");
294         return;
295     }
296 
297     if (ret != DUMPCATCH_ESUCCESS) {
298         std::string summary = DfxDumpCatchError::ToString(ret);
299         copyLen = std::min(sizeof(stat->summary) - 1, summary.size());
300         if (memcpy_s(stat->summary, sizeof(stat->summary) - 1, summary.c_str(), copyLen) != 0) {
301             DFXLOGE("Failed to copy dumpcatcher summary");
302             return;
303         }
304     }
305 
306     Dl_info info;
307     if (dladdr(retAddr, &info) != 0) {
308         copyLen = std::min(sizeof(stat->callerElf) - 1, strlen(info.dli_fname));
309         if (memcpy_s(stat->callerElf, sizeof(stat->callerElf) - 1, info.dli_fname, copyLen) != 0) {
310             DFXLOGE("Failed to copy caller elf info");
311             return;
312         }
313         stat->offset = reinterpret_cast<uintptr_t>(retAddr) - reinterpret_cast<uintptr_t>(info.dli_fbase);
314     }
315 
316     std::string cmdline;
317     if (OHOS::LoadStringFromFile("/proc/self/cmdline", cmdline)) {
318         copyLen = std::min(sizeof(stat->callerProcess) - 1, cmdline.size());
319         if (memcpy_s(stat->callerProcess, sizeof(stat->callerProcess) - 1,
320             cmdline.c_str(), copyLen) != 0) {
321             DFXLOGE("Failed to copy caller cmdline");
322             return;
323         }
324     }
325     ReportDumpStats(stat);
326 }
327 
IsBitOn(const std::string & content,const std::string & filed,int signal)328 static bool IsBitOn(const std::string& content, const std::string& filed, int signal)
329 {
330     if (content.find(filed) == std::string::npos) {
331         return false;
332     }
333     //SigBlk:   0000000000000000
334     std::string num = content.substr(content.find(filed) + filed.size() + 2, 16);
335     uint64_t hexValue = strtoul(num.c_str(), nullptr, 16);
336     uint64_t mask = 1ULL << (signal - 1);
337 
338     return (hexValue & mask) != 0;
339 }
340 
IsSignalBlocked(int pid,int32_t & ret)341 static bool IsSignalBlocked(int pid, int32_t& ret)
342 {
343     std::vector<int> tids;
344     std::vector<int> nstids;
345     GetTidsByPid(pid, tids, nstids);
346     std::string threadName;
347     std::string content;
348     int targetTid = -1;
349     for (size_t i = 0; i < tids.size(); ++i) {
350         ReadThreadNameByPidAndTid(pid, tids[i], threadName);
351         if (threadName == "OS_DfxWatchdog") {
352             targetTid = tids[i];
353             break;
354         }
355     }
356     if (targetTid != -1) {
357         std::string threadStatusPath = StringPrintf("/proc/%d/task/%d/status", pid, targetTid);
358         if (!LoadStringFromFile(threadStatusPath, content) || content.empty()) {
359             DFXLOGE("the pid(%{public}d)thread(%{public}d) read status fail, errno(%{public}d)", pid, targetTid, errno);
360             ret = DUMPCATCH_TIMEOUT_PARSE_FAIL_READ_ESTATUS;
361             return true;
362         }
363 
364         if (IsBitOn(content, "SigBlk", SIGDUMP) || IsBitOn(content, "SigIgn", SIGDUMP)) {
365             DFXLOGI("the pid(%{public}d)thread(%{public}d) signal has been blocked by target process", pid, targetTid);
366             ret = DUMPCATCH_TIMEOUT_SIGNAL_BLOCK;
367             return true;
368         }
369     }
370     return false;
371 }
372 
IsFrozen(int pid,int32_t & ret)373 static bool IsFrozen(int pid, int32_t& ret)
374 {
375     std::string content;
376     std::string cgroupPath = StringPrintf("/proc/%d/cgroup", pid);
377     if (!LoadStringFromFile(cgroupPath, content)) {
378         DFXLOGE("the pid (%{public}d) read cgroup fail, errno (%{public}d)", pid, errno);
379         ret = DUMPCATCH_TIMEOUT_PARSE_FAIL_READ_ECGROUP;
380         return true;
381     }
382 
383     if (content.find("Frozen") != std::string::npos) {
384         DFXLOGI("the pid (%{public}d) has been frozen", pid);
385         ret = DUMPCATCH_TIMEOUT_KERNEL_FROZEN;
386         return true;
387     }
388     return false;
389 }
390 
AnalyzeTimeoutReason(int pid,int32_t & ret)391 static void AnalyzeTimeoutReason(int pid, int32_t& ret)
392 {
393     std::string statusPath = StringPrintf("/proc/%d/status", pid);
394     if (access(statusPath.c_str(), F_OK) != 0) {
395         DFXLOGI("the pid (%{public}d) process exit during the dump, errno (%{public}d)", pid, errno);
396         ret = DUMPCATCH_TIMEOUT_PROCESS_KILLED;
397         return;
398     }
399 
400     if (IsSignalBlocked(pid, ret)) {
401         return;
402     }
403 
404     if (IsFrozen(pid, ret)) {
405         return;
406     }
407 
408     DFXLOGI("the pid (%{public}d) dump slow", pid);
409     ret = DUMPCATCH_TIMEOUT_DUMP_SLOW;
410 }
411 
DealAfterPollFail(int pid,std::string & msg)412 void DfxDumpCatcher::Impl::DealAfterPollFail(int pid, std::string& msg)
413 {
414     // get result
415     if (notifyCollect_) {
416         stack_ = stackKit_.GetCollectedStackResult();
417     } else {
418         stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
419     }
420 
421     std::string halfProcStatus;
422     std::string halfProcWchan;
423     ReadProcessStatus(halfProcStatus, pid);
424     if (IsLinuxKernel()) {
425         ReadProcessWchan(halfProcWchan, pid, false, true);
426     }
427     msg.append(std::move(halfProcStatus));
428     msg.append(std::move(halfProcWchan));
429 }
430 
DealWithPollRet(int pollRet,int pid,int32_t & ret,std::string & msg)431 void DfxDumpCatcher::Impl::DealWithPollRet(int pollRet, int pid, int32_t& ret, std::string& msg)
432 {
433     bool isPollFail = true;
434 
435     switch (pollRet) {
436         case DUMP_POLL_OK:
437             ret = DUMPCATCH_ESUCCESS;
438             isPollFail = false;
439             break;
440         case DUMP_POLL_NO_PARSE_SYMBOL:
441             ret = DUMPCATCH_DUMP_ESYMBOL_NO_PARSE;
442             isPollFail = false;
443             break;
444         case DUMP_POLL_PARSE_SYMBOL_TIMEOUT:
445             ret = DUMPCATCH_DUMP_ESYMBOL_PARSE_TIMEOUT;
446             isPollFail = false;
447             break;
448         case DUMP_POLL_FD:
449             ret = DUMPCATCH_EFD;
450             break;
451         case DUMP_POLL_FAILED:
452             ret = DUMPCATCH_EPOLL;
453             break;
454         case DUMP_POLL_TIMEOUT:
455             AnalyzeTimeoutReason(pid, ret);
456             if (ret == DUMPCATCH_TIMEOUT_DUMP_SLOW) {
457                 DfxDumpCatcherSlowPolicy::GetInstance().SetDumpCatcherSlowStat(pid);
458             }
459             break;
460         case DUMP_POLL_RETURN:
461             if (msg.find("ptrace attach thread failed") != std::string::npos) {
462                 ret = DUMPCATCH_DUMP_EPTRACE;
463             } else if (msg.find("stop unwinding") != std::string::npos) {
464                 ret = DUMPCATCH_DUMP_EUNWIND;
465             } else if (msg.find("mapinfo is not exist") != std::string::npos) {
466                 ret = DUMPCATCH_DUMP_EMAP;
467             } else {
468                 ret = DUMPCATCH_DUMP_ERROR;
469             }
470             break;
471         default:
472             ret = DUMPCATCH_UNKNOWN;
473             break;
474     }
475 
476     if (isPollFail) {
477         DealAfterPollFail(pid, msg);
478     }
479 }
480 
DealWithSdkDumpRet(int sdkdumpRet,int pid,int32_t & ret,std::string & msg)481 void DfxDumpCatcher::Impl::DealWithSdkDumpRet(int sdkdumpRet, int pid, int32_t& ret, std::string& msg)
482 {
483     uint32_t uid = getuid();
484     if (sdkdumpRet == ResponseCode::SDK_DUMP_REPEAT) {
485         stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
486         msg.append("Result: pid(" + std::to_string(pid) + ") process is dumping.\n");
487         ret = DUMPCATCH_IS_DUMPING;
488     } else if (sdkdumpRet == ResponseCode::REQUEST_REJECT) {
489         msg.append("Result: pid(" + std::to_string(pid) + ") process check permission error.\n");
490         ret = DUMPCATCH_EPERMISSION;
491     } else if (sdkdumpRet == ResponseCode::SDK_DUMP_NOPROC) {
492         msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
493         ret = DUMPCATCH_NO_PROCESS;
494     } else if (sdkdumpRet == ResponseCode::SDK_PROCESS_CRASHED) {
495         msg.append("Result: pid(" + std::to_string(pid) + ") process has been crashed.\n");
496         ret = DUMPCATCH_HAS_CRASHED;
497     } else if (sdkdumpRet == ResponseCode::CONNECT_FAILED) {
498         if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
499             stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
500         }
501         msg.append("Result: pid(" + std::to_string(pid) + ") process fail to conntect faultloggerd.\n");
502         ret = DUMPCATCH_ECONNECT;
503     } else if (sdkdumpRet == ResponseCode::SEND_DATA_FAILED) {
504         if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
505             stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
506         }
507         msg.append("Result: pid(" + std::to_string(pid) + ") process fail to write to faultloggerd.\n");
508         ret = DUMPCATCH_EWRITE;
509     } else {
510         if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
511             stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
512         }
513         msg.append("Result: pid(" + std::to_string(pid) + ") faultloggerd maybe exception occurred.\n");
514         ret = DUMPCATCH_EFAULTLOGGERD;
515     }
516     DFXLOGW("%{public}s :: %{public}s", __func__, msg.c_str());
517 }
518 
DealWithDumpCatchRet(int pid,int32_t & ret,std::string & msg)519 std::pair<int, std::string> DfxDumpCatcher::Impl::DealWithDumpCatchRet(int pid, int32_t& ret, std::string& msg)
520 {
521     int result = ret == 0 ? 0 : -1;
522     std::string reason;
523     if (result == 0) {
524         reason = "Reason:" + DfxDumpCatchError::ToString(ret) + "\n";
525     } else if (ret == DUMPCATCH_DUMP_ESYMBOL_NO_PARSE || ret == DUMPCATCH_DUMP_ESYMBOL_PARSE_TIMEOUT) {
526         reason = "Reason:" + DfxDumpCatchError::ToString(ret) + "\n";
527         result = 0;
528     } else {
529         reason = "Reason:\nnormal stack:" + DfxDumpCatchError::ToString(ret) + "\n";
530         if (stack_.errorCode != KernelStackAsyncCollector::STACK_SUCCESS) {
531             ret = KernelRet2DumpcatchRet(stack_.errorCode);
532             reason += "kernel stack:" + DfxDumpCatchError::ToString(ret) + "\n";
533         } else if (!stack_.msg.empty()) {
534             msg.append(stack_.msg);
535             result = 1;
536         } else {
537             reason += "kernel stack:" + DfxDumpCatchError::ToString(DUMPCATCH_KERNELSTACK_NONEED) + "\n";
538         }
539     }
540 
541     return std::make_pair(result, reason);
542 }
543 
DumpCatchWithTimeout(int pid,std::string & msg,int timeout,int tid,bool isJson)544 std::pair<int, std::string> DfxDumpCatcher::Impl::DumpCatchWithTimeout(int pid, std::string& msg, int timeout,
545                                                                        int tid, bool isJson)
546 {
547     DfxEnableTraceDlsym(true);
548     ElapsedTime counter;
549     uint64_t requestTime = GetTimeMilliSeconds();
550     int32_t dumpcatchErrno = DUMPCATCH_UNKNOWN;
551     bool reportStat = false;
552     do {
553         if (pid <= 0 || tid <0 || timeout <= WAIT_GET_KERNEL_STACK_TIMEOUT) {
554             DFXLOGE("DumpCatchWithTimeout:: param error.");
555             dumpcatchErrno = DUMPCATCH_EPARAM;
556             break;
557         }
558         if (!IsLinuxKernel()) {
559             std::string statusPath = StringPrintf("/proc/%d/status", pid);
560             if (access(statusPath.c_str(), F_OK) != 0 && errno != EACCES) {
561                 DFXLOGE("DumpCatchWithTimeout:: the pid(%{public}d) process has exited, errno(%{public}d)", pid, errno);
562                 msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
563                 dumpcatchErrno = DUMPCATCH_NO_PROCESS;
564                 break;
565             }
566         }
567         std::unique_lock<std::mutex> lck(mutex_);
568         int currentPid = getpid();
569         if (pid == currentPid) {
570             bool ret = DoDumpLocalLocked(pid, tid, msg, DEFAULT_MAX_FRAME_NUM);
571             dumpcatchErrno = ret ? DUMPCATCH_ESUCCESS : DUMPCATCH_DUMP_SELF_FAIL;
572         } else {
573             DFXLOGI("Receive DumpCatch request for cPid:(%{public}d), pid(%{public}d)", currentPid, pid);
574             dumpcatchErrno = DoDumpRemoteLocked(pid, tid, msg, isJson, timeout);
575             reportStat = true;
576         }
577     } while (false);
578 
579     auto result = DealWithDumpCatchRet(pid, dumpcatchErrno, msg);
580     if (reportStat) {
581         void* retAddr = __builtin_return_address(0);
582         ReportDumpCatcherStats(pid, requestTime, dumpcatchErrno, retAddr);
583     }
584 
585     DFXLOGI("dump_catch : pid = %{public}d, elapsed time = %{public}" PRId64 " ms, " \
586         "msgLength = %{public}zu, ret = %{public}d\n%{public}s",
587         pid, counter.Elapsed<std::chrono::milliseconds>(), msg.size(), result.first, result.second.c_str());
588     DfxEnableTraceDlsym(false);
589     return result;
590 }
591 
DumpCatch(int pid,int tid,std::string & msg,size_t maxFrameNums,bool isJson)592 bool DfxDumpCatcher::Impl::DumpCatch(int pid, int tid, std::string& msg, size_t maxFrameNums, bool isJson)
593 {
594     bool ret = false;
595     if (pid <= 0 || tid < 0) {
596         DFXLOGE("dump_catch :: param error.");
597         return ret;
598     }
599     if (!IsLinuxKernel()) {
600         std::string statusPath = StringPrintf("/proc/%d/status", pid);
601         DFXLOGI("DumpCatch:: access pid(%{public}d) status", pid);
602         if (access(statusPath.c_str(), F_OK) != 0 && errno != EACCES) {
603             DFXLOGE("DumpCatch:: the pid(%{public}d) process has exited, errno(%{public}d)", pid, errno);
604             msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
605             return ret;
606         }
607     }
608     DfxEnableTraceDlsym(true);
609     ElapsedTime counter;
610     std::unique_lock<std::mutex> lck(mutex_);
611     stack_ = {};
612     notifyCollect_ = false;
613     int currentPid = getpid();
614     uint64_t requestTime = GetTimeMilliSeconds();
615     DFXLOGI("Receive DumpCatch request for cPid:(%{public}d), pid(%{public}d), " \
616         "tid:(%{public}d).", currentPid, pid, tid);
617     if (pid == currentPid) {
618         ret = DoDumpLocalLocked(pid, tid, msg, maxFrameNums);
619     } else {
620         if (maxFrameNums != DEFAULT_MAX_FRAME_NUM) {
621             DFXLOGI("dump_catch :: maxFrameNums does not support setting " \
622                 "when pid is not equal to caller pid");
623         }
624         int timeout = (tid == 0 ? 3 : 10) * 1000; // when tid not zero, timeout is 10s
625         int32_t res = DoDumpRemoteLocked(pid, tid, msg, isJson, timeout);
626         if (res == DUMPCATCH_ESUCCESS ||
627             res == DUMPCATCH_DUMP_ESYMBOL_NO_PARSE ||
628             res == DUMPCATCH_DUMP_ESYMBOL_PARSE_TIMEOUT) {
629             ret = true;
630         }
631         if (!ret && stack_.errorCode != KernelStackAsyncCollector::STACK_SUCCESS) {
632             res = KernelRet2DumpcatchRet(stack_.errorCode);
633         }
634         void* retAddr = __builtin_return_address(0);
635         ReportDumpCatcherStats(pid, requestTime, res, retAddr);
636     }
637     DFXLOGI("dump_catch : pid = %{public}d, elapsed time = %{public}" PRId64 " ms, ret = %{public}d, " \
638         "msgLength = %{public}zu",
639         pid, counter.Elapsed<std::chrono::milliseconds>(), ret, msg.size());
640     DfxEnableTraceDlsym(false);
641     return ret;
642 }
643 
DumpCatchFd(int pid,int tid,std::string & msg,int fd,size_t maxFrameNums)644 bool DfxDumpCatcher::Impl::DumpCatchFd(int pid, int tid, std::string& msg, int fd, size_t maxFrameNums)
645 {
646     bool ret = false;
647     ret = DumpCatch(pid, tid, msg, maxFrameNums, false);
648     if (fd > 0) {
649         ret = OHOS_TEMP_FAILURE_RETRY(write(fd, msg.c_str(), msg.length()));
650     }
651     return ret;
652 }
653 
DoDumpCatchRemote(int pid,int tid,std::string & msg,bool isJson,int timeout)654 int32_t DfxDumpCatcher::Impl::DoDumpCatchRemote(int pid, int tid, std::string& msg, bool isJson, int timeout)
655 {
656     DFX_TRACE_SCOPED_DLSYM("DoDumpCatchRemote");
657     int32_t ret = DUMPCATCH_UNKNOWN;
658 
659     if (pid <= 0 || tid < 0 || timeout <= WAIT_GET_KERNEL_STACK_TIMEOUT) {
660         msg.append("Result: pid(" + std::to_string(pid) + ") param error.\n");
661         DFXLOGW("%{public}s :: %{public}s", __func__, msg.c_str());
662         return DUMPCATCH_EPARAM;
663     }
664 
665     if (DfxDumpCatcherSlowPolicy::GetInstance().IsDumpCatcherInSlowPeriod(pid)) {
666         DFXLOGW("dumpcatch in slow period, return pid (%{public}d) kernel stack directly!", pid);
667         msg.append("Result: pid(" + std::to_string(pid) + ") last dump slow, return kernel stack directly.\n");
668         stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
669         return DUMPCATCH_TIMEOUT_DUMP_IN_SLOWPERIOD;
670     }
671 
672     int pipeReadFd[] = { -1, -1 };
673     uint64_t sdkDumpStartTime = GetAbsTimeMilliSeconds();
674     int sdkdumpRet = RequestSdkDump(pid, tid, pipeReadFd, isJson, timeout);
675     if (sdkdumpRet != ResponseCode::REQUEST_SUCCESS) {
676         DealWithSdkDumpRet(sdkdumpRet, pid, ret, msg);
677         return ret;
678     }
679     DumpCatcherPipeData pipeData(pid, pipeReadFd[PIPE_BUF_INDEX], pipeReadFd[PIPE_RES_INDEX]);
680     // timeout sub the cost time of sdkdump
681     timeout -= static_cast<int>(GetAbsTimeMilliSeconds() - sdkDumpStartTime);
682 
683     int pollRet = DoDumpRemotePid(pid, msg, pipeData, isJson, timeout);
684     DealWithPollRet(pollRet, pid, ret, msg);
685     DFXLOGI("%{public}s :: pid(%{public}d) ret: %{public}d", __func__, pid, ret);
686     return ret;
687 }
688 
DoDumpRemotePid(int pid,std::string & msg,DumpCatcherPipeData & pipeData,bool isJson,int32_t timeout)689 int DfxDumpCatcher::Impl::DoDumpRemotePid(int pid, std::string& msg, DumpCatcherPipeData& pipeData,
690                                           bool isJson, int32_t timeout)
691 {
692     DFX_TRACE_SCOPED_DLSYM("DoDumpRemotePid");
693     if (timeout <= 0) {
694         DFXLOGW("timeout less than 0, try to get kernel stack and return directly!");
695         stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
696         return DUMP_POLL_TIMEOUT;
697     } else if (timeout < 1000) { // 1000 : one thousand milliseconds
698         DFXLOGW("timeout less than 1 seconds, get kernel stack directly!");
699         notifyCollect_ = stackKit_.NotifyStartCollect(pid);
700     }
701     int ret = DoDumpRemotePoll(pid, timeout, msg, pipeData, isJson);
702     DFXLOGI("%{public}s :: pid(%{public}d) poll ret: %{public}d", __func__, pid, ret);
703     return ret;
704 }
705 
KernelRet2DumpcatchRet(int32_t ret)706 int32_t DfxDumpCatcher::Impl::KernelRet2DumpcatchRet(int32_t ret)
707 {
708     switch (ret) {
709         case KernelStackAsyncCollector::STACK_ECREATE:
710              return DUMPCATCH_KERNELSTACK_ECREATE;
711         case KernelStackAsyncCollector::STACK_EOPEN:
712              return DUMPCATCH_KERNELSTACK_EOPEN;
713         case KernelStackAsyncCollector::STACK_EIOCTL:
714              return DUMPCATCH_KERNELSTACK_EIOCTL;
715         case KernelStackAsyncCollector::STACK_TIMEOUT:
716             return DUMPCATCH_KERNELSTACK_TIMEOUT;
717         case KernelStackAsyncCollector::STACK_OVER_LIMIT:
718             return DUMPCATCH_KERNELSTACK_OVER_LIMIT;
719         default:
720             return DUMPCATCH_UNKNOWN;
721     }
722 }
723 
HandlePollError(int pid,const uint64_t endTime,int & remainTime,int & pollRet,std::string & resMsg)724 bool DfxDumpCatcher::Impl::HandlePollError(int pid, const uint64_t endTime, int& remainTime,
725                                            int& pollRet, std::string& resMsg)
726 {
727     if (errno == EINTR) {
728         uint64_t now = GetAbsTimeMilliSeconds();
729         if (now >= endTime) {
730             pollRet = DUMP_POLL_TIMEOUT;
731             resMsg.append("Result: poll timeout.\n");
732             return false;
733         }
734         if (!notifyCollect_ && (remainTime == DUMPCATCHER_REMOTE_P90_TIMEOUT)) {
735             notifyCollect_ = stackKit_.NotifyStartCollect(pid);
736         }
737         remainTime = static_cast<int>(endTime - now);
738         return true;
739     }
740     pollRet = DUMP_POLL_FAILED;
741     resMsg.append("Result: poll error, errno(" + std::to_string(errno) + ")\n");
742     return false;
743 }
744 
HandlePollTimeout(int pid,const int timeout,int & remainTime,int & pollRet,std::string & resMsg)745 bool DfxDumpCatcher::Impl::HandlePollTimeout(int pid, const int timeout, int& remainTime,
746                                              int& pollRet, std::string& resMsg)
747 {
748     if (!notifyCollect_ && (remainTime == DUMPCATCHER_REMOTE_P90_TIMEOUT)) {
749         notifyCollect_ = stackKit_.NotifyStartCollect(pid);
750         remainTime = timeout - DUMPCATCHER_REMOTE_P90_TIMEOUT;
751         return true;
752     }
753     pollRet = DUMP_POLL_TIMEOUT;
754     resMsg.append("Result: poll timeout.\n");
755     return false;
756 }
757 
HandlePollEvents(int pid,const struct pollfd (& readFds)[2],bool & bPipeConnect,int & pollRet,DumpCatcherPipeData & pipeData)758 bool DfxDumpCatcher::Impl::HandlePollEvents(int pid, const struct pollfd (&readFds)[2], bool& bPipeConnect,
759                                             int& pollRet, DumpCatcherPipeData& pipeData)
760 {
761     bool bufRet = true;
762     bool resRet = false;
763     bool eventRet = true;
764     for (auto& readFd : readFds) {
765         if (!bPipeConnect && (static_cast<uint32_t>(readFd.revents) & POLLIN)) {
766             bPipeConnect = true;
767         }
768 
769         if (bPipeConnect &&
770             ((static_cast<uint32_t>(readFd.revents) & POLLERR) || (static_cast<uint32_t>(readFd.revents) & POLLHUP))) {
771             eventRet = false;
772             pipeData.resMsg.append("Result: poll events error.\n");
773             break;
774         }
775 
776         if ((static_cast<uint32_t>(readFd.revents) & POLLIN) != POLLIN) {
777             continue;
778         }
779 
780         if (readFd.fd == pipeData.bufFd.GetFd()) {
781             bufRet = DoReadBuf(pipeData);
782         } else if (readFd.fd == pipeData.resFd.GetFd()) {
783             resRet = DoReadRes(pollRet, pipeData);
784         }
785     }
786 
787     if ((eventRet == false) || (bufRet == false) || (resRet == true)) {
788         DFXLOGI("eventRet:%{public}d bufRet:%{public}d resRet:%{public}d", eventRet, bufRet, resRet);
789         return false;
790     }
791     return true;
792 }
793 
DumpRemotePoll(int pid,const int timeout,DumpCatcherPipeData & pipeData)794 int DfxDumpCatcher::Impl::DumpRemotePoll(int pid, const int timeout, DumpCatcherPipeData& pipeData)
795 {
796     int pollRet = DUMP_POLL_INIT;
797     struct pollfd readFds[2];
798     (void)memset_s(readFds, sizeof(readFds), 0, sizeof(readFds));
799     readFds[0].fd = pipeData.bufFd.GetFd();
800     readFds[0].events = POLLIN;
801     readFds[1].fd = pipeData.resFd.GetFd();
802     readFds[1].events = POLLIN;
803     int fdsSize = sizeof(readFds) / sizeof(readFds[0]);
804     bool bPipeConnect = false;
805     int remainTime = DUMPCATCHER_REMOTE_P90_TIMEOUT < timeout ? DUMPCATCHER_REMOTE_P90_TIMEOUT : timeout;
806     uint64_t startTime = GetAbsTimeMilliSeconds();
807     uint64_t endTime = startTime + static_cast<uint64_t>(timeout);
808     bool isContinue = true;
809     do {
810         int pRet = poll(readFds, fdsSize, remainTime);
811         if (pRet < 0) {
812             isContinue = HandlePollError(pid, endTime, remainTime, pollRet, pipeData.resMsg);
813             continue;
814         } else if (pRet == 0) {
815             isContinue = HandlePollTimeout(pid, timeout, remainTime, pollRet, pipeData.resMsg);
816             continue;
817         }
818         if (!HandlePollEvents(pid, readFds, bPipeConnect, pollRet, pipeData)) {
819             break;
820         }
821         uint64_t now = GetAbsTimeMilliSeconds();
822         if (now >= endTime) {
823             pollRet = DUMP_POLL_TIMEOUT;
824             pipeData.resMsg.append("Result: poll timeout.\n");
825             break;
826         }
827         remainTime = static_cast<int>(endTime - now);
828     } while (isContinue);
829     return pollRet;
830 }
831 
DoDumpRemotePoll(int pid,int timeout,std::string & msg,DumpCatcherPipeData & pipeData,bool isJson)832 int DfxDumpCatcher::Impl::DoDumpRemotePoll(int pid, int timeout, std::string& msg,
833     DumpCatcherPipeData& pipeData, bool isJson)
834 {
835     DFX_TRACE_SCOPED_DLSYM("DoDumpRemotePoll");
836     if (!pipeData.bufFd || !pipeData.resFd) {
837         if (!isJson) {
838             msg = "Result: bufFd or resFd < 0.\n";
839         }
840         DFXLOGE("invalid bufFd or resFd");
841         return DUMP_POLL_FD;
842     }
843 
844     int res = DumpRemotePoll(pid, timeout, pipeData);
845     bool isDumpSuccess = (res == DUMP_POLL_OK) || (res == DUMP_POLL_NO_PARSE_SYMBOL)
846         || (res == DUMP_POLL_PARSE_SYMBOL_TIMEOUT);
847     DFXLOGI("%{public}s :: %{public}s", __func__, pipeData.resMsg.c_str());
848     msg = isJson && isDumpSuccess ? pipeData.bufMsg : (pipeData.resMsg + pipeData.bufMsg);
849     return res;
850 }
851 
DoReadBuf(DumpCatcherPipeData & pipeData)852 bool DfxDumpCatcher::Impl::DoReadBuf(DumpCatcherPipeData& pipeData)
853 {
854     std::vector<char> buffer(MAX_PIPE_SIZE, 0);
855     ssize_t nread = OHOS_TEMP_FAILURE_RETRY(read(pipeData.bufFd.GetFd(), buffer.data(), MAX_PIPE_SIZE));
856     if (nread <= 0) {
857         DFXLOGW("%{public}s :: read error", __func__);
858         return false;
859     }
860     DFXLOGD("%{public}s :: nread: %{public}zu", __func__, nread);
861     pipeData.bufMsg.append(buffer.data(), static_cast<size_t>(nread));
862     return true;
863 }
864 
DoReadRes(int & pollRet,DumpCatcherPipeData & pipeData)865 bool DfxDumpCatcher::Impl::DoReadRes(int& pollRet, DumpCatcherPipeData& pipeData)
866 {
867     int32_t res = DumpErrorCode::DUMP_ESUCCESS;
868     ssize_t nread = OHOS_TEMP_FAILURE_RETRY(read(pipeData.resFd.GetFd(), &res, sizeof(res)));
869     if (nread <= 0 || nread != sizeof(res)) {
870         DFXLOGW("%{public}s :: read error", __func__);
871         return false;
872     }
873 
874     switch (res) {
875         case DUMP_ESUCCESS:
876             pollRet = DUMP_POLL_OK;
877             break;
878         case DUMP_ESYMBOL_NO_PARSE:
879             pollRet = DUMP_POLL_NO_PARSE_SYMBOL;
880             break;
881         case DUMP_ESYMBOL_PARSE_TIMEOUT:
882             pollRet = DUMP_POLL_PARSE_SYMBOL_TIMEOUT;
883             break;
884         default:
885             pollRet = DUMP_POLL_RETURN;
886             break;
887     }
888 
889     pipeData.resMsg.append("Result: " + DfxDumpRes::ToString(res) + "\n");
890     return true;
891 }
892 
DumpCatchMultiPid(const std::vector<int> & pids,std::string & msg)893 bool DfxDumpCatcher::Impl::DumpCatchMultiPid(const std::vector<int>& pids, std::string& msg)
894 {
895     bool ret = false;
896     int pidSize = (int)pids.size();
897     if (pidSize <= 0) {
898         DFXLOGE("%{public}s :: param error, pidSize(%{public}d).", __func__, pidSize);
899         return ret;
900     }
901 
902     std::unique_lock<std::mutex> lck(mutex_);
903     int currentPid = getpid();
904     int currentTid = gettid();
905     DFXLOGD("%{public}s :: cPid(%{public}d), cTid(%{public}d), pidSize(%{public}d).",
906         __func__, currentPid, currentTid, pidSize);
907 
908     time_t startTime = time(nullptr);
909     if (startTime > 0) {
910         DFXLOGD("%{public}s :: startTime(%{public}" PRId64 ").", __func__, startTime);
911     }
912 
913     for (int i = 0; i < pidSize; i++) {
914         int pid = pids[i];
915         std::string pidStr;
916         bool ret = DoDumpRemoteLocked(pid, 0, pidStr) == DUMPCATCH_ESUCCESS;
917         if (ret) {
918             msg.append(pidStr + "\n");
919         } else {
920             msg.append("Failed to dump process:" + std::to_string(pid));
921         }
922 
923         time_t currentTime = time(nullptr);
924         if (currentTime > 0) {
925             DFXLOGD("%{public}s :: startTime(%{public}" PRId64 "), currentTime(%{public}" PRId64 ").",
926                 __func__, startTime, currentTime);
927             if (currentTime > startTime + DUMP_CATCHE_WORK_TIME_S) {
928                 break;
929             }
930         }
931     }
932 
933     DFXLOGD("%{public}s :: msg(%{public}s).", __func__, msg.c_str());
934     if (msg.find("Tid:") != std::string::npos) {
935         ret = true;
936     }
937     return ret;
938 }
939 } // namespace HiviewDFX
940 } // namespace OHOS
941