1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "dfx_dump_catcher.h"
17
18 #include <atomic>
19 #include <cerrno>
20 #include <condition_variable>
21 #include <mutex>
22 #include <thread>
23 #include <vector>
24
25 #include <dlfcn.h>
26 #include <poll.h>
27 #include <securec.h>
28 #include <unistd.h>
29 #include <sys/syscall.h>
30 #include <sys/types.h>
31
32 #include "backtrace_local.h"
33 #include "dfx_define.h"
34 #include "dfx_dump_catcher_errno.h"
35 #include "dfx_dump_catcher_slow_policy.h"
36 #include "dfx_dump_res.h"
37 #include "dfx_log.h"
38 #include "dfx_socket_request.h"
39 #include "dfx_trace_dlsym.h"
40 #include "dfx_util.h"
41 #include "elapsed_time.h"
42 #include "faultloggerd_client.h"
43 #include "file_ex.h"
44 #include "kernel_stack_async_collector.h"
45 #include "procinfo.h"
46 #include "smart_fd.h"
47 #include "string_printf.h"
48
49 namespace OHOS {
50 namespace HiviewDFX {
51 namespace {
52 #ifdef LOG_DOMAIN
53 #undef LOG_DOMAIN
54 #define LOG_DOMAIN 0xD002D11
55 #endif
56
57 #ifdef LOG_TAG
58 #undef LOG_TAG
59 #define LOG_TAG "DfxDumpCatcher"
60 #endif
61 static const int DUMP_CATCHE_WORK_TIME_S = 60;
62 static constexpr int WAIT_GET_KERNEL_STACK_TIMEOUT = 1000; // 1000 : time out 1000 ms
63 static constexpr uint32_t HIVIEW_UID = 1201;
64 static constexpr uint32_t FOUNDATION_UID = 5523;
65
66 enum DfxDumpStatRes : int32_t {
67 DUMP_RES_NO_KERNELSTACK = -2,
68 DUMP_RES_WITH_KERNELSTACK = -1,
69 DUMP_RES_WITH_USERSTACK = 0,
70 DUMP_RES_WITH_USERSTACK_NO_PARSE_SYMBOL = 1,
71 DUMP_RES_WITH_USERSTACK_PARSE_SYMBOL_TIMEOUT = 2,
72 };
73 }
74
75 struct DumpCatcherPipeData {
DumpCatcherPipeDataOHOS::HiviewDFX::DumpCatcherPipeData76 DumpCatcherPipeData(int32_t pid, int32_t bufPipe, int32_t resPipe) : pid(pid), bufFd(bufPipe), resFd(resPipe) {}
~DumpCatcherPipeDataOHOS::HiviewDFX::DumpCatcherPipeData77 ~DumpCatcherPipeData()
78 {
79 // request close fds in faultloggerd
80 RequestDelPipeFd(pid);
81 }
82 DumpCatcherPipeData(const DumpCatcherPipeData&) = delete;
83 DumpCatcherPipeData& operator=(const DumpCatcherPipeData&) = delete;
84
85 int pid{-1};
86 SmartFd bufFd;
87 SmartFd resFd;
88 std::string bufMsg = "";
89 std::string resMsg = "";
90 } ;
91
IsLinuxKernel()92 static bool IsLinuxKernel()
93 {
94 static bool isLinux = [] {
95 std::string content;
96 LoadStringFromFile("/proc/version", content);
97 if (content.empty()) {
98 return true;
99 }
100 if (content.find("Linux") != std::string::npos) {
101 return true;
102 }
103 return false;
104 }();
105 return isLinux;
106 }
107
108 class DfxDumpCatcher::Impl {
109 public:
110 bool DumpCatch(int pid, int tid, std::string& msg, size_t maxFrameNums, bool isJson);
111 bool DumpCatchFd(int pid, int tid, std::string& msg, int fd, size_t maxFrameNums);
112 bool DumpCatchMultiPid(const std::vector<int> &pids, std::string& msg);
113 std::pair<int, std::string> DumpCatchWithTimeout(int pid, std::string& msg, int timeout, int tid, bool isJson);
114 private:
115 bool DoDumpCurrTid(const size_t skipFrameNum, std::string& msg, size_t maxFrameNums);
116 bool DoDumpLocalTid(const int tid, std::string& msg, size_t maxFrameNums);
117 bool DoDumpLocalPid(int pid, std::string& msg, size_t maxFrameNums);
118 bool DoDumpLocalLocked(int pid, int tid, std::string& msg, size_t maxFrameNums);
119 int32_t DoDumpRemoteLocked(int pid, int tid, std::string& msg, bool isJson = false,
120 int timeout = DUMPCATCHER_REMOTE_TIMEOUT);
121 int32_t DoDumpCatchRemote(int pid, int tid, std::string& msg, bool isJson = false,
122 int timeout = DUMPCATCHER_REMOTE_TIMEOUT);
123 int DoDumpRemotePid(int pid, std::string& msg, DumpCatcherPipeData& pipeData,
124 bool isJson = false, int32_t timeout = DUMPCATCHER_REMOTE_TIMEOUT);
125 bool HandlePollError(int pid, const uint64_t endTime, int& remainTime, int& pollRet, std::string& resMsg);
126 bool HandlePollTimeout(int pid, const int timeout, int& remainTime, int& pollRet, std::string& resMsg);
127 bool HandlePollEvents(int pid, const struct pollfd (&readFds)[2],
128 bool& bPipeConnect, int& pollRet, DumpCatcherPipeData& pipeData);
129 int DumpRemotePoll(int pid, const int timeout, DumpCatcherPipeData& pipeData);
130 int DoDumpRemotePoll(int pid, int timeout, std::string& msg, DumpCatcherPipeData& pipeData, bool isJson = false);
131 bool DoReadBuf(DumpCatcherPipeData& pipeData);
132 bool DoReadRes(int& pollRet, DumpCatcherPipeData& pipeData);
133 void DealAfterPollFail(int pid, std::string& msg);
134 void DealWithPollRet(int pollRet, int pid, int32_t& ret, std::string& msg);
135 void DealWithSdkDumpRet(int sdkdumpRet, int pid, int32_t& ret, std::string& msg);
136 std::pair<int, std::string> DealWithDumpCatchRet(int pid, int32_t& ret, std::string& msg);
137 void ReportDumpCatcherStats(int32_t pid, uint64_t requestTime, int32_t ret, void* retAddr);
138
139 static int32_t KernelRet2DumpcatchRet(int32_t ret);
140 static const int DUMPCATCHER_REMOTE_P90_TIMEOUT = 1000;
141 static const int DUMPCATCHER_REMOTE_TIMEOUT = 10000;
142
143 std::mutex mutex_;
144 bool notifyCollect_ = false;
145 KernelStackAsyncCollector stackKit_;
146 KernelStackAsyncCollector::KernelResult stack_;
147 };
148
DfxDumpCatcher()149 DfxDumpCatcher::DfxDumpCatcher() : impl_(std::make_shared<Impl>())
150 {}
151
DumpCatch(int pid,int tid,std::string & msg,size_t maxFrameNums,bool isJson)152 bool DfxDumpCatcher::DumpCatch(int pid, int tid, std::string& msg, size_t maxFrameNums, bool isJson)
153 {
154 return impl_->DumpCatch(pid, tid, msg, maxFrameNums, isJson);
155 }
156
DumpCatchFd(int pid,int tid,std::string & msg,int fd,size_t maxFrameNums)157 bool DfxDumpCatcher::DumpCatchFd(int pid, int tid, std::string& msg, int fd, size_t maxFrameNums)
158 {
159 return impl_->DumpCatchFd(pid, tid, msg, fd, maxFrameNums);
160 }
161
DumpCatchMultiPid(const std::vector<int> & pids,std::string & msg)162 bool DfxDumpCatcher::DumpCatchMultiPid(const std::vector<int> &pids, std::string& msg)
163 {
164 return impl_->DumpCatchMultiPid(pids, msg);
165 }
166
DumpCatchWithTimeout(int pid,std::string & msg,int timeout,int tid,bool isJson)167 std::pair<int, std::string> DfxDumpCatcher::DumpCatchWithTimeout(int pid, std::string& msg,
168 int timeout, int tid, bool isJson)
169 {
170 return impl_->DumpCatchWithTimeout(pid, msg, timeout, tid, isJson);
171 }
172
DoDumpCurrTid(const size_t skipFrameNum,std::string & msg,size_t maxFrameNums)173 bool DfxDumpCatcher::Impl::DoDumpCurrTid(const size_t skipFrameNum, std::string& msg, size_t maxFrameNums)
174 {
175 bool ret = false;
176
177 ret = GetBacktrace(msg, skipFrameNum + 1, false, maxFrameNums);
178 if (!ret) {
179 int currTid = gettid();
180 msg.append("Failed to dump curr thread:" + std::to_string(currTid) + ".\n");
181 }
182 DFXLOGD("DoDumpCurrTid :: return %{public}d.", ret);
183 return ret;
184 }
185
DoDumpLocalTid(const int tid,std::string & msg,size_t maxFrameNums)186 bool DfxDumpCatcher::Impl::DoDumpLocalTid(const int tid, std::string& msg, size_t maxFrameNums)
187 {
188 bool ret = false;
189 if (tid <= 0) {
190 DFXLOGE("DoDumpLocalTid :: return false as param error.");
191 return ret;
192 }
193 ret = GetBacktraceStringByTid(msg, tid, 0, false, maxFrameNums);
194 if (!ret) {
195 msg.append("Failed to dump thread:" + std::to_string(tid) + ".\n");
196 }
197 DFXLOGD("DoDumpLocalTid :: return %{public}d.", ret);
198 return ret;
199 }
200
DoDumpLocalPid(int pid,std::string & msg,size_t maxFrameNums)201 bool DfxDumpCatcher::Impl::DoDumpLocalPid(int pid, std::string& msg, size_t maxFrameNums)
202 {
203 bool ret = false;
204 if (pid <= 0) {
205 DFXLOGE("DoDumpLocalPid :: return false as param error.");
206 return ret;
207 }
208 size_t skipFramNum = 5; // 5: skip 5 frame
209
210 msg = GetStacktraceHeader();
211 std::function<bool(int)> func = [&](int tid) {
212 if (tid <= 0) {
213 return false;
214 }
215 std::string threadMsg;
216 if (tid == gettid()) {
217 ret = DoDumpCurrTid(skipFramNum, threadMsg, maxFrameNums);
218 } else {
219 ret = DoDumpLocalTid(tid, threadMsg, maxFrameNums);
220 }
221 msg += threadMsg;
222 return ret;
223 };
224 std::vector<int> tids;
225 ret = GetTidsByPidWithFunc(getpid(), tids, func);
226 DFXLOGD("DoDumpLocalPid :: return %{public}d.", ret);
227 return ret;
228 }
229
DoDumpRemoteLocked(int pid,int tid,std::string & msg,bool isJson,int timeout)230 int32_t DfxDumpCatcher::Impl::DoDumpRemoteLocked(int pid, int tid, std::string& msg, bool isJson, int timeout)
231 {
232 return DoDumpCatchRemote(pid, tid, msg, isJson, timeout);
233 }
234
DoDumpLocalLocked(int pid,int tid,std::string & msg,size_t maxFrameNums)235 bool DfxDumpCatcher::Impl::DoDumpLocalLocked(int pid, int tid, std::string& msg, size_t maxFrameNums)
236 {
237 bool ret = false;
238 if (tid == gettid()) {
239 size_t skipFramNum = 4; // 4: skip 4 frame
240 ret = DoDumpCurrTid(skipFramNum, msg, maxFrameNums);
241 } else if (tid == 0) {
242 ret = DoDumpLocalPid(pid, msg, maxFrameNums);
243 } else {
244 if (!IsThreadInPid(pid, tid)) {
245 msg.append("tid(" + std::to_string(tid) + ") is not in pid(" + std::to_string(pid) + ").\n");
246 } else {
247 ret = DoDumpLocalTid(tid, msg, maxFrameNums);
248 }
249 }
250
251 DFXLOGD("DoDumpLocal :: ret(%{public}d).", ret);
252 return ret;
253 }
254
ConvertDumpResultToDumpStats(int32_t dumpRes)255 static int32_t ConvertDumpResultToDumpStats(int32_t dumpRes)
256 {
257 int32_t stats = DUMP_RES_WITH_KERNELSTACK;
258 switch (dumpRes) {
259 case DUMPCATCH_ESUCCESS:
260 stats = DUMP_RES_WITH_USERSTACK;
261 break;
262 case DUMPCATCH_DUMP_ESYMBOL_NO_PARSE:
263 stats = DUMP_RES_WITH_USERSTACK_NO_PARSE_SYMBOL;
264 break;
265 case DUMPCATCH_DUMP_ESYMBOL_PARSE_TIMEOUT:
266 stats = DUMP_RES_WITH_USERSTACK_PARSE_SYMBOL_TIMEOUT;
267 break;
268 default:
269 break;
270 }
271 return stats;
272 }
273
ReportDumpCatcherStats(int32_t pid,uint64_t requestTime,int32_t ret,void * retAddr)274 void DfxDumpCatcher::Impl::ReportDumpCatcherStats(int32_t pid,
275 uint64_t requestTime, int32_t ret, void* retAddr)
276 {
277 std::vector<uint8_t> buf(sizeof(struct FaultLoggerdStatsRequest), 0);
278 auto stat = reinterpret_cast<struct FaultLoggerdStatsRequest*>(buf.data());
279 stat->type = DUMP_CATCHER;
280 stat->pid = pid;
281 stat->requestTime = requestTime;
282 stat->dumpCatcherFinishTime = GetTimeMilliSeconds();
283 stat->result = ConvertDumpResultToDumpStats(ret);
284 if ((stat->result == DUMP_RES_WITH_KERNELSTACK) && stack_.msg.empty()) {
285 stat->result = DUMP_RES_NO_KERNELSTACK;
286 }
287 stat->targetProcessThreadCount = stack_.threadCount;
288 size_t copyLen;
289 std::string processName;
290 ReadProcessName(pid, processName);
291 copyLen = std::min(sizeof(stat->targetProcess) - 1, processName.size());
292 if (memcpy_s(stat->targetProcess, sizeof(stat->targetProcess) - 1, processName.c_str(), copyLen) != 0) {
293 DFXLOGE("Failed to copy target process");
294 return;
295 }
296
297 if (ret != DUMPCATCH_ESUCCESS) {
298 std::string summary = DfxDumpCatchError::ToString(ret);
299 copyLen = std::min(sizeof(stat->summary) - 1, summary.size());
300 if (memcpy_s(stat->summary, sizeof(stat->summary) - 1, summary.c_str(), copyLen) != 0) {
301 DFXLOGE("Failed to copy dumpcatcher summary");
302 return;
303 }
304 }
305
306 Dl_info info;
307 if (dladdr(retAddr, &info) != 0) {
308 copyLen = std::min(sizeof(stat->callerElf) - 1, strlen(info.dli_fname));
309 if (memcpy_s(stat->callerElf, sizeof(stat->callerElf) - 1, info.dli_fname, copyLen) != 0) {
310 DFXLOGE("Failed to copy caller elf info");
311 return;
312 }
313 stat->offset = reinterpret_cast<uintptr_t>(retAddr) - reinterpret_cast<uintptr_t>(info.dli_fbase);
314 }
315
316 std::string cmdline;
317 if (OHOS::LoadStringFromFile("/proc/self/cmdline", cmdline)) {
318 copyLen = std::min(sizeof(stat->callerProcess) - 1, cmdline.size());
319 if (memcpy_s(stat->callerProcess, sizeof(stat->callerProcess) - 1,
320 cmdline.c_str(), copyLen) != 0) {
321 DFXLOGE("Failed to copy caller cmdline");
322 return;
323 }
324 }
325 ReportDumpStats(stat);
326 }
327
IsBitOn(const std::string & content,const std::string & filed,int signal)328 static bool IsBitOn(const std::string& content, const std::string& filed, int signal)
329 {
330 if (content.find(filed) == std::string::npos) {
331 return false;
332 }
333 //SigBlk: 0000000000000000
334 std::string num = content.substr(content.find(filed) + filed.size() + 2, 16);
335 uint64_t hexValue = strtoul(num.c_str(), nullptr, 16);
336 uint64_t mask = 1ULL << (signal - 1);
337
338 return (hexValue & mask) != 0;
339 }
340
IsSignalBlocked(int pid,int32_t & ret)341 static bool IsSignalBlocked(int pid, int32_t& ret)
342 {
343 std::vector<int> tids;
344 std::vector<int> nstids;
345 GetTidsByPid(pid, tids, nstids);
346 std::string threadName;
347 std::string content;
348 int targetTid = -1;
349 for (size_t i = 0; i < tids.size(); ++i) {
350 ReadThreadNameByPidAndTid(pid, tids[i], threadName);
351 if (threadName == "OS_DfxWatchdog") {
352 targetTid = tids[i];
353 break;
354 }
355 }
356 if (targetTid != -1) {
357 std::string threadStatusPath = StringPrintf("/proc/%d/task/%d/status", pid, targetTid);
358 if (!LoadStringFromFile(threadStatusPath, content) || content.empty()) {
359 DFXLOGE("the pid(%{public}d)thread(%{public}d) read status fail, errno(%{public}d)", pid, targetTid, errno);
360 ret = DUMPCATCH_TIMEOUT_PARSE_FAIL_READ_ESTATUS;
361 return true;
362 }
363
364 if (IsBitOn(content, "SigBlk", SIGDUMP) || IsBitOn(content, "SigIgn", SIGDUMP)) {
365 DFXLOGI("the pid(%{public}d)thread(%{public}d) signal has been blocked by target process", pid, targetTid);
366 ret = DUMPCATCH_TIMEOUT_SIGNAL_BLOCK;
367 return true;
368 }
369 }
370 return false;
371 }
372
IsFrozen(int pid,int32_t & ret)373 static bool IsFrozen(int pid, int32_t& ret)
374 {
375 std::string content;
376 std::string cgroupPath = StringPrintf("/proc/%d/cgroup", pid);
377 if (!LoadStringFromFile(cgroupPath, content)) {
378 DFXLOGE("the pid (%{public}d) read cgroup fail, errno (%{public}d)", pid, errno);
379 ret = DUMPCATCH_TIMEOUT_PARSE_FAIL_READ_ECGROUP;
380 return true;
381 }
382
383 if (content.find("Frozen") != std::string::npos) {
384 DFXLOGI("the pid (%{public}d) has been frozen", pid);
385 ret = DUMPCATCH_TIMEOUT_KERNEL_FROZEN;
386 return true;
387 }
388 return false;
389 }
390
AnalyzeTimeoutReason(int pid,int32_t & ret)391 static void AnalyzeTimeoutReason(int pid, int32_t& ret)
392 {
393 std::string statusPath = StringPrintf("/proc/%d/status", pid);
394 if (access(statusPath.c_str(), F_OK) != 0) {
395 DFXLOGI("the pid (%{public}d) process exit during the dump, errno (%{public}d)", pid, errno);
396 ret = DUMPCATCH_TIMEOUT_PROCESS_KILLED;
397 return;
398 }
399
400 if (IsSignalBlocked(pid, ret)) {
401 return;
402 }
403
404 if (IsFrozen(pid, ret)) {
405 return;
406 }
407
408 DFXLOGI("the pid (%{public}d) dump slow", pid);
409 ret = DUMPCATCH_TIMEOUT_DUMP_SLOW;
410 }
411
DealAfterPollFail(int pid,std::string & msg)412 void DfxDumpCatcher::Impl::DealAfterPollFail(int pid, std::string& msg)
413 {
414 // get result
415 if (notifyCollect_) {
416 stack_ = stackKit_.GetCollectedStackResult();
417 } else {
418 stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
419 }
420
421 std::string halfProcStatus;
422 std::string halfProcWchan;
423 ReadProcessStatus(halfProcStatus, pid);
424 if (IsLinuxKernel()) {
425 ReadProcessWchan(halfProcWchan, pid, false, true);
426 }
427 msg.append(std::move(halfProcStatus));
428 msg.append(std::move(halfProcWchan));
429 }
430
DealWithPollRet(int pollRet,int pid,int32_t & ret,std::string & msg)431 void DfxDumpCatcher::Impl::DealWithPollRet(int pollRet, int pid, int32_t& ret, std::string& msg)
432 {
433 bool isPollFail = true;
434
435 switch (pollRet) {
436 case DUMP_POLL_OK:
437 ret = DUMPCATCH_ESUCCESS;
438 isPollFail = false;
439 break;
440 case DUMP_POLL_NO_PARSE_SYMBOL:
441 ret = DUMPCATCH_DUMP_ESYMBOL_NO_PARSE;
442 isPollFail = false;
443 break;
444 case DUMP_POLL_PARSE_SYMBOL_TIMEOUT:
445 ret = DUMPCATCH_DUMP_ESYMBOL_PARSE_TIMEOUT;
446 isPollFail = false;
447 break;
448 case DUMP_POLL_FD:
449 ret = DUMPCATCH_EFD;
450 break;
451 case DUMP_POLL_FAILED:
452 ret = DUMPCATCH_EPOLL;
453 break;
454 case DUMP_POLL_TIMEOUT:
455 AnalyzeTimeoutReason(pid, ret);
456 if (ret == DUMPCATCH_TIMEOUT_DUMP_SLOW) {
457 DfxDumpCatcherSlowPolicy::GetInstance().SetDumpCatcherSlowStat(pid);
458 }
459 break;
460 case DUMP_POLL_RETURN:
461 if (msg.find("ptrace attach thread failed") != std::string::npos) {
462 ret = DUMPCATCH_DUMP_EPTRACE;
463 } else if (msg.find("stop unwinding") != std::string::npos) {
464 ret = DUMPCATCH_DUMP_EUNWIND;
465 } else if (msg.find("mapinfo is not exist") != std::string::npos) {
466 ret = DUMPCATCH_DUMP_EMAP;
467 } else {
468 ret = DUMPCATCH_DUMP_ERROR;
469 }
470 break;
471 default:
472 ret = DUMPCATCH_UNKNOWN;
473 break;
474 }
475
476 if (isPollFail) {
477 DealAfterPollFail(pid, msg);
478 }
479 }
480
DealWithSdkDumpRet(int sdkdumpRet,int pid,int32_t & ret,std::string & msg)481 void DfxDumpCatcher::Impl::DealWithSdkDumpRet(int sdkdumpRet, int pid, int32_t& ret, std::string& msg)
482 {
483 uint32_t uid = getuid();
484 if (sdkdumpRet == ResponseCode::SDK_DUMP_REPEAT) {
485 stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
486 msg.append("Result: pid(" + std::to_string(pid) + ") process is dumping.\n");
487 ret = DUMPCATCH_IS_DUMPING;
488 } else if (sdkdumpRet == ResponseCode::REQUEST_REJECT) {
489 msg.append("Result: pid(" + std::to_string(pid) + ") process check permission error.\n");
490 ret = DUMPCATCH_EPERMISSION;
491 } else if (sdkdumpRet == ResponseCode::SDK_DUMP_NOPROC) {
492 msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
493 ret = DUMPCATCH_NO_PROCESS;
494 } else if (sdkdumpRet == ResponseCode::SDK_PROCESS_CRASHED) {
495 msg.append("Result: pid(" + std::to_string(pid) + ") process has been crashed.\n");
496 ret = DUMPCATCH_HAS_CRASHED;
497 } else if (sdkdumpRet == ResponseCode::CONNECT_FAILED) {
498 if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
499 stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
500 }
501 msg.append("Result: pid(" + std::to_string(pid) + ") process fail to conntect faultloggerd.\n");
502 ret = DUMPCATCH_ECONNECT;
503 } else if (sdkdumpRet == ResponseCode::SEND_DATA_FAILED) {
504 if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
505 stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
506 }
507 msg.append("Result: pid(" + std::to_string(pid) + ") process fail to write to faultloggerd.\n");
508 ret = DUMPCATCH_EWRITE;
509 } else {
510 if (uid == HIVIEW_UID || uid == FOUNDATION_UID) {
511 stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
512 }
513 msg.append("Result: pid(" + std::to_string(pid) + ") faultloggerd maybe exception occurred.\n");
514 ret = DUMPCATCH_EFAULTLOGGERD;
515 }
516 DFXLOGW("%{public}s :: %{public}s", __func__, msg.c_str());
517 }
518
DealWithDumpCatchRet(int pid,int32_t & ret,std::string & msg)519 std::pair<int, std::string> DfxDumpCatcher::Impl::DealWithDumpCatchRet(int pid, int32_t& ret, std::string& msg)
520 {
521 int result = ret == 0 ? 0 : -1;
522 std::string reason;
523 if (result == 0) {
524 reason = "Reason:" + DfxDumpCatchError::ToString(ret) + "\n";
525 } else if (ret == DUMPCATCH_DUMP_ESYMBOL_NO_PARSE || ret == DUMPCATCH_DUMP_ESYMBOL_PARSE_TIMEOUT) {
526 reason = "Reason:" + DfxDumpCatchError::ToString(ret) + "\n";
527 result = 0;
528 } else {
529 reason = "Reason:\nnormal stack:" + DfxDumpCatchError::ToString(ret) + "\n";
530 if (stack_.errorCode != KernelStackAsyncCollector::STACK_SUCCESS) {
531 ret = KernelRet2DumpcatchRet(stack_.errorCode);
532 reason += "kernel stack:" + DfxDumpCatchError::ToString(ret) + "\n";
533 } else if (!stack_.msg.empty()) {
534 msg.append(stack_.msg);
535 result = 1;
536 } else {
537 reason += "kernel stack:" + DfxDumpCatchError::ToString(DUMPCATCH_KERNELSTACK_NONEED) + "\n";
538 }
539 }
540
541 return std::make_pair(result, reason);
542 }
543
DumpCatchWithTimeout(int pid,std::string & msg,int timeout,int tid,bool isJson)544 std::pair<int, std::string> DfxDumpCatcher::Impl::DumpCatchWithTimeout(int pid, std::string& msg, int timeout,
545 int tid, bool isJson)
546 {
547 DfxEnableTraceDlsym(true);
548 ElapsedTime counter;
549 uint64_t requestTime = GetTimeMilliSeconds();
550 int32_t dumpcatchErrno = DUMPCATCH_UNKNOWN;
551 bool reportStat = false;
552 do {
553 if (pid <= 0 || tid <0 || timeout <= WAIT_GET_KERNEL_STACK_TIMEOUT) {
554 DFXLOGE("DumpCatchWithTimeout:: param error.");
555 dumpcatchErrno = DUMPCATCH_EPARAM;
556 break;
557 }
558 if (!IsLinuxKernel()) {
559 std::string statusPath = StringPrintf("/proc/%d/status", pid);
560 if (access(statusPath.c_str(), F_OK) != 0 && errno != EACCES) {
561 DFXLOGE("DumpCatchWithTimeout:: the pid(%{public}d) process has exited, errno(%{public}d)", pid, errno);
562 msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
563 dumpcatchErrno = DUMPCATCH_NO_PROCESS;
564 break;
565 }
566 }
567 std::unique_lock<std::mutex> lck(mutex_);
568 int currentPid = getpid();
569 if (pid == currentPid) {
570 bool ret = DoDumpLocalLocked(pid, tid, msg, DEFAULT_MAX_FRAME_NUM);
571 dumpcatchErrno = ret ? DUMPCATCH_ESUCCESS : DUMPCATCH_DUMP_SELF_FAIL;
572 } else {
573 DFXLOGI("Receive DumpCatch request for cPid:(%{public}d), pid(%{public}d)", currentPid, pid);
574 dumpcatchErrno = DoDumpRemoteLocked(pid, tid, msg, isJson, timeout);
575 reportStat = true;
576 }
577 } while (false);
578
579 auto result = DealWithDumpCatchRet(pid, dumpcatchErrno, msg);
580 if (reportStat) {
581 void* retAddr = __builtin_return_address(0);
582 ReportDumpCatcherStats(pid, requestTime, dumpcatchErrno, retAddr);
583 }
584
585 DFXLOGI("dump_catch : pid = %{public}d, elapsed time = %{public}" PRId64 " ms, " \
586 "msgLength = %{public}zu, ret = %{public}d\n%{public}s",
587 pid, counter.Elapsed<std::chrono::milliseconds>(), msg.size(), result.first, result.second.c_str());
588 DfxEnableTraceDlsym(false);
589 return result;
590 }
591
DumpCatch(int pid,int tid,std::string & msg,size_t maxFrameNums,bool isJson)592 bool DfxDumpCatcher::Impl::DumpCatch(int pid, int tid, std::string& msg, size_t maxFrameNums, bool isJson)
593 {
594 bool ret = false;
595 if (pid <= 0 || tid < 0) {
596 DFXLOGE("dump_catch :: param error.");
597 return ret;
598 }
599 if (!IsLinuxKernel()) {
600 std::string statusPath = StringPrintf("/proc/%d/status", pid);
601 DFXLOGI("DumpCatch:: access pid(%{public}d) status", pid);
602 if (access(statusPath.c_str(), F_OK) != 0 && errno != EACCES) {
603 DFXLOGE("DumpCatch:: the pid(%{public}d) process has exited, errno(%{public}d)", pid, errno);
604 msg.append("Result: pid(" + std::to_string(pid) + ") process has exited.\n");
605 return ret;
606 }
607 }
608 DfxEnableTraceDlsym(true);
609 ElapsedTime counter;
610 std::unique_lock<std::mutex> lck(mutex_);
611 stack_ = {};
612 notifyCollect_ = false;
613 int currentPid = getpid();
614 uint64_t requestTime = GetTimeMilliSeconds();
615 DFXLOGI("Receive DumpCatch request for cPid:(%{public}d), pid(%{public}d), " \
616 "tid:(%{public}d).", currentPid, pid, tid);
617 if (pid == currentPid) {
618 ret = DoDumpLocalLocked(pid, tid, msg, maxFrameNums);
619 } else {
620 if (maxFrameNums != DEFAULT_MAX_FRAME_NUM) {
621 DFXLOGI("dump_catch :: maxFrameNums does not support setting " \
622 "when pid is not equal to caller pid");
623 }
624 int timeout = (tid == 0 ? 3 : 10) * 1000; // when tid not zero, timeout is 10s
625 int32_t res = DoDumpRemoteLocked(pid, tid, msg, isJson, timeout);
626 if (res == DUMPCATCH_ESUCCESS ||
627 res == DUMPCATCH_DUMP_ESYMBOL_NO_PARSE ||
628 res == DUMPCATCH_DUMP_ESYMBOL_PARSE_TIMEOUT) {
629 ret = true;
630 }
631 if (!ret && stack_.errorCode != KernelStackAsyncCollector::STACK_SUCCESS) {
632 res = KernelRet2DumpcatchRet(stack_.errorCode);
633 }
634 void* retAddr = __builtin_return_address(0);
635 ReportDumpCatcherStats(pid, requestTime, res, retAddr);
636 }
637 DFXLOGI("dump_catch : pid = %{public}d, elapsed time = %{public}" PRId64 " ms, ret = %{public}d, " \
638 "msgLength = %{public}zu",
639 pid, counter.Elapsed<std::chrono::milliseconds>(), ret, msg.size());
640 DfxEnableTraceDlsym(false);
641 return ret;
642 }
643
DumpCatchFd(int pid,int tid,std::string & msg,int fd,size_t maxFrameNums)644 bool DfxDumpCatcher::Impl::DumpCatchFd(int pid, int tid, std::string& msg, int fd, size_t maxFrameNums)
645 {
646 bool ret = false;
647 ret = DumpCatch(pid, tid, msg, maxFrameNums, false);
648 if (fd > 0) {
649 ret = OHOS_TEMP_FAILURE_RETRY(write(fd, msg.c_str(), msg.length()));
650 }
651 return ret;
652 }
653
DoDumpCatchRemote(int pid,int tid,std::string & msg,bool isJson,int timeout)654 int32_t DfxDumpCatcher::Impl::DoDumpCatchRemote(int pid, int tid, std::string& msg, bool isJson, int timeout)
655 {
656 DFX_TRACE_SCOPED_DLSYM("DoDumpCatchRemote");
657 int32_t ret = DUMPCATCH_UNKNOWN;
658
659 if (pid <= 0 || tid < 0 || timeout <= WAIT_GET_KERNEL_STACK_TIMEOUT) {
660 msg.append("Result: pid(" + std::to_string(pid) + ") param error.\n");
661 DFXLOGW("%{public}s :: %{public}s", __func__, msg.c_str());
662 return DUMPCATCH_EPARAM;
663 }
664
665 if (DfxDumpCatcherSlowPolicy::GetInstance().IsDumpCatcherInSlowPeriod(pid)) {
666 DFXLOGW("dumpcatch in slow period, return pid (%{public}d) kernel stack directly!", pid);
667 msg.append("Result: pid(" + std::to_string(pid) + ") last dump slow, return kernel stack directly.\n");
668 stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
669 return DUMPCATCH_TIMEOUT_DUMP_IN_SLOWPERIOD;
670 }
671
672 int pipeReadFd[] = { -1, -1 };
673 uint64_t sdkDumpStartTime = GetAbsTimeMilliSeconds();
674 int sdkdumpRet = RequestSdkDump(pid, tid, pipeReadFd, isJson, timeout);
675 if (sdkdumpRet != ResponseCode::REQUEST_SUCCESS) {
676 DealWithSdkDumpRet(sdkdumpRet, pid, ret, msg);
677 return ret;
678 }
679 DumpCatcherPipeData pipeData(pid, pipeReadFd[PIPE_BUF_INDEX], pipeReadFd[PIPE_RES_INDEX]);
680 // timeout sub the cost time of sdkdump
681 timeout -= static_cast<int>(GetAbsTimeMilliSeconds() - sdkDumpStartTime);
682
683 int pollRet = DoDumpRemotePid(pid, msg, pipeData, isJson, timeout);
684 DealWithPollRet(pollRet, pid, ret, msg);
685 DFXLOGI("%{public}s :: pid(%{public}d) ret: %{public}d", __func__, pid, ret);
686 return ret;
687 }
688
DoDumpRemotePid(int pid,std::string & msg,DumpCatcherPipeData & pipeData,bool isJson,int32_t timeout)689 int DfxDumpCatcher::Impl::DoDumpRemotePid(int pid, std::string& msg, DumpCatcherPipeData& pipeData,
690 bool isJson, int32_t timeout)
691 {
692 DFX_TRACE_SCOPED_DLSYM("DoDumpRemotePid");
693 if (timeout <= 0) {
694 DFXLOGW("timeout less than 0, try to get kernel stack and return directly!");
695 stack_ = stackKit_.GetProcessStackWithTimeout(pid, WAIT_GET_KERNEL_STACK_TIMEOUT);
696 return DUMP_POLL_TIMEOUT;
697 } else if (timeout < 1000) { // 1000 : one thousand milliseconds
698 DFXLOGW("timeout less than 1 seconds, get kernel stack directly!");
699 notifyCollect_ = stackKit_.NotifyStartCollect(pid);
700 }
701 int ret = DoDumpRemotePoll(pid, timeout, msg, pipeData, isJson);
702 DFXLOGI("%{public}s :: pid(%{public}d) poll ret: %{public}d", __func__, pid, ret);
703 return ret;
704 }
705
KernelRet2DumpcatchRet(int32_t ret)706 int32_t DfxDumpCatcher::Impl::KernelRet2DumpcatchRet(int32_t ret)
707 {
708 switch (ret) {
709 case KernelStackAsyncCollector::STACK_ECREATE:
710 return DUMPCATCH_KERNELSTACK_ECREATE;
711 case KernelStackAsyncCollector::STACK_EOPEN:
712 return DUMPCATCH_KERNELSTACK_EOPEN;
713 case KernelStackAsyncCollector::STACK_EIOCTL:
714 return DUMPCATCH_KERNELSTACK_EIOCTL;
715 case KernelStackAsyncCollector::STACK_TIMEOUT:
716 return DUMPCATCH_KERNELSTACK_TIMEOUT;
717 case KernelStackAsyncCollector::STACK_OVER_LIMIT:
718 return DUMPCATCH_KERNELSTACK_OVER_LIMIT;
719 default:
720 return DUMPCATCH_UNKNOWN;
721 }
722 }
723
HandlePollError(int pid,const uint64_t endTime,int & remainTime,int & pollRet,std::string & resMsg)724 bool DfxDumpCatcher::Impl::HandlePollError(int pid, const uint64_t endTime, int& remainTime,
725 int& pollRet, std::string& resMsg)
726 {
727 if (errno == EINTR) {
728 uint64_t now = GetAbsTimeMilliSeconds();
729 if (now >= endTime) {
730 pollRet = DUMP_POLL_TIMEOUT;
731 resMsg.append("Result: poll timeout.\n");
732 return false;
733 }
734 if (!notifyCollect_ && (remainTime == DUMPCATCHER_REMOTE_P90_TIMEOUT)) {
735 notifyCollect_ = stackKit_.NotifyStartCollect(pid);
736 }
737 remainTime = static_cast<int>(endTime - now);
738 return true;
739 }
740 pollRet = DUMP_POLL_FAILED;
741 resMsg.append("Result: poll error, errno(" + std::to_string(errno) + ")\n");
742 return false;
743 }
744
HandlePollTimeout(int pid,const int timeout,int & remainTime,int & pollRet,std::string & resMsg)745 bool DfxDumpCatcher::Impl::HandlePollTimeout(int pid, const int timeout, int& remainTime,
746 int& pollRet, std::string& resMsg)
747 {
748 if (!notifyCollect_ && (remainTime == DUMPCATCHER_REMOTE_P90_TIMEOUT)) {
749 notifyCollect_ = stackKit_.NotifyStartCollect(pid);
750 remainTime = timeout - DUMPCATCHER_REMOTE_P90_TIMEOUT;
751 return true;
752 }
753 pollRet = DUMP_POLL_TIMEOUT;
754 resMsg.append("Result: poll timeout.\n");
755 return false;
756 }
757
HandlePollEvents(int pid,const struct pollfd (& readFds)[2],bool & bPipeConnect,int & pollRet,DumpCatcherPipeData & pipeData)758 bool DfxDumpCatcher::Impl::HandlePollEvents(int pid, const struct pollfd (&readFds)[2], bool& bPipeConnect,
759 int& pollRet, DumpCatcherPipeData& pipeData)
760 {
761 bool bufRet = true;
762 bool resRet = false;
763 bool eventRet = true;
764 for (auto& readFd : readFds) {
765 if (!bPipeConnect && (static_cast<uint32_t>(readFd.revents) & POLLIN)) {
766 bPipeConnect = true;
767 }
768
769 if (bPipeConnect &&
770 ((static_cast<uint32_t>(readFd.revents) & POLLERR) || (static_cast<uint32_t>(readFd.revents) & POLLHUP))) {
771 eventRet = false;
772 pipeData.resMsg.append("Result: poll events error.\n");
773 break;
774 }
775
776 if ((static_cast<uint32_t>(readFd.revents) & POLLIN) != POLLIN) {
777 continue;
778 }
779
780 if (readFd.fd == pipeData.bufFd.GetFd()) {
781 bufRet = DoReadBuf(pipeData);
782 } else if (readFd.fd == pipeData.resFd.GetFd()) {
783 resRet = DoReadRes(pollRet, pipeData);
784 }
785 }
786
787 if ((eventRet == false) || (bufRet == false) || (resRet == true)) {
788 DFXLOGI("eventRet:%{public}d bufRet:%{public}d resRet:%{public}d", eventRet, bufRet, resRet);
789 return false;
790 }
791 return true;
792 }
793
DumpRemotePoll(int pid,const int timeout,DumpCatcherPipeData & pipeData)794 int DfxDumpCatcher::Impl::DumpRemotePoll(int pid, const int timeout, DumpCatcherPipeData& pipeData)
795 {
796 int pollRet = DUMP_POLL_INIT;
797 struct pollfd readFds[2];
798 (void)memset_s(readFds, sizeof(readFds), 0, sizeof(readFds));
799 readFds[0].fd = pipeData.bufFd.GetFd();
800 readFds[0].events = POLLIN;
801 readFds[1].fd = pipeData.resFd.GetFd();
802 readFds[1].events = POLLIN;
803 int fdsSize = sizeof(readFds) / sizeof(readFds[0]);
804 bool bPipeConnect = false;
805 int remainTime = DUMPCATCHER_REMOTE_P90_TIMEOUT < timeout ? DUMPCATCHER_REMOTE_P90_TIMEOUT : timeout;
806 uint64_t startTime = GetAbsTimeMilliSeconds();
807 uint64_t endTime = startTime + static_cast<uint64_t>(timeout);
808 bool isContinue = true;
809 do {
810 int pRet = poll(readFds, fdsSize, remainTime);
811 if (pRet < 0) {
812 isContinue = HandlePollError(pid, endTime, remainTime, pollRet, pipeData.resMsg);
813 continue;
814 } else if (pRet == 0) {
815 isContinue = HandlePollTimeout(pid, timeout, remainTime, pollRet, pipeData.resMsg);
816 continue;
817 }
818 if (!HandlePollEvents(pid, readFds, bPipeConnect, pollRet, pipeData)) {
819 break;
820 }
821 uint64_t now = GetAbsTimeMilliSeconds();
822 if (now >= endTime) {
823 pollRet = DUMP_POLL_TIMEOUT;
824 pipeData.resMsg.append("Result: poll timeout.\n");
825 break;
826 }
827 remainTime = static_cast<int>(endTime - now);
828 } while (isContinue);
829 return pollRet;
830 }
831
DoDumpRemotePoll(int pid,int timeout,std::string & msg,DumpCatcherPipeData & pipeData,bool isJson)832 int DfxDumpCatcher::Impl::DoDumpRemotePoll(int pid, int timeout, std::string& msg,
833 DumpCatcherPipeData& pipeData, bool isJson)
834 {
835 DFX_TRACE_SCOPED_DLSYM("DoDumpRemotePoll");
836 if (!pipeData.bufFd || !pipeData.resFd) {
837 if (!isJson) {
838 msg = "Result: bufFd or resFd < 0.\n";
839 }
840 DFXLOGE("invalid bufFd or resFd");
841 return DUMP_POLL_FD;
842 }
843
844 int res = DumpRemotePoll(pid, timeout, pipeData);
845 bool isDumpSuccess = (res == DUMP_POLL_OK) || (res == DUMP_POLL_NO_PARSE_SYMBOL)
846 || (res == DUMP_POLL_PARSE_SYMBOL_TIMEOUT);
847 DFXLOGI("%{public}s :: %{public}s", __func__, pipeData.resMsg.c_str());
848 msg = isJson && isDumpSuccess ? pipeData.bufMsg : (pipeData.resMsg + pipeData.bufMsg);
849 return res;
850 }
851
DoReadBuf(DumpCatcherPipeData & pipeData)852 bool DfxDumpCatcher::Impl::DoReadBuf(DumpCatcherPipeData& pipeData)
853 {
854 std::vector<char> buffer(MAX_PIPE_SIZE, 0);
855 ssize_t nread = OHOS_TEMP_FAILURE_RETRY(read(pipeData.bufFd.GetFd(), buffer.data(), MAX_PIPE_SIZE));
856 if (nread <= 0) {
857 DFXLOGW("%{public}s :: read error", __func__);
858 return false;
859 }
860 DFXLOGD("%{public}s :: nread: %{public}zu", __func__, nread);
861 pipeData.bufMsg.append(buffer.data(), static_cast<size_t>(nread));
862 return true;
863 }
864
DoReadRes(int & pollRet,DumpCatcherPipeData & pipeData)865 bool DfxDumpCatcher::Impl::DoReadRes(int& pollRet, DumpCatcherPipeData& pipeData)
866 {
867 int32_t res = DumpErrorCode::DUMP_ESUCCESS;
868 ssize_t nread = OHOS_TEMP_FAILURE_RETRY(read(pipeData.resFd.GetFd(), &res, sizeof(res)));
869 if (nread <= 0 || nread != sizeof(res)) {
870 DFXLOGW("%{public}s :: read error", __func__);
871 return false;
872 }
873
874 switch (res) {
875 case DUMP_ESUCCESS:
876 pollRet = DUMP_POLL_OK;
877 break;
878 case DUMP_ESYMBOL_NO_PARSE:
879 pollRet = DUMP_POLL_NO_PARSE_SYMBOL;
880 break;
881 case DUMP_ESYMBOL_PARSE_TIMEOUT:
882 pollRet = DUMP_POLL_PARSE_SYMBOL_TIMEOUT;
883 break;
884 default:
885 pollRet = DUMP_POLL_RETURN;
886 break;
887 }
888
889 pipeData.resMsg.append("Result: " + DfxDumpRes::ToString(res) + "\n");
890 return true;
891 }
892
DumpCatchMultiPid(const std::vector<int> & pids,std::string & msg)893 bool DfxDumpCatcher::Impl::DumpCatchMultiPid(const std::vector<int>& pids, std::string& msg)
894 {
895 bool ret = false;
896 int pidSize = (int)pids.size();
897 if (pidSize <= 0) {
898 DFXLOGE("%{public}s :: param error, pidSize(%{public}d).", __func__, pidSize);
899 return ret;
900 }
901
902 std::unique_lock<std::mutex> lck(mutex_);
903 int currentPid = getpid();
904 int currentTid = gettid();
905 DFXLOGD("%{public}s :: cPid(%{public}d), cTid(%{public}d), pidSize(%{public}d).",
906 __func__, currentPid, currentTid, pidSize);
907
908 time_t startTime = time(nullptr);
909 if (startTime > 0) {
910 DFXLOGD("%{public}s :: startTime(%{public}" PRId64 ").", __func__, startTime);
911 }
912
913 for (int i = 0; i < pidSize; i++) {
914 int pid = pids[i];
915 std::string pidStr;
916 bool ret = DoDumpRemoteLocked(pid, 0, pidStr) == DUMPCATCH_ESUCCESS;
917 if (ret) {
918 msg.append(pidStr + "\n");
919 } else {
920 msg.append("Failed to dump process:" + std::to_string(pid));
921 }
922
923 time_t currentTime = time(nullptr);
924 if (currentTime > 0) {
925 DFXLOGD("%{public}s :: startTime(%{public}" PRId64 "), currentTime(%{public}" PRId64 ").",
926 __func__, startTime, currentTime);
927 if (currentTime > startTime + DUMP_CATCHE_WORK_TIME_S) {
928 break;
929 }
930 }
931 }
932
933 DFXLOGD("%{public}s :: msg(%{public}s).", __func__, msg.c_str());
934 if (msg.find("Tid:") != std::string::npos) {
935 ret = true;
936 }
937 return ret;
938 }
939 } // namespace HiviewDFX
940 } // namespace OHOS
941