• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "watchdog_inner.h"
17 
18 #include <cerrno>
19 #include <climits>
20 #include <cstdio>
21 #include <mutex>
22 
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <fcntl.h>
26 #include <pthread.h>
27 #include <unistd.h>
28 #include <csignal>
29 #include <string>
30 
31 #include <securec.h>
32 #include <dlfcn.h>
33 #include "musl_preinit_common.h"
34 #include "backtrace_local.h"
35 #ifdef HISYSEVENT_ENABLE
36 #include "hisysevent.h"
37 #endif
38 #include "ipc_skeleton.h"
39 #include "xcollie_utils.h"
40 #include "xcollie_define.h"
41 #include "dfx_define.h"
42 #include "parameter.h"
43 
44 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
45 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
46 namespace OHOS {
47 namespace HiviewDFX {
48 namespace {
49 enum DumpStackState {
50     DEFAULT = 0,
51     COMPLETE = 1,
52     SAMPLE_COMPLETE = 2
53 };
54 enum CatchLogType {
55     LOGTYPE_DEFAULT = -1,
56     LOGTYPE_NONE = 0,
57     LOGTYPE_SAMPLE_STACK = 1,
58     LOGTYPE_COLLECT_TRACE = 2
59 };
60 constexpr char IPC_CHECKER[] = "IpcChecker";
61 constexpr char STACK_CHECKER[] = "ThreadSampler";
62 constexpr char TRACE_CHECKER[] = "TraceCollector";
63 constexpr int64_t ONE_DAY_LIMIT = 86400000;
64 constexpr int64_t ONE_HOUR_LIMIT = 3600000;
65 constexpr int MILLISEC_TO_NANOSEC = 1000000;
66 const int FFRT_BUFFER_SIZE = 512 * 1024;
67 const int DETECT_STACK_COUNT = 2;
68 const int COLLECT_STACK_COUNT = 10;
69 const int COLLECT_TRACE_MIN = 1;
70 const int COLLECT_TRACE_MAX = 20;
71 const int TASK_INTERVAL = 155;
72 const int DURATION_TIME = 150;
73 const int DISTRIBUTE_TIME = 2000;
74 const int DUMPTRACE_TIME = 450;
75 constexpr const char* const KEY_SCB_STATE = "com.ohos.sceneboard";
76 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
77 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
78 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
79 constexpr uint32_t TIME_MS_TO_S = 1000;
80 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
81 constexpr uint32_t DATA_MANAGE_SERVICE_UID = 3012;
82 constexpr uint32_t FOUNDATION_UID = 5523;
83 constexpr uint32_t RENDER_SERVICE_UID = 1003;
84 constexpr int SERVICE_WARNING = 1;
85 const char* SYS_KERNEL_HUNGTASK_USERLIST = "/sys/kernel/hungtask/userlist";
86 const char* HMOS_HUNGTASK_USERLIST = "/proc/sys/hguard/user_list";
87 const char* ON_KICK_TIME = "on,72";
88 const char* ON_KICK_TIME_HMOS = "on,10,foundation";
89 const char* KICK_TIME = "kick";
90 const char* KICK_TIME_HMOS = "kick,foundation";
91 const int32_t NOT_OPEN = -1;
92 const char* LIB_THREAD_SAMPLER_PATH = "libthread_sampler.z.so";
93 constexpr size_t STACK_LENGTH = 128 * 1024;
94 constexpr uint64_t DEFAULE_SLEEP_TIME = 2 * 1000;
95 constexpr uint32_t JOIN_IPC_FULL_UIDS[] = {DATA_MANAGE_SERVICE_UID, FOUNDATION_UID, RENDER_SERVICE_UID};
96 constexpr uint64_t SAMPLE_PARAMS_MAX_SIZE = 5;
97 constexpr uint64_t SAMPLE_PARAMS_MIN_SIZE = 1;
98 constexpr int MAX_SAMPLE_STACK_TIMES = 2500; // 2.5s
99 constexpr int SAMPLE_INTERVAL_MIN = 50; // 50ms
100 constexpr int SAMPLE_INTERVAL_MAX = 500; // 500ms
101 constexpr int SAMPLE_COUNT_MIN = 1;
102 constexpr int SAMPLE_REPORT_TIMES_MIN = 1;
103 constexpr int SAMPLE_REPORT_TIMES_MAX = 3;
104 constexpr int SAMPLE_EXTRA_COUNT = 4;
105 constexpr int IGNORE_STARTUP_TIME_MIN = 3; // 3s
106 constexpr int SCROLL_INTERVAL = 50; // 50ms
107 constexpr char EXEC_DOMAIN[] = "PERFORMANCE";
108 }
109 
110 std::mutex WatchdogInner::lockFfrt_;
111 static uint64_t g_lastKickTime = GetCurrentTickMillseconds();
112 static int32_t g_fd = NOT_OPEN;
113 static bool g_existFile = true;
114 
115 SigActionType WatchdogInner::threadSamplerSigHandler_ = nullptr;
116 std::mutex WatchdogInner::threadSamplerSignalMutex_;
117 
118 namespace {
ThreadInfo(char * buf,size_t len,void * ucontext)119 void ThreadInfo(char *buf  __attribute__((unused)),
120                 size_t len  __attribute__((unused)),
121                 void* ucontext  __attribute__((unused)))
122 {
123     if (ucontext == nullptr) {
124         return;
125     }
126 
127     auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
128         WatchdogInner::GetInstance().currentScene_.size());
129     if (ret != 0) {
130         return;
131     }
132 }
133 
SetThreadSignalMask(int signo,bool isAddSignal,bool isBlock)134 void SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)
135 {
136     sigset_t set;
137     sigemptyset(&set);
138     pthread_sigmask(SIG_SETMASK, nullptr, &set);
139     if (isAddSignal) {
140         sigaddset(&set, signo);
141     } else {
142         sigdelset(&set, signo);
143     }
144     if (isBlock) {
145         pthread_sigmask(SIG_BLOCK, &set, nullptr);
146     } else {
147         pthread_sigmask(SIG_UNBLOCK, &set, nullptr);
148     }
149 }
150 
151 static const int CRASH_SIGNAL_LIST[] = {
152     SIGILL, SIGABRT, SIGBUS, SIGFPE,
153     SIGSEGV, SIGSTKFLT, SIGSYS, SIGTRAP
154 };
155 }
156 
WatchdogInner()157 WatchdogInner::WatchdogInner()
158     : cntCallback_(0), timeCallback_(0)
159 {
160     currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
161 }
162 
~WatchdogInner()163 WatchdogInner::~WatchdogInner()
164 {
165     Stop();
166 }
167 
IsInAppspwan()168 static bool IsInAppspwan()
169 {
170     if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
171         return true;
172     }
173 
174     if (getuid() == 0 && GetSelfProcName().find("nativespawn") != std::string::npos) {
175         return true;
176     }
177 
178     return false;
179 }
180 
SetBundleInfo(const std::string & bundleName,const std::string & bundleVersion)181 void WatchdogInner::SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)
182 {
183     bundleName_ = bundleName;
184     bundleVersion_ = bundleVersion;
185 }
186 
SetForeground(const bool & isForeground)187 void WatchdogInner::SetForeground(const bool& isForeground)
188 {
189     isForeground_ = isForeground;
190 }
191 
ReportMainThreadEvent(int64_t tid,bool isScroll)192 bool WatchdogInner::ReportMainThreadEvent(int64_t tid, bool isScroll)
193 {
194     std::string stack = "";
195     std::string heaviestStack = "";
196     if (!CollectStack(stack, heaviestStack)) {
197         stack = "";
198         heaviestStack = "";
199     }
200 
201     std::string path = "";
202     std::string eventName = "MAIN_THREAD_JANK";
203     if (!buissnessThreadInfo_.empty()) {
204         eventName = "BUSSINESS_THREAD_JANK";
205     }
206     int32_t pid = getprocpid();
207     bool isOverLimit = false;
208     if (!WriteStackToFd(pid, path, stack, eventName, isOverLimit)) {
209         XCOLLIE_LOGI("MainThread WriteStackToFd Failed");
210         return false;
211     }
212 #ifdef HISYSEVENT_ENABLE
213     int result = -1;
214     if (!isScroll) {
215         result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "MAIN_THREAD_JANK",
216             HiSysEvent::EventType::FAULT,
217             "BUNDLE_VERSION", bundleVersion_, "BUNDLE_NAME", bundleName_,
218             "BEGIN_TIME", stackContent_.reportBegin / MILLISEC_TO_NANOSEC,
219             "END_TIME", stackContent_.reportEnd / MILLISEC_TO_NANOSEC,
220             "EXTERNAL_LOG", path, "STACK", stack, "JANK_LEVEL", 0,
221             "THREAD_NAME", GetSelfProcName(), "FOREGROUND", isForeground_,
222             "LOG_TIME", GetTimeStamp() / MILLISEC_TO_NANOSEC,
223             "APP_START_JIFFIES_TIME", GetAppStartTime(pid, tid), "HEAVIEST_STACK", heaviestStack,
224             "LOG_OVER_LIMIT", isOverLimit);
225     } else {
226         result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "SCROLL_TIMEOUT",
227             HiSysEvent::EventType::FAULT, "PROCESS_NAME", GetSelfProcName(),
228             "EXTERNAL_LOG", path, "LOG_OVER_LIMIT", isOverLimit);
229     }
230 
231     XCOLLIE_LOGI("MainThread HiSysEventWrite result=%{public}d, isScroll=%{public}d", result, isScroll);
232     return result >= 0;
233 #else
234     XCOLLIE_LOGI("hisysevent not exists");
235 #endif
236 }
237 
CheckEventTimer(int64_t currentTime,int64_t reportBegin,int64_t reportEnd,int interval)238 bool WatchdogInner::CheckEventTimer(int64_t currentTime, int64_t reportBegin, int64_t reportEnd, int interval)
239 {
240     if (reportBegin == timeContent_.curBegin &&
241         reportEnd == timeContent_.curEnd) {
242         return false;
243     }
244     return (timeContent_.curEnd <= timeContent_.curBegin &&
245         (currentTime - timeContent_.curBegin >= interval * MILLISEC_TO_NANOSEC)) ||
246         (timeContent_.curEnd - timeContent_.curBegin > interval * MILLISEC_TO_NANOSEC);
247 }
248 
ThreadSamplerSigHandler(int sig,siginfo_t * si,void * context)249 void WatchdogInner::ThreadSamplerSigHandler(int sig, siginfo_t* si, void* context)
250 {
251     std::lock_guard<std::mutex> lock(threadSamplerSignalMutex_);
252     if (WatchdogInner::threadSamplerSigHandler_ == nullptr) {
253         return;
254     }
255     WatchdogInner::threadSamplerSigHandler_(sig, si, context);
256 }
257 
InstallThreadSamplerSignal()258 bool WatchdogInner::InstallThreadSamplerSignal()
259 {
260     struct sigaction action {};
261     sigfillset(&action.sa_mask);
262     for (size_t i = 0; i < sizeof(CRASH_SIGNAL_LIST) / sizeof(CRASH_SIGNAL_LIST[0]); i++) {
263         sigdelset(&action.sa_mask, CRASH_SIGNAL_LIST[i]);
264     }
265     action.sa_sigaction = WatchdogInner::ThreadSamplerSigHandler;
266     action.sa_flags = SA_RESTART | SA_SIGINFO;
267     if (sigaction(MUSL_SIGNAL_SAMPLE_STACK, &action, nullptr) != 0) {
268         XCOLLIE_LOGE("Failed to register signal(%{public}d:%{public}d)", MUSL_SIGNAL_SAMPLE_STACK, errno);
269         return false;
270     }
271     return true;
272 }
273 
UninstallThreadSamplerSignal()274 void WatchdogInner::UninstallThreadSamplerSignal()
275 {
276     std::lock_guard<std::mutex> lock(threadSamplerSignalMutex_);
277     threadSamplerSigHandler_ = nullptr;
278 }
279 
CheckThreadSampler()280 bool WatchdogInner::CheckThreadSampler()
281 {
282     XCOLLIE_LOGD("ThreadSampler 1st in ThreadSamplerTask.\n");
283     if (!InitThreadSamplerFuncs()) {
284         XCOLLIE_LOGE("ThreadSampler initialize failed.\n");
285         return false;
286     }
287 
288     if (!InstallThreadSamplerSignal()) {
289         XCOLLIE_LOGE("ThreadSampler install signal failed.\n");
290         return false;
291     }
292 
293     int initThreadSamplerRet = threadSamplerInitFunc_(COLLECT_STACK_COUNT);
294     if (initThreadSamplerRet != 0) {
295         XCOLLIE_LOGE("Thread sampler init failed. ret %{public}d\n", initThreadSamplerRet);
296         return false;
297     }
298     return true;
299 }
300 
InitThreadSamplerFuncs()301 bool WatchdogInner::InitThreadSamplerFuncs()
302 {
303     threadSamplerFuncHandler_ = dlopen(LIB_THREAD_SAMPLER_PATH, RTLD_LAZY);
304     if (threadSamplerFuncHandler_ == nullptr) {
305         XCOLLIE_LOGE("dlopen failed, funcHandler is nullptr.\n");
306         return false;
307     }
308 
309     threadSamplerInitFunc_ =
310         reinterpret_cast<ThreadSamplerInitFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerInit"));
311     threadSamplerSampleFunc_ =
312         reinterpret_cast<ThreadSamplerSampleFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerSample"));
313     threadSamplerCollectFunc_ =
314         reinterpret_cast<ThreadSamplerCollectFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerCollect"));
315     threadSamplerDeinitFunc_ =
316         reinterpret_cast<ThreadSamplerDeinitFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerDeinit"));
317     threadSamplerSigHandler_ =
318         reinterpret_cast<SigActionType>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerSigHandler"));
319     if (threadSamplerInitFunc_ == nullptr || threadSamplerSampleFunc_ == nullptr ||
320         threadSamplerCollectFunc_ == nullptr || threadSamplerDeinitFunc_ == nullptr ||
321         threadSamplerSigHandler_ == nullptr) {
322         ResetThreadSamplerFuncs();
323         XCOLLIE_LOGE("ThreadSampler dlsym some function failed.\n");
324         return false;
325     }
326     XCOLLIE_LOGE("ThreadSampler has been successfully loaded.\n");
327     return true;
328 }
329 
ResetThreadSamplerFuncs()330 void WatchdogInner::ResetThreadSamplerFuncs()
331 {
332     threadSamplerInitFunc_ = nullptr;
333     threadSamplerSampleFunc_ = nullptr;
334     threadSamplerCollectFunc_ = nullptr;
335     threadSamplerDeinitFunc_ = nullptr;
336     threadSamplerSigHandler_ = nullptr;
337     dlclose(threadSamplerFuncHandler_);
338     threadSamplerFuncHandler_ = nullptr;
339 }
340 
UpdateTime(int64_t & reportBegin,int64_t & reportEnd,TimePoint & lastEndTime,const TimePoint & endTime)341 void WatchdogInner::UpdateTime(int64_t& reportBegin, int64_t& reportEnd,
342     TimePoint& lastEndTime, const TimePoint& endTime)
343 {
344     reportBegin = timeContent_.curBegin;
345     reportEnd = timeContent_.curEnd;
346     lastEndTime = endTime;
347 }
348 
SampleStackDetect(const TimePoint & endTime,int & reportTimes,int updateTimes,int ignoreTime)349 bool WatchdogInner::SampleStackDetect(const TimePoint& endTime, int& reportTimes,
350     int updateTimes, int ignoreTime)
351 {
352     uint64_t startUpTime = static_cast<uint64_t>(ignoreTime) * TIME_MS_TO_S;
353     if (GetCurrentTickMillseconds() - watchdogStartTime_ < startUpTime) {
354         XCOLLIE_LOGI("Application is in starting period.\n");
355         return false;
356     }
357     if (!stackContent_.isStartSampleEnabled) {
358         XCOLLIE_LOGI("Current sample detection task is being executed.\n");
359         return false;
360     }
361     if (reportTimes <= 0) {
362         int64_t checkTimer = ONE_DAY_LIMIT;
363         if (!isScroll_ && (IsDeveloperOpen() ||
364             (IsBetaVersion() && GetProcessNameFromProcCmdline(getpid()) == KEY_SCB_STATE))) {
365             checkTimer = ONE_HOUR_LIMIT;
366         }
367         auto diff = endTime - stackContent_.lastEndTime;
368         int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count();
369         if (intervalTime < checkTimer) {
370             return false;
371         }
372         reportTimes = updateTimes;
373         XCOLLIE_LOGI("Update the currentThread's reportTimes: %{public}d", reportTimes);
374     }
375     stackContent_.isStartSampleEnabled = false;
376     UpdateTime(stackContent_.reportBegin, stackContent_.reportEnd, stackContent_.lastEndTime, endTime);
377     return true;
378 }
379 
StartScrollProfile(const TimePoint & endTime,int64_t durationTime,int sampleInterval)380 bool WatchdogInner::StartScrollProfile(const TimePoint& endTime, int64_t durationTime, int sampleInterval)
381 {
382     std::unique_lock<std::mutex> lock(lock_);
383     if (!isScroll_ || !SampleStackDetect(endTime, stackContent_.scrollTimes, SAMPLE_DEFULE_REPORT_TIMES)) {
384         return false;
385     }
386     XCOLLIE_LOGI("StartScrollProfile durationTime: %{public}" PRId64 " ms, sampleInterval: %{public}d.",
387         durationTime, sampleInterval);
388     int64_t tid = getproctid();
389     auto sampleTask = [this, sampleInterval, tid]() {
390         if (!CheckThreadSampler() || threadSamplerSampleFunc_ == nullptr) {
391             isMainThreadStackEnabled_ = true;
392             return;
393         }
394         threadSamplerSampleFunc_();
395         ReportMainThreadEvent(tid, true);
396         stackContent_.scrollTimes--;
397         isMainThreadStackEnabled_ = true;
398     };
399     WatchdogTask task("ThreadSampler", sampleTask, 0, sampleInterval, true);
400     InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
401     return true;
402 }
403 
StartProfileMainThread(const TimePoint & endTime,int64_t durationTime,int sampleInterval)404 void WatchdogInner::StartProfileMainThread(const TimePoint& endTime, int64_t durationTime, int sampleInterval)
405 {
406     std::unique_lock<std::mutex> lock(lock_);
407     bool result = SampleStackDetect(endTime, stackContent_.reportTimes,
408         jankParamsMap[KEY_SAMPLE_REPORT_TIMES], jankParamsMap[KEY_IGNORE_STARTUP_TIME]);
409     if (!result) {
410         return;
411     }
412     XCOLLIE_LOGI("StartProfileMainThread durationTime: %{public}" PRId64 " ms, sampleInterval: %{public}d.",
413         durationTime, sampleInterval);
414     stackContent_.detectorCount = 0;
415     stackContent_.collectCount = 0;
416     int sampleCount = jankParamsMap[KEY_SAMPLE_COUNT];
417     int64_t tid = getproctid();
418     auto sampleTask = [this, sampleInterval, sampleCount, tid]() {
419         if ((stackContent_.detectorCount == 0 && stackContent_.collectCount == 0 && !CheckThreadSampler()) ||
420             threadSamplerSampleFunc_ == nullptr) {
421             isMainThreadStackEnabled_ = true;
422             return;
423         }
424         if (stackContent_.collectCount > DumpStackState::DEFAULT &&
425             stackContent_.collectCount < sampleCount) {
426             threadSamplerSampleFunc_();
427             stackContent_.collectCount++;
428         } else if (stackContent_.collectCount == sampleCount) {
429             ReportMainThreadEvent(tid);
430             stackContent_.reportTimes--;
431             isMainThreadStackEnabled_ = true;
432             return;
433         } else {
434             if (CheckEventTimer(GetTimeStamp(), stackContent_.reportBegin,
435                 stackContent_.reportEnd, sampleInterval)) {
436                 threadSamplerSampleFunc_();
437                 stackContent_.collectCount++;
438             } else {
439                 stackContent_.detectorCount++;
440             }
441         }
442         if (stackContent_.detectorCount == DETECT_STACK_COUNT) {
443             isMainThreadStackEnabled_ = true;
444         }
445     };
446     WatchdogTask task("ThreadSampler", sampleTask, 0, sampleInterval, true);
447     InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
448 }
449 
CollectStack(std::string & stack,std::string & heaviestStack)450 bool WatchdogInner::CollectStack(std::string& stack, std::string& heaviestStack)
451 {
452     if (threadSamplerCollectFunc_ == nullptr) {
453         return false;
454     }
455     int treeFormat = 1;
456     char* stk = new char[STACK_LENGTH]();
457     char* heaviest = new char[STACK_LENGTH]();
458     int collectRet = threadSamplerCollectFunc_(stk, heaviest, STACK_LENGTH, STACK_LENGTH, treeFormat);
459     if (collectRet != 0) {
460         XCOLLIE_LOGE("threadSampler collect stack failed.");
461         delete[] stk;
462         delete[] heaviest;
463         return false;
464     }
465     stack = stk;
466     heaviestStack = heaviest;
467     delete[] stk;
468     delete[] heaviest;
469     return true;
470 }
471 
Deinit()472 bool WatchdogInner::Deinit()
473 {
474     if (threadSamplerDeinitFunc_ == nullptr) {
475         return false;
476     }
477     UninstallThreadSamplerSignal();
478     int ret = threadSamplerDeinitFunc_();
479     return ret == 0;
480 }
481 
DumpTraceProfile(int32_t interval)482 void WatchdogInner::DumpTraceProfile(int32_t interval)
483 {
484     traceContent_.dumpCount = 0;
485     traceContent_.traceCount = 0;
486     auto traceTask = [this, interval]() {
487         if (traceContent_.traceCount == 0) {
488             appCaller_.actionId = UCollectClient::ACTION_ID_START_TRACE;
489             auto result = traceCollector_->CaptureDurationTrace(appCaller_);
490             if (result.retCode != 0) {
491                 traceContent_.traceState = DumpStackState::DEFAULT;
492                 isMainThreadTraceEnabled_ = true;
493                 return;
494             }
495         }
496         traceContent_.traceCount++;
497         if (CheckEventTimer(GetTimeStamp(), traceContent_.reportBegin,
498             traceContent_.reportEnd, interval)) {
499             traceContent_.dumpCount++;
500         }
501         if (traceContent_.traceCount >= COLLECT_TRACE_MAX) {
502             if (traceContent_.dumpCount >= COLLECT_TRACE_MIN) {
503                 CreateWatchdogDir();
504                 appCaller_.actionId = UCollectClient::ACTION_ID_DUMP_TRACE;
505                 appCaller_.isBusinessJank = !buissnessThreadInfo_.empty();
506                 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
507                 XCOLLIE_LOGI("MainThread TraceCollector Dump result: %{public}d", result.retCode);
508             }
509             isMainThreadTraceEnabled_ = true;
510         }
511     };
512     WatchdogTask task("TraceCollector", traceTask, 0, interval, true);
513     std::unique_lock<std::mutex> lock(lock_);
514     InsertWatchdogTaskLocked("TraceCollector", std::move(task));
515 }
516 
StartTraceProfile()517 int32_t WatchdogInner::StartTraceProfile()
518 {
519     traceCollector_ = UCollectClient::TraceCollector::Create();
520     if (traceCollector_ == nullptr) {
521         traceContent_.traceState = DumpStackState::DEFAULT;
522         XCOLLIE_LOGE("Create traceCollector failed.");
523         return -1;
524     }
525     appCaller_.bundleName = bundleName_;
526     appCaller_.bundleVersion = bundleVersion_;
527     appCaller_.uid = static_cast<int64_t>(getuid());
528     appCaller_.pid = getprocpid();
529     appCaller_.threadName = GetSelfProcName();
530     appCaller_.foreground = isForeground_;
531     appCaller_.happenTime = GetTimeStamp() / MILLISEC_TO_NANOSEC;
532     appCaller_.beginTime = traceContent_.reportBegin / MILLISEC_TO_NANOSEC;
533     appCaller_.endTime = traceContent_.reportEnd / MILLISEC_TO_NANOSEC;
534     DumpTraceProfile(DUMPTRACE_TIME);
535     return 0;
536 }
537 
CollectTraceDetect(const TimePoint & endTime,int64_t durationTime)538 void WatchdogInner::CollectTraceDetect(const TimePoint& endTime, int64_t durationTime)
539 {
540     if (IsBetaVersion()) {
541         return;
542     }
543     if (traceContent_.traceState == DumpStackState::COMPLETE) {
544         auto diff = endTime - stackContent_.lastEndTime;
545         int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count();
546         if (intervalTime < ONE_DAY_LIMIT) {
547             return;
548         }
549     }
550     traceContent_.traceState = DumpStackState::COMPLETE;
551     UpdateTime(traceContent_.reportBegin, traceContent_.reportEnd, traceContent_.lastEndTime, endTime);
552     int32_t result = StartTraceProfile();
553     XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d, Duration Time: %{public}" PRId64 " ms",
554         result, durationTime);
555 }
556 
DistributeStart(const std::string & name)557 static TimePoint DistributeStart(const std::string& name)
558 {
559     WatchdogInner::GetInstance().timeContent_.curBegin = GetTimeStamp();
560     return std::chrono::steady_clock::now();
561 }
562 
DistributeEnd(const std::string & name,const TimePoint & startTime)563 static void DistributeEnd(const std::string& name, const TimePoint& startTime)
564 {
565     TimePoint endTime = std::chrono::steady_clock::now();
566     auto duration = endTime - startTime;
567     int64_t durationTime = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
568 #ifdef HICOLLIE_JANK_ENABLE
569     WatchdogInner::GetInstance().timeContent_.curEnd = GetTimeStamp();
570     if (duration > std::chrono::milliseconds(SCROLL_INTERVAL) &&
571         WatchdogInner::GetInstance().StartScrollProfile(endTime, durationTime, SCROLL_INTERVAL)) {
572         return;
573     }
574     int sampleInterval = WatchdogInner::GetInstance().jankParamsMap[KEY_SAMPLE_INTERVAL];
575     if (duration > std::chrono::milliseconds(sampleInterval)) {
576         int logType = WatchdogInner::GetInstance().jankParamsMap[KEY_LOG_TYPE];
577         switch (logType) {
578             case CatchLogType::LOGTYPE_SAMPLE_STACK: {
579                 WatchdogInner::GetInstance().StartProfileMainThread(endTime, durationTime, sampleInterval);
580                 break;
581             }
582             case CatchLogType::LOGTYPE_COLLECT_TRACE: {
583                 if (duration > std::chrono::milliseconds(DUMPTRACE_TIME)) {
584                     WatchdogInner::GetInstance().CollectTraceDetect(endTime, durationTime);
585                 }
586                 break;
587             }
588             case CatchLogType::LOGTYPE_NONE: {
589                 if (duration < std::chrono::milliseconds(DUMPTRACE_TIME)) {
590                     WatchdogInner::GetInstance().StartProfileMainThread(endTime, durationTime, sampleInterval);
591                 } else {
592                     WatchdogInner::GetInstance().CollectTraceDetect(endTime, durationTime);
593                 }
594                 break;
595             }
596             default:
597                 break;
598         }
599     }
600 #endif // HICOLLIE_JANK_ENABLE
601     if (duration > std::chrono::milliseconds(DISTRIBUTE_TIME)) {
602         XCOLLIE_LOGI("BlockMonitor event name: %{public}s, Duration Time: %{public}" PRId64 " ms",
603             name.c_str(), durationTime);
604     }
605 }
606 
AddThread(const std::string & name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)607 int WatchdogInner::AddThread(const std::string &name,
608     std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
609 {
610     if (name.empty() || handler == nullptr) {
611         XCOLLIE_LOGE("Add thread fail, invalid args!");
612         return -1;
613     }
614 
615     if (IsInAppspwan()) {
616         return -1;
617     }
618 
619     std::string limitedName = GetLimitedSizeName(name);
620     XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
621     std::unique_lock<std::mutex> lock(lock_);
622 
623     IpcCheck();
624 
625     if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
626         return -1;
627     }
628     return 0;
629 }
630 
RunOneShotTask(const std::string & name,Task && task,uint64_t delay)631 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
632 {
633     if (name.empty() || task == nullptr) {
634         XCOLLIE_LOGE("Add task fail, invalid args!");
635         return;
636     }
637 
638     if (IsInAppspwan()) {
639         return;
640     }
641 
642     std::unique_lock<std::mutex> lock(lock_);
643     std::string limitedName = GetLimitedSizeName(name);
644     InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
645 }
646 
RunXCollieTask(const std::string & name,uint64_t timeout,XCollieCallback func,void * arg,unsigned int flag)647 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
648     void *arg, unsigned int flag)
649 {
650     if (name.empty() || timeout == 0) {
651         XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
652         return INVALID_ID;
653     }
654 
655     if (IsInAppspwan()) {
656         return INVALID_ID;
657     }
658 
659     std::unique_lock<std::mutex> lock(lock_);
660     IpcCheck();
661     std::string limitedName = GetLimitedSizeName(name);
662     return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
663 }
664 
RemoveXCollieTask(int64_t id)665 void WatchdogInner::RemoveXCollieTask(int64_t id)
666 {
667     std::priority_queue<WatchdogTask> tmpQueue;
668     std::unique_lock<std::mutex> lock(lock_);
669     size_t size = checkerQueue_.size();
670     if (size == 0) {
671         XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
672         return;
673     }
674     while (!checkerQueue_.empty()) {
675         const WatchdogTask& task = checkerQueue_.top();
676         if (task.id != id || task.timeout == 0) {
677             tmpQueue.push(task);
678         }
679         checkerQueue_.pop();
680     }
681     if (tmpQueue.size() == size) {
682         XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
683             static_cast<long long>(id), size);
684     }
685     tmpQueue.swap(checkerQueue_);
686 }
687 
RunPeriodicalTask(const std::string & name,Task && task,uint64_t interval,uint64_t delay)688 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
689 {
690     if (name.empty() || task == nullptr) {
691         XCOLLIE_LOGE("Add task fail, invalid args!");
692         return;
693     }
694 
695     if (IsInAppspwan()) {
696         return;
697     }
698 
699     std::string limitedName = GetLimitedSizeName(name);
700     XCOLLIE_LOGD("Add periodical task %{public}s to watchdog.", name.c_str());
701     std::unique_lock<std::mutex> lock(lock_);
702     InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
703 }
704 
SetTimerCountTask(const std::string & name,uint64_t timeLimit,int countLimit)705 int64_t WatchdogInner::SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)
706 {
707     if (name.empty() || timeLimit == 0 || countLimit <= 0) {
708         XCOLLIE_LOGE("SetTimerCountTask fail, invalid args!");
709         return INVALID_ID;
710     }
711 
712     if (IsInAppspwan()) {
713         return INVALID_ID;
714     }
715     std::string limitedName = GetLimitedSizeName(name);
716     XCOLLIE_LOGD("SetTimerCountTask name : %{public}s", name.c_str());
717     std::unique_lock<std::mutex> lock(lock_);
718     return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeLimit, countLimit));
719 }
720 
TriggerTimerCountTask(const std::string & name,bool bTrigger,const std::string & message)721 void WatchdogInner::TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)
722 {
723     std::unique_lock<std::mutex> lock(lock_);
724 
725     if (checkerQueue_.empty()) {
726         XCOLLIE_LOGE("TriggerTimerCountTask name : %{public}s fail, empty queue!", name.c_str());
727         return;
728     }
729 
730     bool isTaskExist = false;
731     uint64_t now = GetCurrentTickMillseconds();
732     std::priority_queue<WatchdogTask> tmpQueue;
733     while (!checkerQueue_.empty()) {
734         WatchdogTask task = checkerQueue_.top();
735         if (task.name == name) {
736             isTaskExist = true;
737             if (bTrigger) {
738                 task.triggerTimes.push_back(now);
739                 task.message = message;
740             } else {
741                 task.triggerTimes.clear();
742             }
743         }
744         tmpQueue.push(task);
745         checkerQueue_.pop();
746     }
747     tmpQueue.swap(checkerQueue_);
748 
749     if (!isTaskExist) {
750         XCOLLIE_LOGE("TriggerTimerCount name : %{public}s does not exist!", name.c_str());
751     }
752 }
753 
IsTaskExistLocked(const std::string & name)754 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
755 {
756     return (taskNameSet_.find(name) != taskNameSet_.end());
757 }
758 
IsExceedMaxTaskLocked()759 bool WatchdogInner::IsExceedMaxTaskLocked()
760 {
761     if (checkerQueue_.size() >= MAX_WATCH_NUM) {
762         XCOLLIE_LOGE("Exceed max watchdog task!");
763         return true;
764     }
765 
766     return false;
767 }
768 
InsertWatchdogTaskLocked(const std::string & name,WatchdogTask && task)769 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
770 {
771     if (!task.isOneshotTask && IsTaskExistLocked(name)) {
772         XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
773         return 0;
774     }
775 
776     if (IsExceedMaxTaskLocked()) {
777         XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
778         return 0;
779     }
780     int64_t id = task.id;
781     checkerQueue_.push(std::move(task));
782     if (!task.isOneshotTask) {
783         taskNameSet_.insert(name);
784     }
785     CreateWatchdogThreadIfNeed();
786     condition_.notify_all();
787 
788     return id;
789 }
790 
StopWatchdog()791 void WatchdogInner::StopWatchdog()
792 {
793     Stop();
794 }
795 
IsCallbackLimit(unsigned int flag)796 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
797 {
798     bool ret = false;
799     time_t startTime = time(nullptr);
800     if (!(flag & XCOLLIE_FLAG_LOG)) {
801         return ret;
802     }
803     if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
804         timeCallback_ = startTime;
805     } else {
806         if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
807             ret = true;
808         }
809     }
810     return ret;
811 }
812 
IPCProxyLimitCallback(uint64_t num)813 void IPCProxyLimitCallback(uint64_t num)
814 {
815     XCOLLIE_LOGE("ipc proxy num %{public}" PRIu64 " exceed limit", num);
816     if (getuid() >= MIN_APP_UID && IsBetaVersion()) {
817         XCOLLIE_LOGI("Process is going to exit, reason: ipc proxy num exceed limit");
818         _exit(0);
819     }
820 }
821 
CreateWatchdogThreadIfNeed()822 void WatchdogInner::CreateWatchdogThreadIfNeed()
823 {
824     std::call_once(flag_, [this] {
825         if (threadLoop_ == nullptr) {
826             if (mainRunner_ == nullptr) {
827                 mainRunner_ = AppExecFwk::EventRunner::GetMainEventRunner();
828             }
829             mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
830             const uint64_t limitNum = 20000;
831             IPCDfx::SetIPCProxyLimit(limitNum, IPCProxyLimitCallback);
832             threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
833             if (getpid() == gettid()) {
834                 SetThreadSignalMask(SIGDUMP, true, true);
835             }
836             XCOLLIE_LOGD("Watchdog is running!");
837         }
838     });
839 }
840 
IsInSleep(const WatchdogTask & queuedTaskCheck)841 bool WatchdogInner::IsInSleep(const WatchdogTask& queuedTaskCheck)
842 {
843     if (IsInAppspwan() || queuedTaskCheck.bootTimeStart <= 0 || queuedTaskCheck.monoTimeStart <= 0) {
844         return false;
845     }
846 
847     uint64_t bootTimeStart = 0;
848     uint64_t monoTimeStart = 0;
849     CalculateTimes(bootTimeStart, monoTimeStart);
850     uint64_t bootTimeDetal = GetNumsDiffAbs(bootTimeStart, queuedTaskCheck.bootTimeStart);
851     uint64_t monoTimeDetal = GetNumsDiffAbs(monoTimeStart, queuedTaskCheck.monoTimeStart);
852     if (GetNumsDiffAbs(bootTimeDetal, monoTimeDetal) >= DEFAULE_SLEEP_TIME) {
853         XCOLLIE_LOGI("Current Thread has been sleep, pid: %{public}d", getprocpid());
854         return true;
855     }
856     return false;
857 }
858 
CheckKickWatchdog(uint64_t now,const WatchdogTask & queuedTask)859 void WatchdogInner::CheckKickWatchdog(uint64_t now, const WatchdogTask& queuedTask)
860 {
861     if (g_existFile && queuedTask.name == IPC_FULL && getuid() == FOUNDATION_UID &&
862         now - g_lastKickTime > INTERVAL_KICK_TIME) {
863         if (KickWatchdog()) {
864             g_lastKickTime = now;
865         }
866     }
867 }
868 
CheckCurrentTaskLocked(const WatchdogTask & queuedTaskCheck)869 bool WatchdogInner::CheckCurrentTaskLocked(const WatchdogTask& queuedTaskCheck)
870 {
871     if (queuedTaskCheck.name.empty()) {
872         checkerQueue_.pop();
873         XCOLLIE_LOGW("queuedTask name is empty.");
874     } else if (queuedTaskCheck.name == STACK_CHECKER && isMainThreadStackEnabled_) {
875         checkerQueue_.pop();
876         taskNameSet_.erase("ThreadSampler");
877         if (Deinit()) {
878             ResetThreadSamplerFuncs();
879         }
880         stackContent_.isStartSampleEnabled = true;
881         isMainThreadStackEnabled_ = false;
882         XCOLLIE_LOGI("Detect sample stack task complete.");
883     } else if (queuedTaskCheck.name == TRACE_CHECKER && isMainThreadTraceEnabled_) {
884         checkerQueue_.pop();
885         taskNameSet_.erase("TraceCollector");
886         isMainThreadTraceEnabled_ = false;
887         if (traceContent_.dumpCount < COLLECT_TRACE_MIN) {
888             traceContent_.traceState = DumpStackState::DEFAULT;
889         }
890         XCOLLIE_LOGI("Detect collect trace task complete.");
891     } else {
892         return false;
893     }
894     return true;
895 }
896 
FetchNextTask(uint64_t now,WatchdogTask & task)897 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
898 {
899     std::unique_lock<std::mutex> lock(lock_);
900     if (isNeedStop_) {
901         while (!checkerQueue_.empty()) {
902             checkerQueue_.pop();
903         }
904         return DEFAULT_TIMEOUT;
905     }
906 
907     if (checkerQueue_.empty()) {
908         return DEFAULT_TIMEOUT;
909     }
910 
911     const WatchdogTask& queuedTaskCheck = checkerQueue_.top();
912     if (CheckCurrentTaskLocked(queuedTaskCheck) && checkerQueue_.empty()) {
913         return DEFAULT_TIMEOUT;
914     }
915 
916     const WatchdogTask& queuedTask = checkerQueue_.top();
917     CheckKickWatchdog(now, queuedTask);
918     if (queuedTask.nextTickTime > now) {
919         return queuedTask.nextTickTime - now;
920     }
921 
922     currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
923     task = queuedTask;
924     checkerQueue_.pop();
925     return 0;
926 }
927 
ReInsertTaskIfNeed(WatchdogTask & task)928 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
929 {
930     if (task.checkInterval == 0) {
931         return;
932     }
933 
934     std::unique_lock<std::mutex> lock(lock_);
935     task.nextTickTime = task.nextTickTime + task.checkInterval;
936     checkerQueue_.push(task);
937 }
938 
Start()939 bool WatchdogInner::Start()
940 {
941     if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
942         XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
943     }
944     SetThreadSignalMask(SIGDUMP, false, false);
945     watchdogStartTime_ = GetCurrentTickMillseconds();
946     XCOLLIE_LOGD("Watchdog is running in thread(%{public}d)!", getproctid());
947     if (SetThreadInfoCallback != nullptr) {
948         SetThreadInfoCallback(ThreadInfo);
949         XCOLLIE_LOGD("Watchdog Set Thread Info Callback");
950     }
951     while (!isNeedStop_) {
952         if (__get_global_hook_flag() && __get_hook_flag()) {
953             __set_hook_flag(false);
954         }
955         uint64_t now = GetCurrentTickMillseconds();
956         WatchdogTask task;
957         uint64_t leftTimeMill = FetchNextTask(now, task);
958         if (leftTimeMill == 0) {
959             if (!IsInSleep(task)) {
960                 task.Run(now);
961                 ReInsertTaskIfNeed(task);
962                 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
963             }
964             continue;
965         } else if (isNeedStop_) {
966             break;
967         } else {
968             std::unique_lock<std::mutex> lock(lock_);
969             condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
970         }
971     }
972     if (SetThreadInfoCallback != nullptr) {
973         SetThreadInfoCallback(nullptr);
974     }
975     return true;
976 }
977 
SendMsgToHungtask(const std::string & msg)978 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
979 {
980     if (g_fd == NOT_OPEN) {
981         return false;
982     }
983 
984     ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
985     if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
986         XCOLLIE_KLOGE("watchdog write msg failed");
987         close(g_fd);
988         g_fd = NOT_OPEN;
989         return false;
990     }
991     XCOLLIE_KLOGI("Send %{public}s to hungtask Successful\n", msg.c_str());
992     return true;
993 }
994 
KickWatchdog()995 bool WatchdogInner::KickWatchdog()
996 {
997     if (g_fd == NOT_OPEN) {
998         g_fd = open(SYS_KERNEL_HUNGTASK_USERLIST, O_WRONLY);
999         if (g_fd < 0) {
1000             g_fd = open(HMOS_HUNGTASK_USERLIST, O_WRONLY);
1001             if (g_fd < 0) {
1002                 XCOLLIE_KLOGE("can't open hungtask file");
1003                 g_existFile = false;
1004                 return false;
1005             }
1006             XCOLLIE_KLOGE("change to hmos kernel");
1007             isHmos = true;
1008         } else {
1009             XCOLLIE_KLOGI("change to linux kernel");
1010         }
1011 
1012         if (!SendMsgToHungtask(isHmos ? ON_KICK_TIME_HMOS : ON_KICK_TIME)) {
1013             XCOLLIE_KLOGI("kick watchdog send msg to hungtask fail");
1014             return false;
1015         }
1016     }
1017     return SendMsgToHungtask(isHmos ? KICK_TIME_HMOS : KICK_TIME);
1018 }
1019 
IpcCheck()1020 void WatchdogInner::IpcCheck()
1021 {
1022     static bool isIpcCheckInit = false;
1023     if (isIpcCheckInit) {
1024         return;
1025     }
1026 
1027     uint32_t uid = getuid();
1028     bool isJoinIpcFullUid = std::any_of(std::begin(JOIN_IPC_FULL_UIDS), std::end(JOIN_IPC_FULL_UIDS),
1029         [uid](const uint32_t joinIpcFullUid) { return uid == joinIpcFullUid; });
1030     if (isJoinIpcFullUid || GetSelfProcName() == KEY_SCB_STATE) {
1031         if (binderCheckHander_ == nullptr) {
1032             auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
1033             binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
1034             if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
1035                 nullptr, IPC_CHECKER_TIME))) {
1036                 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
1037             }
1038         }
1039     }
1040     isIpcCheckInit = true;
1041 }
1042 
WriteStringToFile(uint32_t pid,const char * str)1043 bool WatchdogInner::WriteStringToFile(uint32_t pid, const char *str)
1044 {
1045     char file[PATH_LEN] = {0};
1046     int32_t newPid = static_cast<int32_t>(pid);
1047     if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", newPid) == -1) {
1048         XCOLLIE_LOGI("failed to build path for %{public}d.", newPid);
1049         return false;
1050     }
1051     FILE* fp = fopen(file, "wb");
1052     if (fp == nullptr) {
1053         XCOLLIE_LOGI("failed to open file %{public}s, errno: %{public}d", file, errno);
1054         return false;
1055     }
1056     bool writeResult = true;
1057     if (fwrite(str, sizeof(char), strlen(str), fp) != strlen(str)) {
1058         XCOLLIE_LOGI("failed to write file %{public}s, errno: %{public}d", file, errno);
1059         writeResult = false;
1060     }
1061     if (fclose(fp)) {
1062         XCOLLIE_LOGE("fclose is failed");
1063     }
1064     fp = nullptr;
1065     return writeResult;
1066 }
1067 
FfrtCallback(uint64_t taskId,const char * taskInfo,uint32_t delayedTaskCount)1068 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
1069 {
1070     std::string description = "FfrtCallback: task(";
1071     description += taskInfo;
1072     description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
1073     std::string info(taskInfo);
1074     if (info.find("Queue_Schedule_Timeout") != std::string::npos) {
1075         WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
1076         description += ", report twice instead of exiting process.";
1077         WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
1078         WatchdogInner::KillPeerBinderProcess(description);
1079         return;
1080     }
1081     bool isExist = false;
1082     {
1083         std::unique_lock<std::mutex> lock(lockFfrt_);
1084         auto &map = WatchdogInner::GetInstance().taskIdCnt;
1085         auto search = map.find(taskId);
1086         if (search != map.end()) {
1087             isExist = true;
1088         } else {
1089             map[taskId] = SERVICE_WARNING;
1090         }
1091     }
1092 
1093     if (isExist) {
1094         description += ", report twice instead of exiting process."; // 1s = 1000ms
1095         WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
1096         WatchdogInner::GetInstance().taskIdCnt.erase(taskId);
1097         WatchdogInner::KillPeerBinderProcess(description);
1098     } else {
1099         WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
1100     }
1101 }
1102 
InitFfrtWatchdog()1103 void WatchdogInner::InitFfrtWatchdog()
1104 {
1105     CreateWatchdogThreadIfNeed();
1106     ffrt_task_timeout_set_cb(FfrtCallback);
1107     ffrt_task_timeout_set_threshold(FFRT_CALLBACK_TIME);
1108     std::unique_lock<std::mutex> lock(lock_);
1109     IpcCheck();
1110 }
1111 
SendFfrtEvent(const std::string & msg,const std::string & eventName,const char * taskInfo)1112 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)
1113 {
1114     int32_t pid = getprocpid();
1115     if (IsProcessDebug(pid)) {
1116         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
1117         return;
1118     }
1119     uint32_t gid = getgid();
1120     uint32_t uid = getuid();
1121     time_t curTime = time(nullptr);
1122     std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
1123         "\n" + msg + "\n";
1124     char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
1125     buffer[FFRT_BUFFER_SIZE] = 0;
1126     ffrt_dump(DUMP_INFO_ALL, buffer, FFRT_BUFFER_SIZE);
1127     sendMsg += buffer;
1128     delete[] buffer;
1129     int32_t tid = pid;
1130     GetFfrtTaskTid(tid, sendMsg);
1131 #ifdef HISYSEVENT_ENABLE
1132     int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
1133         "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo, "PROCESS_NAME", GetSelfProcName(),
1134         "MSG", sendMsg, "STACK", GetProcessStacktrace());
1135     if (ret == ERR_OVER_SIZE) {
1136         std::string stack = "";
1137         GetBacktraceStringByTid(stack, tid, 0, true);
1138         ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
1139             "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo,
1140             "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg, "STACK", stack);
1141     }
1142 
1143     XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
1144         "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
1145 #else
1146     XCOLLIE_LOGI("hisysevent not exists");
1147 #endif
1148 }
1149 
GetFfrtTaskTid(int32_t & tid,const std::string & msg)1150 void WatchdogInner::GetFfrtTaskTid(int32_t& tid, const std::string& msg)
1151 {
1152     std::string queueNameFrontStr = "us. queue name [";
1153     size_t queueNameFrontPos = msg.find(queueNameFrontStr);
1154     if (queueNameFrontPos == std::string::npos) {
1155         return;
1156     }
1157     size_t queueNameRearPos = msg.find("], remaining tasks count=");
1158     size_t queueStartPos = queueNameFrontPos + queueNameFrontStr.length();
1159     if (queueNameRearPos == std::string::npos || queueNameRearPos <= queueStartPos) {
1160         return;
1161     }
1162     size_t queueNameLength = queueNameRearPos - queueStartPos;
1163     std::string workerTidFrontStr = " worker tid ";
1164     std::string taskIdFrontStr = " is running, task id ";
1165     std::string queueNameStr = " name " + msg.substr(queueStartPos, queueNameLength);
1166     std::istringstream issMsg(msg);
1167     std::string line;
1168     while (std::getline(issMsg, line, '\n')) {
1169         size_t workerTidFrontPos = line.find(workerTidFrontStr);
1170         size_t taskIdFrontPos = line.find(taskIdFrontStr);
1171         size_t queueNamePos = line.find(queueNameStr);
1172         size_t workerStartPos = workerTidFrontPos + workerTidFrontStr.length();
1173         if (workerTidFrontPos == std::string::npos || taskIdFrontPos == std::string::npos ||
1174             queueNamePos == std::string::npos || taskIdFrontPos <= workerStartPos) {
1175             continue;
1176         }
1177         size_t tidLength = taskIdFrontPos - workerStartPos;
1178         if (tidLength < std::to_string(INT32_MAX).length()) {
1179             std::string tidStr = line.substr(workerStartPos, tidLength);
1180             if (std::all_of(std::begin(tidStr), std::end(tidStr), [] (const char& c) {
1181                 return isdigit(c);
1182             })) {
1183                 tid = std::stoi(tidStr);
1184                 return;
1185             }
1186         }
1187     }
1188 }
1189 
LeftTimeExitProcess(const std::string & description)1190 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
1191 {
1192     int32_t pid = getprocpid();
1193     if (IsProcessDebug(pid)) {
1194         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't exit.", pid);
1195         return;
1196     }
1197     DelayBeforeExit(10); // sleep 10s for hiview dump
1198     bool result = WatchdogInner::WriteStringToFile(pid, "0");
1199     XCOLLIE_LOGI("Process is going to exit, reason:%{public}s, write to file: %{public}d.",
1200         description.c_str(), result);
1201 
1202     _exit(0);
1203 }
1204 
Stop()1205 bool WatchdogInner::Stop()
1206 {
1207     IPCDfx::SetIPCProxyLimit(0, nullptr);
1208     if (mainRunner_ != nullptr) {
1209         mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1210     }
1211     isNeedStop_.store(true);
1212     condition_.notify_all();
1213     if (threadLoop_ != nullptr && threadLoop_->joinable()) {
1214         threadLoop_->join();
1215         threadLoop_ = nullptr;
1216     }
1217     if (g_fd != NOT_OPEN) {
1218         close(g_fd);
1219         g_fd = NOT_OPEN;
1220     }
1221     return true;
1222 }
1223 
KillPeerBinderProcess(const std::string & description)1224 void WatchdogInner::KillPeerBinderProcess(const std::string &description)
1225 {
1226     bool result = false;
1227     if (getuid() == FOUNDATION_UID) {
1228         result = KillProcessByPid(getprocpid());
1229     }
1230     if (!result) {
1231         WatchdogInner::LeftTimeExitProcess(description);
1232     }
1233 }
1234 
RemoveInnerTask(const std::string & name)1235 void WatchdogInner::RemoveInnerTask(const std::string& name)
1236 {
1237     if (name.empty()) {
1238         XCOLLIE_LOGI("RemoveInnerTask fail, cname is null");
1239         return;
1240     }
1241     std::priority_queue<WatchdogTask> tmpQueue;
1242     std::unique_lock<std::mutex> lock(lock_);
1243     size_t size = checkerQueue_.size();
1244     if (size == 0) {
1245         XCOLLIE_LOGE("RemoveInnerTask %{public}s fail, empty queue!", name.c_str());
1246         return;
1247     }
1248     while (!checkerQueue_.empty()) {
1249         const WatchdogTask& task = checkerQueue_.top();
1250         if (task.name != name) {
1251             tmpQueue.push(task);
1252         } else {
1253             size_t nameSize = taskNameSet_.size();
1254             if (nameSize != 0 && !task.isOneshotTask) {
1255                 taskNameSet_.erase(name);
1256                 XCOLLIE_LOGD("RemoveInnerTask name %{public}s, remove result=%{public}d",
1257                     name.c_str(), nameSize > taskNameSet_.size());
1258             }
1259         }
1260         checkerQueue_.pop();
1261     }
1262     if (tmpQueue.size() == size) {
1263         XCOLLIE_LOGE("RemoveInnerTask fail, can not find name %{public}s, size=%{public}zu!",
1264             name.c_str(), size);
1265     }
1266     tmpQueue.swap(checkerQueue_);
1267 }
1268 
InitBeginFunc(const char * name)1269 void InitBeginFunc(const char* name)
1270 {
1271     std::string nameStr(name);
1272     WatchdogInner::GetInstance().bussinessBeginTime_ = DistributeStart(nameStr);
1273 }
1274 
InitEndFunc(const char * name)1275 void InitEndFunc(const char* name)
1276 {
1277     std::string nameStr(name);
1278     DistributeEnd(nameStr, WatchdogInner::GetInstance().bussinessBeginTime_);
1279 }
1280 
InitMainLooperWatcher(WatchdogInnerBeginFunc * beginFunc,WatchdogInnerEndFunc * endFunc)1281 void WatchdogInner::InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc,
1282     WatchdogInnerEndFunc* endFunc)
1283 {
1284     int64_t tid = getproctid();
1285     if (beginFunc && endFunc) {
1286         if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1287             XCOLLIE_LOGI("Tid =%{public}" PRId64 "already exits, "
1288                 "no repeated initialization.", tid);
1289             return;
1290         }
1291         if (mainRunner_ != nullptr) {
1292             mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1293         }
1294         *beginFunc = InitBeginFunc;
1295         *endFunc = InitEndFunc;
1296         buissnessThreadInfo_.insert(tid);
1297     } else {
1298         if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1299             XCOLLIE_LOGI("Remove already init tid=%{public}." PRId64, tid);
1300             mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
1301             buissnessThreadInfo_.erase(tid);
1302         }
1303     }
1304 }
1305 
SetAppDebug(bool isAppDebug)1306 void WatchdogInner::SetAppDebug(bool isAppDebug)
1307 {
1308     isAppDebug_ = isAppDebug;
1309 }
1310 
GetAppDebug()1311 bool WatchdogInner::GetAppDebug()
1312 {
1313     return isAppDebug_;
1314 }
1315 
UpdateJankParam(int sampleInterval,int startUpTime,int sampleCount,int logType,int reportTimes)1316 void WatchdogInner::UpdateJankParam(int sampleInterval, int startUpTime, int sampleCount,
1317     int logType, int reportTimes)
1318 {
1319     jankParamsMap[KEY_LOG_TYPE] = logType;
1320     jankParamsMap[KEY_SAMPLE_INTERVAL] = sampleInterval;
1321     jankParamsMap[KEY_IGNORE_STARTUP_TIME] = startUpTime;
1322     jankParamsMap[KEY_SAMPLE_COUNT] = sampleCount;
1323     if (logType == CatchLogType::LOGTYPE_COLLECT_TRACE) {
1324         XCOLLIE_LOGI("Set thread only dump trace success.");
1325         return;
1326     }
1327     if (jankParamsMap[KEY_SET_TIMES_FLAG] == SET_TIMES_FLAG) {
1328         jankParamsMap[KEY_SAMPLE_REPORT_TIMES] = reportTimes;
1329         stackContent_.reportTimes = reportTimes;
1330         jankParamsMap[KEY_SET_TIMES_FLAG] = 0;
1331     }
1332     XCOLLIE_LOGI("Set thread sampler params success. logType: %{public}d, sample interval: %{public}d, "
1333         "ignore startUp interval: %{public}d, count: %{public}d, reportTimes: %{public}d.",
1334         logType, sampleInterval, startUpTime, sampleCount, stackContent_.reportTimes);
1335 }
1336 
ConvertStrToNum(std::map<std::string,std::string> paramsMap,const std::string & key)1337 int WatchdogInner::ConvertStrToNum(std::map<std::string, std::string> paramsMap, const std::string& key)
1338 {
1339     int num = -1;
1340     auto it = paramsMap.find(key);
1341     if (it == paramsMap.end()) {
1342         XCOLLIE_LOGE("Set the thread sampler param error, %{public}s is not exist.", key.c_str());
1343         return num;
1344     }
1345     std::string str = it->second;
1346     if (!str.empty() && str.size() < std::to_string(INT32_MAX).length()) {
1347         if (std::all_of(std::begin(str), std::end(str), [] (const char &c) {
1348             return isdigit(c);
1349         })) {
1350             num = std::stoi(str);
1351         }
1352     }
1353     if (num < 0) {
1354         XCOLLIE_LOGE("Set param error, %{public}s: %{public}s should be a number and greater than 0.",
1355             key.c_str(), str.c_str());
1356     }
1357     return num;
1358 }
1359 
CheckSampleParam(std::map<std::string,std::string> paramsMap)1360 bool WatchdogInner::CheckSampleParam(std::map<std::string, std::string> paramsMap)
1361 {
1362     int sampleInterval = ConvertStrToNum(paramsMap, KEY_SAMPLE_INTERVAL);
1363     if (sampleInterval < 0) {
1364         return false;
1365     } else if (sampleInterval < SAMPLE_INTERVAL_MIN || sampleInterval > SAMPLE_INTERVAL_MAX) {
1366         XCOLLIE_LOGE("Set the range of sample stack is from %{public}d to %{public}d, "
1367             "interval: %{public}d.", SAMPLE_INTERVAL_MIN, SAMPLE_INTERVAL_MAX, sampleInterval);
1368         return false;
1369     }
1370 
1371     int startUpTime = ConvertStrToNum(paramsMap, KEY_IGNORE_STARTUP_TIME);
1372     if (startUpTime < 0) {
1373         return false;
1374     } else if (startUpTime < IGNORE_STARTUP_TIME_MIN) {
1375         XCOLLIE_LOGE("Set the minimum of ignore startup interval is %{public}d s, "
1376             "interval: %{public}d.", IGNORE_STARTUP_TIME_MIN, startUpTime);
1377         return false;
1378     }
1379 
1380     int sampleCount = ConvertStrToNum(paramsMap, KEY_SAMPLE_COUNT);
1381     if (sampleCount < 0) {
1382         return false;
1383     }
1384     int maxSampleCount = MAX_SAMPLE_STACK_TIMES / sampleInterval - SAMPLE_EXTRA_COUNT;
1385     if (sampleCount < SAMPLE_COUNT_MIN || sampleCount > maxSampleCount) {
1386         XCOLLIE_LOGE("Set the range of sample count, min value: %{public}d max value: %{public}d, count: %{public}d.",
1387             SAMPLE_COUNT_MIN, maxSampleCount, sampleCount);
1388         return false;
1389     }
1390 
1391     int reportTimes = ConvertStrToNum(paramsMap, KEY_SAMPLE_REPORT_TIMES);
1392     if (reportTimes < 0) {
1393         return false;
1394     } else if (reportTimes < SAMPLE_REPORT_TIMES_MIN || reportTimes > SAMPLE_REPORT_TIMES_MAX) {
1395         XCOLLIE_LOGE("Set the range of sample reportTimes is from %{public}d to %{public}d,"
1396             "reportTimes: %{public}d", SAMPLE_REPORT_TIMES_MIN, SAMPLE_REPORT_TIMES_MAX, reportTimes);
1397         return false;
1398     }
1399     UpdateJankParam(sampleInterval, startUpTime, sampleCount, CatchLogType::LOGTYPE_SAMPLE_STACK, reportTimes);
1400     return true;
1401 }
1402 
SetEventConfig(std::map<std::string,std::string> paramsMap)1403 int WatchdogInner::SetEventConfig(std::map<std::string, std::string> paramsMap)
1404 {
1405     if (paramsMap.empty()) {
1406         XCOLLIE_LOGE("Set the thread sampler param map is null.");
1407         return -1;
1408     }
1409     int logType = ConvertStrToNum(paramsMap, KEY_LOG_TYPE);
1410     size_t size = paramsMap.size();
1411     switch (logType) {
1412         case CatchLogType::LOGTYPE_DEFAULT:
1413             return -1;
1414         case CatchLogType::LOGTYPE_NONE:
1415         case CatchLogType::LOGTYPE_COLLECT_TRACE: {
1416             if (size != SAMPLE_PARAMS_MIN_SIZE) {
1417                 XCOLLIE_LOGE("Set the thread sampler param map size error, can only set log_type. "
1418                     "map size: %{public}zu", size);
1419                 return -1;
1420             }
1421             UpdateJankParam(SAMPLE_DEFULE_INTERVAL, DEFAULT_IGNORE_STARTUP_TIME, SAMPLE_DEFULE_COUNT,
1422                 logType, SAMPLE_REPORT_TIMES_MIN);
1423             break;
1424         }
1425         case CatchLogType::LOGTYPE_SAMPLE_STACK: {
1426             if (size != SAMPLE_PARAMS_MAX_SIZE) {
1427                 XCOLLIE_LOGE("Set the thread sampler param map size error, current map size: %{public}zu", size);
1428                 return -1;
1429             }
1430             if (!CheckSampleParam(paramsMap)) {
1431                 return -1;
1432             }
1433             break;
1434         }
1435         default: {
1436             XCOLLIE_LOGE("Set the log_type can only be 0、1、2, logType: %{public}d", logType);
1437             return -1;
1438         }
1439     };
1440     return 0;
1441 }
1442 
SetSpecifiedProcessName(const std::string & name)1443 void WatchdogInner::SetSpecifiedProcessName(const std::string& name)
1444 {
1445     specifiedProcessName_ = name;
1446 }
1447 
GetSpecifiedProcessName()1448 std::string WatchdogInner::GetSpecifiedProcessName()
1449 {
1450     return specifiedProcessName_;
1451 }
1452 
SetScrollState(bool isScroll)1453 void WatchdogInner::SetScrollState(bool isScroll)
1454 {
1455     isScroll_ = isScroll;
1456 }
1457 } // end of namespace HiviewDFX
1458 } // end of namespace OHOS
1459