• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "watchdog_inner.h"
17 
18 #include <cerrno>
19 #include <climits>
20 #include <mutex>
21 
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <pthread.h>
26 #include <unistd.h>
27 #include <csignal>
28 #include <string>
29 
30 #include <securec.h>
31 #include <dlfcn.h>
32 
33 #include "backtrace_local.h"
34 #include "hisysevent.h"
35 #include "ipc_skeleton.h"
36 #include "xcollie_utils.h"
37 #include "xcollie_define.h"
38 #include "dfx_define.h"
39 #include "parameter.h"
40 
41 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
42 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
43 namespace OHOS {
44 namespace HiviewDFX {
45 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
46 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
47 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
48 constexpr uint32_t TIME_MS_TO_S = 1000;
49 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
50 constexpr int32_t WATCHED_UID = 5523;
51 constexpr int SERVICE_WARNING = 1;
52 const char* SYS_KERNEL_HUNGTASK_USERLIST = "/sys/kernel/hungtask/userlist";
53 const char* HMOS_HUNGTASK_USERLIST = "/proc/sys/hguard/user_list";
54 const std::string ON_KICK_TIME = "on,72";
55 const std::string ON_KICK_TIME_HMOS = "on,63,foundation";
56 const std::string KICK_TIME = "kick";
57 const std::string KICK_TIME_HMOS = "kick,foundation";
58 const int32_t NOT_OPEN = -1;
59 std::mutex WatchdogInner::lockFfrt_;
60 static uint64_t g_nextKickTime = GetCurrentTickMillseconds();
61 static int32_t g_fd = NOT_OPEN;
62 static bool g_existFile = true;
63 
64 constexpr uint64_t MAX_START_TIME = 10 * 1000;
65 const char* LIB_THREAD_SAMPLER_PATH = "libthread_sampler.z.so";
66 constexpr size_t STACK_LENGTH = 32 * 1024;
67 typedef int (*ThreadSamplerInitFunc)(int);
68 typedef int32_t (*ThreadSamplerSampleFunc)();
69 typedef int (*ThreadSamplerCollectFunc)(char*, size_t, int);
70 typedef void (*ThreadSamplerDeinitFunc)();
71 
72 namespace {
ThreadInfo(char * buf,size_t len,void * ucontext)73 void ThreadInfo(char *buf  __attribute__((unused)),
74                 size_t len  __attribute__((unused)),
75                 void* ucontext  __attribute__((unused)))
76 {
77     if (ucontext == nullptr) {
78         return;
79     }
80 
81     auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
82         WatchdogInner::GetInstance().currentScene_.size());
83     if (ret != 0) {
84         return;
85     }
86 }
87 
SetThreadSignalMask(int signo,bool isAddSignal,bool isBlock)88 void SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)
89 {
90     sigset_t set;
91     sigemptyset(&set);
92     pthread_sigmask(SIG_SETMASK, nullptr, &set);
93     if (isAddSignal) {
94         sigaddset(&set, signo);
95     } else {
96         sigdelset(&set, signo);
97     }
98     if (isBlock) {
99         pthread_sigmask(SIG_BLOCK, &set, nullptr);
100     } else {
101         pthread_sigmask(SIG_UNBLOCK, &set, nullptr);
102     }
103 }
104 }
105 
WatchdogInner()106 WatchdogInner::WatchdogInner()
107     : cntCallback_(0), timeCallback_(0), sampleTaskState_(0)
108 {
109     currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
110 }
111 
~WatchdogInner()112 WatchdogInner::~WatchdogInner()
113 {
114     Stop();
115 }
116 
IsInAppspwan()117 static bool IsInAppspwan()
118 {
119     if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
120         return true;
121     }
122 
123     if (getuid() == 0 && GetSelfProcName().find("nativespawn") != std::string::npos) {
124         return true;
125     }
126 
127     return false;
128 }
129 
SetBundleInfo(const std::string & bundleName,const std::string & bundleVersion)130 void WatchdogInner::SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)
131 {
132     bundleName_ = bundleName;
133     bundleVersion_ = bundleVersion;
134 }
135 
SetForeground(const bool & isForeground)136 void WatchdogInner::SetForeground(const bool& isForeground)
137 {
138     isForeground_ = isForeground;
139 }
140 
ReportMainThreadEvent()141 bool WatchdogInner::ReportMainThreadEvent()
142 {
143     std::string stack = "";
144     CollectStack(stack);
145     Deinit();
146     std::string path = "";
147     std::string eventName = "MAIN_THREAD_JANK";
148     if (!buissnessThreadInfo_.empty()) {
149         eventName = "BUSSINESS_THREAD_JANK";
150     }
151     if (!WriteStackToFd(getprocpid(), path, stack, eventName)) {
152         XCOLLIE_LOGI("MainThread WriteStackToFd Failed");
153         return false;
154     }
155     int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "MAIN_THREAD_JANK",
156         HiSysEvent::EventType::FAULT,
157         "BUNDLE_VERSION", bundleVersion_,
158         "BUNDLE_NAME", bundleName_,
159         "BEGIN_TIME", timeContent_.reportBegin / MILLISEC_TO_NANOSEC,
160         "END_TIME", timeContent_.reportEnd / MILLISEC_TO_NANOSEC,
161         "EXTERNAL_LOG", path,
162         "STACK", stack,
163         "JANK_LEVEL", 0,
164         "THREAD_NAME", GetSelfProcName(),
165         "FOREGROUND", isForeground_,
166         "LOG_TIME", GetTimeStamp() / MILLISEC_TO_NANOSEC);
167     XCOLLIE_LOGI("MainThread HiSysEventWrite result=%{public}d", result);
168     return result >= 0;
169 }
170 
CheckEventTimer(const int64_t & currentTime)171 bool WatchdogInner::CheckEventTimer(const int64_t& currentTime)
172 {
173     if (timeContent_.reportBegin == timeContent_.curBegin &&
174         timeContent_.reportEnd == timeContent_.curEnd) {
175         return false;
176     }
177     return (timeContent_.curEnd <= timeContent_.curBegin &&
178         (currentTime - timeContent_.curBegin >= DURATION_TIME * MILLISEC_TO_NANOSEC)) ||
179         (timeContent_.curEnd - timeContent_.curBegin > DURATION_TIME * MILLISEC_TO_NANOSEC);
180 }
181 
ThreadSampleTask(int32_t (* threadSamplerSampleFunc)())182 void WatchdogInner::ThreadSampleTask(int32_t (*threadSamplerSampleFunc)())
183 {
184     if (sampleTaskState_ == DumpStackState::DEFAULT) {
185         sampleTaskState_++;
186         return;
187     }
188     int64_t currentTime = GetTimeStamp();
189     if (stackContent_.collectCount > DumpStackState::DEFAULT &&
190         stackContent_.collectCount < COLLECT_STACK_COUNT) {
191         threadSamplerSampleFunc();
192         stackContent_.collectCount++;
193     } else if (stackContent_.collectCount == COLLECT_STACK_COUNT) {
194         ReportMainThreadEvent();
195         isMainThreadProfileTaskEnabled_ = true;
196         return;
197     } else {
198         if (CheckEventTimer(currentTime)) {
199             threadSamplerSampleFunc();
200             stackContent_.collectCount++;
201         } else {
202             stackContent_.detectorCount++;
203         }
204     }
205     if (stackContent_.detectorCount == DETECT_STACK_COUNT) {
206         isMainThreadProfileTaskEnabled_ = true;
207     }
208 }
209 
StartProfileMainThread(int32_t interval)210 int32_t WatchdogInner::StartProfileMainThread(int32_t interval)
211 {
212     std::unique_lock<std::mutex> lock(lock_);
213 
214     uint64_t now = GetCurrentTickMillseconds();
215     if (now - watchdogStartTime_ < MAX_START_TIME) {
216         XCOLLIE_LOGI("application is in starting period.\n");
217         stackContent_.stackState = DumpStackState::DEFAULT;
218         return -1;
219     }
220 
221     funcHandler_ = dlopen(LIB_THREAD_SAMPLER_PATH, RTLD_LAZY);
222 
223     auto threadSamplerInitFunc =
224         reinterpret_cast<ThreadSamplerInitFunc>(FunctionOpen(funcHandler_, "ThreadSamplerInit"));
225     auto threadSamplerSampleFunc =
226         reinterpret_cast<ThreadSamplerSampleFunc>(FunctionOpen(funcHandler_, "ThreadSamplerSample"));
227     if (threadSamplerInitFunc == nullptr || threadSamplerSampleFunc == nullptr) {
228         funcHandler_ = nullptr;
229         return -1;
230     }
231 
232     int initThreadSamplerRet = threadSamplerInitFunc(COLLECT_STACK_COUNT);
233     if (initThreadSamplerRet != 0) {
234         return -1;
235     }
236 
237     sampleTaskState_ = 0;
238     stackContent_.detectorCount = 0;
239     stackContent_.collectCount = 0;
240     auto sampleTask = [this, threadSamplerSampleFunc]() {
241         ThreadSampleTask(threadSamplerSampleFunc);
242     };
243 
244     WatchdogTask task("ThreadSampler", sampleTask, 0, interval, true);
245     InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
246     return 0;
247 }
248 
CollectStack(std::string & stack)249 bool WatchdogInner::CollectStack(std::string& stack)
250 {
251     if (funcHandler_ == nullptr) {
252         XCOLLIE_LOGE("open library failed.");
253         return false;
254     }
255 
256     auto threadSamplerCollectFunc =
257         reinterpret_cast<ThreadSamplerCollectFunc>(FunctionOpen(funcHandler_, "ThreadSamplerCollect"));
258     if (threadSamplerCollectFunc == nullptr) {
259         funcHandler_ = nullptr;
260         return false;
261     }
262     int treeFormat = 1;
263     char* stk = new char[STACK_LENGTH];
264     int collectRet = threadSamplerCollectFunc(stk, STACK_LENGTH, treeFormat);
265     stack = stk;
266     delete[] stk;
267     return collectRet == 0;
268 }
269 
Deinit()270 void WatchdogInner::Deinit()
271 {
272     if (funcHandler_ == nullptr) {
273         XCOLLIE_LOGE("open library failed.");
274         return;
275     }
276 
277     auto threadSamplerDeinitFunc =
278         reinterpret_cast<ThreadSamplerDeinitFunc>(FunctionOpen(funcHandler_, "ThreadSamplerDeinit"));
279     if (threadSamplerDeinitFunc == nullptr) {
280         funcHandler_ = nullptr;
281         return;
282     }
283     threadSamplerDeinitFunc();
284 }
285 
ChangeState(int & state,int targetState)286 void WatchdogInner::ChangeState(int& state, int targetState)
287 {
288     timeContent_.reportBegin = timeContent_.curBegin;
289     timeContent_.reportEnd = timeContent_.curEnd;
290     state = targetState;
291 }
292 
DayChecker(int & state,TimePoint currenTime,TimePoint lastEndTime,int64_t checkTimer)293 void WatchdogInner::DayChecker(int& state, TimePoint currenTime, TimePoint lastEndTime,
294     int64_t checkTimer)
295 {
296     auto diff = currenTime - lastEndTime;
297     int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>
298         (diff).count();
299     if (intervalTime >= checkTimer) {
300         XCOLLIE_LOGD("MainThread StartProfileMainThread Over checkTimer: "
301             "%{public}" PRId64 " ms", checkTimer);
302         state = DumpStackState::DEFAULT;
303     }
304 }
305 
StartTraceProfile(int32_t interval)306 void WatchdogInner::StartTraceProfile(int32_t interval)
307 {
308     if (traceCollector_ == nullptr) {
309         XCOLLIE_LOGI("MainThread TraceCollector Failed.");
310         return;
311     }
312     traceContent_.dumpCount = 0;
313     traceContent_.traceCount = 0;
314     auto traceTask = [this]() {
315         traceContent_.traceCount++;
316         int64_t currentTime = GetTimeStamp();
317         if (CheckEventTimer(currentTime)) {
318             traceContent_.dumpCount++;
319         }
320         if (traceContent_.traceCount >= COLLECT_TRACE_MAX) {
321             if (traceContent_.dumpCount >= COLLECT_TRACE_MIN) {
322                 CreateWatchdogDir();
323                 appCaller_.actionId = UCollectClient::ACTION_ID_DUMP_TRACE;
324                 appCaller_.isBusinessJank = !buissnessThreadInfo_.empty();
325                 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
326                 XCOLLIE_LOGI("MainThread TraceCollector Dump result: %{public}d", result.retCode);
327             }
328             isMainThreadTraceEnabled_ = true;
329         }
330     };
331     WatchdogTask task("TraceCollector", traceTask, 0, interval, true);
332     std::unique_lock<std::mutex> lock(lock_);
333     InsertWatchdogTaskLocked("TraceCollector", std::move(task));
334 }
335 
CollectTrace()336 void WatchdogInner::CollectTrace()
337 {
338     traceCollector_ = UCollectClient::TraceCollector::Create();
339     int32_t pid = getprocpid();
340     int32_t uid = static_cast<int64_t>(getuid());
341     appCaller_.actionId = UCollectClient::ACTION_ID_START_TRACE;
342     appCaller_.bundleName = bundleName_;
343     appCaller_.bundleVersion = bundleVersion_;
344     appCaller_.uid = uid;
345     appCaller_.pid = pid;
346     appCaller_.threadName = GetSelfProcName();
347     appCaller_.foreground = isForeground_;
348     appCaller_.happenTime = GetTimeStamp() / MILLISEC_TO_NANOSEC;
349     appCaller_.beginTime = timeContent_.reportBegin / MILLISEC_TO_NANOSEC;
350     appCaller_.endTime = timeContent_.reportEnd / MILLISEC_TO_NANOSEC;
351     auto result = traceCollector_->CaptureDurationTrace(appCaller_);
352     XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d", result.retCode);
353     if (result.retCode != 0) {
354         return;
355     }
356     StartTraceProfile(DURATION_TIME);
357 }
358 
DistributeStart(const std::string & name)359 static TimePoint DistributeStart(const std::string& name)
360 {
361     WatchdogInner::GetInstance().timeContent_.curBegin = GetTimeStamp();
362     return std::chrono::steady_clock::now();
363 }
364 
DistributeEnd(const std::string & name,const TimePoint & startTime)365 static void DistributeEnd(const std::string& name, const TimePoint& startTime)
366 {
367     TimePoint endTime = std::chrono::steady_clock::now();
368     auto duration = endTime - startTime;
369     int64_t durationTime = std::chrono::duration_cast<std::chrono::milliseconds>
370         (duration).count();
371     if (duration > std::chrono::milliseconds(DISTRIBUTE_TIME)) {
372         XCOLLIE_LOGI("BlockMonitor event name: %{public}s, Duration Time: %{public}" PRId64 " ms",
373             name.c_str(), durationTime);
374     }
375     WatchdogInner::GetInstance().timeContent_.curEnd = GetTimeStamp();
376     if (WatchdogInner::GetInstance().stackContent_.stackState == DumpStackState::COMPLETE) {
377         int64_t checkTimer = ONE_DAY_LIMIT;
378         if (IsEnableVersion(KEY_DEVELOPER_MODE_STATE, ENABLE_VAULE)) {
379             checkTimer = ONE_HOUR_LIMIT;
380         }
381         WatchdogInner::GetInstance().DayChecker(WatchdogInner::GetInstance().stackContent_.stackState,
382             endTime, WatchdogInner::GetInstance().lastStackTime_, checkTimer);
383     }
384     if (WatchdogInner::GetInstance().traceContent_.traceState == DumpStackState::COMPLETE) {
385         WatchdogInner::GetInstance().DayChecker(WatchdogInner::GetInstance().traceContent_.traceState,
386             endTime, WatchdogInner::GetInstance().lastTraceTime_, ONE_DAY_LIMIT);
387     }
388     if (duration > std::chrono::milliseconds(DURATION_TIME) && duration < std::chrono::milliseconds(DUMPTRACE_TIME) &&
389         WatchdogInner::GetInstance().stackContent_.stackState == DumpStackState::DEFAULT) {
390         if (IsEnableVersion(KEY_ANCO_ENABLE_TYPE, ENABLE_VAULE)) {
391             return;
392         }
393         WatchdogInner::GetInstance().ChangeState(WatchdogInner::GetInstance().stackContent_.stackState,
394             DumpStackState::COMPLETE);
395         WatchdogInner::GetInstance().lastStackTime_ = endTime;
396 
397         int32_t ret = WatchdogInner::GetInstance().StartProfileMainThread(TASK_INTERVAL);
398         XCOLLIE_LOGI("MainThread StartProfileMainThread ret: %{public}d  "
399             "Duration Time: %{public}" PRId64 " ms", ret, durationTime);
400     }
401     if (duration > std::chrono::milliseconds(DUMPTRACE_TIME) &&
402         WatchdogInner::GetInstance().traceContent_.traceState == DumpStackState::DEFAULT) {
403         if (!IsEnableVersion(KEY_HIVIEW_USER_TYPE, ENABLE_HIVIEW_USER_VAULE) ||
404             IsEnableVersion(KEY_ANCO_ENABLE_TYPE, ENABLE_VAULE)) {
405             return;
406         }
407         XCOLLIE_LOGI("MainThread TraceCollector Duration Time: %{public}" PRId64 " ms", durationTime);
408         WatchdogInner::GetInstance().ChangeState(WatchdogInner::GetInstance().traceContent_.traceState,
409             DumpStackState::COMPLETE);
410         WatchdogInner::GetInstance().lastTraceTime_ = endTime;
411         WatchdogInner::GetInstance().CollectTrace();
412     }
413 }
414 
AddThread(const std::string & name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)415 int WatchdogInner::AddThread(const std::string &name,
416     std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
417 {
418     if (name.empty() || handler == nullptr) {
419         XCOLLIE_LOGE("Add thread fail, invalid args!");
420         return -1;
421     }
422 
423     if (IsInAppspwan()) {
424         return -1;
425     }
426 
427     std::string limitedName = GetLimitedSizeName(name);
428     XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
429     std::unique_lock<std::mutex> lock(lock_);
430 
431     IpcCheck();
432 
433     if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
434         return -1;
435     }
436     return 0;
437 }
438 
RunOneShotTask(const std::string & name,Task && task,uint64_t delay)439 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
440 {
441     if (name.empty() || task == nullptr) {
442         XCOLLIE_LOGE("Add task fail, invalid args!");
443         return;
444     }
445 
446     if (IsInAppspwan()) {
447         return;
448     }
449 
450     std::unique_lock<std::mutex> lock(lock_);
451     std::string limitedName = GetLimitedSizeName(name);
452     InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
453 }
454 
RunXCollieTask(const std::string & name,uint64_t timeout,XCollieCallback func,void * arg,unsigned int flag)455 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
456     void *arg, unsigned int flag)
457 {
458     if (name.empty() || timeout == 0) {
459         XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
460         return INVALID_ID;
461     }
462 
463     if (IsInAppspwan()) {
464         return INVALID_ID;
465     }
466 
467     std::unique_lock<std::mutex> lock(lock_);
468     std::string limitedName = GetLimitedSizeName(name);
469     return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
470 }
471 
RemoveXCollieTask(int64_t id)472 void WatchdogInner::RemoveXCollieTask(int64_t id)
473 {
474     std::priority_queue<WatchdogTask> tmpQueue;
475     std::unique_lock<std::mutex> lock(lock_);
476     size_t size = checkerQueue_.size();
477     if (size == 0) {
478         XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
479         return;
480     }
481     while (!checkerQueue_.empty()) {
482         const WatchdogTask& task = checkerQueue_.top();
483         if (task.id != id || task.timeout == 0) {
484             tmpQueue.push(task);
485         }
486         checkerQueue_.pop();
487     }
488     if (tmpQueue.size() == size) {
489         XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
490             static_cast<long long>(id), size);
491     }
492     tmpQueue.swap(checkerQueue_);
493 }
494 
RunPeriodicalTask(const std::string & name,Task && task,uint64_t interval,uint64_t delay)495 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
496 {
497     if (name.empty() || task == nullptr) {
498         XCOLLIE_LOGE("Add task fail, invalid args!");
499         return;
500     }
501 
502     if (IsInAppspwan()) {
503         return;
504     }
505 
506     std::string limitedName = GetLimitedSizeName(name);
507     XCOLLIE_LOGD("Add periodical task %{public}s to watchdog.", name.c_str());
508     std::unique_lock<std::mutex> lock(lock_);
509     InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
510 }
511 
SetTimerCountTask(const std::string & name,uint64_t timeLimit,int countLimit)512 int64_t WatchdogInner::SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)
513 {
514     if (name.empty() || timeLimit == 0 || countLimit <= 0) {
515         XCOLLIE_LOGE("SetTimerCountTask fail, invalid args!");
516         return INVALID_ID;
517     }
518 
519     if (IsInAppspwan()) {
520         return INVALID_ID;
521     }
522     std::string limitedName = GetLimitedSizeName(name);
523     XCOLLIE_LOGD("SetTimerCountTask name : %{public}s", name.c_str());
524     std::unique_lock<std::mutex> lock(lock_);
525     return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeLimit, countLimit));
526 }
527 
TriggerTimerCountTask(const std::string & name,bool bTrigger,const std::string & message)528 void WatchdogInner::TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)
529 {
530     std::unique_lock<std::mutex> lock(lock_);
531 
532     if (checkerQueue_.empty()) {
533         XCOLLIE_LOGE("TriggerTimerCountTask name : %{public}s fail, empty queue!", name.c_str());
534         return;
535     }
536 
537     bool isTaskExist = false;
538     uint64_t now = GetCurrentTickMillseconds();
539     std::priority_queue<WatchdogTask> tmpQueue;
540     while (!checkerQueue_.empty()) {
541         WatchdogTask task = checkerQueue_.top();
542         if (task.name == name) {
543             isTaskExist = true;
544             if (bTrigger) {
545                 task.triggerTimes.push_back(now);
546                 task.message = message;
547             } else {
548                 task.triggerTimes.clear();
549             }
550         }
551         tmpQueue.push(task);
552         checkerQueue_.pop();
553     }
554     tmpQueue.swap(checkerQueue_);
555 
556     if (!isTaskExist) {
557         XCOLLIE_LOGE("TriggerTimerCount name : %{public}s does not exist!", name.c_str());
558     }
559 }
560 
IsTaskExistLocked(const std::string & name)561 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
562 {
563     return (taskNameSet_.find(name) != taskNameSet_.end());
564 }
565 
IsExceedMaxTaskLocked()566 bool WatchdogInner::IsExceedMaxTaskLocked()
567 {
568     if (checkerQueue_.size() >= MAX_WATCH_NUM) {
569         XCOLLIE_LOGE("Exceed max watchdog task!");
570         return true;
571     }
572 
573     return false;
574 }
575 
InsertWatchdogTaskLocked(const std::string & name,WatchdogTask && task)576 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
577 {
578     if (!task.isOneshotTask && IsTaskExistLocked(name)) {
579         XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
580         return 0;
581     }
582 
583     if (IsExceedMaxTaskLocked()) {
584         XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
585         return 0;
586     }
587     int64_t id = task.id;
588     checkerQueue_.push(std::move(task));
589     if (!task.isOneshotTask) {
590         taskNameSet_.insert(name);
591     }
592     CreateWatchdogThreadIfNeed();
593     condition_.notify_all();
594 
595     return id;
596 }
597 
StopWatchdog()598 void WatchdogInner::StopWatchdog()
599 {
600     Stop();
601 }
602 
IsCallbackLimit(unsigned int flag)603 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
604 {
605     bool ret = false;
606     time_t startTime = time(nullptr);
607     if (!(flag & XCOLLIE_FLAG_LOG)) {
608         return ret;
609     }
610     if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
611         timeCallback_ = startTime;
612     } else {
613         if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
614             ret = true;
615         }
616     }
617     return ret;
618 }
619 
IPCProxyLimitCallback(uint64_t num)620 void IPCProxyLimitCallback(uint64_t num)
621 {
622     XCOLLIE_LOGE("ipc proxy num %{public}" PRIu64 " exceed limit", num);
623     if (getuid() >= MIN_APP_UID && IsBetaVersion()) {
624         XCOLLIE_LOGI("Process is going to exit, reason: ipc proxy num exceed limit");
625         _exit(0);
626     }
627 }
628 
CreateWatchdogThreadIfNeed()629 void WatchdogInner::CreateWatchdogThreadIfNeed()
630 {
631     std::call_once(flag_, [this] {
632         if (threadLoop_ == nullptr) {
633             if (mainRunner_ == nullptr) {
634                 mainRunner_ = AppExecFwk::EventRunner::GetMainEventRunner();
635             }
636             mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
637             const uint64_t limitNum = 20000;
638             IPCDfx::SetIPCProxyLimit(limitNum, IPCProxyLimitCallback);
639             threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
640             if (getpid() == gettid()) {
641                 SetThreadSignalMask(SIGDUMP, true, true);
642             }
643             XCOLLIE_LOGD("Watchdog is running!");
644         }
645     });
646 }
647 
FetchNextTask(uint64_t now,WatchdogTask & task)648 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
649 {
650     std::unique_lock<std::mutex> lock(lock_);
651     if (isNeedStop_) {
652         while (!checkerQueue_.empty()) {
653             checkerQueue_.pop();
654         }
655         return DEFAULT_TIMEOUT;
656     }
657 
658     if (checkerQueue_.empty()) {
659         return DEFAULT_TIMEOUT;
660     }
661 
662     const WatchdogTask& queuedTaskCheck = checkerQueue_.top();
663     bool popCheck = true;
664     if (queuedTaskCheck.name.empty()) {
665         checkerQueue_.pop();
666         XCOLLIE_LOGW("queuedTask name is empty.");
667     } else if (queuedTaskCheck.name == STACK_CHECKER && isMainThreadProfileTaskEnabled_) {
668         checkerQueue_.pop();
669         taskNameSet_.erase("ThreadSampler");
670         isMainThreadProfileTaskEnabled_ = false;
671         dlclose(funcHandler_);
672         funcHandler_ = nullptr;
673         XCOLLIE_LOGI("STACK_CHECKER Task pop");
674     } else if (queuedTaskCheck.name == TRACE_CHECKER && isMainThreadTraceEnabled_) {
675         checkerQueue_.pop();
676         taskNameSet_.erase("TraceCollector");
677         isMainThreadTraceEnabled_ = false;
678         XCOLLIE_LOGI("TRACE_CHECKER Task pop");
679     } else {
680         popCheck = false;
681     }
682     if (popCheck && checkerQueue_.empty()) {
683         return DEFAULT_TIMEOUT;
684     }
685 
686     const WatchdogTask& queuedTask = checkerQueue_.top();
687     if (g_existFile && queuedTask.name == IPC_FULL && now - g_nextKickTime > INTERVAL_KICK_TIME) {
688         if (KickWatchdog()) {
689             g_nextKickTime = now;
690         }
691     }
692     if (queuedTask.nextTickTime > now) {
693         return queuedTask.nextTickTime - now;
694     }
695 
696     currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
697     task = queuedTask;
698     checkerQueue_.pop();
699     return 0;
700 }
701 
ReInsertTaskIfNeed(WatchdogTask & task)702 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
703 {
704     if (task.checkInterval == 0) {
705         return;
706     }
707 
708     std::unique_lock<std::mutex> lock(lock_);
709     task.nextTickTime = task.nextTickTime + task.checkInterval;
710     checkerQueue_.push(task);
711 }
712 
Start()713 bool WatchdogInner::Start()
714 {
715     if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
716         XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
717     }
718     SetThreadSignalMask(SIGDUMP, false, false);
719     watchdogStartTime_ = GetCurrentTickMillseconds();
720     XCOLLIE_LOGD("Watchdog is running in thread(%{public}d)!", getproctid());
721     if (SetThreadInfoCallback != nullptr) {
722         SetThreadInfoCallback(ThreadInfo);
723         XCOLLIE_LOGD("Watchdog Set Thread Info Callback");
724     }
725     while (!isNeedStop_) {
726         uint64_t now = GetCurrentTickMillseconds();
727         WatchdogTask task;
728         uint64_t leftTimeMill = FetchNextTask(now, task);
729         if (leftTimeMill == 0) {
730             task.Run(now);
731             ReInsertTaskIfNeed(task);
732             currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
733             continue;
734         } else if (isNeedStop_) {
735             break;
736         } else {
737             std::unique_lock<std::mutex> lock(lock_);
738             condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
739         }
740     }
741     if (SetThreadInfoCallback != nullptr) {
742         SetThreadInfoCallback(nullptr);
743     }
744     return true;
745 }
746 
SendMsgToHungtask(const std::string & msg)747 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
748 {
749     if (g_fd == NOT_OPEN) {
750         return false;
751     }
752 
753     ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
754     if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
755         XCOLLIE_LOGE("watchdogWrite msg failed");
756         close(g_fd);
757         g_fd = NOT_OPEN;
758         return false;
759     }
760     XCOLLIE_LOGE("Send %{public}s to hungtask Successful\n", msg.c_str());
761     return true;
762 }
763 
KickWatchdog()764 bool WatchdogInner::KickWatchdog()
765 {
766     if (g_fd == NOT_OPEN) {
767         g_fd = open(SYS_KERNEL_HUNGTASK_USERLIST, O_WRONLY);
768         if (g_fd < 0) {
769             g_fd = open(HMOS_HUNGTASK_USERLIST, O_WRONLY);
770             if (g_fd < 0) {
771                 XCOLLIE_LOGE("can't open hungtask file");
772                 g_existFile = false;
773                 return false;
774             }
775             XCOLLIE_LOGE("change to hmos kernel");
776             isHmos = true;
777         } else {
778             XCOLLIE_LOGE("change to linux kernel");
779         }
780 
781         if (!SendMsgToHungtask(isHmos ? ON_KICK_TIME_HMOS : ON_KICK_TIME)) {
782             XCOLLIE_LOGE("KickWatchdog SendMsgToHungtask false");
783             return false;
784         }
785     }
786     return SendMsgToHungtask(isHmos ? KICK_TIME_HMOS : KICK_TIME);
787 }
788 
IpcCheck()789 void WatchdogInner::IpcCheck()
790 {
791     if (getuid() == WATCHED_UID) {
792         if (binderCheckHander_ == nullptr) {
793             auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
794             binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
795             if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
796                 nullptr, IPC_CHECKER_TIME))) {
797                 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
798             }
799         }
800     }
801 }
802 
WriteStringToFile(int32_t pid,const char * str)803 void WatchdogInner::WriteStringToFile(int32_t pid, const char *str)
804 {
805     char file[PATH_LEN] = {0};
806     if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", pid) == -1) {
807         XCOLLIE_LOGE("failed to build path for %{public}d.", pid);
808         return;
809     }
810     int fd = open(file, O_RDWR);
811     if (fd == -1) {
812         return;
813     }
814     if (write(fd, str, strlen(str)) < 0) {
815         XCOLLIE_LOGI("failed to write 0 for %{public}s", file);
816     }
817     close(fd);
818     return;
819 }
820 
FfrtCallback(uint64_t taskId,const char * taskInfo,uint32_t delayedTaskCount)821 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
822 {
823     std::string description = "FfrtCallback: task(";
824     description += taskInfo;
825     description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
826     bool isExist = false;
827     {
828         std::unique_lock<std::mutex> lock(lockFfrt_);
829         auto &map = WatchdogInner::GetInstance().taskIdCnt;
830         auto search = map.find(taskId);
831         if (search != map.end()) {
832             isExist = true;
833         } else {
834             map[taskId] = SERVICE_WARNING;
835         }
836     }
837 
838     if (isExist) {
839         description += ", report twice instead of exiting process."; // 1s = 1000ms
840         WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
841         WatchdogInner::GetInstance().taskIdCnt.erase(taskId);
842         WatchdogInner::KillPeerBinderProcess(description);
843     } else {
844         WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
845     }
846 }
847 
InitFfrtWatchdog()848 void WatchdogInner::InitFfrtWatchdog()
849 {
850     CreateWatchdogThreadIfNeed();
851     ffrt_task_timeout_set_cb(FfrtCallback);
852     ffrt_task_timeout_set_threshold(FFRT_CALLBACK_TIME);
853     std::unique_lock<std::mutex> lock(lock_);
854     IpcCheck();
855 }
856 
SendFfrtEvent(const std::string & msg,const std::string & eventName,const char * taskInfo)857 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)
858 {
859     int32_t pid = getprocpid();
860     if (IsProcessDebug(pid)) {
861         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
862         return;
863     }
864     uint32_t gid = getgid();
865     uint32_t uid = getuid();
866     time_t curTime = time(nullptr);
867     std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
868         "\n" + msg + "\n";
869     char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
870     buffer[FFRT_BUFFER_SIZE] = 0;
871     ffrt_dump(DUMP_INFO_ALL, buffer, FFRT_BUFFER_SIZE);
872     sendMsg += buffer;
873     delete[] buffer;
874     int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT, "PID", pid,
875         "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo, "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg,
876         "STACK", GetProcessStacktrace());
877     XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
878         "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
879 }
880 
LeftTimeExitProcess(const std::string & description)881 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
882 {
883     int32_t pid = getprocpid();
884     if (IsProcessDebug(pid)) {
885         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't exit.", pid);
886         return;
887     }
888     DelayBeforeExit(10); // sleep 10s for hiview dump
889     XCOLLIE_LOGI("Process is going to exit, reason:%{public}s.", description.c_str());
890     WatchdogInner::WriteStringToFile(pid, "0");
891 
892     _exit(0);
893 }
894 
Stop()895 bool WatchdogInner::Stop()
896 {
897     IPCDfx::SetIPCProxyLimit(0, nullptr);
898     if (mainRunner_ != nullptr) {
899         mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
900     }
901     isNeedStop_.store(true);
902     condition_.notify_all();
903     if (threadLoop_ != nullptr && threadLoop_->joinable()) {
904         threadLoop_->join();
905         threadLoop_ = nullptr;
906     }
907     if (g_fd != NOT_OPEN) {
908         close(g_fd);
909         g_fd = NOT_OPEN;
910     }
911     return true;
912 }
913 
KillPeerBinderProcess(const std::string & description)914 void WatchdogInner::KillPeerBinderProcess(const std::string &description)
915 {
916     bool result = false;
917     if (getuid() == WATCHED_UID) {
918         result = KillProcessByPid(getprocpid());
919     }
920     if (!result) {
921         WatchdogInner::LeftTimeExitProcess(description);
922     }
923 }
924 
RemoveInnerTask(const std::string & name)925 void WatchdogInner::RemoveInnerTask(const std::string& name)
926 {
927     if (name.empty()) {
928         XCOLLIE_LOGI("RemoveInnerTask fail, cname is null");
929         return;
930     }
931     std::priority_queue<WatchdogTask> tmpQueue;
932     std::unique_lock<std::mutex> lock(lock_);
933     size_t size = checkerQueue_.size();
934     if (size == 0) {
935         XCOLLIE_LOGE("RemoveInnerTask %{public}s fail, empty queue!", name.c_str());
936         return;
937     }
938     while (!checkerQueue_.empty()) {
939         const WatchdogTask& task = checkerQueue_.top();
940         if (task.name != name) {
941             tmpQueue.push(task);
942         } else {
943             size_t nameSize = taskNameSet_.size();
944             if (nameSize != 0 && !task.isOneshotTask) {
945                 taskNameSet_.erase(name);
946                 XCOLLIE_LOGD("RemoveInnerTask name %{public}s, remove result=%{public}d",
947                     name.c_str(), nameSize > taskNameSet_.size());
948             }
949         }
950         checkerQueue_.pop();
951     }
952     if (tmpQueue.size() == size) {
953         XCOLLIE_LOGE("RemoveInnerTask fail, can not find name %{public}s, size=%{public}zu!",
954             name.c_str(), size);
955     }
956     tmpQueue.swap(checkerQueue_);
957 }
958 
InitBeginFunc(const char * name)959 void InitBeginFunc(const char* name)
960 {
961     std::string nameStr(name);
962     WatchdogInner::GetInstance().bussinessBeginTime_ = DistributeStart(nameStr);
963 }
964 
InitEndFunc(const char * name)965 void InitEndFunc(const char* name)
966 {
967     std::string nameStr(name);
968     DistributeEnd(nameStr, WatchdogInner::GetInstance().bussinessBeginTime_);
969 }
970 
InitMainLooperWatcher(WatchdogInnerBeginFunc * beginFunc,WatchdogInnerEndFunc * endFunc)971 void WatchdogInner::InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc,
972     WatchdogInnerEndFunc* endFunc)
973 {
974     int64_t tid = getproctid();
975     if (beginFunc && endFunc) {
976         if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
977             XCOLLIE_LOGI("Tid =%{public}" PRId64 "already exits, "
978                 "no repeated initialization.", tid);
979             return;
980         }
981         if (mainRunner_ != nullptr) {
982             mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
983         }
984         *beginFunc = InitBeginFunc;
985         *endFunc = InitEndFunc;
986         buissnessThreadInfo_.insert(tid);
987     } else {
988         if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
989             XCOLLIE_LOGI("Remove already init tid=%{public}." PRId64, tid);
990             mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
991             buissnessThreadInfo_.erase(tid);
992         }
993     }
994 }
995 
SetAppDebug(bool isAppDebug)996 void WatchdogInner::SetAppDebug(bool isAppDebug)
997 {
998     isAppDebug_ = isAppDebug;
999 }
1000 
GetAppDebug()1001 bool WatchdogInner::GetAppDebug()
1002 {
1003     return isAppDebug_;
1004 }
1005 } // end of namespace HiviewDFX
1006 } // end of namespace OHOS
1007