1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "watchdog_inner.h"
17
18 #include <cerrno>
19 #include <climits>
20 #include <mutex>
21
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <pthread.h>
26 #include <unistd.h>
27 #include <csignal>
28 #include <string>
29
30 #include <securec.h>
31 #include <dlfcn.h>
32
33 #include "backtrace_local.h"
34 #include "hisysevent.h"
35 #include "ipc_skeleton.h"
36 #include "xcollie_utils.h"
37 #include "xcollie_define.h"
38 #include "dfx_define.h"
39 #include "parameter.h"
40
41 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
42 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
43 namespace OHOS {
44 namespace HiviewDFX {
45 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
46 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
47 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
48 constexpr uint32_t TIME_MS_TO_S = 1000;
49 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
50 constexpr int32_t WATCHED_UID = 5523;
51 constexpr int SERVICE_WARNING = 1;
52 const char* SYS_KERNEL_HUNGTASK_USERLIST = "/sys/kernel/hungtask/userlist";
53 const char* HMOS_HUNGTASK_USERLIST = "/proc/sys/hguard/user_list";
54 const std::string ON_KICK_TIME = "on,72";
55 const std::string ON_KICK_TIME_HMOS = "on,63,foundation";
56 const std::string KICK_TIME = "kick";
57 const std::string KICK_TIME_HMOS = "kick,foundation";
58 const int32_t NOT_OPEN = -1;
59 std::mutex WatchdogInner::lockFfrt_;
60 static uint64_t g_nextKickTime = GetCurrentTickMillseconds();
61 static int32_t g_fd = NOT_OPEN;
62 static bool g_existFile = true;
63
64 constexpr uint64_t MAX_START_TIME = 10 * 1000;
65 const char* LIB_THREAD_SAMPLER_PATH = "libthread_sampler.z.so";
66 constexpr size_t STACK_LENGTH = 32 * 1024;
67 typedef int (*ThreadSamplerInitFunc)(int);
68 typedef int32_t (*ThreadSamplerSampleFunc)();
69 typedef int (*ThreadSamplerCollectFunc)(char*, size_t, int);
70 typedef void (*ThreadSamplerDeinitFunc)();
71
72 namespace {
ThreadInfo(char * buf,size_t len,void * ucontext)73 void ThreadInfo(char *buf __attribute__((unused)),
74 size_t len __attribute__((unused)),
75 void* ucontext __attribute__((unused)))
76 {
77 if (ucontext == nullptr) {
78 return;
79 }
80
81 auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
82 WatchdogInner::GetInstance().currentScene_.size());
83 if (ret != 0) {
84 return;
85 }
86 }
87
SetThreadSignalMask(int signo,bool isAddSignal,bool isBlock)88 void SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)
89 {
90 sigset_t set;
91 sigemptyset(&set);
92 pthread_sigmask(SIG_SETMASK, nullptr, &set);
93 if (isAddSignal) {
94 sigaddset(&set, signo);
95 } else {
96 sigdelset(&set, signo);
97 }
98 if (isBlock) {
99 pthread_sigmask(SIG_BLOCK, &set, nullptr);
100 } else {
101 pthread_sigmask(SIG_UNBLOCK, &set, nullptr);
102 }
103 }
104 }
105
WatchdogInner()106 WatchdogInner::WatchdogInner()
107 : cntCallback_(0), timeCallback_(0), sampleTaskState_(0)
108 {
109 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
110 }
111
~WatchdogInner()112 WatchdogInner::~WatchdogInner()
113 {
114 Stop();
115 }
116
IsInAppspwan()117 static bool IsInAppspwan()
118 {
119 if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
120 return true;
121 }
122
123 if (getuid() == 0 && GetSelfProcName().find("nativespawn") != std::string::npos) {
124 return true;
125 }
126
127 return false;
128 }
129
SetBundleInfo(const std::string & bundleName,const std::string & bundleVersion)130 void WatchdogInner::SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)
131 {
132 bundleName_ = bundleName;
133 bundleVersion_ = bundleVersion;
134 }
135
SetForeground(const bool & isForeground)136 void WatchdogInner::SetForeground(const bool& isForeground)
137 {
138 isForeground_ = isForeground;
139 }
140
ReportMainThreadEvent()141 bool WatchdogInner::ReportMainThreadEvent()
142 {
143 std::string stack = "";
144 CollectStack(stack);
145 Deinit();
146 std::string path = "";
147 std::string eventName = "MAIN_THREAD_JANK";
148 if (!buissnessThreadInfo_.empty()) {
149 eventName = "BUSSINESS_THREAD_JANK";
150 }
151 if (!WriteStackToFd(getprocpid(), path, stack, eventName)) {
152 XCOLLIE_LOGI("MainThread WriteStackToFd Failed");
153 return false;
154 }
155 int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "MAIN_THREAD_JANK",
156 HiSysEvent::EventType::FAULT,
157 "BUNDLE_VERSION", bundleVersion_,
158 "BUNDLE_NAME", bundleName_,
159 "BEGIN_TIME", timeContent_.reportBegin / MILLISEC_TO_NANOSEC,
160 "END_TIME", timeContent_.reportEnd / MILLISEC_TO_NANOSEC,
161 "EXTERNAL_LOG", path,
162 "STACK", stack,
163 "JANK_LEVEL", 0,
164 "THREAD_NAME", GetSelfProcName(),
165 "FOREGROUND", isForeground_,
166 "LOG_TIME", GetTimeStamp() / MILLISEC_TO_NANOSEC);
167 XCOLLIE_LOGI("MainThread HiSysEventWrite result=%{public}d", result);
168 return result >= 0;
169 }
170
CheckEventTimer(const int64_t & currentTime)171 bool WatchdogInner::CheckEventTimer(const int64_t& currentTime)
172 {
173 if (timeContent_.reportBegin == timeContent_.curBegin &&
174 timeContent_.reportEnd == timeContent_.curEnd) {
175 return false;
176 }
177 return (timeContent_.curEnd <= timeContent_.curBegin &&
178 (currentTime - timeContent_.curBegin >= DURATION_TIME * MILLISEC_TO_NANOSEC)) ||
179 (timeContent_.curEnd - timeContent_.curBegin > DURATION_TIME * MILLISEC_TO_NANOSEC);
180 }
181
ThreadSampleTask(int32_t (* threadSamplerSampleFunc)())182 void WatchdogInner::ThreadSampleTask(int32_t (*threadSamplerSampleFunc)())
183 {
184 if (sampleTaskState_ == DumpStackState::DEFAULT) {
185 sampleTaskState_++;
186 return;
187 }
188 int64_t currentTime = GetTimeStamp();
189 if (stackContent_.collectCount > DumpStackState::DEFAULT &&
190 stackContent_.collectCount < COLLECT_STACK_COUNT) {
191 threadSamplerSampleFunc();
192 stackContent_.collectCount++;
193 } else if (stackContent_.collectCount == COLLECT_STACK_COUNT) {
194 ReportMainThreadEvent();
195 isMainThreadProfileTaskEnabled_ = true;
196 return;
197 } else {
198 if (CheckEventTimer(currentTime)) {
199 threadSamplerSampleFunc();
200 stackContent_.collectCount++;
201 } else {
202 stackContent_.detectorCount++;
203 }
204 }
205 if (stackContent_.detectorCount == DETECT_STACK_COUNT) {
206 isMainThreadProfileTaskEnabled_ = true;
207 }
208 }
209
StartProfileMainThread(int32_t interval)210 int32_t WatchdogInner::StartProfileMainThread(int32_t interval)
211 {
212 std::unique_lock<std::mutex> lock(lock_);
213
214 uint64_t now = GetCurrentTickMillseconds();
215 if (now - watchdogStartTime_ < MAX_START_TIME) {
216 XCOLLIE_LOGI("application is in starting period.\n");
217 stackContent_.stackState = DumpStackState::DEFAULT;
218 return -1;
219 }
220
221 funcHandler_ = dlopen(LIB_THREAD_SAMPLER_PATH, RTLD_LAZY);
222
223 auto threadSamplerInitFunc =
224 reinterpret_cast<ThreadSamplerInitFunc>(FunctionOpen(funcHandler_, "ThreadSamplerInit"));
225 auto threadSamplerSampleFunc =
226 reinterpret_cast<ThreadSamplerSampleFunc>(FunctionOpen(funcHandler_, "ThreadSamplerSample"));
227 if (threadSamplerInitFunc == nullptr || threadSamplerSampleFunc == nullptr) {
228 funcHandler_ = nullptr;
229 return -1;
230 }
231
232 int initThreadSamplerRet = threadSamplerInitFunc(COLLECT_STACK_COUNT);
233 if (initThreadSamplerRet != 0) {
234 return -1;
235 }
236
237 sampleTaskState_ = 0;
238 stackContent_.detectorCount = 0;
239 stackContent_.collectCount = 0;
240 auto sampleTask = [this, threadSamplerSampleFunc]() {
241 ThreadSampleTask(threadSamplerSampleFunc);
242 };
243
244 WatchdogTask task("ThreadSampler", sampleTask, 0, interval, true);
245 InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
246 return 0;
247 }
248
CollectStack(std::string & stack)249 bool WatchdogInner::CollectStack(std::string& stack)
250 {
251 if (funcHandler_ == nullptr) {
252 XCOLLIE_LOGE("open library failed.");
253 return false;
254 }
255
256 auto threadSamplerCollectFunc =
257 reinterpret_cast<ThreadSamplerCollectFunc>(FunctionOpen(funcHandler_, "ThreadSamplerCollect"));
258 if (threadSamplerCollectFunc == nullptr) {
259 funcHandler_ = nullptr;
260 return false;
261 }
262 int treeFormat = 1;
263 char* stk = new char[STACK_LENGTH];
264 int collectRet = threadSamplerCollectFunc(stk, STACK_LENGTH, treeFormat);
265 stack = stk;
266 delete[] stk;
267 return collectRet == 0;
268 }
269
Deinit()270 void WatchdogInner::Deinit()
271 {
272 if (funcHandler_ == nullptr) {
273 XCOLLIE_LOGE("open library failed.");
274 return;
275 }
276
277 auto threadSamplerDeinitFunc =
278 reinterpret_cast<ThreadSamplerDeinitFunc>(FunctionOpen(funcHandler_, "ThreadSamplerDeinit"));
279 if (threadSamplerDeinitFunc == nullptr) {
280 funcHandler_ = nullptr;
281 return;
282 }
283 threadSamplerDeinitFunc();
284 }
285
ChangeState(int & state,int targetState)286 void WatchdogInner::ChangeState(int& state, int targetState)
287 {
288 timeContent_.reportBegin = timeContent_.curBegin;
289 timeContent_.reportEnd = timeContent_.curEnd;
290 state = targetState;
291 }
292
DayChecker(int & state,TimePoint currenTime,TimePoint lastEndTime,int64_t checkTimer)293 void WatchdogInner::DayChecker(int& state, TimePoint currenTime, TimePoint lastEndTime,
294 int64_t checkTimer)
295 {
296 auto diff = currenTime - lastEndTime;
297 int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>
298 (diff).count();
299 if (intervalTime >= checkTimer) {
300 XCOLLIE_LOGD("MainThread StartProfileMainThread Over checkTimer: "
301 "%{public}" PRId64 " ms", checkTimer);
302 state = DumpStackState::DEFAULT;
303 }
304 }
305
StartTraceProfile(int32_t interval)306 void WatchdogInner::StartTraceProfile(int32_t interval)
307 {
308 if (traceCollector_ == nullptr) {
309 XCOLLIE_LOGI("MainThread TraceCollector Failed.");
310 return;
311 }
312 traceContent_.dumpCount = 0;
313 traceContent_.traceCount = 0;
314 auto traceTask = [this]() {
315 traceContent_.traceCount++;
316 int64_t currentTime = GetTimeStamp();
317 if (CheckEventTimer(currentTime)) {
318 traceContent_.dumpCount++;
319 }
320 if (traceContent_.traceCount >= COLLECT_TRACE_MAX) {
321 if (traceContent_.dumpCount >= COLLECT_TRACE_MIN) {
322 CreateWatchdogDir();
323 appCaller_.actionId = UCollectClient::ACTION_ID_DUMP_TRACE;
324 appCaller_.isBusinessJank = !buissnessThreadInfo_.empty();
325 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
326 XCOLLIE_LOGI("MainThread TraceCollector Dump result: %{public}d", result.retCode);
327 }
328 isMainThreadTraceEnabled_ = true;
329 }
330 };
331 WatchdogTask task("TraceCollector", traceTask, 0, interval, true);
332 std::unique_lock<std::mutex> lock(lock_);
333 InsertWatchdogTaskLocked("TraceCollector", std::move(task));
334 }
335
CollectTrace()336 void WatchdogInner::CollectTrace()
337 {
338 traceCollector_ = UCollectClient::TraceCollector::Create();
339 int32_t pid = getprocpid();
340 int32_t uid = static_cast<int64_t>(getuid());
341 appCaller_.actionId = UCollectClient::ACTION_ID_START_TRACE;
342 appCaller_.bundleName = bundleName_;
343 appCaller_.bundleVersion = bundleVersion_;
344 appCaller_.uid = uid;
345 appCaller_.pid = pid;
346 appCaller_.threadName = GetSelfProcName();
347 appCaller_.foreground = isForeground_;
348 appCaller_.happenTime = GetTimeStamp() / MILLISEC_TO_NANOSEC;
349 appCaller_.beginTime = timeContent_.reportBegin / MILLISEC_TO_NANOSEC;
350 appCaller_.endTime = timeContent_.reportEnd / MILLISEC_TO_NANOSEC;
351 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
352 XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d", result.retCode);
353 if (result.retCode != 0) {
354 return;
355 }
356 StartTraceProfile(DURATION_TIME);
357 }
358
DistributeStart(const std::string & name)359 static TimePoint DistributeStart(const std::string& name)
360 {
361 WatchdogInner::GetInstance().timeContent_.curBegin = GetTimeStamp();
362 return std::chrono::steady_clock::now();
363 }
364
DistributeEnd(const std::string & name,const TimePoint & startTime)365 static void DistributeEnd(const std::string& name, const TimePoint& startTime)
366 {
367 TimePoint endTime = std::chrono::steady_clock::now();
368 auto duration = endTime - startTime;
369 int64_t durationTime = std::chrono::duration_cast<std::chrono::milliseconds>
370 (duration).count();
371 if (duration > std::chrono::milliseconds(DISTRIBUTE_TIME)) {
372 XCOLLIE_LOGI("BlockMonitor event name: %{public}s, Duration Time: %{public}" PRId64 " ms",
373 name.c_str(), durationTime);
374 }
375 WatchdogInner::GetInstance().timeContent_.curEnd = GetTimeStamp();
376 if (WatchdogInner::GetInstance().stackContent_.stackState == DumpStackState::COMPLETE) {
377 int64_t checkTimer = ONE_DAY_LIMIT;
378 if (IsEnableVersion(KEY_DEVELOPER_MODE_STATE, ENABLE_VAULE)) {
379 checkTimer = ONE_HOUR_LIMIT;
380 }
381 WatchdogInner::GetInstance().DayChecker(WatchdogInner::GetInstance().stackContent_.stackState,
382 endTime, WatchdogInner::GetInstance().lastStackTime_, checkTimer);
383 }
384 if (WatchdogInner::GetInstance().traceContent_.traceState == DumpStackState::COMPLETE) {
385 WatchdogInner::GetInstance().DayChecker(WatchdogInner::GetInstance().traceContent_.traceState,
386 endTime, WatchdogInner::GetInstance().lastTraceTime_, ONE_DAY_LIMIT);
387 }
388 if (duration > std::chrono::milliseconds(DURATION_TIME) && duration < std::chrono::milliseconds(DUMPTRACE_TIME) &&
389 WatchdogInner::GetInstance().stackContent_.stackState == DumpStackState::DEFAULT) {
390 if (IsEnableVersion(KEY_ANCO_ENABLE_TYPE, ENABLE_VAULE)) {
391 return;
392 }
393 WatchdogInner::GetInstance().ChangeState(WatchdogInner::GetInstance().stackContent_.stackState,
394 DumpStackState::COMPLETE);
395 WatchdogInner::GetInstance().lastStackTime_ = endTime;
396
397 int32_t ret = WatchdogInner::GetInstance().StartProfileMainThread(TASK_INTERVAL);
398 XCOLLIE_LOGI("MainThread StartProfileMainThread ret: %{public}d "
399 "Duration Time: %{public}" PRId64 " ms", ret, durationTime);
400 }
401 if (duration > std::chrono::milliseconds(DUMPTRACE_TIME) &&
402 WatchdogInner::GetInstance().traceContent_.traceState == DumpStackState::DEFAULT) {
403 if (!IsEnableVersion(KEY_HIVIEW_USER_TYPE, ENABLE_HIVIEW_USER_VAULE) ||
404 IsEnableVersion(KEY_ANCO_ENABLE_TYPE, ENABLE_VAULE)) {
405 return;
406 }
407 XCOLLIE_LOGI("MainThread TraceCollector Duration Time: %{public}" PRId64 " ms", durationTime);
408 WatchdogInner::GetInstance().ChangeState(WatchdogInner::GetInstance().traceContent_.traceState,
409 DumpStackState::COMPLETE);
410 WatchdogInner::GetInstance().lastTraceTime_ = endTime;
411 WatchdogInner::GetInstance().CollectTrace();
412 }
413 }
414
AddThread(const std::string & name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)415 int WatchdogInner::AddThread(const std::string &name,
416 std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
417 {
418 if (name.empty() || handler == nullptr) {
419 XCOLLIE_LOGE("Add thread fail, invalid args!");
420 return -1;
421 }
422
423 if (IsInAppspwan()) {
424 return -1;
425 }
426
427 std::string limitedName = GetLimitedSizeName(name);
428 XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
429 std::unique_lock<std::mutex> lock(lock_);
430
431 IpcCheck();
432
433 if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
434 return -1;
435 }
436 return 0;
437 }
438
RunOneShotTask(const std::string & name,Task && task,uint64_t delay)439 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
440 {
441 if (name.empty() || task == nullptr) {
442 XCOLLIE_LOGE("Add task fail, invalid args!");
443 return;
444 }
445
446 if (IsInAppspwan()) {
447 return;
448 }
449
450 std::unique_lock<std::mutex> lock(lock_);
451 std::string limitedName = GetLimitedSizeName(name);
452 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
453 }
454
RunXCollieTask(const std::string & name,uint64_t timeout,XCollieCallback func,void * arg,unsigned int flag)455 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
456 void *arg, unsigned int flag)
457 {
458 if (name.empty() || timeout == 0) {
459 XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
460 return INVALID_ID;
461 }
462
463 if (IsInAppspwan()) {
464 return INVALID_ID;
465 }
466
467 std::unique_lock<std::mutex> lock(lock_);
468 std::string limitedName = GetLimitedSizeName(name);
469 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
470 }
471
RemoveXCollieTask(int64_t id)472 void WatchdogInner::RemoveXCollieTask(int64_t id)
473 {
474 std::priority_queue<WatchdogTask> tmpQueue;
475 std::unique_lock<std::mutex> lock(lock_);
476 size_t size = checkerQueue_.size();
477 if (size == 0) {
478 XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
479 return;
480 }
481 while (!checkerQueue_.empty()) {
482 const WatchdogTask& task = checkerQueue_.top();
483 if (task.id != id || task.timeout == 0) {
484 tmpQueue.push(task);
485 }
486 checkerQueue_.pop();
487 }
488 if (tmpQueue.size() == size) {
489 XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
490 static_cast<long long>(id), size);
491 }
492 tmpQueue.swap(checkerQueue_);
493 }
494
RunPeriodicalTask(const std::string & name,Task && task,uint64_t interval,uint64_t delay)495 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
496 {
497 if (name.empty() || task == nullptr) {
498 XCOLLIE_LOGE("Add task fail, invalid args!");
499 return;
500 }
501
502 if (IsInAppspwan()) {
503 return;
504 }
505
506 std::string limitedName = GetLimitedSizeName(name);
507 XCOLLIE_LOGD("Add periodical task %{public}s to watchdog.", name.c_str());
508 std::unique_lock<std::mutex> lock(lock_);
509 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
510 }
511
SetTimerCountTask(const std::string & name,uint64_t timeLimit,int countLimit)512 int64_t WatchdogInner::SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)
513 {
514 if (name.empty() || timeLimit == 0 || countLimit <= 0) {
515 XCOLLIE_LOGE("SetTimerCountTask fail, invalid args!");
516 return INVALID_ID;
517 }
518
519 if (IsInAppspwan()) {
520 return INVALID_ID;
521 }
522 std::string limitedName = GetLimitedSizeName(name);
523 XCOLLIE_LOGD("SetTimerCountTask name : %{public}s", name.c_str());
524 std::unique_lock<std::mutex> lock(lock_);
525 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeLimit, countLimit));
526 }
527
TriggerTimerCountTask(const std::string & name,bool bTrigger,const std::string & message)528 void WatchdogInner::TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)
529 {
530 std::unique_lock<std::mutex> lock(lock_);
531
532 if (checkerQueue_.empty()) {
533 XCOLLIE_LOGE("TriggerTimerCountTask name : %{public}s fail, empty queue!", name.c_str());
534 return;
535 }
536
537 bool isTaskExist = false;
538 uint64_t now = GetCurrentTickMillseconds();
539 std::priority_queue<WatchdogTask> tmpQueue;
540 while (!checkerQueue_.empty()) {
541 WatchdogTask task = checkerQueue_.top();
542 if (task.name == name) {
543 isTaskExist = true;
544 if (bTrigger) {
545 task.triggerTimes.push_back(now);
546 task.message = message;
547 } else {
548 task.triggerTimes.clear();
549 }
550 }
551 tmpQueue.push(task);
552 checkerQueue_.pop();
553 }
554 tmpQueue.swap(checkerQueue_);
555
556 if (!isTaskExist) {
557 XCOLLIE_LOGE("TriggerTimerCount name : %{public}s does not exist!", name.c_str());
558 }
559 }
560
IsTaskExistLocked(const std::string & name)561 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
562 {
563 return (taskNameSet_.find(name) != taskNameSet_.end());
564 }
565
IsExceedMaxTaskLocked()566 bool WatchdogInner::IsExceedMaxTaskLocked()
567 {
568 if (checkerQueue_.size() >= MAX_WATCH_NUM) {
569 XCOLLIE_LOGE("Exceed max watchdog task!");
570 return true;
571 }
572
573 return false;
574 }
575
InsertWatchdogTaskLocked(const std::string & name,WatchdogTask && task)576 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
577 {
578 if (!task.isOneshotTask && IsTaskExistLocked(name)) {
579 XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
580 return 0;
581 }
582
583 if (IsExceedMaxTaskLocked()) {
584 XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
585 return 0;
586 }
587 int64_t id = task.id;
588 checkerQueue_.push(std::move(task));
589 if (!task.isOneshotTask) {
590 taskNameSet_.insert(name);
591 }
592 CreateWatchdogThreadIfNeed();
593 condition_.notify_all();
594
595 return id;
596 }
597
StopWatchdog()598 void WatchdogInner::StopWatchdog()
599 {
600 Stop();
601 }
602
IsCallbackLimit(unsigned int flag)603 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
604 {
605 bool ret = false;
606 time_t startTime = time(nullptr);
607 if (!(flag & XCOLLIE_FLAG_LOG)) {
608 return ret;
609 }
610 if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
611 timeCallback_ = startTime;
612 } else {
613 if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
614 ret = true;
615 }
616 }
617 return ret;
618 }
619
IPCProxyLimitCallback(uint64_t num)620 void IPCProxyLimitCallback(uint64_t num)
621 {
622 XCOLLIE_LOGE("ipc proxy num %{public}" PRIu64 " exceed limit", num);
623 if (getuid() >= MIN_APP_UID && IsBetaVersion()) {
624 XCOLLIE_LOGI("Process is going to exit, reason: ipc proxy num exceed limit");
625 _exit(0);
626 }
627 }
628
CreateWatchdogThreadIfNeed()629 void WatchdogInner::CreateWatchdogThreadIfNeed()
630 {
631 std::call_once(flag_, [this] {
632 if (threadLoop_ == nullptr) {
633 if (mainRunner_ == nullptr) {
634 mainRunner_ = AppExecFwk::EventRunner::GetMainEventRunner();
635 }
636 mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
637 const uint64_t limitNum = 20000;
638 IPCDfx::SetIPCProxyLimit(limitNum, IPCProxyLimitCallback);
639 threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
640 if (getpid() == gettid()) {
641 SetThreadSignalMask(SIGDUMP, true, true);
642 }
643 XCOLLIE_LOGD("Watchdog is running!");
644 }
645 });
646 }
647
FetchNextTask(uint64_t now,WatchdogTask & task)648 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
649 {
650 std::unique_lock<std::mutex> lock(lock_);
651 if (isNeedStop_) {
652 while (!checkerQueue_.empty()) {
653 checkerQueue_.pop();
654 }
655 return DEFAULT_TIMEOUT;
656 }
657
658 if (checkerQueue_.empty()) {
659 return DEFAULT_TIMEOUT;
660 }
661
662 const WatchdogTask& queuedTaskCheck = checkerQueue_.top();
663 bool popCheck = true;
664 if (queuedTaskCheck.name.empty()) {
665 checkerQueue_.pop();
666 XCOLLIE_LOGW("queuedTask name is empty.");
667 } else if (queuedTaskCheck.name == STACK_CHECKER && isMainThreadProfileTaskEnabled_) {
668 checkerQueue_.pop();
669 taskNameSet_.erase("ThreadSampler");
670 isMainThreadProfileTaskEnabled_ = false;
671 dlclose(funcHandler_);
672 funcHandler_ = nullptr;
673 XCOLLIE_LOGI("STACK_CHECKER Task pop");
674 } else if (queuedTaskCheck.name == TRACE_CHECKER && isMainThreadTraceEnabled_) {
675 checkerQueue_.pop();
676 taskNameSet_.erase("TraceCollector");
677 isMainThreadTraceEnabled_ = false;
678 XCOLLIE_LOGI("TRACE_CHECKER Task pop");
679 } else {
680 popCheck = false;
681 }
682 if (popCheck && checkerQueue_.empty()) {
683 return DEFAULT_TIMEOUT;
684 }
685
686 const WatchdogTask& queuedTask = checkerQueue_.top();
687 if (g_existFile && queuedTask.name == IPC_FULL && now - g_nextKickTime > INTERVAL_KICK_TIME) {
688 if (KickWatchdog()) {
689 g_nextKickTime = now;
690 }
691 }
692 if (queuedTask.nextTickTime > now) {
693 return queuedTask.nextTickTime - now;
694 }
695
696 currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
697 task = queuedTask;
698 checkerQueue_.pop();
699 return 0;
700 }
701
ReInsertTaskIfNeed(WatchdogTask & task)702 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
703 {
704 if (task.checkInterval == 0) {
705 return;
706 }
707
708 std::unique_lock<std::mutex> lock(lock_);
709 task.nextTickTime = task.nextTickTime + task.checkInterval;
710 checkerQueue_.push(task);
711 }
712
Start()713 bool WatchdogInner::Start()
714 {
715 if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
716 XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
717 }
718 SetThreadSignalMask(SIGDUMP, false, false);
719 watchdogStartTime_ = GetCurrentTickMillseconds();
720 XCOLLIE_LOGD("Watchdog is running in thread(%{public}d)!", getproctid());
721 if (SetThreadInfoCallback != nullptr) {
722 SetThreadInfoCallback(ThreadInfo);
723 XCOLLIE_LOGD("Watchdog Set Thread Info Callback");
724 }
725 while (!isNeedStop_) {
726 uint64_t now = GetCurrentTickMillseconds();
727 WatchdogTask task;
728 uint64_t leftTimeMill = FetchNextTask(now, task);
729 if (leftTimeMill == 0) {
730 task.Run(now);
731 ReInsertTaskIfNeed(task);
732 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
733 continue;
734 } else if (isNeedStop_) {
735 break;
736 } else {
737 std::unique_lock<std::mutex> lock(lock_);
738 condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
739 }
740 }
741 if (SetThreadInfoCallback != nullptr) {
742 SetThreadInfoCallback(nullptr);
743 }
744 return true;
745 }
746
SendMsgToHungtask(const std::string & msg)747 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
748 {
749 if (g_fd == NOT_OPEN) {
750 return false;
751 }
752
753 ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
754 if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
755 XCOLLIE_LOGE("watchdogWrite msg failed");
756 close(g_fd);
757 g_fd = NOT_OPEN;
758 return false;
759 }
760 XCOLLIE_LOGE("Send %{public}s to hungtask Successful\n", msg.c_str());
761 return true;
762 }
763
KickWatchdog()764 bool WatchdogInner::KickWatchdog()
765 {
766 if (g_fd == NOT_OPEN) {
767 g_fd = open(SYS_KERNEL_HUNGTASK_USERLIST, O_WRONLY);
768 if (g_fd < 0) {
769 g_fd = open(HMOS_HUNGTASK_USERLIST, O_WRONLY);
770 if (g_fd < 0) {
771 XCOLLIE_LOGE("can't open hungtask file");
772 g_existFile = false;
773 return false;
774 }
775 XCOLLIE_LOGE("change to hmos kernel");
776 isHmos = true;
777 } else {
778 XCOLLIE_LOGE("change to linux kernel");
779 }
780
781 if (!SendMsgToHungtask(isHmos ? ON_KICK_TIME_HMOS : ON_KICK_TIME)) {
782 XCOLLIE_LOGE("KickWatchdog SendMsgToHungtask false");
783 return false;
784 }
785 }
786 return SendMsgToHungtask(isHmos ? KICK_TIME_HMOS : KICK_TIME);
787 }
788
IpcCheck()789 void WatchdogInner::IpcCheck()
790 {
791 if (getuid() == WATCHED_UID) {
792 if (binderCheckHander_ == nullptr) {
793 auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
794 binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
795 if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
796 nullptr, IPC_CHECKER_TIME))) {
797 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
798 }
799 }
800 }
801 }
802
WriteStringToFile(int32_t pid,const char * str)803 void WatchdogInner::WriteStringToFile(int32_t pid, const char *str)
804 {
805 char file[PATH_LEN] = {0};
806 if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", pid) == -1) {
807 XCOLLIE_LOGE("failed to build path for %{public}d.", pid);
808 return;
809 }
810 int fd = open(file, O_RDWR);
811 if (fd == -1) {
812 return;
813 }
814 if (write(fd, str, strlen(str)) < 0) {
815 XCOLLIE_LOGI("failed to write 0 for %{public}s", file);
816 }
817 close(fd);
818 return;
819 }
820
FfrtCallback(uint64_t taskId,const char * taskInfo,uint32_t delayedTaskCount)821 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
822 {
823 std::string description = "FfrtCallback: task(";
824 description += taskInfo;
825 description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
826 bool isExist = false;
827 {
828 std::unique_lock<std::mutex> lock(lockFfrt_);
829 auto &map = WatchdogInner::GetInstance().taskIdCnt;
830 auto search = map.find(taskId);
831 if (search != map.end()) {
832 isExist = true;
833 } else {
834 map[taskId] = SERVICE_WARNING;
835 }
836 }
837
838 if (isExist) {
839 description += ", report twice instead of exiting process."; // 1s = 1000ms
840 WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
841 WatchdogInner::GetInstance().taskIdCnt.erase(taskId);
842 WatchdogInner::KillPeerBinderProcess(description);
843 } else {
844 WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
845 }
846 }
847
InitFfrtWatchdog()848 void WatchdogInner::InitFfrtWatchdog()
849 {
850 CreateWatchdogThreadIfNeed();
851 ffrt_task_timeout_set_cb(FfrtCallback);
852 ffrt_task_timeout_set_threshold(FFRT_CALLBACK_TIME);
853 std::unique_lock<std::mutex> lock(lock_);
854 IpcCheck();
855 }
856
SendFfrtEvent(const std::string & msg,const std::string & eventName,const char * taskInfo)857 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)
858 {
859 int32_t pid = getprocpid();
860 if (IsProcessDebug(pid)) {
861 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
862 return;
863 }
864 uint32_t gid = getgid();
865 uint32_t uid = getuid();
866 time_t curTime = time(nullptr);
867 std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
868 "\n" + msg + "\n";
869 char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
870 buffer[FFRT_BUFFER_SIZE] = 0;
871 ffrt_dump(DUMP_INFO_ALL, buffer, FFRT_BUFFER_SIZE);
872 sendMsg += buffer;
873 delete[] buffer;
874 int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT, "PID", pid,
875 "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo, "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg,
876 "STACK", GetProcessStacktrace());
877 XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
878 "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
879 }
880
LeftTimeExitProcess(const std::string & description)881 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
882 {
883 int32_t pid = getprocpid();
884 if (IsProcessDebug(pid)) {
885 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't exit.", pid);
886 return;
887 }
888 DelayBeforeExit(10); // sleep 10s for hiview dump
889 XCOLLIE_LOGI("Process is going to exit, reason:%{public}s.", description.c_str());
890 WatchdogInner::WriteStringToFile(pid, "0");
891
892 _exit(0);
893 }
894
Stop()895 bool WatchdogInner::Stop()
896 {
897 IPCDfx::SetIPCProxyLimit(0, nullptr);
898 if (mainRunner_ != nullptr) {
899 mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
900 }
901 isNeedStop_.store(true);
902 condition_.notify_all();
903 if (threadLoop_ != nullptr && threadLoop_->joinable()) {
904 threadLoop_->join();
905 threadLoop_ = nullptr;
906 }
907 if (g_fd != NOT_OPEN) {
908 close(g_fd);
909 g_fd = NOT_OPEN;
910 }
911 return true;
912 }
913
KillPeerBinderProcess(const std::string & description)914 void WatchdogInner::KillPeerBinderProcess(const std::string &description)
915 {
916 bool result = false;
917 if (getuid() == WATCHED_UID) {
918 result = KillProcessByPid(getprocpid());
919 }
920 if (!result) {
921 WatchdogInner::LeftTimeExitProcess(description);
922 }
923 }
924
RemoveInnerTask(const std::string & name)925 void WatchdogInner::RemoveInnerTask(const std::string& name)
926 {
927 if (name.empty()) {
928 XCOLLIE_LOGI("RemoveInnerTask fail, cname is null");
929 return;
930 }
931 std::priority_queue<WatchdogTask> tmpQueue;
932 std::unique_lock<std::mutex> lock(lock_);
933 size_t size = checkerQueue_.size();
934 if (size == 0) {
935 XCOLLIE_LOGE("RemoveInnerTask %{public}s fail, empty queue!", name.c_str());
936 return;
937 }
938 while (!checkerQueue_.empty()) {
939 const WatchdogTask& task = checkerQueue_.top();
940 if (task.name != name) {
941 tmpQueue.push(task);
942 } else {
943 size_t nameSize = taskNameSet_.size();
944 if (nameSize != 0 && !task.isOneshotTask) {
945 taskNameSet_.erase(name);
946 XCOLLIE_LOGD("RemoveInnerTask name %{public}s, remove result=%{public}d",
947 name.c_str(), nameSize > taskNameSet_.size());
948 }
949 }
950 checkerQueue_.pop();
951 }
952 if (tmpQueue.size() == size) {
953 XCOLLIE_LOGE("RemoveInnerTask fail, can not find name %{public}s, size=%{public}zu!",
954 name.c_str(), size);
955 }
956 tmpQueue.swap(checkerQueue_);
957 }
958
InitBeginFunc(const char * name)959 void InitBeginFunc(const char* name)
960 {
961 std::string nameStr(name);
962 WatchdogInner::GetInstance().bussinessBeginTime_ = DistributeStart(nameStr);
963 }
964
InitEndFunc(const char * name)965 void InitEndFunc(const char* name)
966 {
967 std::string nameStr(name);
968 DistributeEnd(nameStr, WatchdogInner::GetInstance().bussinessBeginTime_);
969 }
970
InitMainLooperWatcher(WatchdogInnerBeginFunc * beginFunc,WatchdogInnerEndFunc * endFunc)971 void WatchdogInner::InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc,
972 WatchdogInnerEndFunc* endFunc)
973 {
974 int64_t tid = getproctid();
975 if (beginFunc && endFunc) {
976 if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
977 XCOLLIE_LOGI("Tid =%{public}" PRId64 "already exits, "
978 "no repeated initialization.", tid);
979 return;
980 }
981 if (mainRunner_ != nullptr) {
982 mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
983 }
984 *beginFunc = InitBeginFunc;
985 *endFunc = InitEndFunc;
986 buissnessThreadInfo_.insert(tid);
987 } else {
988 if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
989 XCOLLIE_LOGI("Remove already init tid=%{public}." PRId64, tid);
990 mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
991 buissnessThreadInfo_.erase(tid);
992 }
993 }
994 }
995
SetAppDebug(bool isAppDebug)996 void WatchdogInner::SetAppDebug(bool isAppDebug)
997 {
998 isAppDebug_ = isAppDebug;
999 }
1000
GetAppDebug()1001 bool WatchdogInner::GetAppDebug()
1002 {
1003 return isAppDebug_;
1004 }
1005 } // end of namespace HiviewDFX
1006 } // end of namespace OHOS
1007