• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "watchdog_inner.h"
17 
18 #include <cerrno>
19 #include <climits>
20 #include <mutex>
21 
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <pthread.h>
26 #include <unistd.h>
27 #include <csignal>
28 #include <string>
29 
30 #include <securec.h>
31 #include <dlfcn.h>
32 
33 #include "backtrace_local.h"
34 #include "hisysevent.h"
35 #include "ipc_skeleton.h"
36 #include "xcollie_utils.h"
37 #include "xcollie_define.h"
38 #include "dfx_define.h"
39 #include "parameter.h"
40 
41 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
42 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
43 namespace OHOS {
44 namespace HiviewDFX {
45 namespace {
46 enum DumpStackState {
47     DEFAULT = 0,
48     COMPLETE = 1,
49     SAMPLE_COMPLETE = 2
50 };
51 enum CatchLogType {
52     LOGTYPE_DEFAULT = -1,
53     LOGTYPE_NONE = 0,
54     LOGTYPE_SAMPLE_STACK = 1,
55     LOGTYPE_COLLECT_TRACE = 2
56 };
57 constexpr char IPC_CHECKER[] = "IpcChecker";
58 constexpr char STACK_CHECKER[] = "ThreadSampler";
59 constexpr char TRACE_CHECKER[] = "TraceCollector";
60 constexpr int64_t ONE_DAY_LIMIT = 86400000;
61 constexpr int64_t ONE_HOUR_LIMIT = 3600000;
62 constexpr int MILLISEC_TO_NANOSEC = 1000000;
63 const int FFRT_BUFFER_SIZE = 512 * 1024;
64 const int DETECT_STACK_COUNT = 2;
65 const int COLLECT_STACK_COUNT = 10;
66 const int COLLECT_TRACE_MIN = 1;
67 const int COLLECT_TRACE_MAX = 20;
68 const int TASK_INTERVAL = 155;
69 const int DURATION_TIME = 150;
70 const int DISTRIBUTE_TIME = 2000;
71 const int DUMPTRACE_TIME = 450;
72 constexpr const char* const KEY_SCB_STATE = "com.ohos.sceneboard";
73 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
74 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
75 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
76 constexpr uint32_t TIME_MS_TO_S = 1000;
77 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
78 constexpr uint32_t DATA_MANAGE_SERVICE_UID = 3012;
79 constexpr uint32_t FOUNDATION_UID = 5523;
80 constexpr uint32_t RENDER_SERVICE_UID = 1003;
81 constexpr int SERVICE_WARNING = 1;
82 const char* SYS_KERNEL_HUNGTASK_USERLIST = "/sys/kernel/hungtask/userlist";
83 const char* HMOS_HUNGTASK_USERLIST = "/proc/sys/hguard/user_list";
84 const std::string ON_KICK_TIME = "on,72";
85 const std::string ON_KICK_TIME_HMOS = "on,10,foundation";
86 const std::string KICK_TIME = "kick";
87 const std::string KICK_TIME_HMOS = "kick,foundation";
88 const int32_t NOT_OPEN = -1;
89 constexpr uint64_t MAX_START_TIME = 10 * 1000;
90 const char* LIB_THREAD_SAMPLER_PATH = "libthread_sampler.z.so";
91 constexpr size_t STACK_LENGTH = 32 * 1024;
92 constexpr uint32_t JOIN_IPC_FULL_UIDS[] = {DATA_MANAGE_SERVICE_UID, FOUNDATION_UID, RENDER_SERVICE_UID};
93 constexpr uint64_t SAMPLE_PARAMS_MAX_SIZE = 5;
94 constexpr uint64_t SAMPLE_PARAMS_MIN_SIZE = 1;
95 constexpr int MAX_SAMPLE_STACK_TIMES = 2500; // 2.5s
96 constexpr int SAMPLE_INTERVAL_MIN = 50; // 50ms
97 constexpr int SAMPLE_INTERVAL_MAX = 500; // 500ms
98 constexpr int SAMPLE_COUNT_MIN = 1;
99 constexpr int SAMPLE_REPORT_TIMES_MIN = 1;
100 constexpr int SAMPLE_REPORT_TIMES_MAX = 3;
101 constexpr int SAMPLE_EXTRA_COUNT = 4;
102 constexpr int IGNORE_STARTUP_TIME_MIN = 3; // 3s
103 }
104 
105 std::mutex WatchdogInner::lockFfrt_;
106 static uint64_t g_nextKickTime = GetCurrentTickMillseconds();
107 static int32_t g_fd = NOT_OPEN;
108 static bool g_existFile = true;
109 
110 SigActionType WatchdogInner::threadSamplerSigHandler_ = nullptr;
111 std::mutex WatchdogInner::threadSamplerSignalMutex_;
112 
113 namespace {
ThreadInfo(char * buf,size_t len,void * ucontext)114 void ThreadInfo(char *buf  __attribute__((unused)),
115                 size_t len  __attribute__((unused)),
116                 void* ucontext  __attribute__((unused)))
117 {
118     if (ucontext == nullptr) {
119         return;
120     }
121 
122     auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
123         WatchdogInner::GetInstance().currentScene_.size());
124     if (ret != 0) {
125         return;
126     }
127 }
128 
SetThreadSignalMask(int signo,bool isAddSignal,bool isBlock)129 void SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)
130 {
131     sigset_t set;
132     sigemptyset(&set);
133     pthread_sigmask(SIG_SETMASK, nullptr, &set);
134     if (isAddSignal) {
135         sigaddset(&set, signo);
136     } else {
137         sigdelset(&set, signo);
138     }
139     if (isBlock) {
140         pthread_sigmask(SIG_BLOCK, &set, nullptr);
141     } else {
142         pthread_sigmask(SIG_UNBLOCK, &set, nullptr);
143     }
144 }
145 
146 static const int CRASH_SIGNAL_LIST[] = {
147     SIGILL, SIGABRT, SIGBUS, SIGFPE,
148     SIGSEGV, SIGSTKFLT, SIGSYS, SIGTRAP
149 };
150 }
151 
WatchdogInner()152 WatchdogInner::WatchdogInner()
153     : cntCallback_(0), timeCallback_(0)
154 {
155     currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
156 }
157 
~WatchdogInner()158 WatchdogInner::~WatchdogInner()
159 {
160     Stop();
161 }
162 
IsInAppspwan()163 static bool IsInAppspwan()
164 {
165     if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
166         return true;
167     }
168 
169     if (getuid() == 0 && GetSelfProcName().find("nativespawn") != std::string::npos) {
170         return true;
171     }
172 
173     return false;
174 }
175 
SetBundleInfo(const std::string & bundleName,const std::string & bundleVersion)176 void WatchdogInner::SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)
177 {
178     bundleName_ = bundleName;
179     bundleVersion_ = bundleVersion;
180 }
181 
SetForeground(const bool & isForeground)182 void WatchdogInner::SetForeground(const bool& isForeground)
183 {
184     isForeground_ = isForeground;
185 }
186 
ReportMainThreadEvent(int64_t tid)187 bool WatchdogInner::ReportMainThreadEvent(int64_t tid)
188 {
189     std::string stack = "";
190     std::string heaviestStack = "";
191     CollectStack(stack, heaviestStack);
192 
193     std::string path = "";
194     std::string eventName = "MAIN_THREAD_JANK";
195     if (!buissnessThreadInfo_.empty()) {
196         eventName = "BUSSINESS_THREAD_JANK";
197     }
198     int32_t pid = getprocpid();
199     if (!WriteStackToFd(pid, path, stack, eventName)) {
200         XCOLLIE_LOGI("MainThread WriteStackToFd Failed");
201         return false;
202     }
203     int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "MAIN_THREAD_JANK",
204         HiSysEvent::EventType::FAULT,
205         "BUNDLE_VERSION", bundleVersion_,
206         "BUNDLE_NAME", bundleName_,
207         "BEGIN_TIME", stackContent_.reportBegin / MILLISEC_TO_NANOSEC,
208         "END_TIME", stackContent_.reportEnd / MILLISEC_TO_NANOSEC,
209         "EXTERNAL_LOG", path,
210         "STACK", stack,
211         "JANK_LEVEL", 0,
212         "THREAD_NAME", GetSelfProcName(),
213         "FOREGROUND", isForeground_,
214         "LOG_TIME", GetTimeStamp() / MILLISEC_TO_NANOSEC,
215         "APP_START_JIFFIES_TIME", GetAppStartTime(pid, tid),
216         "HEAVIEST_STACK", heaviestStack);
217     XCOLLIE_LOGI("MainThread HiSysEventWrite result=%{public}d", result);
218     return result >= 0;
219 }
220 
CheckEventTimer(int64_t currentTime,int64_t reportBegin,int64_t reportEnd,int interval)221 bool WatchdogInner::CheckEventTimer(int64_t currentTime, int64_t reportBegin, int64_t reportEnd, int interval)
222 {
223     if (reportBegin == timeContent_.curBegin &&
224         reportEnd == timeContent_.curEnd) {
225         return false;
226     }
227     return (timeContent_.curEnd <= timeContent_.curBegin &&
228         (currentTime - timeContent_.curBegin >= interval * MILLISEC_TO_NANOSEC)) ||
229         (timeContent_.curEnd - timeContent_.curBegin > interval * MILLISEC_TO_NANOSEC);
230 }
231 
ThreadSamplerSigHandler(int sig,siginfo_t * si,void * context)232 void WatchdogInner::ThreadSamplerSigHandler(int sig, siginfo_t* si, void* context)
233 {
234     std::lock_guard<std::mutex> lock(threadSamplerSignalMutex_);
235     if (WatchdogInner::threadSamplerSigHandler_ == nullptr) {
236         return;
237     }
238     WatchdogInner::threadSamplerSigHandler_(sig, si, context);
239 }
240 
InstallThreadSamplerSignal()241 bool WatchdogInner::InstallThreadSamplerSignal()
242 {
243     struct sigaction action {};
244     sigfillset(&action.sa_mask);
245     for (size_t i = 0; i < sizeof(CRASH_SIGNAL_LIST) / sizeof(CRASH_SIGNAL_LIST[0]); i++) {
246         sigdelset(&action.sa_mask, CRASH_SIGNAL_LIST[i]);
247     }
248     action.sa_sigaction = WatchdogInner::ThreadSamplerSigHandler;
249     action.sa_flags = SA_RESTART | SA_SIGINFO;
250     if (sigaction(MUSL_SIGNAL_SAMPLE_STACK, &action, nullptr) != 0) {
251         XCOLLIE_LOGE("Failed to register signal(%{public}d:%{public}d)", MUSL_SIGNAL_SAMPLE_STACK, errno);
252         return false;
253     }
254     return true;
255 }
256 
UninstallThreadSamplerSignal()257 void WatchdogInner::UninstallThreadSamplerSignal()
258 {
259     std::lock_guard<std::mutex> lock(threadSamplerSignalMutex_);
260     threadSamplerSigHandler_ = nullptr;
261 }
262 
ThreadSampleTask(int sampleInterval,int sampleCount,int64_t tid)263 void WatchdogInner::ThreadSampleTask(int sampleInterval, int sampleCount, int64_t tid)
264 {
265     if (stackContent_.detectorCount == 0 && stackContent_.collectCount == 0) {
266         XCOLLIE_LOGI("ThreadSampler 1st in ThreadSamplerTask.\n");
267         if (!InitThreadSamplerFuncs()) {
268             isMainThreadStackEnabled_ = true;
269             XCOLLIE_LOGE("ThreadSampler initialize failed.\n");
270             return;
271         }
272 
273         if (!InstallThreadSamplerSignal()) {
274             isMainThreadStackEnabled_ = true;
275             XCOLLIE_LOGE("ThreadSampler install signal failed.\n");
276             return;
277         }
278 
279         int initThreadSamplerRet = threadSamplerInitFunc_(COLLECT_STACK_COUNT);
280         if (initThreadSamplerRet != 0) {
281             isMainThreadStackEnabled_ = true;
282             XCOLLIE_LOGE("Thread sampler init failed. ret %{public}d\n", initThreadSamplerRet);
283             return;
284         }
285         XCOLLIE_LOGI("Thread sampler initialized. ret %{public}d\n", initThreadSamplerRet);
286     }
287     if (threadSamplerSampleFunc_ == nullptr) {
288         isMainThreadStackEnabled_ = true;
289         return;
290     }
291     if (stackContent_.collectCount > DumpStackState::DEFAULT && stackContent_.collectCount < sampleCount) {
292         XCOLLIE_LOGI("ThreadSampler in ThreadSamplerTask, %{public}d.\n", stackContent_.collectCount);
293         threadSamplerSampleFunc_();
294         stackContent_.collectCount++;
295     } else if (stackContent_.collectCount == sampleCount) {
296         ReportMainThreadEvent(tid);
297         stackContent_.reportTimes--;
298         isMainThreadStackEnabled_ = true;
299         return;
300     } else {
301         if (CheckEventTimer(GetTimeStamp(), stackContent_.reportBegin,
302             stackContent_.reportEnd, sampleInterval)) {
303             threadSamplerSampleFunc_();
304             stackContent_.collectCount++;
305         } else {
306             stackContent_.detectorCount++;
307         }
308     }
309     if (stackContent_.detectorCount == DETECT_STACK_COUNT) {
310         isMainThreadStackEnabled_ = true;
311     }
312 }
313 
InitThreadSamplerFuncs()314 bool WatchdogInner::InitThreadSamplerFuncs()
315 {
316     threadSamplerFuncHandler_ = dlopen(LIB_THREAD_SAMPLER_PATH, RTLD_LAZY);
317     if (threadSamplerFuncHandler_ == nullptr) {
318         XCOLLIE_LOGE("dlopen failed, funcHandler is nullptr.\n");
319         return false;
320     }
321 
322     threadSamplerInitFunc_ =
323         reinterpret_cast<ThreadSamplerInitFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerInit"));
324     threadSamplerSampleFunc_ =
325         reinterpret_cast<ThreadSamplerSampleFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerSample"));
326     threadSamplerCollectFunc_ =
327         reinterpret_cast<ThreadSamplerCollectFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerCollect"));
328     threadSamplerDeinitFunc_ =
329         reinterpret_cast<ThreadSamplerDeinitFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerDeinit"));
330     threadSamplerSigHandler_ =
331         reinterpret_cast<SigActionType>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerSigHandler"));
332     if (threadSamplerInitFunc_ == nullptr || threadSamplerSampleFunc_ == nullptr ||
333         threadSamplerCollectFunc_ == nullptr || threadSamplerDeinitFunc_ == nullptr ||
334         threadSamplerSigHandler_ == nullptr) {
335         ResetThreadSamplerFuncs();
336         XCOLLIE_LOGE("ThreadSampler dlsym some function failed.\n");
337         return false;
338     }
339     XCOLLIE_LOGE("ThreadSampler has been successfully loaded.\n");
340     return true;
341 }
342 
ResetThreadSamplerFuncs()343 void WatchdogInner::ResetThreadSamplerFuncs()
344 {
345     threadSamplerInitFunc_ = nullptr;
346     threadSamplerSampleFunc_ = nullptr;
347     threadSamplerCollectFunc_ = nullptr;
348     threadSamplerDeinitFunc_ = nullptr;
349     threadSamplerSigHandler_ = nullptr;
350     dlclose(threadSamplerFuncHandler_);
351     threadSamplerFuncHandler_ = nullptr;
352 }
353 
UpdateTime(int64_t & reportBegin,int64_t & reportEnd,TimePoint & lastEndTime,const TimePoint & endTime)354 void WatchdogInner::UpdateTime(int64_t& reportBegin, int64_t& reportEnd,
355     TimePoint& lastEndTime, const TimePoint& endTime)
356 {
357     reportBegin = timeContent_.curBegin;
358     reportEnd = timeContent_.curEnd;
359     lastEndTime = endTime;
360 }
361 
SampleStackDetect(const TimePoint & endTime,int64_t durationTime,int sampleInterval)362 void WatchdogInner::SampleStackDetect(const TimePoint& endTime, int64_t durationTime, int sampleInterval)
363 {
364     uint64_t startUpTime = static_cast<uint64_t>(jankParamsMap[KEY_IGNORE_STARTUP_TIME]) * TIME_MS_TO_S;
365     if (GetCurrentTickMillseconds() - watchdogStartTime_ < startUpTime) {
366         XCOLLIE_LOGI("Application is in starting period.\n");
367         return;
368     }
369     if (!stackContent_.isStartSampleEnabled) {
370         XCOLLIE_LOGI("Current sample detection task is being executed.\n");
371         return;
372     }
373     if (stackContent_.reportTimes <= 0) {
374         int64_t checkTimer = ONE_DAY_LIMIT;
375         if (IsDeveloperOpen() || (IsBetaVersion() && GetProcessNameFromProcCmdline(getpid()) == KEY_SCB_STATE)) {
376             checkTimer = ONE_HOUR_LIMIT;
377         }
378         auto diff = endTime - stackContent_.lastEndTime;
379         int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count();
380         if (intervalTime < checkTimer) {
381             return;
382         }
383         stackContent_.reportTimes = jankParamsMap[KEY_SAMPLE_REPORT_TIMES];
384         XCOLLIE_LOGI("The current thread has exceeded the event limit, reportTimes: %{public}d",
385             stackContent_.reportTimes);
386     }
387     stackContent_.isStartSampleEnabled = false;
388     UpdateTime(stackContent_.reportBegin, stackContent_.reportEnd, stackContent_.lastEndTime, endTime);
389     int32_t ret = StartProfileMainThread(sampleInterval);
390     if (ret == -1) {
391         stackContent_.isStartSampleEnabled = true;
392     }
393     XCOLLIE_LOGI("MainThread StartProfileMainThread ret: %{public}d "
394         "durationTime: %{public}" PRId64 " ms sampleInterval: %{public}d", ret, durationTime, sampleInterval);
395 }
396 
397 
StartProfileMainThread(int32_t interval)398 int32_t WatchdogInner::StartProfileMainThread(int32_t interval)
399 {
400     std::unique_lock<std::mutex> lock(lock_);
401     stackContent_.detectorCount = 0;
402     stackContent_.collectCount = 0;
403     int sampleCount = jankParamsMap[KEY_SAMPLE_COUNT];
404     int64_t tid = getproctid();
405     auto sampleTask = [this, interval, sampleCount, tid]() {
406         ThreadSampleTask(interval, sampleCount, tid);
407     };
408 
409     WatchdogTask task("ThreadSampler", sampleTask, 0, interval, true);
410     InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
411     return 0;
412 }
413 
CollectStack(std::string & stack,std::string & heaviestStack)414 bool WatchdogInner::CollectStack(std::string& stack, std::string& heaviestStack)
415 {
416     if (threadSamplerCollectFunc_ == nullptr) {
417         return false;
418     }
419     int treeFormat = 1;
420     char* stk = new char[STACK_LENGTH];
421     char* heaviest = new char[STACK_LENGTH];
422     int collectRet = threadSamplerCollectFunc_(stk, heaviest, STACK_LENGTH, STACK_LENGTH, treeFormat);
423     stack = stk;
424     heaviestStack = heaviest;
425     delete[] stk;
426     delete[] heaviest;
427     return collectRet == 0;
428 }
429 
Deinit()430 bool WatchdogInner::Deinit()
431 {
432     if (threadSamplerDeinitFunc_ == nullptr) {
433         return false;
434     }
435     UninstallThreadSamplerSignal();
436     int ret = threadSamplerDeinitFunc_();
437     return ret == 0;
438 }
439 
DumpTraceProfile(int32_t interval)440 void WatchdogInner::DumpTraceProfile(int32_t interval)
441 {
442     traceContent_.dumpCount = 0;
443     traceContent_.traceCount = 0;
444     auto traceTask = [this, interval]() {
445         traceContent_.traceCount++;
446         if (CheckEventTimer(GetTimeStamp(), traceContent_.reportBegin,
447             traceContent_.reportEnd, interval)) {
448             traceContent_.dumpCount++;
449         }
450         if (traceContent_.traceCount >= COLLECT_TRACE_MAX) {
451             if (traceContent_.dumpCount >= COLLECT_TRACE_MIN) {
452                 CreateWatchdogDir();
453                 appCaller_.actionId = UCollectClient::ACTION_ID_DUMP_TRACE;
454                 appCaller_.isBusinessJank = !buissnessThreadInfo_.empty();
455                 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
456                 XCOLLIE_LOGI("MainThread TraceCollector Dump result: %{public}d", result.retCode);
457             }
458             isMainThreadTraceEnabled_ = true;
459         }
460     };
461     WatchdogTask task("TraceCollector", traceTask, 0, interval, true);
462     std::unique_lock<std::mutex> lock(lock_);
463     InsertWatchdogTaskLocked("TraceCollector", std::move(task));
464 }
465 
StartTraceProfile()466 int32_t WatchdogInner::StartTraceProfile()
467 {
468     traceCollector_ = UCollectClient::TraceCollector::Create();
469     if (traceCollector_ == nullptr) {
470         traceContent_.traceState = DumpStackState::DEFAULT;
471         XCOLLIE_LOGE("Create traceCollector failed.");
472         return -1;
473     }
474     appCaller_.actionId = UCollectClient::ACTION_ID_START_TRACE;
475     appCaller_.bundleName = bundleName_;
476     appCaller_.bundleVersion = bundleVersion_;
477     appCaller_.uid = static_cast<int64_t>(getuid());
478     appCaller_.pid = getprocpid();
479     appCaller_.threadName = GetSelfProcName();
480     appCaller_.foreground = isForeground_;
481     appCaller_.happenTime = GetTimeStamp() / MILLISEC_TO_NANOSEC;
482     appCaller_.beginTime = traceContent_.reportBegin / MILLISEC_TO_NANOSEC;
483     appCaller_.endTime = traceContent_.reportEnd / MILLISEC_TO_NANOSEC;
484     auto result = traceCollector_->CaptureDurationTrace(appCaller_);
485     if (result.retCode == 0) {
486         DumpTraceProfile(DURATION_TIME);
487     }
488     return result.retCode;
489 }
490 
CollectTraceDetect(const TimePoint & endTime,int64_t durationTime)491 void WatchdogInner::CollectTraceDetect(const TimePoint& endTime, int64_t durationTime)
492 {
493     if (IsBetaVersion()) {
494         return;
495     }
496     if (traceContent_.traceState == DumpStackState::COMPLETE) {
497         auto diff = endTime - stackContent_.lastEndTime;
498         int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count();
499         if (intervalTime < ONE_DAY_LIMIT) {
500             return;
501         }
502     }
503     traceContent_.traceState = DumpStackState::COMPLETE;
504     UpdateTime(traceContent_.reportBegin, traceContent_.reportEnd, traceContent_.lastEndTime, endTime);
505     int32_t result = StartTraceProfile();
506     XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d, Duration Time: %{public}" PRId64 " ms",
507         result, durationTime);
508 }
509 
DistributeStart(const std::string & name)510 static TimePoint DistributeStart(const std::string& name)
511 {
512     WatchdogInner::GetInstance().timeContent_.curBegin = GetTimeStamp();
513     return std::chrono::steady_clock::now();
514 }
515 
DistributeEnd(const std::string & name,const TimePoint & startTime)516 static void DistributeEnd(const std::string& name, const TimePoint& startTime)
517 {
518     TimePoint endTime = std::chrono::steady_clock::now();
519     auto duration = endTime - startTime;
520     int64_t durationTime = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
521     int sampleInterval = WatchdogInner::GetInstance().jankParamsMap[KEY_SAMPLE_INTERVAL];
522     WatchdogInner::GetInstance().timeContent_.curEnd = GetTimeStamp();
523     if (duration > std::chrono::milliseconds(sampleInterval)) {
524         int logType = WatchdogInner::GetInstance().jankParamsMap[KEY_LOG_TYPE];
525         switch (logType) {
526             case CatchLogType::LOGTYPE_SAMPLE_STACK: {
527                 WatchdogInner::GetInstance().SampleStackDetect(endTime, durationTime, sampleInterval);
528                 break;
529             }
530             case CatchLogType::LOGTYPE_COLLECT_TRACE: {
531                 if (duration > std::chrono::milliseconds(DUMPTRACE_TIME)) {
532                     WatchdogInner::GetInstance().CollectTraceDetect(endTime, durationTime);
533                 }
534                 break;
535             }
536             case CatchLogType::LOGTYPE_NONE: {
537                 if (duration < std::chrono::milliseconds(DUMPTRACE_TIME)) {
538                     WatchdogInner::GetInstance().SampleStackDetect(endTime, durationTime, sampleInterval);
539                 } else {
540                     WatchdogInner::GetInstance().CollectTraceDetect(endTime, durationTime);
541                 }
542                 break;
543             }
544             default:
545                 break;
546         }
547     }
548     if (duration > std::chrono::milliseconds(DISTRIBUTE_TIME)) {
549         XCOLLIE_LOGI("BlockMonitor event name: %{public}s, Duration Time: %{public}" PRId64 " ms",
550             name.c_str(), durationTime);
551     }
552 }
553 
AddThread(const std::string & name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)554 int WatchdogInner::AddThread(const std::string &name,
555     std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
556 {
557     if (name.empty() || handler == nullptr) {
558         XCOLLIE_LOGE("Add thread fail, invalid args!");
559         return -1;
560     }
561 
562     if (IsInAppspwan()) {
563         return -1;
564     }
565 
566     std::string limitedName = GetLimitedSizeName(name);
567     XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
568     std::unique_lock<std::mutex> lock(lock_);
569 
570     IpcCheck();
571 
572     if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
573         return -1;
574     }
575     return 0;
576 }
577 
RunOneShotTask(const std::string & name,Task && task,uint64_t delay)578 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
579 {
580     if (name.empty() || task == nullptr) {
581         XCOLLIE_LOGE("Add task fail, invalid args!");
582         return;
583     }
584 
585     if (IsInAppspwan()) {
586         return;
587     }
588 
589     std::unique_lock<std::mutex> lock(lock_);
590     std::string limitedName = GetLimitedSizeName(name);
591     InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
592 }
593 
RunXCollieTask(const std::string & name,uint64_t timeout,XCollieCallback func,void * arg,unsigned int flag)594 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
595     void *arg, unsigned int flag)
596 {
597     if (name.empty() || timeout == 0) {
598         XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
599         return INVALID_ID;
600     }
601 
602     if (IsInAppspwan()) {
603         return INVALID_ID;
604     }
605 
606     std::unique_lock<std::mutex> lock(lock_);
607     IpcCheck();
608     std::string limitedName = GetLimitedSizeName(name);
609     return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
610 }
611 
RemoveXCollieTask(int64_t id)612 void WatchdogInner::RemoveXCollieTask(int64_t id)
613 {
614     std::priority_queue<WatchdogTask> tmpQueue;
615     std::unique_lock<std::mutex> lock(lock_);
616     size_t size = checkerQueue_.size();
617     if (size == 0) {
618         XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
619         return;
620     }
621     while (!checkerQueue_.empty()) {
622         const WatchdogTask& task = checkerQueue_.top();
623         if (task.id != id || task.timeout == 0) {
624             tmpQueue.push(task);
625         }
626         checkerQueue_.pop();
627     }
628     if (tmpQueue.size() == size) {
629         XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
630             static_cast<long long>(id), size);
631     }
632     tmpQueue.swap(checkerQueue_);
633 }
634 
RunPeriodicalTask(const std::string & name,Task && task,uint64_t interval,uint64_t delay)635 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
636 {
637     if (name.empty() || task == nullptr) {
638         XCOLLIE_LOGE("Add task fail, invalid args!");
639         return;
640     }
641 
642     if (IsInAppspwan()) {
643         return;
644     }
645 
646     std::string limitedName = GetLimitedSizeName(name);
647     XCOLLIE_LOGD("Add periodical task %{public}s to watchdog.", name.c_str());
648     std::unique_lock<std::mutex> lock(lock_);
649     InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
650 }
651 
SetTimerCountTask(const std::string & name,uint64_t timeLimit,int countLimit)652 int64_t WatchdogInner::SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)
653 {
654     if (name.empty() || timeLimit == 0 || countLimit <= 0) {
655         XCOLLIE_LOGE("SetTimerCountTask fail, invalid args!");
656         return INVALID_ID;
657     }
658 
659     if (IsInAppspwan()) {
660         return INVALID_ID;
661     }
662     std::string limitedName = GetLimitedSizeName(name);
663     XCOLLIE_LOGD("SetTimerCountTask name : %{public}s", name.c_str());
664     std::unique_lock<std::mutex> lock(lock_);
665     return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeLimit, countLimit));
666 }
667 
TriggerTimerCountTask(const std::string & name,bool bTrigger,const std::string & message)668 void WatchdogInner::TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)
669 {
670     std::unique_lock<std::mutex> lock(lock_);
671 
672     if (checkerQueue_.empty()) {
673         XCOLLIE_LOGE("TriggerTimerCountTask name : %{public}s fail, empty queue!", name.c_str());
674         return;
675     }
676 
677     bool isTaskExist = false;
678     uint64_t now = GetCurrentTickMillseconds();
679     std::priority_queue<WatchdogTask> tmpQueue;
680     while (!checkerQueue_.empty()) {
681         WatchdogTask task = checkerQueue_.top();
682         if (task.name == name) {
683             isTaskExist = true;
684             if (bTrigger) {
685                 task.triggerTimes.push_back(now);
686                 task.message = message;
687             } else {
688                 task.triggerTimes.clear();
689             }
690         }
691         tmpQueue.push(task);
692         checkerQueue_.pop();
693     }
694     tmpQueue.swap(checkerQueue_);
695 
696     if (!isTaskExist) {
697         XCOLLIE_LOGE("TriggerTimerCount name : %{public}s does not exist!", name.c_str());
698     }
699 }
700 
IsTaskExistLocked(const std::string & name)701 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
702 {
703     return (taskNameSet_.find(name) != taskNameSet_.end());
704 }
705 
IsExceedMaxTaskLocked()706 bool WatchdogInner::IsExceedMaxTaskLocked()
707 {
708     if (checkerQueue_.size() >= MAX_WATCH_NUM) {
709         XCOLLIE_LOGE("Exceed max watchdog task!");
710         return true;
711     }
712 
713     return false;
714 }
715 
InsertWatchdogTaskLocked(const std::string & name,WatchdogTask && task)716 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
717 {
718     if (!task.isOneshotTask && IsTaskExistLocked(name)) {
719         XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
720         return 0;
721     }
722 
723     if (IsExceedMaxTaskLocked()) {
724         XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
725         return 0;
726     }
727     int64_t id = task.id;
728     checkerQueue_.push(std::move(task));
729     if (!task.isOneshotTask) {
730         taskNameSet_.insert(name);
731     }
732     CreateWatchdogThreadIfNeed();
733     condition_.notify_all();
734 
735     return id;
736 }
737 
StopWatchdog()738 void WatchdogInner::StopWatchdog()
739 {
740     Stop();
741 }
742 
IsCallbackLimit(unsigned int flag)743 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
744 {
745     bool ret = false;
746     time_t startTime = time(nullptr);
747     if (!(flag & XCOLLIE_FLAG_LOG)) {
748         return ret;
749     }
750     if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
751         timeCallback_ = startTime;
752     } else {
753         if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
754             ret = true;
755         }
756     }
757     return ret;
758 }
759 
IPCProxyLimitCallback(uint64_t num)760 void IPCProxyLimitCallback(uint64_t num)
761 {
762     XCOLLIE_LOGE("ipc proxy num %{public}" PRIu64 " exceed limit", num);
763     if (getuid() >= MIN_APP_UID && IsBetaVersion()) {
764         XCOLLIE_LOGI("Process is going to exit, reason: ipc proxy num exceed limit");
765         _exit(0);
766     }
767 }
768 
CreateWatchdogThreadIfNeed()769 void WatchdogInner::CreateWatchdogThreadIfNeed()
770 {
771     std::call_once(flag_, [this] {
772         if (threadLoop_ == nullptr) {
773             if (mainRunner_ == nullptr) {
774                 mainRunner_ = AppExecFwk::EventRunner::GetMainEventRunner();
775             }
776             mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
777             const uint64_t limitNum = 20000;
778             IPCDfx::SetIPCProxyLimit(limitNum, IPCProxyLimitCallback);
779             threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
780             if (getpid() == gettid()) {
781                 SetThreadSignalMask(SIGDUMP, true, true);
782             }
783             XCOLLIE_LOGD("Watchdog is running!");
784         }
785     });
786 }
787 
CheckCurrentTask(const WatchdogTask & queuedTaskCheck)788 bool WatchdogInner::CheckCurrentTask(const WatchdogTask& queuedTaskCheck)
789 {
790     if (queuedTaskCheck.name.empty()) {
791         checkerQueue_.pop();
792         XCOLLIE_LOGW("queuedTask name is empty.");
793     } else if (queuedTaskCheck.name == STACK_CHECKER && isMainThreadStackEnabled_) {
794         checkerQueue_.pop();
795         taskNameSet_.erase("ThreadSampler");
796         if (Deinit()) {
797             ResetThreadSamplerFuncs();
798         }
799         stackContent_.isStartSampleEnabled = true;
800         isMainThreadStackEnabled_ = false;
801         XCOLLIE_LOGI("Detect sample stack task complete.");
802     } else if (queuedTaskCheck.name == TRACE_CHECKER && isMainThreadTraceEnabled_) {
803         checkerQueue_.pop();
804         taskNameSet_.erase("TraceCollector");
805         isMainThreadTraceEnabled_ = false;
806         if (traceContent_.dumpCount < COLLECT_TRACE_MIN) {
807             traceContent_.traceState = DumpStackState::DEFAULT;
808         }
809         XCOLLIE_LOGI("Detect collect trace task complete.");
810     } else {
811         return false;
812     }
813     return true;
814 }
815 
FetchNextTask(uint64_t now,WatchdogTask & task)816 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
817 {
818     std::unique_lock<std::mutex> lock(lock_);
819     if (isNeedStop_) {
820         while (!checkerQueue_.empty()) {
821             checkerQueue_.pop();
822         }
823         return DEFAULT_TIMEOUT;
824     }
825 
826     if (checkerQueue_.empty()) {
827         return DEFAULT_TIMEOUT;
828     }
829 
830     const WatchdogTask& queuedTaskCheck = checkerQueue_.top();
831     if (CheckCurrentTask(queuedTaskCheck) && checkerQueue_.empty()) {
832         return DEFAULT_TIMEOUT;
833     }
834 
835     const WatchdogTask& queuedTask = checkerQueue_.top();
836     if (g_existFile && queuedTask.name == IPC_FULL && now - g_nextKickTime > INTERVAL_KICK_TIME) {
837         if (KickWatchdog()) {
838             g_nextKickTime = now;
839         }
840     }
841     if (queuedTask.nextTickTime > now) {
842         return queuedTask.nextTickTime - now;
843     }
844 
845     currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
846     task = queuedTask;
847     checkerQueue_.pop();
848     return 0;
849 }
850 
ReInsertTaskIfNeed(WatchdogTask & task)851 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
852 {
853     if (task.checkInterval == 0) {
854         return;
855     }
856 
857     std::unique_lock<std::mutex> lock(lock_);
858     task.nextTickTime = task.nextTickTime + task.checkInterval;
859     checkerQueue_.push(task);
860 }
861 
Start()862 bool WatchdogInner::Start()
863 {
864     if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
865         XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
866     }
867     SetThreadSignalMask(SIGDUMP, false, false);
868     watchdogStartTime_ = GetCurrentTickMillseconds();
869     XCOLLIE_LOGD("Watchdog is running in thread(%{public}d)!", getproctid());
870     if (SetThreadInfoCallback != nullptr) {
871         SetThreadInfoCallback(ThreadInfo);
872         XCOLLIE_LOGD("Watchdog Set Thread Info Callback");
873     }
874     while (!isNeedStop_) {
875         uint64_t now = GetCurrentTickMillseconds();
876         WatchdogTask task;
877         uint64_t leftTimeMill = FetchNextTask(now, task);
878         if (leftTimeMill == 0) {
879             task.Run(now);
880             ReInsertTaskIfNeed(task);
881             currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
882             continue;
883         } else if (isNeedStop_) {
884             break;
885         } else {
886             std::unique_lock<std::mutex> lock(lock_);
887             condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
888         }
889     }
890     if (SetThreadInfoCallback != nullptr) {
891         SetThreadInfoCallback(nullptr);
892     }
893     return true;
894 }
895 
SendMsgToHungtask(const std::string & msg)896 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
897 {
898     if (g_fd == NOT_OPEN) {
899         return false;
900     }
901 
902     ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
903     if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
904         XCOLLIE_LOGE("watchdogWrite msg failed");
905         close(g_fd);
906         g_fd = NOT_OPEN;
907         return false;
908     }
909     XCOLLIE_LOGE("Send %{public}s to hungtask Successful\n", msg.c_str());
910     return true;
911 }
912 
KickWatchdog()913 bool WatchdogInner::KickWatchdog()
914 {
915     if (g_fd == NOT_OPEN) {
916         g_fd = open(SYS_KERNEL_HUNGTASK_USERLIST, O_WRONLY);
917         if (g_fd < 0) {
918             g_fd = open(HMOS_HUNGTASK_USERLIST, O_WRONLY);
919             if (g_fd < 0) {
920                 XCOLLIE_LOGE("can't open hungtask file");
921                 g_existFile = false;
922                 return false;
923             }
924             XCOLLIE_LOGE("change to hmos kernel");
925             isHmos = true;
926         } else {
927             XCOLLIE_LOGE("change to linux kernel");
928         }
929 
930         if (!SendMsgToHungtask(isHmos ? ON_KICK_TIME_HMOS : ON_KICK_TIME)) {
931             XCOLLIE_LOGE("KickWatchdog SendMsgToHungtask false");
932             return false;
933         }
934     }
935     return SendMsgToHungtask(isHmos ? KICK_TIME_HMOS : KICK_TIME);
936 }
937 
IpcCheck()938 void WatchdogInner::IpcCheck()
939 {
940     static bool isIpcCheckInit = false;
941     if (isIpcCheckInit) {
942         return;
943     }
944 
945     uint32_t uid = getuid();
946     bool isJoinIpcFullUid = std::any_of(std::begin(JOIN_IPC_FULL_UIDS), std::end(JOIN_IPC_FULL_UIDS),
947         [uid](const uint32_t joinIpcFullUid) { return uid == joinIpcFullUid; });
948     if (isJoinIpcFullUid || GetSelfProcName() == KEY_SCB_STATE) {
949         if (binderCheckHander_ == nullptr) {
950             auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
951             binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
952             if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
953                 nullptr, IPC_CHECKER_TIME))) {
954                 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
955             }
956         }
957     }
958     isIpcCheckInit = true;
959 }
960 
WriteStringToFile(int32_t pid,const char * str)961 void WatchdogInner::WriteStringToFile(int32_t pid, const char *str)
962 {
963     char file[PATH_LEN] = {0};
964     if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", pid) == -1) {
965         XCOLLIE_LOGE("failed to build path for %{public}d.", pid);
966         return;
967     }
968     int fd = open(file, O_RDWR);
969     if (fd == -1) {
970         return;
971     }
972     if (write(fd, str, strlen(str)) < 0) {
973         XCOLLIE_LOGI("failed to write 0 for %{public}s", file);
974     }
975     close(fd);
976     return;
977 }
978 
FfrtCallback(uint64_t taskId,const char * taskInfo,uint32_t delayedTaskCount)979 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
980 {
981     std::string description = "FfrtCallback: task(";
982     description += taskInfo;
983     description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
984     std::string info(taskInfo);
985     if (info.find("Queue_Schedule_Timeout") != std::string::npos) {
986         WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo, false);
987         description += ", report twice instead of exiting process.";
988         WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
989         WatchdogInner::KillPeerBinderProcess(description);
990         return;
991     }
992     bool isExist = false;
993     {
994         std::unique_lock<std::mutex> lock(lockFfrt_);
995         auto &map = WatchdogInner::GetInstance().taskIdCnt;
996         auto search = map.find(taskId);
997         if (search != map.end()) {
998             isExist = true;
999         } else {
1000             map[taskId] = SERVICE_WARNING;
1001         }
1002     }
1003 
1004     if (isExist) {
1005         description += ", report twice instead of exiting process."; // 1s = 1000ms
1006         WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
1007         WatchdogInner::GetInstance().taskIdCnt.erase(taskId);
1008         WatchdogInner::KillPeerBinderProcess(description);
1009     } else {
1010         WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
1011     }
1012 }
1013 
InitFfrtWatchdog()1014 void WatchdogInner::InitFfrtWatchdog()
1015 {
1016     CreateWatchdogThreadIfNeed();
1017     ffrt_task_timeout_set_cb(FfrtCallback);
1018     ffrt_task_timeout_set_threshold(FFRT_CALLBACK_TIME);
1019     std::unique_lock<std::mutex> lock(lock_);
1020     IpcCheck();
1021 }
1022 
SendFfrtEvent(const std::string & msg,const std::string & eventName,const char * taskInfo,const bool isDumpStack)1023 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo,
1024     const bool isDumpStack)
1025 {
1026     int32_t pid = getprocpid();
1027     if (IsProcessDebug(pid)) {
1028         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
1029         return;
1030     }
1031     uint32_t gid = getgid();
1032     uint32_t uid = getuid();
1033     time_t curTime = time(nullptr);
1034     std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
1035         "\n" + msg + "\n";
1036     char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
1037     buffer[FFRT_BUFFER_SIZE] = 0;
1038     ffrt_dump(DUMP_INFO_ALL, buffer, FFRT_BUFFER_SIZE);
1039     sendMsg += buffer;
1040     delete[] buffer;
1041     int32_t tid = pid;
1042     GetFfrtTaskTid(tid, sendMsg);
1043     int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
1044         "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo, "PROCESS_NAME", GetSelfProcName(),
1045         "MSG", sendMsg, "STACK", isDumpStack ? GetProcessStacktrace() : "");
1046     XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
1047         "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
1048 }
1049 
GetFfrtTaskTid(int32_t & tid,const std::string & msg)1050 void WatchdogInner::GetFfrtTaskTid(int32_t& tid, const std::string& msg)
1051 {
1052     std::string queueNameFrontStr = "us. queue name [";
1053     size_t queueNameFrontPos = msg.find(queueNameFrontStr);
1054     if (queueNameFrontPos == std::string::npos) {
1055         return;
1056     }
1057     size_t queueNameRearPos = msg.find("], remaining tasks count=");
1058     size_t queueStartPos = queueNameFrontPos + queueNameFrontStr.length();
1059     if (queueNameRearPos == std::string::npos || queueNameRearPos <= queueStartPos) {
1060         return;
1061     }
1062     size_t queueNameLength = queueNameRearPos - queueStartPos;
1063     std::string workerTidFrontStr = " worker tid ";
1064     std::string taskIdFrontStr = " is running, task id ";
1065     std::string queueNameStr = " name " + msg.substr(queueStartPos, queueNameLength);
1066     std::istringstream issMsg(msg);
1067     std::string line;
1068     while (std::getline(issMsg, line, '\n')) {
1069         size_t workerTidFrontPos = line.find(workerTidFrontStr);
1070         size_t taskIdFrontPos = line.find(taskIdFrontStr);
1071         size_t queueNamePos = line.find(queueNameStr);
1072         size_t workerStartPos = workerTidFrontPos + workerTidFrontStr.length();
1073         if (workerTidFrontPos == std::string::npos || taskIdFrontPos == std::string::npos ||
1074             queueNamePos == std::string::npos || taskIdFrontPos <= workerStartPos) {
1075             continue;
1076         }
1077         size_t tidLength = taskIdFrontPos - workerStartPos;
1078         if (tidLength < std::to_string(INT32_MAX).length()) {
1079             std::string tidStr = line.substr(workerStartPos, tidLength);
1080             if (std::all_of(std::begin(tidStr), std::end(tidStr), [] (const char& c) {
1081                 return isdigit(c);
1082             })) {
1083                 tid = std::stoi(tidStr);
1084                 return;
1085             }
1086         }
1087     }
1088 }
1089 
LeftTimeExitProcess(const std::string & description)1090 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
1091 {
1092     int32_t pid = getprocpid();
1093     if (IsProcessDebug(pid)) {
1094         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't exit.", pid);
1095         return;
1096     }
1097     DelayBeforeExit(10); // sleep 10s for hiview dump
1098     XCOLLIE_LOGI("Process is going to exit, reason:%{public}s.", description.c_str());
1099     WatchdogInner::WriteStringToFile(pid, "0");
1100 
1101     _exit(0);
1102 }
1103 
Stop()1104 bool WatchdogInner::Stop()
1105 {
1106     IPCDfx::SetIPCProxyLimit(0, nullptr);
1107     if (mainRunner_ != nullptr) {
1108         mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1109     }
1110     isNeedStop_.store(true);
1111     condition_.notify_all();
1112     if (threadLoop_ != nullptr && threadLoop_->joinable()) {
1113         threadLoop_->join();
1114         threadLoop_ = nullptr;
1115     }
1116     if (g_fd != NOT_OPEN) {
1117         close(g_fd);
1118         g_fd = NOT_OPEN;
1119     }
1120     return true;
1121 }
1122 
KillPeerBinderProcess(const std::string & description)1123 void WatchdogInner::KillPeerBinderProcess(const std::string &description)
1124 {
1125     bool result = false;
1126     if (getuid() == FOUNDATION_UID) {
1127         result = KillProcessByPid(getprocpid());
1128     }
1129     if (!result) {
1130         WatchdogInner::LeftTimeExitProcess(description);
1131     }
1132 }
1133 
RemoveInnerTask(const std::string & name)1134 void WatchdogInner::RemoveInnerTask(const std::string& name)
1135 {
1136     if (name.empty()) {
1137         XCOLLIE_LOGI("RemoveInnerTask fail, cname is null");
1138         return;
1139     }
1140     std::priority_queue<WatchdogTask> tmpQueue;
1141     std::unique_lock<std::mutex> lock(lock_);
1142     size_t size = checkerQueue_.size();
1143     if (size == 0) {
1144         XCOLLIE_LOGE("RemoveInnerTask %{public}s fail, empty queue!", name.c_str());
1145         return;
1146     }
1147     while (!checkerQueue_.empty()) {
1148         const WatchdogTask& task = checkerQueue_.top();
1149         if (task.name != name) {
1150             tmpQueue.push(task);
1151         } else {
1152             size_t nameSize = taskNameSet_.size();
1153             if (nameSize != 0 && !task.isOneshotTask) {
1154                 taskNameSet_.erase(name);
1155                 XCOLLIE_LOGD("RemoveInnerTask name %{public}s, remove result=%{public}d",
1156                     name.c_str(), nameSize > taskNameSet_.size());
1157             }
1158         }
1159         checkerQueue_.pop();
1160     }
1161     if (tmpQueue.size() == size) {
1162         XCOLLIE_LOGE("RemoveInnerTask fail, can not find name %{public}s, size=%{public}zu!",
1163             name.c_str(), size);
1164     }
1165     tmpQueue.swap(checkerQueue_);
1166 }
1167 
InitBeginFunc(const char * name)1168 void InitBeginFunc(const char* name)
1169 {
1170     std::string nameStr(name);
1171     WatchdogInner::GetInstance().bussinessBeginTime_ = DistributeStart(nameStr);
1172 }
1173 
InitEndFunc(const char * name)1174 void InitEndFunc(const char* name)
1175 {
1176     std::string nameStr(name);
1177     DistributeEnd(nameStr, WatchdogInner::GetInstance().bussinessBeginTime_);
1178 }
1179 
InitMainLooperWatcher(WatchdogInnerBeginFunc * beginFunc,WatchdogInnerEndFunc * endFunc)1180 void WatchdogInner::InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc,
1181     WatchdogInnerEndFunc* endFunc)
1182 {
1183     int64_t tid = getproctid();
1184     if (beginFunc && endFunc) {
1185         if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1186             XCOLLIE_LOGI("Tid =%{public}" PRId64 "already exits, "
1187                 "no repeated initialization.", tid);
1188             return;
1189         }
1190         if (mainRunner_ != nullptr) {
1191             mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1192         }
1193         *beginFunc = InitBeginFunc;
1194         *endFunc = InitEndFunc;
1195         buissnessThreadInfo_.insert(tid);
1196     } else {
1197         if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1198             XCOLLIE_LOGI("Remove already init tid=%{public}." PRId64, tid);
1199             mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
1200             buissnessThreadInfo_.erase(tid);
1201         }
1202     }
1203 }
1204 
SetAppDebug(bool isAppDebug)1205 void WatchdogInner::SetAppDebug(bool isAppDebug)
1206 {
1207     isAppDebug_ = isAppDebug;
1208 }
1209 
GetAppDebug()1210 bool WatchdogInner::GetAppDebug()
1211 {
1212     return isAppDebug_;
1213 }
1214 
UpdateJankParam(int sampleInterval,int startUpTime,int sampleCount,int logType,int reportTimes)1215 void WatchdogInner::UpdateJankParam(int sampleInterval, int startUpTime, int sampleCount,
1216     int logType, int reportTimes)
1217 {
1218     jankParamsMap[KEY_LOG_TYPE] = logType;
1219     jankParamsMap[KEY_SAMPLE_INTERVAL] = sampleInterval;
1220     jankParamsMap[KEY_IGNORE_STARTUP_TIME] = startUpTime;
1221     jankParamsMap[KEY_SAMPLE_COUNT] = sampleCount;
1222     if (logType == CatchLogType::LOGTYPE_COLLECT_TRACE) {
1223         XCOLLIE_LOGI("Set thread only dump trace success.");
1224         return;
1225     }
1226     if (jankParamsMap[KEY_SET_TIMES_FLAG] == SET_TIMES_FLAG) {
1227         jankParamsMap[KEY_SAMPLE_REPORT_TIMES] = reportTimes;
1228         stackContent_.reportTimes = reportTimes;
1229         jankParamsMap[KEY_SET_TIMES_FLAG] = 0;
1230     }
1231     XCOLLIE_LOGI("Set thread sampler params success. logType: %{public}d, sample interval: %{public}d, "
1232         "ignore startUp interval: %{public}d, count: %{public}d, reportTimes: %{public}d.",
1233         logType, sampleInterval, startUpTime, sampleCount, stackContent_.reportTimes);
1234 }
1235 
ConvertStrToNum(std::map<std::string,std::string> paramsMap,const std::string & key)1236 int WatchdogInner::ConvertStrToNum(std::map<std::string, std::string> paramsMap, const std::string& key)
1237 {
1238     int num = -1;
1239     auto it = paramsMap.find(key);
1240     if (it == paramsMap.end()) {
1241         XCOLLIE_LOGE("Set the thread sampler param error, %{public}s is not exist.", key.c_str());
1242         return num;
1243     }
1244     std::string str = it->second;
1245     if (!str.empty() && str.size() < std::to_string(INT32_MAX).length()) {
1246         if (std::all_of(std::begin(str), std::end(str), [] (const char &c) {
1247             return isdigit(c);
1248         })) {
1249             num = std::stoi(str);
1250         }
1251     }
1252     if (num < 0) {
1253         XCOLLIE_LOGE("Set param error, %{public}s: %{public}s should be a number and greater than 0.",
1254             key.c_str(), str.c_str());
1255     }
1256     return num;
1257 }
1258 
CheckSampleParam(std::map<std::string,std::string> paramsMap)1259 bool WatchdogInner::CheckSampleParam(std::map<std::string, std::string> paramsMap)
1260 {
1261     int sampleInterval = ConvertStrToNum(paramsMap, KEY_SAMPLE_INTERVAL);
1262     if (sampleInterval < 0) {
1263         return false;
1264     } else if (sampleInterval < SAMPLE_INTERVAL_MIN || sampleInterval > SAMPLE_INTERVAL_MAX) {
1265         XCOLLIE_LOGE("Set the range of sample stack is from %{public}d to %{public}d, "
1266             "interval: %{public}d.", SAMPLE_INTERVAL_MIN, SAMPLE_INTERVAL_MAX, sampleInterval);
1267         return false;
1268     }
1269 
1270     int startUpTime = ConvertStrToNum(paramsMap, KEY_IGNORE_STARTUP_TIME);
1271     if (startUpTime < 0) {
1272         return false;
1273     } else if (startUpTime < IGNORE_STARTUP_TIME_MIN) {
1274         XCOLLIE_LOGE("Set the minimum of ignore startup interval is %{public}d s, "
1275             "interval: %{public}d.", IGNORE_STARTUP_TIME_MIN, startUpTime);
1276         return false;
1277     }
1278 
1279     int sampleCount = ConvertStrToNum(paramsMap, KEY_SAMPLE_COUNT);
1280     if (sampleCount < 0) {
1281         return false;
1282     }
1283     int maxSampleCount = MAX_SAMPLE_STACK_TIMES / sampleInterval - SAMPLE_EXTRA_COUNT;
1284     if (sampleCount < SAMPLE_COUNT_MIN || sampleCount > maxSampleCount) {
1285         XCOLLIE_LOGE("Set the range of sample count, min value: %{public}d max value: %{public}d, count: %{public}d.",
1286             SAMPLE_COUNT_MIN, maxSampleCount, sampleCount);
1287         return false;
1288     }
1289 
1290     int reportTimes = ConvertStrToNum(paramsMap, KEY_SAMPLE_REPORT_TIMES);
1291     if (reportTimes < 0) {
1292         return false;
1293     } else if (reportTimes < SAMPLE_REPORT_TIMES_MIN || reportTimes > SAMPLE_REPORT_TIMES_MAX) {
1294         XCOLLIE_LOGE("Set the range of sample reportTimes is from %{public}d to %{public}d,"
1295             "reportTimes: %{public}d", SAMPLE_REPORT_TIMES_MIN, SAMPLE_REPORT_TIMES_MAX, reportTimes);
1296         return false;
1297     }
1298     UpdateJankParam(sampleInterval, startUpTime, sampleCount, CatchLogType::LOGTYPE_SAMPLE_STACK, reportTimes);
1299     return true;
1300 }
1301 
SetEventConfig(std::map<std::string,std::string> paramsMap)1302 int WatchdogInner::SetEventConfig(std::map<std::string, std::string> paramsMap)
1303 {
1304     if (paramsMap.empty()) {
1305         XCOLLIE_LOGE("Set the thread sampler param map is null.");
1306         return -1;
1307     }
1308     int logType = ConvertStrToNum(paramsMap, KEY_LOG_TYPE);
1309     size_t size = paramsMap.size();
1310     switch (logType) {
1311         case CatchLogType::LOGTYPE_DEFAULT:
1312             return -1;
1313         case CatchLogType::LOGTYPE_NONE:
1314         case CatchLogType::LOGTYPE_COLLECT_TRACE: {
1315             if (size != SAMPLE_PARAMS_MIN_SIZE) {
1316                 XCOLLIE_LOGE("Set the thread sampler param map size error, can only set log_type. "
1317                     "map size: %{public}zu", size);
1318                 return -1;
1319             }
1320             UpdateJankParam(SAMPLE_DEFULE_INTERVAL, DEFAULT_IGNORE_STARTUP_TIME, SAMPLE_DEFULE_COUNT,
1321                 logType, SAMPLE_REPORT_TIMES_MIN);
1322             break;
1323         }
1324         case CatchLogType::LOGTYPE_SAMPLE_STACK: {
1325             if (size != SAMPLE_PARAMS_MAX_SIZE) {
1326                 XCOLLIE_LOGE("Set the thread sampler param map size error, current map size: %{public}zu", size);
1327                 return -1;
1328             }
1329             if (!CheckSampleParam(paramsMap)) {
1330                 return -1;
1331             }
1332             break;
1333         }
1334         default: {
1335             XCOLLIE_LOGE("Set the log_type can only be 0 1 2, logType: %{public}d", logType);
1336             return -1;
1337         }
1338     };
1339     return 0;
1340 }
1341 } // end of namespace HiviewDFX
1342 } // end of namespace OHOS
1343