1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "watchdog_inner.h"
17
18 #include <cerrno>
19 #include <climits>
20 #include <cstdio>
21 #include <mutex>
22
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <fcntl.h>
26 #include <pthread.h>
27 #include <unistd.h>
28 #include <csignal>
29 #include <string>
30
31 #include <securec.h>
32 #include <dlfcn.h>
33 #include "musl_preinit_common.h"
34 #include "backtrace_local.h"
35 #ifdef HISYSEVENT_ENABLE
36 #include "hisysevent.h"
37 #endif
38 #include "ipc_skeleton.h"
39 #include "xcollie_utils.h"
40 #include "dfx_define.h"
41 #include "parameter.h"
42 #include "file_ex.h"
43
44 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
45 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
46 namespace OHOS {
47 namespace HiviewDFX {
48 namespace {
49 enum DumpStackState {
50 DEFAULT = 0,
51 COMPLETE = 1,
52 SAMPLE_COMPLETE = 2
53 };
54 enum CatchLogType {
55 LOGTYPE_DEFAULT = -1,
56 LOGTYPE_NONE = 0,
57 LOGTYPE_SAMPLE_STACK = 1,
58 LOGTYPE_COLLECT_TRACE = 2
59 };
60 constexpr char STACK_CHECKER[] = "ThreadSampler";
61 constexpr char TRACE_CHECKER[] = "TraceCollector";
62 constexpr const char* const FREEZE_SAMPLE = "FreezeSampler";
63 constexpr int ONE_DAY_LIMIT = 24 * 60 * 60 * 1000;
64 constexpr int ONE_HOUR_LIMIT = 60 * 60 * 1000;
65 constexpr int MILLISEC_TO_NANOSEC = 1000 * 1000;
66 const int FFRT_BUFFER_SIZE = 512 * 1024;
67 const int DETECT_STACK_COUNT = 2;
68 const int COLLECT_STACK_COUNT = 10;
69 const int COLLECT_TRACE_MIN = 1;
70 const int COLLECT_TRACE_MAX = 20;
71 const int TASK_INTERVAL = 155;
72 const int DURATION_TIME = 150;
73 const int DISTRIBUTE_TIME = 2000;
74 const int DUMPTRACE_TIME = 450;
75 constexpr const char* const KEY_SCB_STATE = "com.ohos.sceneboard";
76 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
77 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
78 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
79 constexpr uint32_t TIME_MS_TO_S = 1000;
80 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
81 constexpr uint32_t AUDIO_SERVER_UID = 1041;
82 constexpr uint32_t DATA_MANAGE_SERVICE_UID = 3012;
83 constexpr uint32_t FOUNDATION_UID = 5523;
84 constexpr uint32_t RENDER_SERVICE_UID = 1003;
85 constexpr int SERVICE_WARNING = 1;
86 const char* SYS_KERNEL_HUNGTASK_USERLIST = "/sys/kernel/hungtask/userlist";
87 const char* HUNGTASK_USERLIST = "/proc/sys/hguard/user_list";
88 const char* ON_KICK_TIME = "on,72";
89 const char* ON_KICK_TIME_EXTRA = "on,10,foundation";
90 const char* KICK_TIME = "kick";
91 const char* KICK_TIME_EXTRA = "kick,foundation";
92 const int32_t NOT_OPEN = -1;
93 constexpr uint64_t MAX_START_TIME = 10 * 1000;
94 const char* LIB_THREAD_SAMPLER_PATH = "libthread_sampler.z.so";
95 constexpr size_t STACK_LENGTH = 128 * 1024;
96 constexpr uint64_t DEFAULE_SLEEP_TIME = 2 * 1000;
97 constexpr uint32_t JOIN_IPC_FULL_UIDS[] = {
98 AUDIO_SERVER_UID, DATA_MANAGE_SERVICE_UID,
99 FOUNDATION_UID, RENDER_SERVICE_UID
100 };
101 constexpr uint64_t MIN_IPC_CHECK_INTERVAL = 10;
102 constexpr uint64_t MAX_IPC_CHECK_INTERVAL = 30;
103 constexpr uint64_t SAMPLE_STACK_MAP_SIZE = 5;
104 constexpr uint64_t SAMPLE_TRACE_MAP_SIZE = 1;
105 constexpr int MAX_SAMPLE_STACK_TIMES = 2500; // 2.5s
106 constexpr int SAMPLE_INTERVAL_MIN = 50; // 50ms
107 constexpr int SAMPLE_INTERVAL_MAX = 500; // 500ms
108 constexpr int SAMPLE_COUNT_MIN = 1;
109 constexpr int SAMPLE_REPORT_TIMES_MIN = 1;
110 constexpr int SAMPLE_REPORT_TIMES_MAX = 3;
111 constexpr int SAMPLE_EXTRA_COUNT = 4;
112 constexpr int IGNORE_STARTUP_TIME_MIN = 3; // 3s
113 constexpr int SCROLL_INTERVAL = 50; // 50ms
114 constexpr int DEFAULT_SAMPLE_VALUE = 1;
115 constexpr int CPU_FREQ_DECIMAL_BASE = 10;
116 constexpr const char* const SCROLL_JANK = "SCROLL_JANK";
117 constexpr const char* const MAIN_THREAD_JANK = "MAIN_THREAD_JANK";
118 constexpr const char* const BUSSINESS_THREAD_JANK = "BUSSINESS_THREAD_JANK";
119 }
120
121 std::mutex WatchdogInner::lockFfrt_;
122 static uint64_t g_lastKickTime = GetCurrentTickMillseconds();
123 static int32_t g_fd = NOT_OPEN;
124 static bool g_existFile = true;
125
126 static std::atomic_int g_scrollSampleCount {0};
127 static std::atomic_int g_freezeSampleCount {0};
128 static std::atomic_bool g_freezeTaskFinished {false};
129 static std::atomic_bool g_isReuseStack {false};
130 static std::atomic_bool g_isDumpStack {false};
131
132 SigActionType WatchdogInner::threadSamplerSigHandler_ = nullptr;
133 std::mutex WatchdogInner::threadSamplerSignalMutex_;
134
135 namespace {
ThreadInfo(char * buf,size_t len,void * ucontext)136 void ThreadInfo(char *buf __attribute__((unused)),
137 size_t len __attribute__((unused)),
138 void* ucontext __attribute__((unused)))
139 {
140 if (ucontext == nullptr) {
141 return;
142 }
143
144 auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
145 WatchdogInner::GetInstance().currentScene_.size());
146 if (ret != 0) {
147 return;
148 }
149 }
150
SetThreadSignalMask(int signo,bool isAddSignal,bool isBlock)151 void SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)
152 {
153 sigset_t set;
154 sigemptyset(&set);
155 pthread_sigmask(SIG_SETMASK, nullptr, &set);
156 if (isAddSignal) {
157 sigaddset(&set, signo);
158 } else {
159 sigdelset(&set, signo);
160 }
161 if (isBlock) {
162 pthread_sigmask(SIG_BLOCK, &set, nullptr);
163 } else {
164 pthread_sigmask(SIG_UNBLOCK, &set, nullptr);
165 }
166 }
167
168 static const int CRASH_SIGNAL_LIST[] = {
169 SIGILL, SIGABRT, SIGBUS, SIGFPE,
170 SIGSEGV, SIGSTKFLT, SIGSYS, SIGTRAP
171 };
172 }
173
WatchdogInner()174 WatchdogInner::WatchdogInner()
175 : cntCallback_(0), timeCallback_(0)
176 {
177 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
178 }
179
~WatchdogInner()180 WatchdogInner::~WatchdogInner()
181 {
182 Stop();
183 }
184
IsInAppspwan()185 static bool IsInAppspwan()
186 {
187 if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
188 return true;
189 }
190
191 if (getuid() == 0 && GetSelfProcName().find("nativespawn") != std::string::npos) {
192 return true;
193 }
194
195 if (getuid() == 0 && GetSelfProcName().find("hybridspawn") != std::string::npos) {
196 return true;
197 }
198
199 return false;
200 }
201
SetBundleInfo(const std::string & bundleName,const std::string & bundleVersion)202 void WatchdogInner::SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)
203 {
204 bundleName_ = bundleName;
205 bundleVersion_ = bundleVersion;
206 }
207
SetForeground(const bool & isForeground)208 void WatchdogInner::SetForeground(const bool& isForeground)
209 {
210 isForeground_ = isForeground;
211 }
212
GetForeground()213 bool WatchdogInner::GetForeground()
214 {
215 return isForeground_;
216 }
217
ReportMainThreadEvent(int64_t tid,std::string eventName,bool isScroll,bool appStart)218 bool WatchdogInner::ReportMainThreadEvent(int64_t tid, std::string eventName, bool isScroll,
219 bool appStart)
220 {
221 std::string stack = "";
222 std::string heaviestStack = "";
223 if (!CollectStack(stack, heaviestStack)) {
224 stack = "";
225 heaviestStack = "";
226 }
227
228 std::string path = "";
229 int32_t pid = getprocpid();
230 bool isOverLimit = false;
231 if (!WriteStackToFd(pid, path, stack, eventName, isOverLimit)) {
232 XCOLLIE_LOGI("MainThread WriteStackToFd Failed");
233 return false;
234 }
235 #ifdef HISYSEVENT_ENABLE
236 int result = -1;
237 if (appStart) {
238 isScroll ? scrollSlowContent_.reportTimes-- : startSlowContent_.reportTimes--;
239 result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName,
240 HiSysEvent::EventType::FAULT, "PROCESS_NAME", GetSelfProcName(),
241 "STACK", stack);
242 XCOLLIE_LOGI("AppStart HiSysEventWrite result=%{public}d, isScroll=%{public}d, "
243 "eventName=%{public}s", result, isScroll, eventName.c_str());
244 return result >= 0;
245 }
246 if (!isScroll) {
247 result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "MAIN_THREAD_JANK",
248 HiSysEvent::EventType::FAULT,
249 "BUNDLE_VERSION", bundleVersion_, "BUNDLE_NAME", bundleName_,
250 "BEGIN_TIME", stackContent_.reportBegin / MILLISEC_TO_NANOSEC,
251 "END_TIME", stackContent_.reportEnd / MILLISEC_TO_NANOSEC,
252 "EXTERNAL_LOG", path, "STACK", stack, "JANK_LEVEL", 0,
253 "THREAD_NAME", GetSelfProcName(), "FOREGROUND", isForeground_,
254 "LOG_TIME", GetTimeStamp() / MILLISEC_TO_NANOSEC,
255 "APP_START_JIFFIES_TIME", GetAppStartTime(pid, tid), "HEAVIEST_STACK", heaviestStack,
256 "LOG_OVER_LIMIT", isOverLimit);
257 } else {
258 result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "SCROLL_TIMEOUT",
259 HiSysEvent::EventType::FAULT, "PROCESS_NAME", GetSelfProcName(),
260 "EXTERNAL_LOG", path, "LOG_OVER_LIMIT", isOverLimit);
261 }
262
263 XCOLLIE_LOGI("MainThread HiSysEventWrite result=%{public}d, isScroll=%{public}d", result, isScroll);
264 return result >= 0;
265 #else
266 XCOLLIE_LOGI("hisysevent not exists");
267 #endif
268 }
269
CheckEventTimer(int64_t currentTime,int64_t reportBegin,int64_t reportEnd,int interval)270 bool WatchdogInner::CheckEventTimer(int64_t currentTime, int64_t reportBegin, int64_t reportEnd, int interval)
271 {
272 if (reportBegin == timeContent_.curBegin &&
273 reportEnd == timeContent_.curEnd) {
274 return false;
275 }
276 return (timeContent_.curEnd <= timeContent_.curBegin &&
277 (currentTime - timeContent_.curBegin >= interval * MILLISEC_TO_NANOSEC)) ||
278 (timeContent_.curEnd - timeContent_.curBegin > interval * MILLISEC_TO_NANOSEC);
279 }
280
ThreadSamplerSigHandler(int sig,siginfo_t * si,void * context)281 void WatchdogInner::ThreadSamplerSigHandler(int sig, siginfo_t* si, void* context)
282 {
283 std::lock_guard<std::mutex> lock(threadSamplerSignalMutex_);
284 if (WatchdogInner::threadSamplerSigHandler_ == nullptr) {
285 return;
286 }
287 WatchdogInner::threadSamplerSigHandler_(sig, si, context);
288 }
289
InstallThreadSamplerSignal()290 bool WatchdogInner::InstallThreadSamplerSignal()
291 {
292 struct sigaction action {};
293 sigfillset(&action.sa_mask);
294 for (size_t i = 0; i < sizeof(CRASH_SIGNAL_LIST) / sizeof(CRASH_SIGNAL_LIST[0]); i++) {
295 sigdelset(&action.sa_mask, CRASH_SIGNAL_LIST[i]);
296 }
297 action.sa_sigaction = WatchdogInner::ThreadSamplerSigHandler;
298 action.sa_flags = SA_RESTART | SA_SIGINFO;
299 if (sigaction(MUSL_SIGNAL_SAMPLE_STACK, &action, nullptr) != 0) {
300 XCOLLIE_LOGE("Failed to register signal(%{public}d:%{public}d)", MUSL_SIGNAL_SAMPLE_STACK, errno);
301 return false;
302 }
303 return true;
304 }
305
UninstallThreadSamplerSignal()306 void WatchdogInner::UninstallThreadSamplerSignal()
307 {
308 std::lock_guard<std::mutex> lock(threadSamplerSignalMutex_);
309 threadSamplerSigHandler_ = nullptr;
310 }
311
CheckThreadSampler()312 bool WatchdogInner::CheckThreadSampler()
313 {
314 XCOLLIE_LOGD("ThreadSampler 1st in ThreadSamplerTask.\n");
315 if (!InitThreadSamplerFuncs()) {
316 XCOLLIE_LOGE("ThreadSampler initialize failed.\n");
317 return false;
318 }
319
320 if (!InstallThreadSamplerSignal()) {
321 XCOLLIE_LOGE("ThreadSampler install signal failed.\n");
322 return false;
323 }
324
325 int initThreadSamplerRet = threadSamplerInitFunc_(COLLECT_STACK_COUNT);
326 if (initThreadSamplerRet != 0) {
327 XCOLLIE_LOGE("Thread sampler init failed. ret %{public}d\n", initThreadSamplerRet);
328 return false;
329 }
330 return true;
331 }
332
InitThreadSamplerFuncs()333 bool WatchdogInner::InitThreadSamplerFuncs()
334 {
335 threadSamplerFuncHandler_ = dlopen(LIB_THREAD_SAMPLER_PATH, RTLD_LAZY);
336 if (threadSamplerFuncHandler_ == nullptr) {
337 XCOLLIE_LOGE("dlopen failed, funcHandler is nullptr.\n");
338 return false;
339 }
340
341 threadSamplerInitFunc_ =
342 reinterpret_cast<ThreadSamplerInitFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerInit"));
343 threadSamplerSampleFunc_ =
344 reinterpret_cast<ThreadSamplerSampleFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerSample"));
345 threadSamplerCollectFunc_ =
346 reinterpret_cast<ThreadSamplerCollectFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerCollect"));
347 threadSamplerDeinitFunc_ =
348 reinterpret_cast<ThreadSamplerDeinitFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerDeinit"));
349 threadSamplerSigHandler_ =
350 reinterpret_cast<SigActionType>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerSigHandler"));
351 if (threadSamplerInitFunc_ == nullptr || threadSamplerSampleFunc_ == nullptr ||
352 threadSamplerCollectFunc_ == nullptr || threadSamplerDeinitFunc_ == nullptr ||
353 threadSamplerSigHandler_ == nullptr) {
354 ResetThreadSamplerFuncs();
355 XCOLLIE_LOGE("ThreadSampler dlsym some function failed.\n");
356 return false;
357 }
358 XCOLLIE_LOGI("ThreadSampler has been successfully loaded.\n");
359 return true;
360 }
361
ResetThreadSamplerFuncs()362 void WatchdogInner::ResetThreadSamplerFuncs()
363 {
364 threadSamplerInitFunc_ = nullptr;
365 threadSamplerSampleFunc_ = nullptr;
366 threadSamplerCollectFunc_ = nullptr;
367 threadSamplerDeinitFunc_ = nullptr;
368 threadSamplerSigHandler_ = nullptr;
369 dlclose(threadSamplerFuncHandler_);
370 threadSamplerFuncHandler_ = nullptr;
371 }
372
UpdateTime(int64_t & reportBegin,int64_t & reportEnd,TimePoint & lastEndTime,const TimePoint & endTime)373 void WatchdogInner::UpdateTime(int64_t& reportBegin, int64_t& reportEnd,
374 TimePoint& lastEndTime, const TimePoint& endTime)
375 {
376 reportBegin = timeContent_.curBegin;
377 reportEnd = timeContent_.curEnd;
378 lastEndTime = endTime;
379 }
380
SampleStackDetect(const TimePoint & endTime,int & reportTimes,int updateTimes,int ignoreTime,bool isScroll)381 bool WatchdogInner::SampleStackDetect(const TimePoint& endTime, int& reportTimes,
382 int updateTimes, int ignoreTime, bool isScroll)
383 {
384 uint64_t startUpTime = static_cast<uint64_t>(ignoreTime) * TIME_MS_TO_S;
385 if (GetCurrentTickMillseconds() - watchdogStartTime_ < startUpTime) {
386 XCOLLIE_LOGI("Application is in starting period.\n");
387 return false;
388 }
389 if (!stackContent_.isStartSampleEnabled) {
390 XCOLLIE_LOGI("Current sample detection task is being executed.\n");
391 return false;
392 }
393 if (reportTimes <= 0) {
394 int32_t checkTimer = ONE_DAY_LIMIT;
395 int32_t checkInterval = jankParamsMap[KEY_CHECKER_INTERVAL];
396 if (checkInterval > 0) {
397 checkTimer = checkInterval;
398 } else if (!isScroll && (IsDeveloperOpen() ||
399 (IsBetaVersion() && GetProcessNameFromProcCmdline(getpid()) == KEY_SCB_STATE))) {
400 checkTimer = ONE_HOUR_LIMIT;
401 }
402 auto diff = endTime - stackContent_.lastEndTime;
403 int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count();
404 if (intervalTime < checkTimer) {
405 return false;
406 }
407 reportTimes = updateTimes;
408 XCOLLIE_LOGI("Update the currentThread's reportTimes: %{public}d", reportTimes);
409 }
410 stackContent_.isStartSampleEnabled = false;
411 UpdateTime(stackContent_.reportBegin, stackContent_.reportEnd, stackContent_.lastEndTime, endTime);
412 return true;
413 }
414
AppStartSample(bool isScroll,AppStartContent & startContent)415 bool WatchdogInner::AppStartSample(bool isScroll, AppStartContent& startContent)
416 {
417 int64_t tid = getproctid();
418 startContent.collectCount.store(0);
419 startContent.isStartSampleEnabled = false;
420 auto sampleTask = [this, tid, isScroll, &startContent]() {
421 if (startContent.collectCount.load() == 0 && g_isDumpStack) {
422 startContent.isFinishStartSample = true;
423 return;
424 }
425 if ((startContent.collectCount.load() == 0 && !CheckThreadSampler()) || threadSamplerSampleFunc_ == nullptr) {
426 g_isDumpStack.store(false);
427 startContent.isFinishStartSample = true;
428 return;
429 }
430 if (startContent.collectCount.load() < startContent.targetCount) {
431 g_isDumpStack.store(true);
432 threadSamplerSampleFunc_();
433 } else {
434 std::string eventName;
435 if (isScroll) {
436 eventName = EVENT_APP_START_SCROLL_JANK;
437 } else {
438 eventName = EVENT_APP_START_JANK;
439 }
440 ReportMainThreadEvent(tid, eventName, isScroll, true);
441 g_isDumpStack.store(false);
442 startContent.isFinishStartSample = true;
443 return;
444 }
445 startContent.collectCount.fetch_add(DEFAULT_SAMPLE_VALUE);
446 };
447 WatchdogTask task(APP_START_SAMPLE, sampleTask, 0, startContent.sampleInterval, false);
448 std::unique_lock<std::mutex> lock(lock_);
449 if (!InsertWatchdogTaskLocked(APP_START_SAMPLE, std::move(task))) {
450 return false;
451 }
452 return true;
453 }
454
EnableAppStartSample(AppStartContent & startContent,int64_t durationTime,bool isScroll)455 bool WatchdogInner::EnableAppStartSample(AppStartContent& startContent, int64_t durationTime, bool isScroll)
456 {
457 if (!startContent.enableStartSample.load()) {
458 return false;
459 }
460 if (!OHOS::FileExists(APP_START_CONFIG)) {
461 startContent.enableStartSample.store(false);
462 XCOLLIE_LOGD("file:%{public}s not exist, errno:%{public}d", APP_START_CONFIG, errno);
463 return false;
464 }
465 if (!isScroll && (GetCurrentTickMillseconds() - watchdogStartTime_) >
466 static_cast<uint64_t>(startContent.startUpDuration)) {
467 startContent.enableStartSample.store(false);
468 XCOLLIE_LOGD("CurrentThread is not in starting period. startUpDuration:%{public}" PRId64,
469 startContent.startUpDuration);
470 return false;
471 }
472 int64_t curTime = GetTimeStamp() / SEC_TO_MICROSEC;
473 if (curTime - startContent.startTime > APP_START_LIMIT) {
474 startContent.enableStartSample.store(false);
475 XCOLLIE_LOGD("The time for detecting the slow startup of an application exceeds the limit, "
476 "curTime:%{public}" PRId64, curTime);
477 return false;
478 }
479 if (startContent.reportTimes <= 0) {
480 if (!isScroll) {
481 startContent.enableStartSample.store(false);
482 }
483 return isScroll;
484 }
485 if (!startContent.isStartSampleEnabled) {
486 XCOLLIE_LOGD("Current app start detection task is being executed.");
487 return true;
488 }
489 if (durationTime < startContent.threshold) {
490 return false;
491 }
492 return AppStartSample(isScroll, startContent);
493 }
494
CheckSample(const TimePoint & endTime,int64_t durationTime)495 bool WatchdogInner::CheckSample(const TimePoint& endTime, int64_t durationTime)
496 {
497 bool isScroll = isScroll_;
498 if (!isScroll) {
499 return EnableAppStartSample(startSlowContent_, durationTime, isScroll);
500 }
501 if (!EnableAppStartSample(scrollSlowContent_, durationTime, isScroll) &&
502 durationTime > SCROLL_INTERVAL && !scrollSlowContent_.enableStartSample.load()) {
503 return StartScrollProfile(endTime, durationTime, SCROLL_INTERVAL);
504 }
505 return false;
506 }
507
StartScrollProfile(const TimePoint & endTime,int64_t durationTime,int sampleInterval)508 bool WatchdogInner::StartScrollProfile(const TimePoint& endTime, int64_t durationTime, int sampleInterval)
509 {
510 bool isScroll = true;
511 if (!SampleStackDetect(endTime, stackContent_.scrollTimes, SAMPLE_DEFULE_REPORT_TIMES,
512 DEFAULT_IGNORE_STARTUP_TIME, isScroll)) {
513 return false;
514 }
515 XCOLLIE_LOGI("StartScrollProfile durationTime: %{public}" PRId64 " ms, sampleInterval: %{public}d "
516 "isScroll: %{public}d.", durationTime, sampleInterval, isScroll);
517 int64_t tid = getproctid();
518 g_scrollSampleCount.store(0);
519 auto sampleTask = [this, sampleInterval, tid, isScroll]() {
520 if (g_scrollSampleCount.load() == 0 && (g_isDumpStack || !CheckThreadSampler())) {
521 isMainThreadStackEnabled_ = true;
522 return;
523 }
524 if (threadSamplerSampleFunc_ == nullptr) {
525 isMainThreadStackEnabled_ = true;
526 return;
527 }
528 if (g_scrollSampleCount.load() == 0) {
529 g_isDumpStack.store(true);
530 threadSamplerSampleFunc_();
531 g_scrollSampleCount.fetch_add(DEFAULT_SAMPLE_VALUE);
532 } else {
533 ReportMainThreadEvent(tid, SCROLL_JANK, isScroll);
534 stackContent_.scrollTimes--;
535 g_isDumpStack.store(false);
536 isMainThreadStackEnabled_ = true;
537 }
538 };
539 WatchdogTask task("ThreadSampler", sampleTask, 0, sampleInterval, true);
540 std::unique_lock<std::mutex> lock(lock_);
541 InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
542 return true;
543 }
544
StartProfileMainThread(const TimePoint & endTime,int64_t durationTime,int sampleInterval)545 void WatchdogInner::StartProfileMainThread(const TimePoint& endTime, int64_t durationTime, int sampleInterval)
546 {
547 std::unique_lock<std::mutex> lock(lock_);
548 if (!SampleStackDetect(endTime, stackContent_.reportTimes,
549 jankParamsMap[KEY_SAMPLE_REPORT_TIMES], jankParamsMap[KEY_IGNORE_STARTUP_TIME])) {
550 return;
551 }
552 XCOLLIE_LOGI("StartProfileMainThread durationTime: %{public}" PRId64 " ms, sampleInterval: %{public}d.",
553 durationTime, sampleInterval);
554 stackContent_.detectorCount = 0;
555 stackContent_.collectCount = 0;
556 int sampleCount = jankParamsMap[KEY_SAMPLE_COUNT];
557 int64_t tid = getproctid();
558 auto sampleTask = [this, sampleInterval, sampleCount, tid]() {
559 if ((stackContent_.detectorCount == 0 && stackContent_.collectCount == 0 &&
560 (g_isDumpStack || !CheckThreadSampler())) || threadSamplerSampleFunc_ == nullptr) {
561 isMainThreadStackEnabled_ = true;
562 return;
563 }
564 if (stackContent_.collectCount > DumpStackState::DEFAULT &&
565 stackContent_.collectCount < sampleCount) {
566 g_isDumpStack.store(true);
567 threadSamplerSampleFunc_();
568 stackContent_.collectCount++;
569 } else if (stackContent_.collectCount == sampleCount) {
570 g_isDumpStack.store(false);
571 std::string eventName = buissnessThreadInfo_.empty() ? MAIN_THREAD_JANK : BUSSINESS_THREAD_JANK;
572 ReportMainThreadEvent(tid, eventName);
573 stackContent_.reportTimes--;
574 isMainThreadStackEnabled_ = true;
575 return;
576 } else {
577 if (CheckEventTimer(GetTimeStamp(), stackContent_.reportBegin,
578 stackContent_.reportEnd, sampleInterval)) {
579 threadSamplerSampleFunc_();
580 stackContent_.collectCount++;
581 } else {
582 stackContent_.detectorCount++;
583 }
584 }
585 if (stackContent_.detectorCount == DETECT_STACK_COUNT) {
586 isMainThreadStackEnabled_ = true;
587 }
588 };
589 WatchdogTask task("ThreadSampler", sampleTask, 0, sampleInterval, true);
590 InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
591 }
592
SaveFreezeStackToFile(const std::string & outFile,int32_t pid)593 void WatchdogInner::SaveFreezeStackToFile(const std::string& outFile, int32_t pid)
594 {
595 if (!CreateDir(FREEZE_DIR)) {
596 XCOLLIE_LOGE("Path to realPath failed.");
597 return;
598 }
599 std::string stack;
600 std::string heaviestStack;
601 CollectStack(stack, heaviestStack, 0);
602 std::string info = "#ThreadInfos Tid: " + std::to_string(pid) + ", Name: " + bundleName_ + "\n";
603 if (g_isReuseStack) {
604 info += "The current thread is collecting the stack, which conflicts with the main thread jank event."
605 " Reuse the current stack.";
606 g_isReuseStack.store(false);
607 }
608 info += stack;
609 ClearFreezeFileIfNeed(info.size());
610 bool saveRet = SaveStringToFile(FREEZE_DIR + outFile, info);
611 g_isDumpStack.store(false);
612 g_freezeTaskFinished.store(true);
613 XCOLLIE_LOGI("Save freeze stack to file, ret:%{public}d, isReuseStack:%{public}d.",
614 saveRet, g_isReuseStack.load());
615 }
616
StartSample(int duration,int interval,std::string & outFile)617 void WatchdogInner::StartSample(int duration, int interval, std::string& outFile)
618 {
619 std::unique_lock<std::mutex> lock(lock_);
620 if (IsTaskExistLocked(FREEZE_SAMPLE)) {
621 XCOLLIE_LOGW("StartSample task already exit, skip this task.");
622 return;
623 }
624 if (duration <= 0 || interval <= 0) {
625 XCOLLIE_LOGW("StartSample failed, duration=%{public}d, interval=%{public}d.",
626 duration, interval);
627 return;
628 }
629 int targetCount = duration / interval;
630 if (targetCount < DEFAULT_SAMPLE_VALUE) {
631 XCOLLIE_LOGW("StartSample failed, sampleCount=%{public}d", targetCount);
632 return;
633 }
634 int32_t pid = getpid();
635 g_freezeSampleCount.store(0);
636 outFile = "freeze_" + GetFormatDate() + "_" + std::to_string(pid) + ".txt";
637 auto sampleTask = [this, pid, targetCount, outFile]() {
638 if ((g_freezeSampleCount.load() == 0 && !CheckThreadSampler()) || threadSamplerSampleFunc_ == nullptr) {
639 g_freezeTaskFinished.store(true);
640 return;
641 }
642 if (g_freezeSampleCount.load() == 0 && g_isDumpStack) {
643 g_isReuseStack.store(true);
644 }
645 if (g_isReuseStack) {
646 if (g_isDumpStack) {
647 return;
648 }
649 SaveFreezeStackToFile(outFile, pid);
650 return;
651 }
652 if (g_freezeSampleCount.load() < targetCount) {
653 g_isDumpStack.store(true);
654 threadSamplerSampleFunc_();
655 } else {
656 SaveFreezeStackToFile(outFile, pid);
657 }
658 g_freezeSampleCount.fetch_add(DEFAULT_SAMPLE_VALUE);
659 };
660 WatchdogTask task(FREEZE_SAMPLE, sampleTask, 0, interval, false);
661 if (!InsertWatchdogTaskLocked(FREEZE_SAMPLE, std::move(task))) {
662 outFile = "";
663 }
664 }
665
CollectStack(std::string & stack,std::string & heaviestStack,int treeFormat)666 bool WatchdogInner::CollectStack(std::string& stack, std::string& heaviestStack, int treeFormat)
667 {
668 if (threadSamplerCollectFunc_ == nullptr) {
669 return false;
670 }
671 char* stk = new char[STACK_LENGTH]();
672 char* heaviest = new char[STACK_LENGTH]();
673 int collectRet = threadSamplerCollectFunc_(stk, heaviest, STACK_LENGTH, STACK_LENGTH, treeFormat);
674 if (collectRet != 0) {
675 XCOLLIE_LOGE("threadSampler collect stack failed.");
676 delete[] stk;
677 delete[] heaviest;
678 return false;
679 }
680 stack = stk;
681 heaviestStack = heaviest;
682 delete[] stk;
683 delete[] heaviest;
684 return true;
685 }
686
Deinit()687 bool WatchdogInner::Deinit()
688 {
689 if (threadSamplerDeinitFunc_ == nullptr) {
690 return false;
691 }
692 UninstallThreadSamplerSignal();
693 int ret = threadSamplerDeinitFunc_();
694 return ret == 0;
695 }
696
DumpTraceProfile(int32_t interval)697 void WatchdogInner::DumpTraceProfile(int32_t interval)
698 {
699 traceContent_.dumpCount = 0;
700 traceContent_.traceCount = 0;
701 auto traceTask = [this, interval]() {
702 traceContent_.traceCount++;
703 if (CheckEventTimer(GetTimeStamp(), traceContent_.reportBegin,
704 traceContent_.reportEnd, interval)) {
705 traceContent_.dumpCount++;
706 }
707 if (traceContent_.traceCount >= COLLECT_TRACE_MAX) {
708 if (traceContent_.dumpCount >= COLLECT_TRACE_MIN) {
709 CreateDir(WATCHDOG_DIR);
710 appCaller_.actionId = UCollectClient::ACTION_ID_DUMP_TRACE;
711 appCaller_.isBusinessJank = !buissnessThreadInfo_.empty();
712 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
713 XCOLLIE_LOGI("MainThread TraceCollector Dump result: %{public}d", result.retCode);
714 }
715 isMainThreadTraceEnabled_ = true;
716 }
717 };
718 WatchdogTask task("TraceCollector", traceTask, 0, interval, true);
719 std::unique_lock<std::mutex> lock(lock_);
720 InsertWatchdogTaskLocked("TraceCollector", std::move(task));
721 }
722
StartTraceProfile()723 int32_t WatchdogInner::StartTraceProfile()
724 {
725 traceCollector_ = UCollectClient::TraceCollector::Create();
726 if (traceCollector_ == nullptr) {
727 traceContent_.traceState = DumpStackState::DEFAULT;
728 XCOLLIE_LOGE("Create traceCollector failed.");
729 return -1;
730 }
731 appCaller_.actionId = UCollectClient::ACTION_ID_START_TRACE;
732 appCaller_.bundleName = bundleName_;
733 appCaller_.bundleVersion = bundleVersion_;
734 appCaller_.uid = static_cast<int64_t>(getuid());
735 appCaller_.pid = getprocpid();
736 appCaller_.threadName = GetSelfProcName();
737 appCaller_.foreground = isForeground_;
738 appCaller_.happenTime = GetTimeStamp() / MILLISEC_TO_NANOSEC;
739 appCaller_.beginTime = traceContent_.reportBegin / MILLISEC_TO_NANOSEC;
740 appCaller_.endTime = traceContent_.reportEnd / MILLISEC_TO_NANOSEC;
741 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
742 XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d", result.retCode);
743 if (result.retCode == 0) {
744 DumpTraceProfile(DURATION_TIME);
745 }
746 return result.retCode;
747 }
748
CollectTraceDetect(const TimePoint & endTime,int64_t durationTime)749 void WatchdogInner::CollectTraceDetect(const TimePoint& endTime, int64_t durationTime)
750 {
751 if (IsBetaVersion()) {
752 return;
753 }
754 if (traceContent_.traceState == DumpStackState::COMPLETE) {
755 auto diff = endTime - traceContent_.lastEndTime;
756 int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count();
757 if (intervalTime < ONE_DAY_LIMIT) {
758 return;
759 }
760 }
761 traceContent_.traceState = DumpStackState::COMPLETE;
762 UpdateTime(traceContent_.reportBegin, traceContent_.reportEnd, traceContent_.lastEndTime, endTime);
763 int32_t result = StartTraceProfile();
764 XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d, Duration Time: %{public}" PRId64 " ms",
765 result, durationTime);
766 }
767
DistributeStart(const std::string & name)768 static TimePoint DistributeStart(const std::string& name)
769 {
770 WatchdogInner::GetInstance().timeContent_.curBegin = GetTimeStamp();
771 return std::chrono::steady_clock::now();
772 }
773
DistributeEnd(const std::string & name,const TimePoint & startTime)774 static void DistributeEnd(const std::string& name, const TimePoint& startTime)
775 {
776 TimePoint endTime = std::chrono::steady_clock::now();
777 auto duration = endTime - startTime;
778 int64_t durationTime = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
779 #ifdef HICOLLIE_JANK_ENABLE
780 WatchdogInner::GetInstance().timeContent_.curEnd = GetTimeStamp();
781 if (WatchdogInner::GetInstance().CheckSample(endTime, durationTime)) {
782 return;
783 }
784 int sampleInterval = WatchdogInner::GetInstance().jankParamsMap[KEY_SAMPLE_INTERVAL];
785 if (duration > std::chrono::milliseconds(sampleInterval)) {
786 int logType = WatchdogInner::GetInstance().jankParamsMap[KEY_LOG_TYPE];
787 switch (logType) {
788 case CatchLogType::LOGTYPE_SAMPLE_STACK: {
789 WatchdogInner::GetInstance().StartProfileMainThread(endTime, durationTime, sampleInterval);
790 break;
791 }
792 case CatchLogType::LOGTYPE_COLLECT_TRACE: {
793 if (duration > std::chrono::milliseconds(DUMPTRACE_TIME)) {
794 WatchdogInner::GetInstance().CollectTraceDetect(endTime, durationTime);
795 }
796 break;
797 }
798 case CatchLogType::LOGTYPE_NONE: {
799 if (duration < std::chrono::milliseconds(DUMPTRACE_TIME)) {
800 WatchdogInner::GetInstance().StartProfileMainThread(endTime, durationTime, sampleInterval);
801 } else {
802 WatchdogInner::GetInstance().CollectTraceDetect(endTime, durationTime);
803 }
804 break;
805 }
806 default:
807 break;
808 }
809 }
810 #endif // HICOLLIE_JANK_ENABLE
811 if (duration > std::chrono::milliseconds(DISTRIBUTE_TIME)) {
812 XCOLLIE_LOGI("BlockMonitor event name: %{public}s, Duration Time: %{public}" PRId64 " ms",
813 name.c_str(), durationTime);
814 }
815 }
816
AddThread(const std::string & name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)817 int WatchdogInner::AddThread(const std::string &name,
818 std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
819 {
820 if (name.empty() || handler == nullptr) {
821 XCOLLIE_LOGE("Add thread fail, invalid args!");
822 return -1;
823 }
824
825 if (IsInAppspwan()) {
826 return -1;
827 }
828
829 std::string limitedName = GetLimitedSizeName(name);
830 XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
831 std::unique_lock<std::mutex> lock(lock_);
832
833 IpcCheck();
834
835 if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
836 return -1;
837 }
838 return 0;
839 }
840
RunOneShotTask(const std::string & name,Task && task,uint64_t delay)841 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
842 {
843 if (name.empty() || task == nullptr) {
844 XCOLLIE_LOGE("Add task fail, invalid args!");
845 return;
846 }
847
848 if (IsInAppspwan()) {
849 return;
850 }
851
852 std::unique_lock<std::mutex> lock(lock_);
853 std::string limitedName = GetLimitedSizeName(name);
854 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
855 }
856
RunXCollieTask(const std::string & name,uint64_t timeout,XCollieCallback func,void * arg,unsigned int flag)857 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
858 void *arg, unsigned int flag)
859 {
860 if (name.empty() || timeout == 0) {
861 XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
862 return INVALID_ID;
863 }
864
865 if (IsInAppspwan()) {
866 return INVALID_ID;
867 }
868
869 std::unique_lock<std::mutex> lock(lock_);
870 IpcCheck();
871 std::string limitedName = GetLimitedSizeName(name);
872 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
873 }
874
RemoveXCollieTask(int64_t id)875 void WatchdogInner::RemoveXCollieTask(int64_t id)
876 {
877 std::priority_queue<WatchdogTask> tmpQueue;
878 std::unique_lock<std::mutex> lock(lock_);
879 size_t size = checkerQueue_.size();
880 if (size == 0) {
881 XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
882 return;
883 }
884 while (!checkerQueue_.empty()) {
885 const WatchdogTask& task = checkerQueue_.top();
886 if (task.id != id || task.timeout == 0) {
887 tmpQueue.push(task);
888 }
889 checkerQueue_.pop();
890 }
891 if (tmpQueue.size() == size) {
892 XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
893 static_cast<long long>(id), size);
894 }
895 tmpQueue.swap(checkerQueue_);
896 }
897
RunPeriodicalTask(const std::string & name,Task && task,uint64_t interval,uint64_t delay)898 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
899 {
900 if (name.empty() || task == nullptr) {
901 XCOLLIE_LOGE("Add task fail, invalid args!");
902 return;
903 }
904
905 if (IsInAppspwan()) {
906 return;
907 }
908
909 std::string limitedName = GetLimitedSizeName(name);
910 XCOLLIE_LOGD("Add periodical task %{public}s to watchdog.", name.c_str());
911 std::unique_lock<std::mutex> lock(lock_);
912 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
913 }
914
SetTimerCountTask(const std::string & name,uint64_t timeLimit,int countLimit)915 int64_t WatchdogInner::SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)
916 {
917 if (name.empty() || timeLimit == 0 || countLimit <= 0) {
918 XCOLLIE_LOGE("SetTimerCountTask fail, invalid args!");
919 return INVALID_ID;
920 }
921
922 if (IsInAppspwan()) {
923 return INVALID_ID;
924 }
925 std::string limitedName = GetLimitedSizeName(name);
926 XCOLLIE_LOGD("SetTimerCountTask name : %{public}s", name.c_str());
927 std::unique_lock<std::mutex> lock(lock_);
928 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeLimit, countLimit));
929 }
930
TriggerTimerCountTask(const std::string & name,bool bTrigger,const std::string & message)931 void WatchdogInner::TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)
932 {
933 std::unique_lock<std::mutex> lock(lock_);
934
935 if (checkerQueue_.empty()) {
936 XCOLLIE_LOGE("TriggerTimerCountTask name : %{public}s fail, empty queue!", name.c_str());
937 return;
938 }
939
940 bool isTaskExist = false;
941 uint64_t now = GetCurrentTickMillseconds();
942 std::priority_queue<WatchdogTask> tmpQueue;
943 while (!checkerQueue_.empty()) {
944 WatchdogTask task = checkerQueue_.top();
945 if (task.name == name) {
946 isTaskExist = true;
947 if (bTrigger) {
948 task.triggerTimes.push_back(now);
949 task.message = message;
950 } else {
951 task.triggerTimes.clear();
952 }
953 }
954 tmpQueue.push(task);
955 checkerQueue_.pop();
956 }
957 tmpQueue.swap(checkerQueue_);
958
959 if (!isTaskExist) {
960 XCOLLIE_LOGE("TriggerTimerCount name : %{public}s does not exist!", name.c_str());
961 }
962 }
963
IsTaskExistLocked(const std::string & name)964 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
965 {
966 return (taskNameSet_.find(name) != taskNameSet_.end());
967 }
968
IsExceedMaxTaskLocked()969 bool WatchdogInner::IsExceedMaxTaskLocked()
970 {
971 if (checkerQueue_.size() >= MAX_WATCH_NUM) {
972 XCOLLIE_LOGE("Exceed max watchdog task!");
973 return true;
974 }
975
976 return false;
977 }
978
InsertWatchdogTaskLocked(const std::string & name,WatchdogTask && task)979 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
980 {
981 if (!task.isOneshotTask && IsTaskExistLocked(name)) {
982 XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
983 return 0;
984 }
985
986 if (IsExceedMaxTaskLocked()) {
987 XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
988 return 0;
989 }
990 int64_t id = task.id;
991 checkerQueue_.push(std::move(task));
992 if (!task.isOneshotTask) {
993 taskNameSet_.insert(name);
994 }
995 CreateWatchdogThreadIfNeed();
996 if (task.nextTickTime < nextWeakUpTime_) {
997 condition_.notify_all();
998 }
999
1000 return id;
1001 }
1002
StopWatchdog()1003 void WatchdogInner::StopWatchdog()
1004 {
1005 Stop();
1006 }
1007
IsCallbackLimit(unsigned int flag)1008 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
1009 {
1010 bool ret = false;
1011 time_t startTime = time(nullptr);
1012 if (!(flag & XCOLLIE_FLAG_LOG)) {
1013 return ret;
1014 }
1015 if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
1016 timeCallback_ = startTime;
1017 } else {
1018 if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
1019 ret = true;
1020 }
1021 }
1022 return ret;
1023 }
1024
IPCProxyLimitCallback(uint64_t num)1025 void IPCProxyLimitCallback(uint64_t num)
1026 {
1027 XCOLLIE_LOGE("ipc proxy num %{public}" PRIu64 " exceed limit", num);
1028 if (getuid() >= MIN_APP_UID && IsBetaVersion()) {
1029 XCOLLIE_LOGI("Process is going to exit, reason: ipc proxy num exceed limit");
1030 _exit(0);
1031 }
1032 }
1033
UpdateAppStartContent(const std::map<std::string,int64_t> & paramsMap,AppStartContent & startContent)1034 void WatchdogInner::UpdateAppStartContent(const std::map<std::string, int64_t>& paramsMap,
1035 AppStartContent& startContent)
1036 {
1037 auto it = paramsMap.find(KEY_THRESHOLD);
1038 if (it == paramsMap.end() || it->second <= 0) {
1039 XCOLLIE_LOGE("Set %{public}s param error, value=%{public}" PRId64".", KEY_THRESHOLD, it->second);
1040 return;
1041 }
1042 startContent.threshold = it->second;
1043
1044 it = paramsMap.find(KEY_TRIGGER_INTERVAL);
1045 if (it == paramsMap.end() || it->second <= 0) {
1046 XCOLLIE_LOGE("Set %{public}s param error, value=%{public}" PRId64".", KEY_TRIGGER_INTERVAL, it->second);
1047 return;
1048 }
1049 startContent.sampleInterval = it->second;
1050
1051 it = paramsMap.find(KEY_COLLECT_TIMES);
1052 if (it == paramsMap.end() || it->second <= 0) {
1053 XCOLLIE_LOGE("Set %{public}s param error, value=%{public}" PRId64".", KEY_COLLECT_TIMES, it->second);
1054 return;
1055 }
1056 startContent.targetCount = it->second;
1057
1058 it = paramsMap.find(KEY_REPORT_TIMES);
1059 if (it == paramsMap.end() || it->second <= 0) {
1060 XCOLLIE_LOGE("Set %{public}s param error, value=%{public}" PRId64".", KEY_REPORT_TIMES, it->second);
1061 return;
1062 }
1063 startContent.reportTimes = it->second;
1064
1065 it = paramsMap.find(KEY_START_TIME);
1066 if (it == paramsMap.end() || it->second <= 0) {
1067 XCOLLIE_LOGE("Set %{public}s param error, value=%{public}" PRId64".", KEY_START_TIME, it->second);
1068 return;
1069 }
1070 startContent.startTime = it->second;
1071
1072 it = paramsMap.find(KEY_STARTUP_DURATION);
1073 if (it != paramsMap.end() && it->second > 0) {
1074 startContent.startUpDuration = it->second;
1075 }
1076 startContent.enableStartSample.store(true);
1077 XCOLLIE_LOGW("UpdateAppStartContent threshold=%{public}" PRId64", sampleInterval=%{public}" PRId64
1078 ", targetCount=%{public}d, ""reportTimes=%{public}d, startTime=%{public}" PRId64", "
1079 "enableStartSample=%{public}d, startUpDuration=%{public}" PRId64".",
1080 startContent.threshold, startContent.sampleInterval, startContent.targetCount, startContent.reportTimes,
1081 startContent.startTime, startContent.enableStartSample.load(), startContent.startUpDuration);
1082 }
1083
ParseAppStartParams(const std::string & line,const std::string & eventName)1084 void WatchdogInner::ParseAppStartParams(const std::string& line, const std::string& eventName)
1085 {
1086 std::map<std::string, int64_t> keyValueMap;
1087 std::stringstream iss(line);
1088 std::string tokens;
1089 while (getline(iss, tokens, ',') && !tokens.empty()) {
1090 std::string key;
1091 std::string value;
1092 if (value.size() > std::to_string(INT64_MAX).length() ||
1093 !GetKeyValueByStr(tokens, key, value, ':')) {
1094 XCOLLIE_LOGE("ParseAppStartParams failed, key:%{public}s value:%{public}s",
1095 key.c_str(), value.c_str());
1096 continue;
1097 }
1098 keyValueMap[key] = static_cast<int64_t>(strtoull(value.c_str(), nullptr, CPU_FREQ_DECIMAL_BASE));
1099 }
1100 if (keyValueMap.size() < APP_START_PARAM_SIZE) {
1101 XCOLLIE_LOGE("ParseAppStartParams eventName:%{public}s keyValueMap size:%{public}zu",
1102 eventName.c_str(), keyValueMap.size());
1103 return;
1104 }
1105 if (eventName == EVENT_APP_START_SLOW) {
1106 UpdateAppStartContent(keyValueMap, startSlowContent_);
1107 } else if (eventName == EVENT_SLIDING_JANK) {
1108 UpdateAppStartContent(keyValueMap, scrollSlowContent_);
1109 }
1110 }
1111
ReadAppStartConfig(const std::string & filePath)1112 void WatchdogInner::ReadAppStartConfig(const std::string& filePath)
1113 {
1114 if (!OHOS::FileExists(filePath)) {
1115 XCOLLIE_LOGD("file:%{public}s not exist, errno:%{public}d", filePath.c_str(), errno);
1116 return;
1117 }
1118 std::string str;
1119 if (!OHOS::LoadStringFromFile(APP_START_CONFIG, str)) {
1120 XCOLLIE_LOGE("get content from file:%{public}s failed, errno:%{public}d", APP_START_CONFIG, errno);
1121 return;
1122 }
1123 std::stringstream iss(str);
1124 std::string line;
1125 std::string eventName;
1126 while (std::getline(iss, line)) {
1127 if (line.empty() || line.find(KEY_EVENT_NAME) == std::string::npos) {
1128 continue;
1129 }
1130 if (line.find(EVENT_SLIDING_JANK) != std::string::npos) {
1131 eventName = EVENT_SLIDING_JANK;
1132 } else if (line.find(EVENT_APP_START_SLOW) != std::string::npos) {
1133 eventName = EVENT_APP_START_SLOW;
1134 } else {
1135 continue;
1136 }
1137 ParseAppStartParams(line, eventName);
1138 }
1139 }
1140
CreateWatchdogThreadIfNeed()1141 void WatchdogInner::CreateWatchdogThreadIfNeed()
1142 {
1143 std::call_once(flag_, [this] {
1144 if (threadLoop_ == nullptr) {
1145 if (mainRunner_ == nullptr) {
1146 mainRunner_ = AppExecFwk::EventRunner::GetMainEventRunner();
1147 }
1148 mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
1149 if (getuid() >= MIN_APP_UID) {
1150 ReadAppStartConfig(APP_START_CONFIG);
1151 }
1152 const uint64_t limitNum = 20000;
1153 IPCDfx::SetIPCProxyLimit(limitNum, IPCProxyLimitCallback);
1154 threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
1155 if (getpid() == gettid()) {
1156 SetThreadSignalMask(SIGDUMP, true, true);
1157 }
1158 XCOLLIE_LOGD("Watchdog is running!");
1159 }
1160 });
1161 }
1162
1163
IsInSleep(const WatchdogTask & queuedTaskCheck)1164 bool WatchdogInner::IsInSleep(const WatchdogTask& queuedTaskCheck)
1165 {
1166 if (IsInAppspwan() || queuedTaskCheck.bootTimeStart <= 0 || queuedTaskCheck.monoTimeStart <= 0) {
1167 return false;
1168 }
1169
1170 uint64_t bootTimeStart = 0;
1171 uint64_t monoTimeStart = 0;
1172 CalculateTimes(bootTimeStart, monoTimeStart);
1173 uint64_t bootTimeDetal = GetNumsDiffAbs(bootTimeStart, queuedTaskCheck.bootTimeStart);
1174 uint64_t monoTimeDetal = GetNumsDiffAbs(monoTimeStart, queuedTaskCheck.monoTimeStart);
1175 if (GetNumsDiffAbs(bootTimeDetal, monoTimeDetal) >= DEFAULE_SLEEP_TIME) {
1176 XCOLLIE_LOGI("Current Thread has been sleep, pid: %{public}d", getprocpid());
1177 return true;
1178 }
1179 return false;
1180 }
1181
CheckKickWatchdog(uint64_t now,const WatchdogTask & queuedTask)1182 void WatchdogInner::CheckKickWatchdog(uint64_t now, const WatchdogTask& queuedTask)
1183 {
1184 if (g_existFile && queuedTask.name == IPC_FULL_TASK && getuid() == FOUNDATION_UID &&
1185 now - g_lastKickTime > INTERVAL_KICK_TIME) {
1186 if (KickWatchdog()) {
1187 g_lastKickTime = now;
1188 }
1189 }
1190 }
1191
CheckCurrentTaskLocked(const WatchdogTask & queuedTaskCheck)1192 bool WatchdogInner::CheckCurrentTaskLocked(const WatchdogTask& queuedTaskCheck)
1193 {
1194 if (queuedTaskCheck.name.empty()) {
1195 checkerQueue_.pop();
1196 XCOLLIE_LOGW("queuedTask name is empty.");
1197 } else if (queuedTaskCheck.name == STACK_CHECKER && isMainThreadStackEnabled_) {
1198 checkerQueue_.pop();
1199 taskNameSet_.erase("ThreadSampler");
1200 if (!g_isDumpStack && !g_isReuseStack && Deinit()) {
1201 ResetThreadSamplerFuncs();
1202 }
1203 stackContent_.isStartSampleEnabled = true;
1204 isMainThreadStackEnabled_ = false;
1205 XCOLLIE_LOGI("Detect sample stack task complete.");
1206 } else if (queuedTaskCheck.name == APP_START_SAMPLE && ((startSlowContent_.isFinishStartSample) ||
1207 scrollSlowContent_.isFinishStartSample)) {
1208 checkerQueue_.pop();
1209 taskNameSet_.erase(APP_START_SAMPLE);
1210 if (!g_isDumpStack && !g_isReuseStack && Deinit()) {
1211 ResetThreadSamplerFuncs();
1212 }
1213 startSlowContent_.isFinishStartSample = false;
1214 startSlowContent_.isStartSampleEnabled = true;
1215 scrollSlowContent_.isFinishStartSample = false;
1216 scrollSlowContent_.isStartSampleEnabled = true;
1217 XCOLLIE_LOGI("Detect app start sample task complete.");
1218 } else if (queuedTaskCheck.name == TRACE_CHECKER && isMainThreadTraceEnabled_) {
1219 checkerQueue_.pop();
1220 taskNameSet_.erase("TraceCollector");
1221 isMainThreadTraceEnabled_ = false;
1222 if (traceContent_.dumpCount < COLLECT_TRACE_MIN) {
1223 traceContent_.traceState = DumpStackState::DEFAULT;
1224 }
1225 XCOLLIE_LOGI("Detect collect trace task complete.");
1226 } else if (queuedTaskCheck.name == FREEZE_SAMPLE && g_freezeTaskFinished) {
1227 checkerQueue_.pop();
1228 if (!g_isDumpStack && Deinit()) {
1229 ResetThreadSamplerFuncs();
1230 }
1231 taskNameSet_.erase(FREEZE_SAMPLE);
1232 g_isReuseStack.store(false);
1233 g_freezeTaskFinished.store(false);
1234 XCOLLIE_LOGI("Freeze collect stack task complete.");
1235 } else {
1236 return false;
1237 }
1238 return true;
1239 }
1240
FetchNextTask(uint64_t now,WatchdogTask & task)1241 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
1242 {
1243 if (isNeedStop_) {
1244 while (!checkerQueue_.empty()) {
1245 checkerQueue_.pop();
1246 }
1247 return DEFAULT_TIMEOUT;
1248 }
1249
1250 if (checkerQueue_.empty()) {
1251 return DEFAULT_TIMEOUT;
1252 }
1253
1254 const WatchdogTask& queuedTaskCheck = checkerQueue_.top();
1255 if (CheckCurrentTaskLocked(queuedTaskCheck) && checkerQueue_.empty()) {
1256 return DEFAULT_TIMEOUT;
1257 }
1258
1259 const WatchdogTask& queuedTask = checkerQueue_.top();
1260 CheckKickWatchdog(now, queuedTask);
1261 if (queuedTask.nextTickTime > now) {
1262 return queuedTask.nextTickTime - now;
1263 }
1264
1265 currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
1266 task = queuedTask;
1267 checkerQueue_.pop();
1268 return 0;
1269 }
1270
ReInsertTaskIfNeed(WatchdogTask & task)1271 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
1272 {
1273 if (task.checkInterval == 0) {
1274 return;
1275 }
1276
1277 std::unique_lock<std::mutex> lock(lock_);
1278 task.nextTickTime = task.nextTickTime + task.checkInterval;
1279 checkerQueue_.push(task);
1280 }
1281
Start()1282 bool WatchdogInner::Start()
1283 {
1284 if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
1285 XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
1286 }
1287 SetThreadSignalMask(SIGDUMP, false, false);
1288 watchdogStartTime_ = GetCurrentTickMillseconds();
1289 XCOLLIE_LOGD("Watchdog is running in thread(%{public}d)!", getproctid());
1290 if (SetThreadInfoCallback != nullptr) {
1291 SetThreadInfoCallback(ThreadInfo);
1292 XCOLLIE_LOGD("Watchdog Set Thread Info Callback");
1293 }
1294 while (!isNeedStop_) {
1295 if (__get_global_hook_flag() && __get_hook_flag()) {
1296 __set_hook_flag(false);
1297 }
1298 uint64_t now = GetCurrentTickMillseconds();
1299 WatchdogTask task;
1300 uint64_t leftTimeMill;
1301 {
1302 std::unique_lock<std::mutex> lock(lock_);
1303 leftTimeMill = FetchNextTask(now, task);
1304 nextWeakUpTime_ = now + leftTimeMill;
1305 }
1306 if (leftTimeMill == 0) {
1307 if (!IsInSleep(task)) {
1308 task.Run(now);
1309 ReInsertTaskIfNeed(task);
1310 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
1311 }
1312 continue;
1313 } else if (isNeedStop_) {
1314 break;
1315 } else {
1316 std::unique_lock<std::mutex> lock(lock_);
1317 condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
1318 }
1319 }
1320 if (SetThreadInfoCallback != nullptr) {
1321 SetThreadInfoCallback(nullptr);
1322 }
1323 return true;
1324 }
1325
SendMsgToHungtask(const std::string & msg)1326 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
1327 {
1328 if (g_fd == NOT_OPEN) {
1329 return false;
1330 }
1331
1332 ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
1333 if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
1334 XCOLLIE_KLOGE("watchdog write msg failed, errno:%{public}d", errno);
1335 close(g_fd);
1336 g_fd = NOT_OPEN;
1337 return false;
1338 }
1339 XCOLLIE_KLOGI("Send %{public}s to hungtask Successful\n", msg.c_str());
1340 return true;
1341 }
1342
KickWatchdog()1343 bool WatchdogInner::KickWatchdog()
1344 {
1345 if (g_fd == NOT_OPEN) {
1346 g_fd = open(SYS_KERNEL_HUNGTASK_USERLIST, O_WRONLY);
1347 if (g_fd < 0) {
1348 g_fd = open(HUNGTASK_USERLIST, O_WRONLY);
1349 if (g_fd < 0) {
1350 XCOLLIE_KLOGE("can't open hungtask file, errno:%{public}d", errno);
1351 g_existFile = false;
1352 return false;
1353 }
1354 XCOLLIE_KLOGE("hmos kernel");
1355 isHmos = true;
1356 } else {
1357 XCOLLIE_KLOGE("linux kernel");
1358 }
1359
1360 if (!SendMsgToHungtask(isHmos ? ON_KICK_TIME_EXTRA : ON_KICK_TIME)) {
1361 XCOLLIE_KLOGI("kick watchdog send msg to hungtask fail");
1362 return false;
1363 }
1364 }
1365 return SendMsgToHungtask(isHmos ? KICK_TIME_EXTRA : KICK_TIME);
1366 }
1367
AddIpcFull(uint64_t interval,unsigned int flag,IpcFullCallback func,void * arg)1368 bool WatchdogInner::AddIpcFull(uint64_t interval, unsigned int flag, IpcFullCallback func, void *arg)
1369 {
1370 if (interval < MIN_IPC_CHECK_INTERVAL || interval > MAX_IPC_CHECK_INTERVAL) {
1371 XCOLLIE_KLOGE("add ipc full failed, interval is invalid");
1372 return false;
1373 }
1374 if (IsInAppspwan()) {
1375 return false;
1376 }
1377
1378 std::unique_lock<std::mutex> lock(lock_);
1379 return IpcCheck(interval, flag, func, arg, false);
1380 }
1381
IpcCheck(uint64_t interval,unsigned int flag,IpcFullCallback func,void * arg,bool defaultType)1382 bool WatchdogInner::IpcCheck(uint64_t interval, unsigned int flag, IpcFullCallback func, void *arg, bool defaultType)
1383 {
1384 if (defaultType) {
1385 static bool ipcCheckInit = false;
1386 if (ipcCheckInit) {
1387 return false;
1388 }
1389 ipcCheckInit = true;
1390 uint32_t uid = getuid();
1391 bool isJoinIpcFullUid = std::any_of(std::begin(JOIN_IPC_FULL_UIDS), std::end(JOIN_IPC_FULL_UIDS),
1392 [uid](const uint32_t joinIpcFullUid) { return uid == joinIpcFullUid; });
1393 if (!isJoinIpcFullUid && GetSelfProcName() != KEY_SCB_STATE) {
1394 return false;
1395 }
1396 }
1397
1398 if (IsTaskExistLocked(IPC_FULL_TASK)) {
1399 XCOLLIE_LOGE("ipc full task aleady exists");
1400 return false;
1401 }
1402
1403 bool result = InsertWatchdogTaskLocked(IPC_FULL_TASK, WatchdogTask(interval * TO_MILLISECOND_MULTPLE, func, arg,
1404 flag)) > 0;
1405 if (result) {
1406 XCOLLIE_LOGI("add ipc full task success");
1407 } else {
1408 XCOLLIE_LOGE("add ipc full task falied");
1409 }
1410 return result;
1411 }
1412
WriteStringToFile(int32_t pid,const char * str)1413 bool WatchdogInner::WriteStringToFile(int32_t pid, const char *str)
1414 {
1415 char file[PATH_LEN] = {0};
1416 if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", pid) == -1) {
1417 XCOLLIE_LOGE("failed to build path for %{public}d.", pid);
1418 return false;
1419 }
1420 FILE* fp = fopen(file, "wb");
1421 if (fp == nullptr) {
1422 XCOLLIE_LOGI("failed to open file %{public}s, errno: %{public}d", file, errno);
1423 return false;
1424 }
1425 bool writeResult = true;
1426 if (fwrite(str, sizeof(char), strlen(str), fp) != strlen(str)) {
1427 XCOLLIE_LOGI("failed to write file %{public}s, errno: %{public}d", file, errno);
1428 writeResult = false;
1429 }
1430 if (fclose(fp)) {
1431 XCOLLIE_LOGE("fclose is failed");
1432 }
1433 fp = nullptr;
1434 return writeResult;
1435 }
1436
FfrtCallback(uint64_t taskId,const char * taskInfo,uint32_t delayedTaskCount)1437 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
1438 {
1439 std::string faultTimeStr = "\nFault time:" + FormatTime("%Y/%m/%d-%H:%M:%S") + "\n";
1440 std::string description = "FfrtCallback: task(";
1441 description += taskInfo;
1442 description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
1443 std::string info(taskInfo);
1444 if (info.find("Queue_Schedule_Timeout") != std::string::npos) {
1445 WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo, faultTimeStr, false);
1446 description += ", report twice instead of exiting process.";
1447 WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo, faultTimeStr);
1448 WatchdogInner::KillPeerBinderProcess(description);
1449 return;
1450 }
1451 bool isExist = false;
1452 {
1453 std::unique_lock<std::mutex> lock(lockFfrt_);
1454 auto &map = WatchdogInner::GetInstance().taskIdCnt;
1455 auto search = map.find(taskId);
1456 if (search != map.end()) {
1457 isExist = true;
1458 } else {
1459 map[taskId] = SERVICE_WARNING;
1460 }
1461 }
1462
1463 if (isExist) {
1464 description += ", report twice instead of exiting process."; // 1s = 1000ms
1465 WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo, faultTimeStr);
1466 WatchdogInner::GetInstance().taskIdCnt.erase(taskId);
1467 WatchdogInner::KillPeerBinderProcess(description);
1468 } else {
1469 WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo, faultTimeStr);
1470 }
1471 }
1472
InitFfrtWatchdog()1473 void WatchdogInner::InitFfrtWatchdog()
1474 {
1475 CreateWatchdogThreadIfNeed();
1476 ffrt_task_timeout_set_cb(FfrtCallback);
1477 ffrt_task_timeout_set_threshold(FFRT_CALLBACK_TIME);
1478 std::unique_lock<std::mutex> lock(lock_);
1479 IpcCheck();
1480 }
1481
SendFfrtEvent(const std::string & msg,const std::string & eventName,const char * taskInfo,const std::string & faultTimeStr,const bool isDumpStack)1482 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo,
1483 const std::string& faultTimeStr, const bool isDumpStack)
1484 {
1485 int32_t pid = getprocpid();
1486 if (IsProcessDebug(pid)) {
1487 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
1488 return;
1489 }
1490 uint32_t gid = getgid();
1491 uint32_t uid = getuid();
1492 time_t curTime = time(nullptr);
1493 std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
1494 "\n" + msg + "\n";
1495 char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
1496 buffer[FFRT_BUFFER_SIZE] = 0;
1497 ffrt_dump(DUMP_INFO_ALL, buffer, FFRT_BUFFER_SIZE);
1498 sendMsg += buffer;
1499 delete[] buffer;
1500 int32_t tid = pid;
1501 GetFfrtTaskTid(tid, sendMsg);
1502 sendMsg += faultTimeStr;
1503 #ifdef HISYSEVENT_ENABLE
1504 int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
1505 "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo, "PROCESS_NAME", GetSelfProcName(),
1506 "MSG", sendMsg, "STACK", isDumpStack ? GetProcessStacktrace() : "");
1507 if (ret == ERR_OVER_SIZE) {
1508 std::string stack = "";
1509 if (isDumpStack) {
1510 GetBacktraceStringByTid(stack, tid, 0, true);
1511 }
1512 ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
1513 "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo,
1514 "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg, "STACK", stack);
1515 }
1516
1517 XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
1518 "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
1519 #else
1520 XCOLLIE_LOGI("hisysevent not exists");
1521 #endif
1522 }
1523
GetFfrtTaskTid(int32_t & tid,const std::string & msg)1524 void WatchdogInner::GetFfrtTaskTid(int32_t& tid, const std::string& msg)
1525 {
1526 std::string queueNameFrontStr = "us. queue name [";
1527 size_t queueNameFrontPos = msg.find(queueNameFrontStr);
1528 if (queueNameFrontPos == std::string::npos) {
1529 return;
1530 }
1531 size_t queueNameRearPos = msg.find("], remaining tasks count=");
1532 size_t queueStartPos = queueNameFrontPos + queueNameFrontStr.length();
1533 if (queueNameRearPos == std::string::npos || queueNameRearPos <= queueStartPos) {
1534 return;
1535 }
1536 size_t queueNameLength = queueNameRearPos - queueStartPos;
1537 std::string workerTidFrontStr = " worker tid ";
1538 std::string taskIdFrontStr = " is running, task id ";
1539 std::string queueNameStr = " name " + msg.substr(queueStartPos, queueNameLength);
1540 std::istringstream issMsg(msg);
1541 std::string line;
1542 while (std::getline(issMsg, line, '\n')) {
1543 size_t workerTidFrontPos = line.find(workerTidFrontStr);
1544 size_t taskIdFrontPos = line.find(taskIdFrontStr);
1545 size_t queueNamePos = line.find(queueNameStr);
1546 size_t workerStartPos = workerTidFrontPos + workerTidFrontStr.length();
1547 if (workerTidFrontPos == std::string::npos || taskIdFrontPos == std::string::npos ||
1548 queueNamePos == std::string::npos || taskIdFrontPos <= workerStartPos) {
1549 continue;
1550 }
1551 size_t tidLength = taskIdFrontPos - workerStartPos;
1552 if (tidLength < std::to_string(INT32_MAX).length()) {
1553 std::string tidStr = line.substr(workerStartPos, tidLength);
1554 if (std::all_of(std::begin(tidStr), std::end(tidStr), [] (const char& c) {
1555 return isdigit(c);
1556 })) {
1557 tid = std::stoi(tidStr);
1558 return;
1559 }
1560 }
1561 }
1562 }
1563
LeftTimeExitProcess(const std::string & description)1564 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
1565 {
1566 int32_t pid = getprocpid();
1567 if (IsProcessDebug(pid)) {
1568 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't exit.", pid);
1569 return;
1570 }
1571 DelayBeforeExit(10); // sleep 10s for hiview dump
1572 bool result = WatchdogInner::WriteStringToFile(pid, "0");
1573 XCOLLIE_LOGI("Process is going to exit, reason:%{public}s. write to file: %{public}d.",
1574 description.c_str(), result);
1575
1576 _exit(0);
1577 }
1578
Stop()1579 bool WatchdogInner::Stop()
1580 {
1581 IPCDfx::SetIPCProxyLimit(0, nullptr);
1582 if (mainRunner_ != nullptr) {
1583 mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1584 }
1585 isNeedStop_.store(true);
1586 condition_.notify_all();
1587 if (threadLoop_ != nullptr && threadLoop_->joinable()) {
1588 threadLoop_->join();
1589 threadLoop_ = nullptr;
1590 }
1591 if (g_fd != NOT_OPEN) {
1592 close(g_fd);
1593 g_fd = NOT_OPEN;
1594 }
1595 return true;
1596 }
1597
KillPeerBinderProcess(const std::string & description)1598 void WatchdogInner::KillPeerBinderProcess(const std::string &description)
1599 {
1600 bool result = false;
1601 if (getuid() == FOUNDATION_UID) {
1602 result = KillProcessByPid(getprocpid());
1603 }
1604 if (!result) {
1605 WatchdogInner::LeftTimeExitProcess(description);
1606 }
1607 }
1608
RemoveInnerTask(const std::string & name)1609 void WatchdogInner::RemoveInnerTask(const std::string& name)
1610 {
1611 if (name.empty()) {
1612 XCOLLIE_LOGI("RemoveInnerTask fail, cname is null");
1613 return;
1614 }
1615 std::priority_queue<WatchdogTask> tmpQueue;
1616 std::unique_lock<std::mutex> lock(lock_);
1617 size_t size = checkerQueue_.size();
1618 if (size == 0) {
1619 XCOLLIE_LOGE("RemoveInnerTask %{public}s fail, empty queue!", name.c_str());
1620 return;
1621 }
1622 while (!checkerQueue_.empty()) {
1623 const WatchdogTask& task = checkerQueue_.top();
1624 if (task.name != name) {
1625 tmpQueue.push(task);
1626 } else {
1627 size_t nameSize = taskNameSet_.size();
1628 if (nameSize != 0 && !task.isOneshotTask) {
1629 taskNameSet_.erase(name);
1630 XCOLLIE_LOGD("RemoveInnerTask name %{public}s, remove result=%{public}d",
1631 name.c_str(), nameSize > taskNameSet_.size());
1632 }
1633 }
1634 checkerQueue_.pop();
1635 }
1636 if (tmpQueue.size() == size) {
1637 XCOLLIE_LOGE("RemoveInnerTask fail, can not find name %{public}s, size=%{public}zu!",
1638 name.c_str(), size);
1639 }
1640 tmpQueue.swap(checkerQueue_);
1641 }
1642
InitBeginFunc(const char * name)1643 void InitBeginFunc(const char* name)
1644 {
1645 std::string nameStr(name);
1646 WatchdogInner::GetInstance().bussinessBeginTime_ = DistributeStart(nameStr);
1647 }
1648
InitEndFunc(const char * name)1649 void InitEndFunc(const char* name)
1650 {
1651 std::string nameStr(name);
1652 DistributeEnd(nameStr, WatchdogInner::GetInstance().bussinessBeginTime_);
1653 }
1654
InitMainLooperWatcher(WatchdogInnerBeginFunc * beginFunc,WatchdogInnerEndFunc * endFunc)1655 void WatchdogInner::InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc,
1656 WatchdogInnerEndFunc* endFunc)
1657 {
1658 int64_t tid = getproctid();
1659 if (beginFunc && endFunc) {
1660 if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1661 XCOLLIE_LOGI("Tid =%{public}" PRId64 "already exits, "
1662 "no repeated initialization.", tid);
1663 return;
1664 }
1665 if (mainRunner_ != nullptr) {
1666 mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1667 }
1668 *beginFunc = InitBeginFunc;
1669 *endFunc = InitEndFunc;
1670 buissnessThreadInfo_.insert(tid);
1671 } else {
1672 if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1673 XCOLLIE_LOGI("Remove already init tid=%{public}." PRId64, tid);
1674 mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
1675 buissnessThreadInfo_.erase(tid);
1676 }
1677 }
1678 }
1679
SetAppDebug(bool isAppDebug)1680 void WatchdogInner::SetAppDebug(bool isAppDebug)
1681 {
1682 isAppDebug_ = isAppDebug;
1683 }
1684
GetAppDebug()1685 bool WatchdogInner::GetAppDebug()
1686 {
1687 return isAppDebug_;
1688 }
1689
UpdateJankParam(int sampleInterval,int ignoreStartUpTime,int sampleCount,int logType,int reportTimes)1690 void WatchdogInner::UpdateJankParam(int sampleInterval, int ignoreStartUpTime, int sampleCount,
1691 int logType, int reportTimes)
1692 {
1693 jankParamsMap[KEY_LOG_TYPE] = logType;
1694 jankParamsMap[KEY_SAMPLE_INTERVAL] = sampleInterval;
1695 jankParamsMap[KEY_IGNORE_STARTUP_TIME] = ignoreStartUpTime;
1696 jankParamsMap[KEY_SAMPLE_COUNT] = sampleCount;
1697 if (logType == CatchLogType::LOGTYPE_COLLECT_TRACE) {
1698 XCOLLIE_LOGI("Set thread only dump trace success.");
1699 return;
1700 }
1701 if (jankParamsMap[KEY_SET_TIMES_FLAG] == SET_TIMES_FLAG) {
1702 UpdateReportTimes(bundleName_, reportTimes, jankParamsMap[KEY_CHECKER_INTERVAL]);
1703 jankParamsMap[KEY_SAMPLE_REPORT_TIMES] = reportTimes;
1704 stackContent_.reportTimes = reportTimes;
1705 jankParamsMap[KEY_SET_TIMES_FLAG] = 0;
1706 }
1707 XCOLLIE_LOGI("Set thread sampler params success. logType: %{public}d, sample interval: %{public}d, "
1708 "ignore startUp interval: %{public}d, count: %{public}d, reportTimes: %{public}d.",
1709 logType, sampleInterval, ignoreStartUpTime, sampleCount, stackContent_.reportTimes);
1710 }
1711
ConvertStrToNum(std::map<std::string,std::string> paramsMap,const std::string & key)1712 int WatchdogInner::ConvertStrToNum(std::map<std::string, std::string> paramsMap, const std::string& key)
1713 {
1714 int num = -1;
1715 auto it = paramsMap.find(key);
1716 if (it == paramsMap.end()) {
1717 XCOLLIE_LOGE("Set the thread sampler param error, %{public}s is not exist.", key.c_str());
1718 return num;
1719 }
1720 std::string str = it->second;
1721 if (!str.empty() && str.size() < std::to_string(INT32_MAX).length()) {
1722 if (std::all_of(std::begin(str), std::end(str), [] (const char &c) {
1723 return isdigit(c);
1724 })) {
1725 num = std::stoi(str);
1726 }
1727 }
1728 if (num < 0) {
1729 XCOLLIE_LOGE("Set param error, %{public}s: %{public}s should be a number, "
1730 "and greater than 0 and less than INT32_MAX.", key.c_str(), str.c_str());
1731 }
1732 return num;
1733 }
1734
CheckSampleParam(std::map<std::string,std::string> paramsMap)1735 bool WatchdogInner::CheckSampleParam(std::map<std::string, std::string> paramsMap)
1736 {
1737 int sampleInterval = ConvertStrToNum(paramsMap, KEY_SAMPLE_INTERVAL);
1738 if (sampleInterval < 0) {
1739 return false;
1740 } else if (sampleInterval < SAMPLE_INTERVAL_MIN || sampleInterval > SAMPLE_INTERVAL_MAX) {
1741 XCOLLIE_LOGE("Set the range of sample stack is from %{public}d to %{public}d, "
1742 "interval: %{public}d.", SAMPLE_INTERVAL_MIN, SAMPLE_INTERVAL_MAX, sampleInterval);
1743 return false;
1744 }
1745
1746 int ignoreStartUpTime = ConvertStrToNum(paramsMap, KEY_IGNORE_STARTUP_TIME);
1747 if (ignoreStartUpTime < 0) {
1748 return false;
1749 } else if (ignoreStartUpTime < IGNORE_STARTUP_TIME_MIN) {
1750 XCOLLIE_LOGE("Set the minimum of ignore startup interval is %{public}d s, "
1751 "interval: %{public}d.", IGNORE_STARTUP_TIME_MIN, ignoreStartUpTime);
1752 return false;
1753 }
1754
1755 int sampleCount = ConvertStrToNum(paramsMap, KEY_SAMPLE_COUNT);
1756 if (sampleCount < 0) {
1757 return false;
1758 }
1759 int maxSampleCount = MAX_SAMPLE_STACK_TIMES / sampleInterval - SAMPLE_EXTRA_COUNT;
1760 if (sampleCount < SAMPLE_COUNT_MIN || sampleCount > maxSampleCount) {
1761 XCOLLIE_LOGE("Set the range of sample count, min value: %{public}d max value: %{public}d, count: %{public}d.",
1762 SAMPLE_COUNT_MIN, maxSampleCount, sampleCount);
1763 return false;
1764 }
1765
1766 int reportTimes = ConvertStrToNum(paramsMap, KEY_SAMPLE_REPORT_TIMES);
1767 if (reportTimes < 0) {
1768 return false;
1769 } else if (reportTimes < SAMPLE_REPORT_TIMES_MIN || reportTimes > SAMPLE_REPORT_TIMES_MAX) {
1770 XCOLLIE_LOGE("Set the range of sample reportTimes is from %{public}d to %{public}d, "
1771 "reportTimes: %{public}d", SAMPLE_REPORT_TIMES_MIN, SAMPLE_REPORT_TIMES_MAX, reportTimes);
1772 return false;
1773 }
1774 UpdateJankParam(sampleInterval, ignoreStartUpTime, sampleCount, CatchLogType::LOGTYPE_SAMPLE_STACK, reportTimes);
1775 return true;
1776 }
1777
SetEventConfig(std::map<std::string,std::string> paramsMap)1778 int WatchdogInner::SetEventConfig(std::map<std::string, std::string> paramsMap)
1779 {
1780 if (paramsMap.empty()) {
1781 XCOLLIE_LOGE("Set the thread sampler param map is null.");
1782 return -1;
1783 }
1784 int logType = ConvertStrToNum(paramsMap, KEY_LOG_TYPE);
1785 size_t size = paramsMap.size();
1786 switch (logType) {
1787 case CatchLogType::LOGTYPE_DEFAULT:
1788 return -1;
1789 case CatchLogType::LOGTYPE_NONE:
1790 case CatchLogType::LOGTYPE_COLLECT_TRACE: {
1791 if (size != SAMPLE_TRACE_MAP_SIZE) {
1792 XCOLLIE_LOGE("Set the thread sampler param map size error, can only set log_type. "
1793 "map size: %{public}zu", size);
1794 return -1;
1795 }
1796 UpdateJankParam(SAMPLE_DEFULE_INTERVAL, DEFAULT_IGNORE_STARTUP_TIME, SAMPLE_DEFULE_COUNT,
1797 logType, SAMPLE_REPORT_TIMES_MIN);
1798 break;
1799 }
1800 case CatchLogType::LOGTYPE_SAMPLE_STACK: {
1801 if (size != SAMPLE_STACK_MAP_SIZE) {
1802 XCOLLIE_LOGE("Set the thread sampler param map size error, current map size: %{public}zu", size);
1803 return -1;
1804 }
1805 if (!CheckSampleParam(paramsMap)) {
1806 return -1;
1807 }
1808 break;
1809 }
1810 default: {
1811 XCOLLIE_LOGE("Set the log_type can only be 0、1、2, logType: %{public}d", logType);
1812 return -1;
1813 }
1814 };
1815 return 0;
1816 }
1817
SetSpecifiedProcessName(const std::string & name)1818 void WatchdogInner::SetSpecifiedProcessName(const std::string& name)
1819 {
1820 specifiedProcessName_ = name;
1821 }
1822
GetSpecifiedProcessName()1823 std::string WatchdogInner::GetSpecifiedProcessName()
1824 {
1825 return specifiedProcessName_;
1826 }
1827
SetScrollState(bool isScroll)1828 void WatchdogInner::SetScrollState(bool isScroll)
1829 {
1830 isScroll_ = isScroll;
1831 }
1832 } // end of namespace HiviewDFX
1833 } // end of namespace OHOS
1834