1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "watchdog_inner.h"
17
18 #include <cerrno>
19 #include <climits>
20 #include <mutex>
21
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <pthread.h>
26 #include <unistd.h>
27 #include <csignal>
28 #include <string>
29
30 #include <securec.h>
31 #include <dlfcn.h>
32
33 #include "backtrace_local.h"
34 #include "hisysevent.h"
35 #include "ipc_skeleton.h"
36 #include "xcollie_utils.h"
37 #include "xcollie_define.h"
38 #include "dfx_define.h"
39 #include "parameter.h"
40
41 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
42 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
43 namespace OHOS {
44 namespace HiviewDFX {
45 namespace {
46 enum DumpStackState {
47 DEFAULT = 0,
48 COMPLETE = 1,
49 SAMPLE_COMPLETE = 2
50 };
51 enum CatchLogType {
52 LOGTYPE_DEFAULT = -1,
53 LOGTYPE_NONE = 0,
54 LOGTYPE_SAMPLE_STACK = 1,
55 LOGTYPE_COLLECT_TRACE = 2
56 };
57 constexpr char IPC_CHECKER[] = "IpcChecker";
58 constexpr char STACK_CHECKER[] = "ThreadSampler";
59 constexpr char TRACE_CHECKER[] = "TraceCollector";
60 constexpr int64_t ONE_DAY_LIMIT = 86400000;
61 constexpr int64_t ONE_HOUR_LIMIT = 3600000;
62 constexpr int MILLISEC_TO_NANOSEC = 1000000;
63 const int FFRT_BUFFER_SIZE = 512 * 1024;
64 const int DETECT_STACK_COUNT = 2;
65 const int COLLECT_STACK_COUNT = 10;
66 const int COLLECT_TRACE_MIN = 1;
67 const int COLLECT_TRACE_MAX = 20;
68 const int TASK_INTERVAL = 155;
69 const int DURATION_TIME = 150;
70 const int DISTRIBUTE_TIME = 2000;
71 const int DUMPTRACE_TIME = 450;
72 constexpr const char* const KEY_SCB_STATE = "com.ohos.sceneboard";
73 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
74 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
75 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
76 constexpr uint32_t TIME_MS_TO_S = 1000;
77 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
78 constexpr uint32_t DATA_MANAGE_SERVICE_UID = 3012;
79 constexpr uint32_t FOUNDATION_UID = 5523;
80 constexpr uint32_t RENDER_SERVICE_UID = 1003;
81 constexpr int SERVICE_WARNING = 1;
82 const char* SYS_KERNEL_HUNGTASK_USERLIST = "/sys/kernel/hungtask/userlist";
83 const char* HMOS_HUNGTASK_USERLIST = "/proc/sys/hguard/user_list";
84 const std::string ON_KICK_TIME = "on,72";
85 const std::string ON_KICK_TIME_HMOS = "on,10,foundation";
86 const std::string KICK_TIME = "kick";
87 const std::string KICK_TIME_HMOS = "kick,foundation";
88 const int32_t NOT_OPEN = -1;
89 constexpr uint64_t MAX_START_TIME = 10 * 1000;
90 const char* LIB_THREAD_SAMPLER_PATH = "libthread_sampler.z.so";
91 constexpr size_t STACK_LENGTH = 32 * 1024;
92 constexpr uint32_t JOIN_IPC_FULL_UIDS[] = {DATA_MANAGE_SERVICE_UID, FOUNDATION_UID, RENDER_SERVICE_UID};
93 constexpr uint64_t SAMPLE_PARAMS_MAX_SIZE = 5;
94 constexpr uint64_t SAMPLE_PARAMS_MIN_SIZE = 1;
95 constexpr int MAX_SAMPLE_STACK_TIMES = 2500; // 2.5s
96 constexpr int SAMPLE_INTERVAL_MIN = 50; // 50ms
97 constexpr int SAMPLE_INTERVAL_MAX = 500; // 500ms
98 constexpr int SAMPLE_COUNT_MIN = 1;
99 constexpr int SAMPLE_REPORT_TIMES_MIN = 1;
100 constexpr int SAMPLE_REPORT_TIMES_MAX = 3;
101 constexpr int SAMPLE_EXTRA_COUNT = 4;
102 constexpr int IGNORE_STARTUP_TIME_MIN = 3; // 3s
103 }
104
105 std::mutex WatchdogInner::lockFfrt_;
106 static uint64_t g_nextKickTime = GetCurrentTickMillseconds();
107 static int32_t g_fd = NOT_OPEN;
108 static bool g_existFile = true;
109
110 SigActionType WatchdogInner::threadSamplerSigHandler_ = nullptr;
111 std::mutex WatchdogInner::threadSamplerSignalMutex_;
112
113 namespace {
ThreadInfo(char * buf,size_t len,void * ucontext)114 void ThreadInfo(char *buf __attribute__((unused)),
115 size_t len __attribute__((unused)),
116 void* ucontext __attribute__((unused)))
117 {
118 if (ucontext == nullptr) {
119 return;
120 }
121
122 auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
123 WatchdogInner::GetInstance().currentScene_.size());
124 if (ret != 0) {
125 return;
126 }
127 }
128
SetThreadSignalMask(int signo,bool isAddSignal,bool isBlock)129 void SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)
130 {
131 sigset_t set;
132 sigemptyset(&set);
133 pthread_sigmask(SIG_SETMASK, nullptr, &set);
134 if (isAddSignal) {
135 sigaddset(&set, signo);
136 } else {
137 sigdelset(&set, signo);
138 }
139 if (isBlock) {
140 pthread_sigmask(SIG_BLOCK, &set, nullptr);
141 } else {
142 pthread_sigmask(SIG_UNBLOCK, &set, nullptr);
143 }
144 }
145
146 static const int CRASH_SIGNAL_LIST[] = {
147 SIGILL, SIGABRT, SIGBUS, SIGFPE,
148 SIGSEGV, SIGSTKFLT, SIGSYS, SIGTRAP
149 };
150 }
151
WatchdogInner()152 WatchdogInner::WatchdogInner()
153 : cntCallback_(0), timeCallback_(0)
154 {
155 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
156 }
157
~WatchdogInner()158 WatchdogInner::~WatchdogInner()
159 {
160 Stop();
161 }
162
IsInAppspwan()163 static bool IsInAppspwan()
164 {
165 if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
166 return true;
167 }
168
169 if (getuid() == 0 && GetSelfProcName().find("nativespawn") != std::string::npos) {
170 return true;
171 }
172
173 return false;
174 }
175
SetBundleInfo(const std::string & bundleName,const std::string & bundleVersion)176 void WatchdogInner::SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)
177 {
178 bundleName_ = bundleName;
179 bundleVersion_ = bundleVersion;
180 }
181
SetForeground(const bool & isForeground)182 void WatchdogInner::SetForeground(const bool& isForeground)
183 {
184 isForeground_ = isForeground;
185 }
186
ReportMainThreadEvent(int64_t tid)187 bool WatchdogInner::ReportMainThreadEvent(int64_t tid)
188 {
189 std::string stack = "";
190 std::string heaviestStack = "";
191 CollectStack(stack, heaviestStack);
192
193 std::string path = "";
194 std::string eventName = "MAIN_THREAD_JANK";
195 if (!buissnessThreadInfo_.empty()) {
196 eventName = "BUSSINESS_THREAD_JANK";
197 }
198 int32_t pid = getprocpid();
199 if (!WriteStackToFd(pid, path, stack, eventName)) {
200 XCOLLIE_LOGI("MainThread WriteStackToFd Failed");
201 return false;
202 }
203 int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "MAIN_THREAD_JANK",
204 HiSysEvent::EventType::FAULT,
205 "BUNDLE_VERSION", bundleVersion_,
206 "BUNDLE_NAME", bundleName_,
207 "BEGIN_TIME", stackContent_.reportBegin / MILLISEC_TO_NANOSEC,
208 "END_TIME", stackContent_.reportEnd / MILLISEC_TO_NANOSEC,
209 "EXTERNAL_LOG", path,
210 "STACK", stack,
211 "JANK_LEVEL", 0,
212 "THREAD_NAME", GetSelfProcName(),
213 "FOREGROUND", isForeground_,
214 "LOG_TIME", GetTimeStamp() / MILLISEC_TO_NANOSEC,
215 "APP_START_JIFFIES_TIME", GetAppStartTime(pid, tid),
216 "HEAVIEST_STACK", heaviestStack);
217 XCOLLIE_LOGI("MainThread HiSysEventWrite result=%{public}d", result);
218 return result >= 0;
219 }
220
CheckEventTimer(int64_t currentTime,int64_t reportBegin,int64_t reportEnd,int interval)221 bool WatchdogInner::CheckEventTimer(int64_t currentTime, int64_t reportBegin, int64_t reportEnd, int interval)
222 {
223 if (reportBegin == timeContent_.curBegin &&
224 reportEnd == timeContent_.curEnd) {
225 return false;
226 }
227 return (timeContent_.curEnd <= timeContent_.curBegin &&
228 (currentTime - timeContent_.curBegin >= interval * MILLISEC_TO_NANOSEC)) ||
229 (timeContent_.curEnd - timeContent_.curBegin > interval * MILLISEC_TO_NANOSEC);
230 }
231
ThreadSamplerSigHandler(int sig,siginfo_t * si,void * context)232 void WatchdogInner::ThreadSamplerSigHandler(int sig, siginfo_t* si, void* context)
233 {
234 std::lock_guard<std::mutex> lock(threadSamplerSignalMutex_);
235 if (WatchdogInner::threadSamplerSigHandler_ == nullptr) {
236 return;
237 }
238 WatchdogInner::threadSamplerSigHandler_(sig, si, context);
239 }
240
InstallThreadSamplerSignal()241 bool WatchdogInner::InstallThreadSamplerSignal()
242 {
243 struct sigaction action {};
244 sigfillset(&action.sa_mask);
245 for (size_t i = 0; i < sizeof(CRASH_SIGNAL_LIST) / sizeof(CRASH_SIGNAL_LIST[0]); i++) {
246 sigdelset(&action.sa_mask, CRASH_SIGNAL_LIST[i]);
247 }
248 action.sa_sigaction = WatchdogInner::ThreadSamplerSigHandler;
249 action.sa_flags = SA_RESTART | SA_SIGINFO;
250 if (sigaction(MUSL_SIGNAL_SAMPLE_STACK, &action, nullptr) != 0) {
251 XCOLLIE_LOGE("Failed to register signal(%{public}d:%{public}d)", MUSL_SIGNAL_SAMPLE_STACK, errno);
252 return false;
253 }
254 return true;
255 }
256
UninstallThreadSamplerSignal()257 void WatchdogInner::UninstallThreadSamplerSignal()
258 {
259 std::lock_guard<std::mutex> lock(threadSamplerSignalMutex_);
260 threadSamplerSigHandler_ = nullptr;
261 }
262
ThreadSampleTask(int sampleInterval,int sampleCount,int64_t tid)263 void WatchdogInner::ThreadSampleTask(int sampleInterval, int sampleCount, int64_t tid)
264 {
265 if (stackContent_.detectorCount == 0 && stackContent_.collectCount == 0) {
266 XCOLLIE_LOGI("ThreadSampler 1st in ThreadSamplerTask.\n");
267 if (!InitThreadSamplerFuncs()) {
268 isMainThreadStackEnabled_ = true;
269 XCOLLIE_LOGE("ThreadSampler initialize failed.\n");
270 return;
271 }
272
273 if (!InstallThreadSamplerSignal()) {
274 isMainThreadStackEnabled_ = true;
275 XCOLLIE_LOGE("ThreadSampler install signal failed.\n");
276 return;
277 }
278
279 int initThreadSamplerRet = threadSamplerInitFunc_(COLLECT_STACK_COUNT);
280 if (initThreadSamplerRet != 0) {
281 isMainThreadStackEnabled_ = true;
282 XCOLLIE_LOGE("Thread sampler init failed. ret %{public}d\n", initThreadSamplerRet);
283 return;
284 }
285 XCOLLIE_LOGI("Thread sampler initialized. ret %{public}d\n", initThreadSamplerRet);
286 }
287 if (threadSamplerSampleFunc_ == nullptr) {
288 isMainThreadStackEnabled_ = true;
289 return;
290 }
291 if (stackContent_.collectCount > DumpStackState::DEFAULT && stackContent_.collectCount < sampleCount) {
292 XCOLLIE_LOGI("ThreadSampler in ThreadSamplerTask, %{public}d.\n", stackContent_.collectCount);
293 threadSamplerSampleFunc_();
294 stackContent_.collectCount++;
295 } else if (stackContent_.collectCount == sampleCount) {
296 ReportMainThreadEvent(tid);
297 stackContent_.reportTimes--;
298 isMainThreadStackEnabled_ = true;
299 return;
300 } else {
301 if (CheckEventTimer(GetTimeStamp(), stackContent_.reportBegin,
302 stackContent_.reportEnd, sampleInterval)) {
303 threadSamplerSampleFunc_();
304 stackContent_.collectCount++;
305 } else {
306 stackContent_.detectorCount++;
307 }
308 }
309 if (stackContent_.detectorCount == DETECT_STACK_COUNT) {
310 isMainThreadStackEnabled_ = true;
311 }
312 }
313
InitThreadSamplerFuncs()314 bool WatchdogInner::InitThreadSamplerFuncs()
315 {
316 threadSamplerFuncHandler_ = dlopen(LIB_THREAD_SAMPLER_PATH, RTLD_LAZY);
317 if (threadSamplerFuncHandler_ == nullptr) {
318 XCOLLIE_LOGE("dlopen failed, funcHandler is nullptr.\n");
319 return false;
320 }
321
322 threadSamplerInitFunc_ =
323 reinterpret_cast<ThreadSamplerInitFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerInit"));
324 threadSamplerSampleFunc_ =
325 reinterpret_cast<ThreadSamplerSampleFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerSample"));
326 threadSamplerCollectFunc_ =
327 reinterpret_cast<ThreadSamplerCollectFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerCollect"));
328 threadSamplerDeinitFunc_ =
329 reinterpret_cast<ThreadSamplerDeinitFunc>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerDeinit"));
330 threadSamplerSigHandler_ =
331 reinterpret_cast<SigActionType>(FunctionOpen(threadSamplerFuncHandler_, "ThreadSamplerSigHandler"));
332 if (threadSamplerInitFunc_ == nullptr || threadSamplerSampleFunc_ == nullptr ||
333 threadSamplerCollectFunc_ == nullptr || threadSamplerDeinitFunc_ == nullptr ||
334 threadSamplerSigHandler_ == nullptr) {
335 ResetThreadSamplerFuncs();
336 XCOLLIE_LOGE("ThreadSampler dlsym some function failed.\n");
337 return false;
338 }
339 XCOLLIE_LOGE("ThreadSampler has been successfully loaded.\n");
340 return true;
341 }
342
ResetThreadSamplerFuncs()343 void WatchdogInner::ResetThreadSamplerFuncs()
344 {
345 threadSamplerInitFunc_ = nullptr;
346 threadSamplerSampleFunc_ = nullptr;
347 threadSamplerCollectFunc_ = nullptr;
348 threadSamplerDeinitFunc_ = nullptr;
349 threadSamplerSigHandler_ = nullptr;
350 dlclose(threadSamplerFuncHandler_);
351 threadSamplerFuncHandler_ = nullptr;
352 }
353
UpdateTime(int64_t & reportBegin,int64_t & reportEnd,TimePoint & lastEndTime,const TimePoint & endTime)354 void WatchdogInner::UpdateTime(int64_t& reportBegin, int64_t& reportEnd,
355 TimePoint& lastEndTime, const TimePoint& endTime)
356 {
357 reportBegin = timeContent_.curBegin;
358 reportEnd = timeContent_.curEnd;
359 lastEndTime = endTime;
360 }
361
SampleStackDetect(const TimePoint & endTime,int64_t durationTime,int sampleInterval)362 void WatchdogInner::SampleStackDetect(const TimePoint& endTime, int64_t durationTime, int sampleInterval)
363 {
364 uint64_t startUpTime = static_cast<uint64_t>(jankParamsMap[KEY_IGNORE_STARTUP_TIME]) * TIME_MS_TO_S;
365 if (GetCurrentTickMillseconds() - watchdogStartTime_ < startUpTime) {
366 XCOLLIE_LOGI("Application is in starting period.\n");
367 return;
368 }
369 if (!stackContent_.isStartSampleEnabled) {
370 XCOLLIE_LOGI("Current sample detection task is being executed.\n");
371 return;
372 }
373 if (stackContent_.reportTimes <= 0) {
374 int64_t checkTimer = ONE_DAY_LIMIT;
375 if (IsDeveloperOpen() || (IsBetaVersion() && GetProcessNameFromProcCmdline(getpid()) == KEY_SCB_STATE)) {
376 checkTimer = ONE_HOUR_LIMIT;
377 }
378 auto diff = endTime - stackContent_.lastEndTime;
379 int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count();
380 if (intervalTime < checkTimer) {
381 return;
382 }
383 stackContent_.reportTimes = jankParamsMap[KEY_SAMPLE_REPORT_TIMES];
384 XCOLLIE_LOGI("The current thread has exceeded the event limit, reportTimes: %{public}d",
385 stackContent_.reportTimes);
386 }
387 stackContent_.isStartSampleEnabled = false;
388 UpdateTime(stackContent_.reportBegin, stackContent_.reportEnd, stackContent_.lastEndTime, endTime);
389 int32_t ret = StartProfileMainThread(sampleInterval);
390 if (ret == -1) {
391 stackContent_.isStartSampleEnabled = true;
392 }
393 XCOLLIE_LOGI("MainThread StartProfileMainThread ret: %{public}d "
394 "durationTime: %{public}" PRId64 " ms sampleInterval: %{public}d", ret, durationTime, sampleInterval);
395 }
396
397
StartProfileMainThread(int32_t interval)398 int32_t WatchdogInner::StartProfileMainThread(int32_t interval)
399 {
400 std::unique_lock<std::mutex> lock(lock_);
401 stackContent_.detectorCount = 0;
402 stackContent_.collectCount = 0;
403 int sampleCount = jankParamsMap[KEY_SAMPLE_COUNT];
404 int64_t tid = getproctid();
405 auto sampleTask = [this, interval, sampleCount, tid]() {
406 ThreadSampleTask(interval, sampleCount, tid);
407 };
408
409 WatchdogTask task("ThreadSampler", sampleTask, 0, interval, true);
410 InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
411 return 0;
412 }
413
CollectStack(std::string & stack,std::string & heaviestStack)414 bool WatchdogInner::CollectStack(std::string& stack, std::string& heaviestStack)
415 {
416 if (threadSamplerCollectFunc_ == nullptr) {
417 return false;
418 }
419 int treeFormat = 1;
420 char* stk = new char[STACK_LENGTH];
421 char* heaviest = new char[STACK_LENGTH];
422 int collectRet = threadSamplerCollectFunc_(stk, heaviest, STACK_LENGTH, STACK_LENGTH, treeFormat);
423 stack = stk;
424 heaviestStack = heaviest;
425 delete[] stk;
426 delete[] heaviest;
427 return collectRet == 0;
428 }
429
Deinit()430 bool WatchdogInner::Deinit()
431 {
432 if (threadSamplerDeinitFunc_ == nullptr) {
433 return false;
434 }
435 UninstallThreadSamplerSignal();
436 int ret = threadSamplerDeinitFunc_();
437 return ret == 0;
438 }
439
DumpTraceProfile(int32_t interval)440 void WatchdogInner::DumpTraceProfile(int32_t interval)
441 {
442 traceContent_.dumpCount = 0;
443 traceContent_.traceCount = 0;
444 auto traceTask = [this, interval]() {
445 traceContent_.traceCount++;
446 if (CheckEventTimer(GetTimeStamp(), traceContent_.reportBegin,
447 traceContent_.reportEnd, interval)) {
448 traceContent_.dumpCount++;
449 }
450 if (traceContent_.traceCount >= COLLECT_TRACE_MAX) {
451 if (traceContent_.dumpCount >= COLLECT_TRACE_MIN) {
452 CreateWatchdogDir();
453 appCaller_.actionId = UCollectClient::ACTION_ID_DUMP_TRACE;
454 appCaller_.isBusinessJank = !buissnessThreadInfo_.empty();
455 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
456 XCOLLIE_LOGI("MainThread TraceCollector Dump result: %{public}d", result.retCode);
457 }
458 isMainThreadTraceEnabled_ = true;
459 }
460 };
461 WatchdogTask task("TraceCollector", traceTask, 0, interval, true);
462 std::unique_lock<std::mutex> lock(lock_);
463 InsertWatchdogTaskLocked("TraceCollector", std::move(task));
464 }
465
StartTraceProfile()466 int32_t WatchdogInner::StartTraceProfile()
467 {
468 traceCollector_ = UCollectClient::TraceCollector::Create();
469 if (traceCollector_ == nullptr) {
470 traceContent_.traceState = DumpStackState::DEFAULT;
471 XCOLLIE_LOGE("Create traceCollector failed.");
472 return -1;
473 }
474 appCaller_.actionId = UCollectClient::ACTION_ID_START_TRACE;
475 appCaller_.bundleName = bundleName_;
476 appCaller_.bundleVersion = bundleVersion_;
477 appCaller_.uid = static_cast<int64_t>(getuid());
478 appCaller_.pid = getprocpid();
479 appCaller_.threadName = GetSelfProcName();
480 appCaller_.foreground = isForeground_;
481 appCaller_.happenTime = GetTimeStamp() / MILLISEC_TO_NANOSEC;
482 appCaller_.beginTime = traceContent_.reportBegin / MILLISEC_TO_NANOSEC;
483 appCaller_.endTime = traceContent_.reportEnd / MILLISEC_TO_NANOSEC;
484 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
485 if (result.retCode == 0) {
486 DumpTraceProfile(DURATION_TIME);
487 }
488 return result.retCode;
489 }
490
CollectTraceDetect(const TimePoint & endTime,int64_t durationTime)491 void WatchdogInner::CollectTraceDetect(const TimePoint& endTime, int64_t durationTime)
492 {
493 if (IsBetaVersion()) {
494 return;
495 }
496 if (traceContent_.traceState == DumpStackState::COMPLETE) {
497 auto diff = endTime - stackContent_.lastEndTime;
498 int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count();
499 if (intervalTime < ONE_DAY_LIMIT) {
500 return;
501 }
502 }
503 traceContent_.traceState = DumpStackState::COMPLETE;
504 UpdateTime(traceContent_.reportBegin, traceContent_.reportEnd, traceContent_.lastEndTime, endTime);
505 int32_t result = StartTraceProfile();
506 XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d, Duration Time: %{public}" PRId64 " ms",
507 result, durationTime);
508 }
509
DistributeStart(const std::string & name)510 static TimePoint DistributeStart(const std::string& name)
511 {
512 WatchdogInner::GetInstance().timeContent_.curBegin = GetTimeStamp();
513 return std::chrono::steady_clock::now();
514 }
515
DistributeEnd(const std::string & name,const TimePoint & startTime)516 static void DistributeEnd(const std::string& name, const TimePoint& startTime)
517 {
518 TimePoint endTime = std::chrono::steady_clock::now();
519 auto duration = endTime - startTime;
520 int64_t durationTime = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
521 int sampleInterval = WatchdogInner::GetInstance().jankParamsMap[KEY_SAMPLE_INTERVAL];
522 WatchdogInner::GetInstance().timeContent_.curEnd = GetTimeStamp();
523 if (duration > std::chrono::milliseconds(sampleInterval)) {
524 int logType = WatchdogInner::GetInstance().jankParamsMap[KEY_LOG_TYPE];
525 switch (logType) {
526 case CatchLogType::LOGTYPE_SAMPLE_STACK: {
527 WatchdogInner::GetInstance().SampleStackDetect(endTime, durationTime, sampleInterval);
528 break;
529 }
530 case CatchLogType::LOGTYPE_COLLECT_TRACE: {
531 if (duration > std::chrono::milliseconds(DUMPTRACE_TIME)) {
532 WatchdogInner::GetInstance().CollectTraceDetect(endTime, durationTime);
533 }
534 break;
535 }
536 case CatchLogType::LOGTYPE_NONE: {
537 if (duration < std::chrono::milliseconds(DUMPTRACE_TIME)) {
538 WatchdogInner::GetInstance().SampleStackDetect(endTime, durationTime, sampleInterval);
539 } else {
540 WatchdogInner::GetInstance().CollectTraceDetect(endTime, durationTime);
541 }
542 break;
543 }
544 default:
545 break;
546 }
547 }
548 if (duration > std::chrono::milliseconds(DISTRIBUTE_TIME)) {
549 XCOLLIE_LOGI("BlockMonitor event name: %{public}s, Duration Time: %{public}" PRId64 " ms",
550 name.c_str(), durationTime);
551 }
552 }
553
AddThread(const std::string & name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)554 int WatchdogInner::AddThread(const std::string &name,
555 std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
556 {
557 if (name.empty() || handler == nullptr) {
558 XCOLLIE_LOGE("Add thread fail, invalid args!");
559 return -1;
560 }
561
562 if (IsInAppspwan()) {
563 return -1;
564 }
565
566 std::string limitedName = GetLimitedSizeName(name);
567 XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
568 std::unique_lock<std::mutex> lock(lock_);
569
570 IpcCheck();
571
572 if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
573 return -1;
574 }
575 return 0;
576 }
577
RunOneShotTask(const std::string & name,Task && task,uint64_t delay)578 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
579 {
580 if (name.empty() || task == nullptr) {
581 XCOLLIE_LOGE("Add task fail, invalid args!");
582 return;
583 }
584
585 if (IsInAppspwan()) {
586 return;
587 }
588
589 std::unique_lock<std::mutex> lock(lock_);
590 std::string limitedName = GetLimitedSizeName(name);
591 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
592 }
593
RunXCollieTask(const std::string & name,uint64_t timeout,XCollieCallback func,void * arg,unsigned int flag)594 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
595 void *arg, unsigned int flag)
596 {
597 if (name.empty() || timeout == 0) {
598 XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
599 return INVALID_ID;
600 }
601
602 if (IsInAppspwan()) {
603 return INVALID_ID;
604 }
605
606 std::unique_lock<std::mutex> lock(lock_);
607 IpcCheck();
608 std::string limitedName = GetLimitedSizeName(name);
609 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
610 }
611
RemoveXCollieTask(int64_t id)612 void WatchdogInner::RemoveXCollieTask(int64_t id)
613 {
614 std::priority_queue<WatchdogTask> tmpQueue;
615 std::unique_lock<std::mutex> lock(lock_);
616 size_t size = checkerQueue_.size();
617 if (size == 0) {
618 XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
619 return;
620 }
621 while (!checkerQueue_.empty()) {
622 const WatchdogTask& task = checkerQueue_.top();
623 if (task.id != id || task.timeout == 0) {
624 tmpQueue.push(task);
625 }
626 checkerQueue_.pop();
627 }
628 if (tmpQueue.size() == size) {
629 XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
630 static_cast<long long>(id), size);
631 }
632 tmpQueue.swap(checkerQueue_);
633 }
634
RunPeriodicalTask(const std::string & name,Task && task,uint64_t interval,uint64_t delay)635 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
636 {
637 if (name.empty() || task == nullptr) {
638 XCOLLIE_LOGE("Add task fail, invalid args!");
639 return;
640 }
641
642 if (IsInAppspwan()) {
643 return;
644 }
645
646 std::string limitedName = GetLimitedSizeName(name);
647 XCOLLIE_LOGD("Add periodical task %{public}s to watchdog.", name.c_str());
648 std::unique_lock<std::mutex> lock(lock_);
649 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
650 }
651
SetTimerCountTask(const std::string & name,uint64_t timeLimit,int countLimit)652 int64_t WatchdogInner::SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)
653 {
654 if (name.empty() || timeLimit == 0 || countLimit <= 0) {
655 XCOLLIE_LOGE("SetTimerCountTask fail, invalid args!");
656 return INVALID_ID;
657 }
658
659 if (IsInAppspwan()) {
660 return INVALID_ID;
661 }
662 std::string limitedName = GetLimitedSizeName(name);
663 XCOLLIE_LOGD("SetTimerCountTask name : %{public}s", name.c_str());
664 std::unique_lock<std::mutex> lock(lock_);
665 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeLimit, countLimit));
666 }
667
TriggerTimerCountTask(const std::string & name,bool bTrigger,const std::string & message)668 void WatchdogInner::TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)
669 {
670 std::unique_lock<std::mutex> lock(lock_);
671
672 if (checkerQueue_.empty()) {
673 XCOLLIE_LOGE("TriggerTimerCountTask name : %{public}s fail, empty queue!", name.c_str());
674 return;
675 }
676
677 bool isTaskExist = false;
678 uint64_t now = GetCurrentTickMillseconds();
679 std::priority_queue<WatchdogTask> tmpQueue;
680 while (!checkerQueue_.empty()) {
681 WatchdogTask task = checkerQueue_.top();
682 if (task.name == name) {
683 isTaskExist = true;
684 if (bTrigger) {
685 task.triggerTimes.push_back(now);
686 task.message = message;
687 } else {
688 task.triggerTimes.clear();
689 }
690 }
691 tmpQueue.push(task);
692 checkerQueue_.pop();
693 }
694 tmpQueue.swap(checkerQueue_);
695
696 if (!isTaskExist) {
697 XCOLLIE_LOGE("TriggerTimerCount name : %{public}s does not exist!", name.c_str());
698 }
699 }
700
IsTaskExistLocked(const std::string & name)701 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
702 {
703 return (taskNameSet_.find(name) != taskNameSet_.end());
704 }
705
IsExceedMaxTaskLocked()706 bool WatchdogInner::IsExceedMaxTaskLocked()
707 {
708 if (checkerQueue_.size() >= MAX_WATCH_NUM) {
709 XCOLLIE_LOGE("Exceed max watchdog task!");
710 return true;
711 }
712
713 return false;
714 }
715
InsertWatchdogTaskLocked(const std::string & name,WatchdogTask && task)716 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
717 {
718 if (!task.isOneshotTask && IsTaskExistLocked(name)) {
719 XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
720 return 0;
721 }
722
723 if (IsExceedMaxTaskLocked()) {
724 XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
725 return 0;
726 }
727 int64_t id = task.id;
728 checkerQueue_.push(std::move(task));
729 if (!task.isOneshotTask) {
730 taskNameSet_.insert(name);
731 }
732 CreateWatchdogThreadIfNeed();
733 condition_.notify_all();
734
735 return id;
736 }
737
StopWatchdog()738 void WatchdogInner::StopWatchdog()
739 {
740 Stop();
741 }
742
IsCallbackLimit(unsigned int flag)743 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
744 {
745 bool ret = false;
746 time_t startTime = time(nullptr);
747 if (!(flag & XCOLLIE_FLAG_LOG)) {
748 return ret;
749 }
750 if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
751 timeCallback_ = startTime;
752 } else {
753 if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
754 ret = true;
755 }
756 }
757 return ret;
758 }
759
IPCProxyLimitCallback(uint64_t num)760 void IPCProxyLimitCallback(uint64_t num)
761 {
762 XCOLLIE_LOGE("ipc proxy num %{public}" PRIu64 " exceed limit", num);
763 if (getuid() >= MIN_APP_UID && IsBetaVersion()) {
764 XCOLLIE_LOGI("Process is going to exit, reason: ipc proxy num exceed limit");
765 _exit(0);
766 }
767 }
768
CreateWatchdogThreadIfNeed()769 void WatchdogInner::CreateWatchdogThreadIfNeed()
770 {
771 std::call_once(flag_, [this] {
772 if (threadLoop_ == nullptr) {
773 if (mainRunner_ == nullptr) {
774 mainRunner_ = AppExecFwk::EventRunner::GetMainEventRunner();
775 }
776 mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
777 const uint64_t limitNum = 20000;
778 IPCDfx::SetIPCProxyLimit(limitNum, IPCProxyLimitCallback);
779 threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
780 if (getpid() == gettid()) {
781 SetThreadSignalMask(SIGDUMP, true, true);
782 }
783 XCOLLIE_LOGD("Watchdog is running!");
784 }
785 });
786 }
787
CheckCurrentTask(const WatchdogTask & queuedTaskCheck)788 bool WatchdogInner::CheckCurrentTask(const WatchdogTask& queuedTaskCheck)
789 {
790 if (queuedTaskCheck.name.empty()) {
791 checkerQueue_.pop();
792 XCOLLIE_LOGW("queuedTask name is empty.");
793 } else if (queuedTaskCheck.name == STACK_CHECKER && isMainThreadStackEnabled_) {
794 checkerQueue_.pop();
795 taskNameSet_.erase("ThreadSampler");
796 if (Deinit()) {
797 ResetThreadSamplerFuncs();
798 }
799 stackContent_.isStartSampleEnabled = true;
800 isMainThreadStackEnabled_ = false;
801 XCOLLIE_LOGI("Detect sample stack task complete.");
802 } else if (queuedTaskCheck.name == TRACE_CHECKER && isMainThreadTraceEnabled_) {
803 checkerQueue_.pop();
804 taskNameSet_.erase("TraceCollector");
805 isMainThreadTraceEnabled_ = false;
806 if (traceContent_.dumpCount < COLLECT_TRACE_MIN) {
807 traceContent_.traceState = DumpStackState::DEFAULT;
808 }
809 XCOLLIE_LOGI("Detect collect trace task complete.");
810 } else {
811 return false;
812 }
813 return true;
814 }
815
FetchNextTask(uint64_t now,WatchdogTask & task)816 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
817 {
818 std::unique_lock<std::mutex> lock(lock_);
819 if (isNeedStop_) {
820 while (!checkerQueue_.empty()) {
821 checkerQueue_.pop();
822 }
823 return DEFAULT_TIMEOUT;
824 }
825
826 if (checkerQueue_.empty()) {
827 return DEFAULT_TIMEOUT;
828 }
829
830 const WatchdogTask& queuedTaskCheck = checkerQueue_.top();
831 if (CheckCurrentTask(queuedTaskCheck) && checkerQueue_.empty()) {
832 return DEFAULT_TIMEOUT;
833 }
834
835 const WatchdogTask& queuedTask = checkerQueue_.top();
836 if (g_existFile && queuedTask.name == IPC_FULL && now - g_nextKickTime > INTERVAL_KICK_TIME) {
837 if (KickWatchdog()) {
838 g_nextKickTime = now;
839 }
840 }
841 if (queuedTask.nextTickTime > now) {
842 return queuedTask.nextTickTime - now;
843 }
844
845 currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
846 task = queuedTask;
847 checkerQueue_.pop();
848 return 0;
849 }
850
ReInsertTaskIfNeed(WatchdogTask & task)851 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
852 {
853 if (task.checkInterval == 0) {
854 return;
855 }
856
857 std::unique_lock<std::mutex> lock(lock_);
858 task.nextTickTime = task.nextTickTime + task.checkInterval;
859 checkerQueue_.push(task);
860 }
861
Start()862 bool WatchdogInner::Start()
863 {
864 if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
865 XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
866 }
867 SetThreadSignalMask(SIGDUMP, false, false);
868 watchdogStartTime_ = GetCurrentTickMillseconds();
869 XCOLLIE_LOGD("Watchdog is running in thread(%{public}d)!", getproctid());
870 if (SetThreadInfoCallback != nullptr) {
871 SetThreadInfoCallback(ThreadInfo);
872 XCOLLIE_LOGD("Watchdog Set Thread Info Callback");
873 }
874 while (!isNeedStop_) {
875 uint64_t now = GetCurrentTickMillseconds();
876 WatchdogTask task;
877 uint64_t leftTimeMill = FetchNextTask(now, task);
878 if (leftTimeMill == 0) {
879 task.Run(now);
880 ReInsertTaskIfNeed(task);
881 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
882 continue;
883 } else if (isNeedStop_) {
884 break;
885 } else {
886 std::unique_lock<std::mutex> lock(lock_);
887 condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
888 }
889 }
890 if (SetThreadInfoCallback != nullptr) {
891 SetThreadInfoCallback(nullptr);
892 }
893 return true;
894 }
895
SendMsgToHungtask(const std::string & msg)896 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
897 {
898 if (g_fd == NOT_OPEN) {
899 return false;
900 }
901
902 ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
903 if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
904 XCOLLIE_LOGE("watchdogWrite msg failed");
905 close(g_fd);
906 g_fd = NOT_OPEN;
907 return false;
908 }
909 XCOLLIE_LOGE("Send %{public}s to hungtask Successful\n", msg.c_str());
910 return true;
911 }
912
KickWatchdog()913 bool WatchdogInner::KickWatchdog()
914 {
915 if (g_fd == NOT_OPEN) {
916 g_fd = open(SYS_KERNEL_HUNGTASK_USERLIST, O_WRONLY);
917 if (g_fd < 0) {
918 g_fd = open(HMOS_HUNGTASK_USERLIST, O_WRONLY);
919 if (g_fd < 0) {
920 XCOLLIE_LOGE("can't open hungtask file");
921 g_existFile = false;
922 return false;
923 }
924 XCOLLIE_LOGE("change to hmos kernel");
925 isHmos = true;
926 } else {
927 XCOLLIE_LOGE("change to linux kernel");
928 }
929
930 if (!SendMsgToHungtask(isHmos ? ON_KICK_TIME_HMOS : ON_KICK_TIME)) {
931 XCOLLIE_LOGE("KickWatchdog SendMsgToHungtask false");
932 return false;
933 }
934 }
935 return SendMsgToHungtask(isHmos ? KICK_TIME_HMOS : KICK_TIME);
936 }
937
IpcCheck()938 void WatchdogInner::IpcCheck()
939 {
940 static bool isIpcCheckInit = false;
941 if (isIpcCheckInit) {
942 return;
943 }
944
945 uint32_t uid = getuid();
946 bool isJoinIpcFullUid = std::any_of(std::begin(JOIN_IPC_FULL_UIDS), std::end(JOIN_IPC_FULL_UIDS),
947 [uid](const uint32_t joinIpcFullUid) { return uid == joinIpcFullUid; });
948 if (isJoinIpcFullUid || GetSelfProcName() == KEY_SCB_STATE) {
949 if (binderCheckHander_ == nullptr) {
950 auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
951 binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
952 if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
953 nullptr, IPC_CHECKER_TIME))) {
954 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
955 }
956 }
957 }
958 isIpcCheckInit = true;
959 }
960
WriteStringToFile(int32_t pid,const char * str)961 void WatchdogInner::WriteStringToFile(int32_t pid, const char *str)
962 {
963 char file[PATH_LEN] = {0};
964 if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", pid) == -1) {
965 XCOLLIE_LOGE("failed to build path for %{public}d.", pid);
966 return;
967 }
968 int fd = open(file, O_RDWR);
969 if (fd == -1) {
970 return;
971 }
972 if (write(fd, str, strlen(str)) < 0) {
973 XCOLLIE_LOGI("failed to write 0 for %{public}s", file);
974 }
975 close(fd);
976 return;
977 }
978
FfrtCallback(uint64_t taskId,const char * taskInfo,uint32_t delayedTaskCount)979 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
980 {
981 std::string description = "FfrtCallback: task(";
982 description += taskInfo;
983 description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
984 std::string info(taskInfo);
985 if (info.find("Queue_Schedule_Timeout") != std::string::npos) {
986 WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo, false);
987 description += ", report twice instead of exiting process.";
988 WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
989 WatchdogInner::KillPeerBinderProcess(description);
990 return;
991 }
992 bool isExist = false;
993 {
994 std::unique_lock<std::mutex> lock(lockFfrt_);
995 auto &map = WatchdogInner::GetInstance().taskIdCnt;
996 auto search = map.find(taskId);
997 if (search != map.end()) {
998 isExist = true;
999 } else {
1000 map[taskId] = SERVICE_WARNING;
1001 }
1002 }
1003
1004 if (isExist) {
1005 description += ", report twice instead of exiting process."; // 1s = 1000ms
1006 WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
1007 WatchdogInner::GetInstance().taskIdCnt.erase(taskId);
1008 WatchdogInner::KillPeerBinderProcess(description);
1009 } else {
1010 WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
1011 }
1012 }
1013
InitFfrtWatchdog()1014 void WatchdogInner::InitFfrtWatchdog()
1015 {
1016 CreateWatchdogThreadIfNeed();
1017 ffrt_task_timeout_set_cb(FfrtCallback);
1018 ffrt_task_timeout_set_threshold(FFRT_CALLBACK_TIME);
1019 std::unique_lock<std::mutex> lock(lock_);
1020 IpcCheck();
1021 }
1022
SendFfrtEvent(const std::string & msg,const std::string & eventName,const char * taskInfo,const bool isDumpStack)1023 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo,
1024 const bool isDumpStack)
1025 {
1026 int32_t pid = getprocpid();
1027 if (IsProcessDebug(pid)) {
1028 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
1029 return;
1030 }
1031 uint32_t gid = getgid();
1032 uint32_t uid = getuid();
1033 time_t curTime = time(nullptr);
1034 std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
1035 "\n" + msg + "\n";
1036 char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
1037 buffer[FFRT_BUFFER_SIZE] = 0;
1038 ffrt_dump(DUMP_INFO_ALL, buffer, FFRT_BUFFER_SIZE);
1039 sendMsg += buffer;
1040 delete[] buffer;
1041 int32_t tid = pid;
1042 GetFfrtTaskTid(tid, sendMsg);
1043 int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
1044 "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo, "PROCESS_NAME", GetSelfProcName(),
1045 "MSG", sendMsg, "STACK", isDumpStack ? GetProcessStacktrace() : "");
1046 XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
1047 "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
1048 }
1049
GetFfrtTaskTid(int32_t & tid,const std::string & msg)1050 void WatchdogInner::GetFfrtTaskTid(int32_t& tid, const std::string& msg)
1051 {
1052 std::string queueNameFrontStr = "us. queue name [";
1053 size_t queueNameFrontPos = msg.find(queueNameFrontStr);
1054 if (queueNameFrontPos == std::string::npos) {
1055 return;
1056 }
1057 size_t queueNameRearPos = msg.find("], remaining tasks count=");
1058 size_t queueStartPos = queueNameFrontPos + queueNameFrontStr.length();
1059 if (queueNameRearPos == std::string::npos || queueNameRearPos <= queueStartPos) {
1060 return;
1061 }
1062 size_t queueNameLength = queueNameRearPos - queueStartPos;
1063 std::string workerTidFrontStr = " worker tid ";
1064 std::string taskIdFrontStr = " is running, task id ";
1065 std::string queueNameStr = " name " + msg.substr(queueStartPos, queueNameLength);
1066 std::istringstream issMsg(msg);
1067 std::string line;
1068 while (std::getline(issMsg, line, '\n')) {
1069 size_t workerTidFrontPos = line.find(workerTidFrontStr);
1070 size_t taskIdFrontPos = line.find(taskIdFrontStr);
1071 size_t queueNamePos = line.find(queueNameStr);
1072 size_t workerStartPos = workerTidFrontPos + workerTidFrontStr.length();
1073 if (workerTidFrontPos == std::string::npos || taskIdFrontPos == std::string::npos ||
1074 queueNamePos == std::string::npos || taskIdFrontPos <= workerStartPos) {
1075 continue;
1076 }
1077 size_t tidLength = taskIdFrontPos - workerStartPos;
1078 if (tidLength < std::to_string(INT32_MAX).length()) {
1079 std::string tidStr = line.substr(workerStartPos, tidLength);
1080 if (std::all_of(std::begin(tidStr), std::end(tidStr), [] (const char& c) {
1081 return isdigit(c);
1082 })) {
1083 tid = std::stoi(tidStr);
1084 return;
1085 }
1086 }
1087 }
1088 }
1089
LeftTimeExitProcess(const std::string & description)1090 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
1091 {
1092 int32_t pid = getprocpid();
1093 if (IsProcessDebug(pid)) {
1094 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't exit.", pid);
1095 return;
1096 }
1097 DelayBeforeExit(10); // sleep 10s for hiview dump
1098 XCOLLIE_LOGI("Process is going to exit, reason:%{public}s.", description.c_str());
1099 WatchdogInner::WriteStringToFile(pid, "0");
1100
1101 _exit(0);
1102 }
1103
Stop()1104 bool WatchdogInner::Stop()
1105 {
1106 IPCDfx::SetIPCProxyLimit(0, nullptr);
1107 if (mainRunner_ != nullptr) {
1108 mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1109 }
1110 isNeedStop_.store(true);
1111 condition_.notify_all();
1112 if (threadLoop_ != nullptr && threadLoop_->joinable()) {
1113 threadLoop_->join();
1114 threadLoop_ = nullptr;
1115 }
1116 if (g_fd != NOT_OPEN) {
1117 close(g_fd);
1118 g_fd = NOT_OPEN;
1119 }
1120 return true;
1121 }
1122
KillPeerBinderProcess(const std::string & description)1123 void WatchdogInner::KillPeerBinderProcess(const std::string &description)
1124 {
1125 bool result = false;
1126 if (getuid() == FOUNDATION_UID) {
1127 result = KillProcessByPid(getprocpid());
1128 }
1129 if (!result) {
1130 WatchdogInner::LeftTimeExitProcess(description);
1131 }
1132 }
1133
RemoveInnerTask(const std::string & name)1134 void WatchdogInner::RemoveInnerTask(const std::string& name)
1135 {
1136 if (name.empty()) {
1137 XCOLLIE_LOGI("RemoveInnerTask fail, cname is null");
1138 return;
1139 }
1140 std::priority_queue<WatchdogTask> tmpQueue;
1141 std::unique_lock<std::mutex> lock(lock_);
1142 size_t size = checkerQueue_.size();
1143 if (size == 0) {
1144 XCOLLIE_LOGE("RemoveInnerTask %{public}s fail, empty queue!", name.c_str());
1145 return;
1146 }
1147 while (!checkerQueue_.empty()) {
1148 const WatchdogTask& task = checkerQueue_.top();
1149 if (task.name != name) {
1150 tmpQueue.push(task);
1151 } else {
1152 size_t nameSize = taskNameSet_.size();
1153 if (nameSize != 0 && !task.isOneshotTask) {
1154 taskNameSet_.erase(name);
1155 XCOLLIE_LOGD("RemoveInnerTask name %{public}s, remove result=%{public}d",
1156 name.c_str(), nameSize > taskNameSet_.size());
1157 }
1158 }
1159 checkerQueue_.pop();
1160 }
1161 if (tmpQueue.size() == size) {
1162 XCOLLIE_LOGE("RemoveInnerTask fail, can not find name %{public}s, size=%{public}zu!",
1163 name.c_str(), size);
1164 }
1165 tmpQueue.swap(checkerQueue_);
1166 }
1167
InitBeginFunc(const char * name)1168 void InitBeginFunc(const char* name)
1169 {
1170 std::string nameStr(name);
1171 WatchdogInner::GetInstance().bussinessBeginTime_ = DistributeStart(nameStr);
1172 }
1173
InitEndFunc(const char * name)1174 void InitEndFunc(const char* name)
1175 {
1176 std::string nameStr(name);
1177 DistributeEnd(nameStr, WatchdogInner::GetInstance().bussinessBeginTime_);
1178 }
1179
InitMainLooperWatcher(WatchdogInnerBeginFunc * beginFunc,WatchdogInnerEndFunc * endFunc)1180 void WatchdogInner::InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc,
1181 WatchdogInnerEndFunc* endFunc)
1182 {
1183 int64_t tid = getproctid();
1184 if (beginFunc && endFunc) {
1185 if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1186 XCOLLIE_LOGI("Tid =%{public}" PRId64 "already exits, "
1187 "no repeated initialization.", tid);
1188 return;
1189 }
1190 if (mainRunner_ != nullptr) {
1191 mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1192 }
1193 *beginFunc = InitBeginFunc;
1194 *endFunc = InitEndFunc;
1195 buissnessThreadInfo_.insert(tid);
1196 } else {
1197 if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1198 XCOLLIE_LOGI("Remove already init tid=%{public}." PRId64, tid);
1199 mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
1200 buissnessThreadInfo_.erase(tid);
1201 }
1202 }
1203 }
1204
SetAppDebug(bool isAppDebug)1205 void WatchdogInner::SetAppDebug(bool isAppDebug)
1206 {
1207 isAppDebug_ = isAppDebug;
1208 }
1209
GetAppDebug()1210 bool WatchdogInner::GetAppDebug()
1211 {
1212 return isAppDebug_;
1213 }
1214
UpdateJankParam(int sampleInterval,int startUpTime,int sampleCount,int logType,int reportTimes)1215 void WatchdogInner::UpdateJankParam(int sampleInterval, int startUpTime, int sampleCount,
1216 int logType, int reportTimes)
1217 {
1218 jankParamsMap[KEY_LOG_TYPE] = logType;
1219 jankParamsMap[KEY_SAMPLE_INTERVAL] = sampleInterval;
1220 jankParamsMap[KEY_IGNORE_STARTUP_TIME] = startUpTime;
1221 jankParamsMap[KEY_SAMPLE_COUNT] = sampleCount;
1222 if (logType == CatchLogType::LOGTYPE_COLLECT_TRACE) {
1223 XCOLLIE_LOGI("Set thread only dump trace success.");
1224 return;
1225 }
1226 if (jankParamsMap[KEY_SET_TIMES_FLAG] == SET_TIMES_FLAG) {
1227 jankParamsMap[KEY_SAMPLE_REPORT_TIMES] = reportTimes;
1228 stackContent_.reportTimes = reportTimes;
1229 jankParamsMap[KEY_SET_TIMES_FLAG] = 0;
1230 }
1231 XCOLLIE_LOGI("Set thread sampler params success. logType: %{public}d, sample interval: %{public}d, "
1232 "ignore startUp interval: %{public}d, count: %{public}d, reportTimes: %{public}d.",
1233 logType, sampleInterval, startUpTime, sampleCount, stackContent_.reportTimes);
1234 }
1235
ConvertStrToNum(std::map<std::string,std::string> paramsMap,const std::string & key)1236 int WatchdogInner::ConvertStrToNum(std::map<std::string, std::string> paramsMap, const std::string& key)
1237 {
1238 int num = -1;
1239 auto it = paramsMap.find(key);
1240 if (it == paramsMap.end()) {
1241 XCOLLIE_LOGE("Set the thread sampler param error, %{public}s is not exist.", key.c_str());
1242 return num;
1243 }
1244 std::string str = it->second;
1245 if (!str.empty() && str.size() < std::to_string(INT32_MAX).length()) {
1246 if (std::all_of(std::begin(str), std::end(str), [] (const char &c) {
1247 return isdigit(c);
1248 })) {
1249 num = std::stoi(str);
1250 }
1251 }
1252 if (num < 0) {
1253 XCOLLIE_LOGE("Set param error, %{public}s: %{public}s should be a number and greater than 0.",
1254 key.c_str(), str.c_str());
1255 }
1256 return num;
1257 }
1258
CheckSampleParam(std::map<std::string,std::string> paramsMap)1259 bool WatchdogInner::CheckSampleParam(std::map<std::string, std::string> paramsMap)
1260 {
1261 int sampleInterval = ConvertStrToNum(paramsMap, KEY_SAMPLE_INTERVAL);
1262 if (sampleInterval < 0) {
1263 return false;
1264 } else if (sampleInterval < SAMPLE_INTERVAL_MIN || sampleInterval > SAMPLE_INTERVAL_MAX) {
1265 XCOLLIE_LOGE("Set the range of sample stack is from %{public}d to %{public}d, "
1266 "interval: %{public}d.", SAMPLE_INTERVAL_MIN, SAMPLE_INTERVAL_MAX, sampleInterval);
1267 return false;
1268 }
1269
1270 int startUpTime = ConvertStrToNum(paramsMap, KEY_IGNORE_STARTUP_TIME);
1271 if (startUpTime < 0) {
1272 return false;
1273 } else if (startUpTime < IGNORE_STARTUP_TIME_MIN) {
1274 XCOLLIE_LOGE("Set the minimum of ignore startup interval is %{public}d s, "
1275 "interval: %{public}d.", IGNORE_STARTUP_TIME_MIN, startUpTime);
1276 return false;
1277 }
1278
1279 int sampleCount = ConvertStrToNum(paramsMap, KEY_SAMPLE_COUNT);
1280 if (sampleCount < 0) {
1281 return false;
1282 }
1283 int maxSampleCount = MAX_SAMPLE_STACK_TIMES / sampleInterval - SAMPLE_EXTRA_COUNT;
1284 if (sampleCount < SAMPLE_COUNT_MIN || sampleCount > maxSampleCount) {
1285 XCOLLIE_LOGE("Set the range of sample count, min value: %{public}d max value: %{public}d, count: %{public}d.",
1286 SAMPLE_COUNT_MIN, maxSampleCount, sampleCount);
1287 return false;
1288 }
1289
1290 int reportTimes = ConvertStrToNum(paramsMap, KEY_SAMPLE_REPORT_TIMES);
1291 if (reportTimes < 0) {
1292 return false;
1293 } else if (reportTimes < SAMPLE_REPORT_TIMES_MIN || reportTimes > SAMPLE_REPORT_TIMES_MAX) {
1294 XCOLLIE_LOGE("Set the range of sample reportTimes is from %{public}d to %{public}d,"
1295 "reportTimes: %{public}d", SAMPLE_REPORT_TIMES_MIN, SAMPLE_REPORT_TIMES_MAX, reportTimes);
1296 return false;
1297 }
1298 UpdateJankParam(sampleInterval, startUpTime, sampleCount, CatchLogType::LOGTYPE_SAMPLE_STACK, reportTimes);
1299 return true;
1300 }
1301
SetEventConfig(std::map<std::string,std::string> paramsMap)1302 int WatchdogInner::SetEventConfig(std::map<std::string, std::string> paramsMap)
1303 {
1304 if (paramsMap.empty()) {
1305 XCOLLIE_LOGE("Set the thread sampler param map is null.");
1306 return -1;
1307 }
1308 int logType = ConvertStrToNum(paramsMap, KEY_LOG_TYPE);
1309 size_t size = paramsMap.size();
1310 switch (logType) {
1311 case CatchLogType::LOGTYPE_DEFAULT:
1312 return -1;
1313 case CatchLogType::LOGTYPE_NONE:
1314 case CatchLogType::LOGTYPE_COLLECT_TRACE: {
1315 if (size != SAMPLE_PARAMS_MIN_SIZE) {
1316 XCOLLIE_LOGE("Set the thread sampler param map size error, can only set log_type. "
1317 "map size: %{public}zu", size);
1318 return -1;
1319 }
1320 UpdateJankParam(SAMPLE_DEFULE_INTERVAL, DEFAULT_IGNORE_STARTUP_TIME, SAMPLE_DEFULE_COUNT,
1321 logType, SAMPLE_REPORT_TIMES_MIN);
1322 break;
1323 }
1324 case CatchLogType::LOGTYPE_SAMPLE_STACK: {
1325 if (size != SAMPLE_PARAMS_MAX_SIZE) {
1326 XCOLLIE_LOGE("Set the thread sampler param map size error, current map size: %{public}zu", size);
1327 return -1;
1328 }
1329 if (!CheckSampleParam(paramsMap)) {
1330 return -1;
1331 }
1332 break;
1333 }
1334 default: {
1335 XCOLLIE_LOGE("Set the log_type can only be 0 1 2, logType: %{public}d", logType);
1336 return -1;
1337 }
1338 };
1339 return 0;
1340 }
1341 } // end of namespace HiviewDFX
1342 } // end of namespace OHOS
1343