1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "watchdog_inner.h"
17
18 #include <cerrno>
19 #include <climits>
20 #include <mutex>
21
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <pthread.h>
26 #include <unistd.h>
27 #include <csignal>
28 #include <string>
29
30 #include <securec.h>
31
32 #include "backtrace_local.h"
33 #include "hisysevent.h"
34 #include "xcollie_utils.h"
35 #include "xcollie_define.h"
36 #include "dfx_define.h"
37 #include "parameter.h"
38
39 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
40 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
41 namespace OHOS {
42 namespace HiviewDFX {
43 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
44 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
45 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
46 constexpr uint32_t TIME_MS_TO_S = 1000;
47 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
48 constexpr int32_t WATCHED_UID = 5523;
49 constexpr int SERVICE_WARNING = 1;
50 const int BUF_SIZE_512 = 512;
51 const char* g_sysKernelHungtaskUserlist = "/sys/kernel/hungtask/userlist";
52 const char* g_hmosHungtaskUserlist = "/proc/sys/hguard/user_list";
53 const std::string ON_KICK_TIME = "on,72";
54 const std::string ON_KICK_TIME_HMOS = "on,63,foundation";
55 const std::string KICK_TIME = "kick";
56 const std::string KICK_TIME_HMOS = "kick,foundation";
57 const int32_t NOT_OPEN = -1;
58 std::mutex WatchdogInner::lockFfrt_;
59 static uint64_t g_nextKickTime = GetCurrentTickMillseconds();
60 static int32_t g_fd = -1;
61 static bool g_existFile = true;
62 namespace {
ThreadInfo(char * buf,size_t len,void * ucontext)63 void ThreadInfo(char *buf __attribute__((unused)),
64 size_t len __attribute__((unused)),
65 void* ucontext __attribute__((unused)))
66 {
67 if (ucontext == nullptr) {
68 XCOLLIE_LOGI("ThreadInfo ucontext == nullptr");
69 return;
70 }
71
72 auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
73 WatchdogInner::GetInstance().currentScene_.size());
74 if (ret != 0) {
75 XCOLLIE_LOGE("memcpy_s ret = %d!", ret);
76 }
77 }
78 }
79
WatchdogInner()80 WatchdogInner::WatchdogInner()
81 : cntCallback_(0), timeCallback_(0)
82 {
83 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
84 }
85
~WatchdogInner()86 WatchdogInner::~WatchdogInner()
87 {
88 Stop();
89 }
90
IsInAppspwan()91 static bool IsInAppspwan()
92 {
93 if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
94 return true;
95 }
96 return false;
97 }
98
AddThread(const std::string & name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)99 int WatchdogInner::AddThread(const std::string &name,
100 std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
101 {
102 if (name.empty() || handler == nullptr) {
103 XCOLLIE_LOGE("Add thread fail, invalid args!");
104 return -1;
105 }
106
107 if (IsInAppspwan()) {
108 return -1;
109 }
110
111 std::string limitedName = GetLimitedSizeName(name);
112 XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
113 std::unique_lock<std::mutex> lock(lock_);
114
115 IpcCheck();
116
117 if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
118 return -1;
119 }
120 return 0;
121 }
122
RunOneShotTask(const std::string & name,Task && task,uint64_t delay)123 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
124 {
125 if (name.empty() || task == nullptr) {
126 XCOLLIE_LOGE("Add task fail, invalid args!");
127 return;
128 }
129
130 if (IsInAppspwan()) {
131 return;
132 }
133
134 std::unique_lock<std::mutex> lock(lock_);
135 std::string limitedName = GetLimitedSizeName(name);
136 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
137 }
138
RunXCollieTask(const std::string & name,uint64_t timeout,XCollieCallback func,void * arg,unsigned int flag)139 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
140 void *arg, unsigned int flag)
141 {
142 if (name.empty() || timeout == 0) {
143 XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
144 return INVALID_ID;
145 }
146
147 if (IsInAppspwan()) {
148 return INVALID_ID;
149 }
150
151 std::unique_lock<std::mutex> lock(lock_);
152 std::string limitedName = GetLimitedSizeName(name);
153 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
154 }
155
RemoveXCollieTask(int64_t id)156 void WatchdogInner::RemoveXCollieTask(int64_t id)
157 {
158 std::priority_queue<WatchdogTask> tmpQueue;
159 std::unique_lock<std::mutex> lock(lock_);
160 size_t size = checkerQueue_.size();
161 if (size == 0) {
162 XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
163 return;
164 }
165 while (!checkerQueue_.empty()) {
166 const WatchdogTask& task = checkerQueue_.top();
167 if (task.id != id) {
168 tmpQueue.push(task);
169 }
170 checkerQueue_.pop();
171 }
172 if (tmpQueue.size() == size) {
173 XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
174 static_cast<long long>(id), size);
175 return;
176 }
177 tmpQueue.swap(checkerQueue_);
178 }
179
RunPeriodicalTask(const std::string & name,Task && task,uint64_t interval,uint64_t delay)180 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
181 {
182 if (name.empty() || task == nullptr) {
183 XCOLLIE_LOGE("Add task fail, invalid args!");
184 return;
185 }
186
187 if (IsInAppspwan()) {
188 return;
189 }
190
191 std::string limitedName = GetLimitedSizeName(name);
192 XCOLLIE_LOGI("Add periodical task %{public}s to watchdog.", name.c_str());
193 std::unique_lock<std::mutex> lock(lock_);
194 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
195 }
196
IsTaskExistLocked(const std::string & name)197 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
198 {
199 if (taskNameSet_.find(name) != taskNameSet_.end()) {
200 return true;
201 }
202
203 return false;
204 }
205
IsExceedMaxTaskLocked()206 bool WatchdogInner::IsExceedMaxTaskLocked()
207 {
208 if (checkerQueue_.size() >= MAX_WATCH_NUM) {
209 XCOLLIE_LOGE("Exceed max watchdog task!");
210 return true;
211 }
212
213 return false;
214 }
215
InsertWatchdogTaskLocked(const std::string & name,WatchdogTask && task)216 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
217 {
218 if (!task.isOneshotTask && IsTaskExistLocked(name)) {
219 XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
220 return 0;
221 }
222
223 if (IsExceedMaxTaskLocked()) {
224 XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
225 return 0;
226 }
227 int64_t id = task.id;
228 checkerQueue_.push(std::move(task));
229 if (!task.isOneshotTask) {
230 taskNameSet_.insert(name);
231 }
232 CreateWatchdogThreadIfNeed();
233 condition_.notify_all();
234
235 return id;
236 }
237
StopWatchdog()238 void WatchdogInner::StopWatchdog()
239 {
240 Stop();
241 }
242
IsCallbackLimit(unsigned int flag)243 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
244 {
245 bool ret = false;
246 time_t startTime = time(nullptr);
247 if (!(flag & XCOLLIE_FLAG_LOG)) {
248 return ret;
249 }
250 if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
251 timeCallback_ = startTime;
252 } else {
253 if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
254 ret = true;
255 }
256 }
257 return ret;
258 }
259
CreateWatchdogThreadIfNeed()260 void WatchdogInner::CreateWatchdogThreadIfNeed()
261 {
262 std::call_once(flag_, [this] {
263 if (threadLoop_ == nullptr) {
264 threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
265 XCOLLIE_LOGI("Watchdog is running!");
266 }
267 });
268 }
269
FetchNextTask(uint64_t now,WatchdogTask & task)270 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
271 {
272 std::unique_lock<std::mutex> lock(lock_);
273 if (isNeedStop_) {
274 while (!checkerQueue_.empty()) {
275 checkerQueue_.pop();
276 }
277 return DEFAULT_TIMEOUT;
278 }
279
280 if (checkerQueue_.empty()) {
281 return DEFAULT_TIMEOUT;
282 }
283
284 const WatchdogTask& queuedTask = checkerQueue_.top();
285
286 if (&(queuedTask.name) == nullptr) {
287 checkerQueue_.pop();
288 XCOLLIE_LOGW("queuedTask, failed.");
289 return DEFAULT_TIMEOUT;
290 }
291
292 if (g_existFile && queuedTask.name == IPC_FULL && now - g_nextKickTime > INTERVAL_KICK_TIME) {
293 if (KickWatchdog()) {
294 g_nextKickTime = now;
295 }
296 }
297 if (queuedTask.nextTickTime > now) {
298 return queuedTask.nextTickTime - now;
299 }
300
301 currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
302 task = queuedTask;
303 checkerQueue_.pop();
304 return 0;
305 }
306
ReInsertTaskIfNeed(WatchdogTask & task)307 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
308 {
309 if (task.checkInterval == 0) {
310 return;
311 }
312
313 std::unique_lock<std::mutex> lock(lock_);
314 task.nextTickTime = task.nextTickTime + task.checkInterval;
315 checkerQueue_.push(task);
316 }
317
Start()318 bool WatchdogInner::Start()
319 {
320 if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
321 XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
322 }
323
324 XCOLLIE_LOGI("Watchdog is running in thread(%{public}d)!", gettid());
325 if (SetThreadInfoCallback != nullptr) {
326 SetThreadInfoCallback(ThreadInfo);
327 XCOLLIE_LOGI("Watchdog Set Thread Info Callback");
328 }
329 while (!isNeedStop_) {
330 uint64_t now = GetCurrentTickMillseconds();
331 WatchdogTask task;
332 uint64_t leftTimeMill = FetchNextTask(now, task);
333 if (leftTimeMill == 0) {
334 task.Run(now);
335 ReInsertTaskIfNeed(task);
336 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
337 continue;
338 } else if (isNeedStop_) {
339 break;
340 } else {
341 std::unique_lock<std::mutex> lock(lock_);
342 condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
343 }
344 }
345 if (SetThreadInfoCallback != nullptr) {
346 SetThreadInfoCallback(nullptr);
347 }
348 return true;
349 }
350
SendMsgToHungtask(const std::string & msg)351 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
352 {
353 if (g_fd == NOT_OPEN) {
354 g_fd = open(g_sysKernelHungtaskUserlist, O_WRONLY);
355 if (g_fd < 0) {
356 g_fd = open(g_hmosHungtaskUserlist, O_WRONLY);
357 if (g_fd < 0) {
358 XCOLLIE_LOGE("can't open hungtask file");
359 g_existFile = false;
360 return false;
361 }
362 XCOLLIE_LOGE("change to hmos kernel");
363 isHmos = true;
364 } else {
365 XCOLLIE_LOGE("change to linux kernel");
366 }
367 }
368
369 ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
370 if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
371 XCOLLIE_LOGE("watchdogWrite msg failed");
372 close(g_fd);
373 g_fd = -1;
374 return false;
375 }
376 XCOLLIE_LOGE("Send %{public}s to hungtask Successful\n", msg.c_str());
377 return true;
378 }
379
KickWatchdog()380 bool WatchdogInner::KickWatchdog()
381 {
382 return true;
383 }
384
IpcCheck()385 void WatchdogInner::IpcCheck()
386 {
387 if (getuid() == WATCHED_UID) {
388 if (binderCheckHander_ == nullptr) {
389 auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
390 binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
391 if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
392 nullptr, IPC_CHECKER_TIME))) {
393 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
394 }
395 }
396 }
397 }
398
WriteStringToFile(uint32_t pid,const char * str)399 void WatchdogInner::WriteStringToFile(uint32_t pid, const char *str)
400 {
401 char file[PATH_LEN] = {0};
402 if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", pid) == -1) {
403 XCOLLIE_LOGI("failed to build path for %d.", pid);
404 }
405 int fd = open(file, O_RDWR);
406 if (fd == -1) {
407 return;
408 }
409 if (write(fd, str, strlen(str)) < 0) {
410 XCOLLIE_LOGI("failed to write 0 for %s", file);
411 close(fd);
412 return;
413 }
414 close(fd);
415 return;
416 }
417
FfrtCallback(uint64_t taskId,const char * taskInfo,uint32_t delayedTaskCount)418 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
419 {
420 std::string description = "FfrtCallback: task(";
421 description += taskInfo;
422 description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
423 bool isExist = false;
424 {
425 std::unique_lock<std::mutex> lock(lockFfrt_);
426 auto &map = WatchdogInner::GetInstance().taskIdCnt;
427 auto search = map.find(taskId);
428 uint32_t pid = getpid();
429 if (search != map.end()) {
430 isExist = true;
431 } else {
432 map[taskId] = SERVICE_WARNING;
433 }
434 }
435
436 if (isExist) {
437 description += ", report twice instead of exiting process."; // 1s = 1000ms
438 WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
439 WatchdogInner::LeftTimeExitProcess(description);
440 } else {
441 WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
442 }
443 }
444
InitFfrtWatchdog()445 void WatchdogInner::InitFfrtWatchdog()
446 {
447 CreateWatchdogThreadIfNeed();
448 IpcCheck();
449 ffrt_watchdog_register(FfrtCallback, FFRT_CALLBACK_TIME, FFRT_CALLBACK_TIME);
450 }
451
SendFfrtEvent(const std::string & msg,const std::string & eventName,const char * taskInfo)452 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)
453 {
454 int32_t pid = getpid();
455 uint32_t gid = getgid();
456 uint32_t uid = getuid();
457 time_t curTime = time(nullptr);
458 std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
459 "\n" + msg + "\n";
460 char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
461 buffer[FFRT_BUFFER_SIZE] = 0;
462 ffrt_watchdog_dumpinfo(buffer, FFRT_BUFFER_SIZE);
463 sendMsg += buffer;
464 sendMsg += "\n" + GetProcessStacktrace();
465 delete[] buffer;
466 int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
467 "PID", pid,
468 "TGID", gid,
469 "UID", uid,
470 "MODULE_NAME", taskInfo,
471 "PROCESS_NAME", GetSelfProcName(),
472 "MSG", sendMsg);
473 XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
474 "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
475 }
476
LeftTimeExitProcess(const std::string & description)477 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
478 {
479 int32_t pid = getpid();
480 if (IsProcessDebug(pid)) {
481 XCOLLIE_LOGI("heap dump for %{public}d, don't exit.", pid);
482 return;
483 }
484 DelayBeforeExit(10); // sleep 10s for hiview dump
485 XCOLLIE_LOGI("Process is going to exit, reason:%{public}s.", description.c_str());
486 WatchdogInner::WriteStringToFile(pid, "0");
487
488 const int buffSize = 128;
489 char param[buffSize] = {0};
490 GetParameter("hiviewdfx.appfreeze.filter_bundle_name", "", param, buffSize - 1);
491 std::string debugBundle(param);
492
493 std::string procCmdlineContent = GetProcessNameFromProcCmdline(pid);
494 if (procCmdlineContent.compare(debugBundle) == 0) {
495 XCOLLIE_LOGW("appfreeze filtration %{public}s_%{public}s don't exit.",
496 debugBundle.c_str(), procCmdlineContent.c_str());
497 return;
498 }
499
500 _exit(0);
501 }
502
Stop()503 bool WatchdogInner::Stop()
504 {
505 isNeedStop_.store(true);
506 condition_.notify_all();
507 if (threadLoop_ != nullptr && threadLoop_->joinable()) {
508 threadLoop_->join();
509 threadLoop_ = nullptr;
510 }
511 return true;
512 }
513 } // end of namespace HiviewDFX
514 } // end of namespace OHOS
515