1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "watchdog_inner.h"
17
18 #include <cerrno>
19 #include <climits>
20 #include <mutex>
21
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <pthread.h>
26 #include <unistd.h>
27 #include <csignal>
28
29 #include <securec.h>
30
31 #include "backtrace_local.h"
32 #include "hisysevent.h"
33 #include "xcollie_utils.h"
34 #include "xcollie_define.h"
35
36 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
37 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
38 namespace OHOS {
39 namespace HiviewDFX {
40 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
41 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
42 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
43 constexpr uint32_t TIME_MS_TO_S = 1000;
44 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
45 constexpr int32_t WATCHED_UID = 5523;
46 constexpr int SERVICE_WARNING = 1;
47 const int BUF_SIZE_512 = 512;
48 const char* g_sysKernelHungtaskUserlist = "/sys/kernel/hungtask/userlist";
49 const std::string ON_KICK_TIME = "on,63";
50 const std::string KICK_TIME = "kick";
51 const int32_t NOT_OPEN = -1;
52 static uint64_t g_nextKickTime = GetCurrentTickMillseconds();
53 static int32_t g_fd = -1;
54 static bool g_existFile = true;
55 namespace {
ThreadInfo(char * buf,size_t len,void * ucontext)56 void ThreadInfo(char *buf __attribute__((unused)),
57 size_t len __attribute__((unused)),
58 void* ucontext __attribute__((unused)))
59 {
60 if (ucontext == nullptr) {
61 XCOLLIE_LOGI("ThreadInfo ucontext == nullptr");
62 return;
63 }
64
65 auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
66 WatchdogInner::GetInstance().currentScene_.size());
67 if (ret != 0) {
68 XCOLLIE_LOGE("memcpy_s ret = %d!", ret);
69 }
70 }
71 }
72
WatchdogInner()73 WatchdogInner::WatchdogInner()
74 : cntCallback_(0), timeCallback_(0)
75 {
76 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
77 }
78
~WatchdogInner()79 WatchdogInner::~WatchdogInner()
80 {
81 Stop();
82 }
83
IsInAppspwan()84 static bool IsInAppspwan()
85 {
86 if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
87 return true;
88 }
89 return false;
90 }
91
AddThread(const std::string & name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)92 int WatchdogInner::AddThread(const std::string &name,
93 std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
94 {
95 if (name.empty() || handler == nullptr) {
96 XCOLLIE_LOGE("Add thread fail, invalid args!");
97 return -1;
98 }
99
100 if (IsInAppspwan()) {
101 return -1;
102 }
103
104 XCOLLIE_LOGI("Add thread %{public}s to watchdog.", name.c_str());
105 std::unique_lock<std::mutex> lock(lock_);
106
107 IpcCheck();
108
109 if (!InsertWatchdogTaskLocked(name, WatchdogTask(name, handler, timeOutCallback, interval))) {
110 return -1;
111 }
112 return 0;
113 }
114
RunOneShotTask(const std::string & name,Task && task,uint64_t delay)115 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
116 {
117 if (name.empty() || task == nullptr) {
118 XCOLLIE_LOGE("Add task fail, invalid args!");
119 return;
120 }
121
122 if (IsInAppspwan()) {
123 return;
124 }
125
126 std::unique_lock<std::mutex> lock(lock_);
127 InsertWatchdogTaskLocked(name, WatchdogTask(name, std::move(task), delay, 0, true));
128 }
129
RunXCollieTask(const std::string & name,uint64_t timeout,XCollieCallback func,void * arg,unsigned int flag)130 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
131 void *arg, unsigned int flag)
132 {
133 if (name.empty() || timeout == 0) {
134 XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
135 return INVALID_ID;
136 }
137
138 if (IsInAppspwan()) {
139 return INVALID_ID;
140 }
141
142 std::unique_lock<std::mutex> lock(lock_);
143 return InsertWatchdogTaskLocked(name, WatchdogTask(name, timeout, func, arg, flag));
144 }
145
RemoveXCollieTask(int64_t id)146 void WatchdogInner::RemoveXCollieTask(int64_t id)
147 {
148 std::priority_queue<WatchdogTask> tmpQueue;
149 std::unique_lock<std::mutex> lock(lock_);
150 size_t size = checkerQueue_.size();
151 if (size == 0) {
152 XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
153 return;
154 }
155 while (!checkerQueue_.empty())
156 {
157 const WatchdogTask& task = checkerQueue_.top();
158 if (task.id != id) {
159 tmpQueue.push(task);
160 }
161 checkerQueue_.pop();
162 }
163 if (tmpQueue.size() == size) {
164 XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
165 static_cast<long long>(id), size);
166 return;
167 }
168 tmpQueue.swap(checkerQueue_);
169 }
170
RunPeriodicalTask(const std::string & name,Task && task,uint64_t interval,uint64_t delay)171 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
172 {
173 if (name.empty() || task == nullptr) {
174 XCOLLIE_LOGE("Add task fail, invalid args!");
175 return;
176 }
177
178 if (IsInAppspwan()) {
179 return;
180 }
181
182 XCOLLIE_LOGI("Add periodical task %{public}s to watchdog.", name.c_str());
183 std::unique_lock<std::mutex> lock(lock_);
184 InsertWatchdogTaskLocked(name, WatchdogTask(name, std::move(task), delay, interval, false));
185 }
186
IsTaskExistLocked(const std::string & name)187 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
188 {
189 if (taskNameSet_.find(name) != taskNameSet_.end()) {
190 return true;
191 }
192
193 return false;
194 }
195
IsExceedMaxTaskLocked()196 bool WatchdogInner::IsExceedMaxTaskLocked()
197 {
198 if (checkerQueue_.size() >= MAX_WATCH_NUM) {
199 XCOLLIE_LOGE("Exceed max watchdog task!");
200 return true;
201 }
202
203 return false;
204 }
205
InsertWatchdogTaskLocked(const std::string & name,WatchdogTask && task)206 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
207 {
208 if (!task.isOneshotTask && IsTaskExistLocked(name)) {
209 XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
210 return 0;
211 }
212
213 if (IsExceedMaxTaskLocked()) {
214 XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
215 return 0;
216 }
217 int64_t id = task.id;
218 checkerQueue_.push(std::move(task));
219 if (!task.isOneshotTask) {
220 taskNameSet_.insert(name);
221 }
222 CreateWatchdogThreadIfNeed();
223 condition_.notify_all();
224
225 return id;
226 }
227
StopWatchdog()228 void WatchdogInner::StopWatchdog()
229 {
230 Stop();
231 }
232
IsCallbackLimit(unsigned int flag)233 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
234 {
235 bool ret = false;
236 time_t startTime = time(nullptr);
237 if (!(flag & XCOLLIE_FLAG_LOG)) {
238 return ret;
239 }
240 if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
241 timeCallback_ = startTime;
242 } else {
243 if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
244 ret = true;
245 }
246 }
247 return ret;
248 }
249
CreateWatchdogThreadIfNeed()250 void WatchdogInner::CreateWatchdogThreadIfNeed()
251 {
252 std::call_once(flag_, [this] {
253 if (threadLoop_ == nullptr) {
254 threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
255 XCOLLIE_LOGI("Watchdog is running!");
256 }
257 });
258 }
259
FetchNextTask(uint64_t now,WatchdogTask & task)260 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
261 {
262 std::unique_lock<std::mutex> lock(lock_);
263 if (isNeedStop_) {
264 while (!checkerQueue_.empty()) {
265 checkerQueue_.pop();
266 }
267 return DEFAULT_TIMEOUT;
268 }
269
270 if (checkerQueue_.empty()) {
271 return DEFAULT_TIMEOUT;
272 }
273
274 const WatchdogTask& queuedTask = checkerQueue_.top();
275
276 if (g_existFile && queuedTask.name == IPC_FULL && now - g_nextKickTime > INTERVAL_KICK_TIME) {
277 if (KickWatchdog()) {
278 g_nextKickTime = now;
279 }
280 }
281 if (queuedTask.nextTickTime > now) {
282 return queuedTask.nextTickTime - now;
283 }
284
285 currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
286 task = queuedTask;
287 checkerQueue_.pop();
288 return 0;
289 }
290
ReInsertTaskIfNeed(WatchdogTask & task)291 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
292 {
293 if (task.checkInterval == 0) {
294 return;
295 }
296
297 std::unique_lock<std::mutex> lock(lock_);
298 task.nextTickTime = task.nextTickTime + task.checkInterval;
299 checkerQueue_.push(task);
300 }
301
Start()302 bool WatchdogInner::Start()
303 {
304 if (pthread_setname_np(pthread_self(), "DfxWatchdog") != 0) {
305 XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
306 }
307
308 XCOLLIE_LOGI("Watchdog is running in thread(%{public}d)!", gettid());
309 if (SetThreadInfoCallback != nullptr) {
310 SetThreadInfoCallback(ThreadInfo);
311 XCOLLIE_LOGI("Watchdog Set Thread Info Callback");
312 }
313 while (!isNeedStop_) {
314 uint64_t now = GetCurrentTickMillseconds();
315 WatchdogTask task;
316 uint64_t leftTimeMill = FetchNextTask(now, task);
317 if (leftTimeMill == 0) {
318 task.Run(now);
319 ReInsertTaskIfNeed(task);
320 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
321 continue;
322 } else if (isNeedStop_) {
323 break;
324 } else {
325 std::unique_lock<std::mutex> lock(lock_);
326 condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
327 }
328 }
329 if (SetThreadInfoCallback != nullptr) {
330 SetThreadInfoCallback(nullptr);
331 }
332 return true;
333 }
334
SendMsgToHungtask(const std::string & msg)335 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
336 {
337 if (g_fd == NOT_OPEN) {
338 g_fd = open(g_sysKernelHungtaskUserlist, O_WRONLY);
339 if (g_fd < 0) {
340 XCOLLIE_LOGE("can't open hungtask file");
341 g_existFile = false;
342 return false;
343 }
344 }
345
346 size_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
347 if (watchdogWrite < 0 || watchdogWrite != msg.size()) {
348 XCOLLIE_LOGE("watchdogWrite msg failed");
349 close(g_fd);
350 g_fd = -1;
351 return false;
352 }
353 XCOLLIE_LOGE("Send %{public}s to hungtask Successful\n", msg.c_str());
354 return true;
355 }
356
KickWatchdog()357 bool WatchdogInner::KickWatchdog()
358 {
359 if (g_fd == NOT_OPEN) {
360 if (!SendMsgToHungtask(ON_KICK_TIME)) {
361 XCOLLIE_LOGE("KickWatchdog SendMsgToHungtask false");
362 return false;
363 }
364 }
365 return SendMsgToHungtask(KICK_TIME);
366 }
367
IpcCheck()368 void WatchdogInner::IpcCheck()
369 {
370 if (getuid() == WATCHED_UID) {
371 if (binderCheckHander_ == nullptr) {
372 auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
373 binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
374 if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
375 nullptr, IPC_CHECKER_TIME))) {
376 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
377 }
378 }
379 }
380 }
381
FfrtCallback(uint64_t taskId,const char * taskInfo,uint32_t delayedTaskCount)382 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
383 {
384 std::string desc = "FfrtCallback: task(";
385 desc += taskInfo;
386 desc += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
387 auto map = WatchdogInner::GetInstance().taskIdCnt;
388 auto search = map.find(taskId);
389 if (search != map.end()) {
390 search = map.erase(search);
391 desc += ", report twice instead of exiting process."; // 1s = 1000ms
392 WatchdogInner::SendFfrtEvent(desc, "SERVICE_BLOCK", taskInfo);
393 unsigned int leftTime = 3;
394 while (leftTime > 0) {
395 leftTime = sleep(leftTime);
396 }
397 XCOLLIE_LOGI("Process is going to exit, reason:%{public}s.", desc.c_str());
398 _exit(0);
399 } else {
400 map[taskId] = SERVICE_WARNING;
401 WatchdogInner::SendFfrtEvent(desc, "SERVICE_WARNING", taskInfo);
402 }
403 }
404
InitFfrtWatchdog()405 void WatchdogInner::InitFfrtWatchdog()
406 {
407 CreateWatchdogThreadIfNeed();
408 IpcCheck();
409 ffrt_watchdog_register(FfrtCallback, FFRT_CALLBACK_TIME, FFRT_CALLBACK_TIME);
410 }
411
SendFfrtEvent(const std::string & msg,const std::string & eventName,const char * taskInfo)412 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)
413 {
414 uint32_t pid = getpid();
415 uint32_t gid = getgid();
416 uint32_t uid = getuid();
417 time_t curTime = time(nullptr);
418 std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
419 "\n" + msg + "\n";
420 char buff[BUF_SIZE_512] = {0};
421 ffrt_watchdog_dumpinfo(buff, BUF_SIZE_512);
422 sendMsg += buff;
423 HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
424 "PID", pid,
425 "TGID", gid,
426 "UID", uid,
427 "MODULE_NAME", taskInfo,
428 "PROCESS_NAME", GetSelfProcName(),
429 "MSG", sendMsg);
430 XCOLLIE_LOGI("send event [FRAMEWORK,%{public}s], msg=%{public}s", eventName.c_str(), msg.c_str());
431 }
432
Stop()433 bool WatchdogInner::Stop()
434 {
435 isNeedStop_.store(true);
436 condition_.notify_all();
437 if (threadLoop_ != nullptr && threadLoop_->joinable()) {
438 threadLoop_->join();
439 threadLoop_ = nullptr;
440 }
441 return true;
442 }
443 } // end of namespace HiviewDFX
444 } // end of namespace OHOS
445