• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "watchdog_task.h"
17 
18 #include <cinttypes>
19 #include <ctime>
20 #include <cstdio>
21 #include <securec.h>
22 #include <thread>
23 
24 #include <fcntl.h>
25 #include <dlfcn.h>
26 #include <unistd.h>
27 
28 #include "backtrace_local.h"
29 #include "hisysevent.h"
30 #include "watchdog_inner.h"
31 #include "xcollie_define.h"
32 #include "xcollie_utils.h"
33 
34 namespace OHOS {
35 namespace HiviewDFX {
36 namespace {
37 const char* BBOX_PATH = "/dev/bbox";
38 #ifdef SUSPEND_CHECK_ENABLE
39 const char* LAST_SUSPEND_TIME_PATH = "/sys/power/last_sr";
40 static const double SUSPEND_TIME_RATIO = 0.2;
41 #endif
42 static const int COUNT_LIMIT_NUM_MAX_RATIO = 2;
43 static const int TIME_LIMIT_NUM_MAX_RATIO = 2;
44 static const int UID_TYPE_THRESHOLD = 20000;
45 const int BUFF_STACK_SIZE = 20 * 1024;
46 constexpr int32_t SAMGR_INIT_UID = 5555;
47 constexpr const char* CORE_PROCS[] = {
48     "anco_service_broker", "aptouch_daemon", "foundation", "init",
49     "multimodalinput", "com.ohos.sceneboard", "render_service"
50 };
51 }
52 int64_t WatchdogTask::curId = 0;
53 struct HstackVal {
54     uint32_t magic;
55     pid_t tid;
56     char hstackLogBuff[BUFF_STACK_SIZE];
57 };
WatchdogTask(std::string name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)58 WatchdogTask::WatchdogTask(std::string name, std::shared_ptr<AppExecFwk::EventHandler> handler,
59     TimeOutCallback timeOutCallback, uint64_t interval)
60     : name(name), task(nullptr), timeOutCallback(timeOutCallback), timeout(0), func(nullptr), arg(nullptr), flag(0),
61       watchdogTid(0), timeLimit(0), countLimit(0), bootTimeStart(0), monoTimeStart(0), reportCount(0)
62 {
63     id = ++curId;
64     checker = std::make_shared<HandlerChecker>(name, handler);
65     checkInterval = interval;
66     nextTickTime = GetCurrentTickMillseconds();
67     isTaskScheduled = false;
68     isOneshotTask = false;
69 }
70 
WatchdogTask(uint64_t interval,IpcFullCallback func,void * arg,unsigned int flag)71 WatchdogTask::WatchdogTask(uint64_t interval, IpcFullCallback func, void *arg, unsigned int flag)
72     : name(IPC_FULL_TASK), task(nullptr), timeOutCallback(nullptr), timeout(0), func(std::move(func)), arg(arg),
73       flag(flag), watchdogTid(0), timeLimit(0), countLimit(0), bootTimeStart(0), monoTimeStart(0), reportCount(0)
74 {
75     id = ++curId;
76     checker = std::make_shared<HandlerChecker>(IPC_FULL_TASK, nullptr);
77     checkInterval = interval;
78     nextTickTime = GetCurrentTickMillseconds();
79     isTaskScheduled = false;
80     isOneshotTask = false;
81 }
82 
WatchdogTask(std::string name,Task && task,uint64_t delay,uint64_t interval,bool isOneshot)83 WatchdogTask::WatchdogTask(std::string name, Task&& task, uint64_t delay, uint64_t interval,  bool isOneshot)
84     : name(name), task(std::move(task)), timeOutCallback(nullptr), checker(nullptr), timeout(0), func(nullptr),
85       arg(nullptr), flag(0), watchdogTid(0), timeLimit(0), countLimit(0), bootTimeStart(0), monoTimeStart(0),
86       reportCount(0)
87 {
88     id = ++curId;
89     checkInterval = interval;
90     nextTickTime = GetCurrentTickMillseconds() + delay;
91     isTaskScheduled = false;
92     isOneshotTask = isOneshot;
93 }
94 
WatchdogTask(std::string name,unsigned int timeout,XCollieCallback func,void * arg,unsigned int flag)95 WatchdogTask::WatchdogTask(std::string name, unsigned int timeout, XCollieCallback func, void *arg, unsigned int flag)
96     : name(name), task(nullptr), timeOutCallback(nullptr), checker(nullptr), timeout(timeout), func(std::move(func)),
97       arg(arg), flag(flag), timeLimit(0), countLimit(0), reportCount(0)
98 {
99     id = ++curId;
100     checkInterval = 0;
101     nextTickTime = GetCurrentTickMillseconds() + timeout;
102     isTaskScheduled = false;
103     isOneshotTask = true;
104     watchdogTid = getproctid();
105     CalculateTimes(bootTimeStart, monoTimeStart);
106 }
107 
WatchdogTask(std::string name,unsigned int timeLimit,int countLimit)108 WatchdogTask::WatchdogTask(std::string name, unsigned int timeLimit, int countLimit)
109     : name(name), task(nullptr), timeOutCallback(nullptr), timeout(0), func(nullptr), arg(nullptr), flag(0),
110       isTaskScheduled(false), isOneshotTask(false), watchdogTid(0), timeLimit(timeLimit), countLimit(countLimit),
111       bootTimeStart(0), monoTimeStart(0), reportCount(0)
112 {
113     id = ++curId;
114     checkInterval = timeLimit / TIME_LIMIT_NUM_MAX_RATIO;
115     nextTickTime = GetCurrentTickMillseconds();
116 }
117 
DoCallback()118 void WatchdogTask::DoCallback()
119 {
120     std::string faultTimeStr = "\nFault time:" + FormatTime("%Y/%m/%d-%H:%M:%S") + "\n";
121     if (func) {
122         XCOLLIE_LOGE("XCollieInner::DoTimerCallback %{public}s callback", name.c_str());
123         func(arg);
124     }
125     if (WatchdogInner::GetInstance().IsCallbackLimit(flag)) {
126         XCOLLIE_LOGE("Too many callback triggered in a short time, %{public}s skip", name.c_str());
127         return;
128     }
129     if (flag & XCOLLIE_FLAG_LOG) {
130         /* send to freezedetector */
131         std::string msg = "timeout: " + name + " to check " + std::to_string(timeout) + "ms ago";
132         SendXCollieEvent(name, msg, faultTimeStr);
133     }
134     if (getuid() > UID_TYPE_THRESHOLD) {
135         XCOLLIE_LOGI("check uid is app, do not exit");
136         return;
137     }
138     if (flag & XCOLLIE_FLAG_RECOVERY) {
139         XCOLLIE_LOGE("%{public}s blocked, after timeout %{public}llu ,process will exit", name.c_str(),
140             static_cast<long long>(timeout));
141         std::thread exitFunc([] {
142             std::string description = "timeout, exit...";
143             WatchdogInner::LeftTimeExitProcess(description);
144         });
145         if (exitFunc.joinable()) {
146             exitFunc.detach();
147         }
148     }
149 }
150 
151 #ifdef SUSPEND_CHECK_ENABLE
ShouldSkipCheckForSuspend(uint64_t & now,double & lastSuspendStartTime,double & lastSuspendEndTime)152 bool WatchdogTask::ShouldSkipCheckForSuspend(uint64_t &now, double &lastSuspendStartTime, double &lastSuspendEndTime)
153 {
154     if (lastSuspendStartTime < 0 || lastSuspendEndTime < 0) {
155         return false;
156     }
157     if (lastSuspendStartTime > lastSuspendEndTime) {
158         return true;
159     }
160     if ((lastSuspendEndTime - lastSuspendStartTime) > (SUSPEND_TIME_RATIO * checkInterval) &&
161         (static_cast<double>(now) - lastSuspendStartTime) < checkInterval) {
162         return true;
163     }
164     return false;
165 }
166 #endif
167 
Run(uint64_t now)168 void WatchdogTask::Run(uint64_t now)
169 {
170     if (countLimit > 0) {
171         TimerCountTask();
172         return;
173     }
174 #ifdef SUSPEND_CHECK_ENABLE
175     auto [lastSuspendStartTime, lastSuspendEndTime] = GetSuspendTime(LAST_SUSPEND_TIME_PATH, now);
176     if (ShouldSkipCheckForSuspend(now, lastSuspendStartTime, lastSuspendEndTime)) {
177         XCOLLIE_LOGI("in suspend status, ship check, reset next tick time.interval:%{public}" PRIu64
178             " now:%{public}" PRIu64 " lastSuspendStartTime:%{public}f lastSuspendEndTime:%{public}f",
179             checkInterval, now, lastSuspendStartTime, lastSuspendEndTime);
180         return;
181     }
182 #endif
183     constexpr int resetRatio = 2;
184     if ((checkInterval != 0) && (now - nextTickTime > (resetRatio * checkInterval))) {
185         XCOLLIE_LOGI("checker thread may be blocked, reset next tick time."
186             "now:%{public}" PRIu64 " expect:%{public}" PRIu64 " interval:%{public}" PRIu64 "",
187             now, nextTickTime, checkInterval);
188         nextTickTime = now;
189         isTaskScheduled = false;
190         return;
191     }
192 
193     if (timeout != 0) {
194         DoCallback();
195     } else if (task != nullptr) {
196         task();
197     } else {
198         RunHandlerCheckerTask();
199     }
200 }
201 
TimerCountTask()202 void WatchdogTask::TimerCountTask()
203 {
204     int size = static_cast<int>(triggerTimes.size());
205     if (size < countLimit) {
206         return;
207     }
208     XCOLLIE_LOGD("timeLimit : %{public}" PRIu64 ", countLimit : %{public}d, triggerTimes size : %{public}d",
209         timeLimit, countLimit, size);
210 
211     while (size >= countLimit) {
212         uint64_t timeInterval = triggerTimes[size -1] - triggerTimes[size - countLimit];
213         if (timeInterval < timeLimit) {
214             std::string sendMsg = name + " occured " + std::to_string(countLimit) + " times in " +
215                 std::to_string(timeInterval) + " ms, " + message;
216 #ifdef HISYSEVENT_ENABLE
217             HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, name, HiSysEvent::EventType::FAULT,
218                 "PID", getprocpid(), "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg);
219 #else
220        XCOLLIE_LOGI("hisysevent not exists");
221 #endif
222             triggerTimes.clear();
223             return;
224         }
225         size--;
226     }
227 
228     if (triggerTimes.size() > static_cast<unsigned long>(countLimit * COUNT_LIMIT_NUM_MAX_RATIO)) {
229         triggerTimes.erase(triggerTimes.begin(), triggerTimes.end() - countLimit);
230     }
231 }
232 
RunHandlerCheckerTask()233 void WatchdogTask::RunHandlerCheckerTask()
234 {
235     if (checker == nullptr) {
236         return;
237     }
238 
239     if (!isTaskScheduled) {
240         checker->ScheduleCheck();
241         isTaskScheduled = true;
242     } else {
243         if (EvaluateCheckerState() == CheckStatus::COMPLETED) {
244             // allow next check
245             isTaskScheduled = false;
246         }
247     }
248 }
249 
SendEvent(const std::string & msg,const std::string & eventName,const std::string & faultTimeStr)250 void WatchdogTask::SendEvent(const std::string &msg, const std::string &eventName, const std::string& faultTimeStr)
251 {
252     int32_t pid = getprocpid();
253     if (IsProcessDebug(pid)) {
254         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
255         return;
256     }
257     uint32_t gid = getgid();
258     uint32_t uid = getuid();
259     time_t curTime = time(nullptr);
260     std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
261         "\n" + msg + "\n";
262     sendMsg += checker->GetDumpInfo();
263 
264     watchdogTid = pid;
265     std::string tidFrontStr = "Thread ID = ";
266     std::string tidRearStr = ") is running";
267     std::size_t frontPos = sendMsg.find(tidFrontStr);
268     std::size_t rearPos = sendMsg.find(tidRearStr);
269     std::size_t startPos = frontPos + tidFrontStr.length();
270     if (frontPos != std::string::npos && rearPos != std::string::npos && rearPos > startPos) {
271         size_t tidLength = rearPos - startPos;
272         if (tidLength < std::to_string(INT32_MAX).length()) {
273             std::string tidStr = sendMsg.substr(startPos, tidLength);
274             if (std::all_of(std::begin(tidStr), std::end(tidStr), [] (const char &c) {
275                 return isdigit(c);
276             })) {
277                 watchdogTid = std::stoi(tidStr);
278             }
279         }
280     }
281 
282     sendMsg += faultTimeStr;
283 #ifdef HISYSEVENT_ENABLE
284     int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
285         "PID", pid, "TID", watchdogTid, "TGID", gid, "UID", uid, "MODULE_NAME", name,
286         "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg, "STACK", GetProcessStacktrace());
287     if (ret == ERR_OVER_SIZE) {
288         std::string stack = "";
289         GetBacktraceStringByTid(stack, watchdogTid, 0, true);
290         ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
291             "PID", pid, "TID", watchdogTid, "TGID", gid, "UID", uid, "MODULE_NAME", name,
292             "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg, "STACK", stack);
293     }
294 
295     XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], msg=%{public}s",
296         ret, eventName.c_str(), msg.c_str());
297 #else
298        XCOLLIE_LOGI("hisysevent not exists");
299 #endif
300 }
301 
DumpKernelStack(struct HstackVal & val,int & ret) const302 void WatchdogTask::DumpKernelStack(struct HstackVal& val, int& ret) const
303 {
304     int fd = open(BBOX_PATH, O_WRONLY | O_CLOEXEC);
305     if (fd < 0) {
306         XCOLLIE_LOGE("open %{public}s failed", BBOX_PATH);
307         return;
308     }
309     ret = ioctl(fd, LOGGER_GET_STACK, &val);
310     close(fd);
311     if (ret != 0) {
312         XCOLLIE_LOGE("XCollieDumpKernel getStack failed");
313     } else {
314         XCOLLIE_LOGI("XCollieDumpKernel buff is %{public}s", val.hstackLogBuff);
315     }
316 }
317 
SendXCollieEvent(const std::string & timerName,const std::string & keyMsg,const std::string & faultTimeStr) const318 void WatchdogTask::SendXCollieEvent(const std::string &timerName, const std::string &keyMsg,
319     const std::string& faultTimeStr) const
320 {
321     int32_t pid = getprocpid();
322     if (IsProcessDebug(pid)) {
323         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
324         return;
325     }
326     uint32_t gid = getgid();
327     uint32_t uid = getuid();
328     time_t curTime = time(nullptr);
329     std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) + "\n" +
330         "timeout timer: " + timerName + "\n" + keyMsg + faultTimeStr;
331 
332     struct HstackVal val;
333     if (memset_s(&val, sizeof(val), 0, sizeof(val)) != 0) {
334         XCOLLIE_LOGE("memset val failed\n");
335         return;
336     }
337     val.tid = watchdogTid;
338     val.magic = MAGIC_NUM;
339     int ret = 0;
340     DumpKernelStack(val, ret);
341     std::string eventName = "APP_HICOLLIE";
342     std::string processName = GetSelfProcName();
343     std::string stack = "";
344     if (uid <= UID_TYPE_THRESHOLD) {
345         eventName = std::find(std::begin(CORE_PROCS), std::end(CORE_PROCS), processName) != std::end(CORE_PROCS) ?
346             "SERVICE_TIMEOUT" : "SERVICE_TIMEOUT_WARNING";
347         stack = GetProcessStacktrace();
348     } else if (!GetBacktraceStringByTid(stack, watchdogTid, 0, true)) {
349         XCOLLIE_LOGE("get tid:%{public}d BacktraceString failed", watchdogTid);
350     }
351 #ifdef HISYSEVENT_ENABLE
352     int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT, "PID", pid,
353         "TID", watchdogTid, "TGID", gid, "UID", uid, "MODULE_NAME", timerName, "PROCESS_NAME", processName,
354         "MSG", sendMsg, "STACK", stack + "\n"+ (ret != 0 ? "" : val.hstackLogBuff), "SPECIFICSTACK_NAME",
355             WatchdogInner::GetInstance().GetSpecifiedProcessName());
356     XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
357         "msg=%{public}s", result, eventName.c_str(), keyMsg.c_str());
358 #else
359        XCOLLIE_LOGI("hisysevent not exists");
360 #endif
361 }
362 
EvaluateCheckerState()363 int WatchdogTask::EvaluateCheckerState()
364 {
365     int waitState = checker->GetCheckState();
366     if (waitState == CheckStatus::COMPLETED) {
367         reportCount = 0;
368         return waitState;
369     }
370     std::string faultTimeStr = "\nFault time:" + FormatTime("%Y/%m/%d-%H:%M:%S") + "\n";
371     if (func) { // only ipc full exec func
372         func(arg);
373         flag = (flag & XCOLLIE_FLAG_LOG) ? XCOLLIE_FLAG_LOG : XCOLLIE_FLAG_NOOP;
374     }
375 
376     XCOLLIE_LOGI("%{public}s block happened, waitState = %{public}d", name.c_str(), waitState);
377     if (timeOutCallback != nullptr) {
378         timeOutCallback(name, waitState);
379         return waitState;
380     }
381     std::string description = GetBlockDescription(checkInterval / TO_MILLISECOND_MULTPLE);
382     if (waitState == CheckStatus::WAITED_HALF) {
383         description += "\nReportCount = " + std::to_string(++reportCount);
384         if (name != IPC_FULL_TASK) {
385             SendEvent(description, "SERVICE_WARNING", faultTimeStr);
386         } else if (flag & XCOLLIE_FLAG_LOG) {
387             SendEvent(description, "IPC_FULL_WARNING", faultTimeStr);
388         }
389     } else {
390         description += ", report twice instead of exiting process.\nReportCount = " + std::to_string(++reportCount);
391         if (name != IPC_FULL_TASK) {
392             SendEvent(description, "SERVICE_BLOCK", faultTimeStr);
393         } else if (flag & XCOLLIE_FLAG_LOG) {
394             SendEvent(description, "IPC_FULL", faultTimeStr);
395         }
396         // peer binder log is collected in hiview asynchronously
397         // if blocked process exit early, binder blocked state will change
398         // thus delay exit and let hiview have time to collect log.
399         if ((name != IPC_FULL_TASK) || (flag & XCOLLIE_FLAG_RECOVERY)) {
400             WatchdogInner::KillPeerBinderProcess(description);
401         }
402     }
403     return waitState;
404 }
405 
GetBlockDescription(uint64_t interval)406 std::string WatchdogTask::GetBlockDescription(uint64_t interval)
407 {
408     std::string desc = "Watchdog: thread(";
409     desc += name;
410     desc += ") blocked " + std::to_string(interval) + "s";
411     return desc;
412 }
413 } // end of namespace HiviewDFX
414 } // end of namespace OHOS
415