1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "watchdog_task.h"
17
18 #include <cinttypes>
19 #include <ctime>
20 #include <cstdio>
21 #include <securec.h>
22 #include <thread>
23
24 #include <fcntl.h>
25 #include <dlfcn.h>
26 #include <unistd.h>
27
28 #include "backtrace_local.h"
29 #include "hisysevent.h"
30 #include "watchdog_inner.h"
31 #include "xcollie_define.h"
32 #include "xcollie_utils.h"
33
34 namespace OHOS {
35 namespace HiviewDFX {
36 namespace {
37 constexpr const char* CORE_PROCS[] = {
38 "anco_service_br", "aptouch_daemon", "foundation", "init", "multimodalinput", "ohos.sceneboard", "render_service"
39 };
40 }
41 int64_t WatchdogTask::curId = 0;
42 const char* BBOX_PATH = "/dev/bbox";
43 struct HstackVal {
44 uint32_t magic;
45 pid_t tid;
46 char hstackLogBuff[BUFF_STACK_SIZE];
47 };
WatchdogTask(std::string name,std::shared_ptr<AppExecFwk::EventHandler> handler,TimeOutCallback timeOutCallback,uint64_t interval)48 WatchdogTask::WatchdogTask(std::string name, std::shared_ptr<AppExecFwk::EventHandler> handler,
49 TimeOutCallback timeOutCallback, uint64_t interval)
50 : name(name), task(nullptr), timeOutCallback(timeOutCallback), timeout(0), func(nullptr),
51 arg(nullptr), flag(0), timeLimit(0), countLimit(0)
52 {
53 id = ++curId;
54 checker = std::make_shared<HandlerChecker>(name, handler);
55 checkInterval = interval;
56 nextTickTime = GetCurrentTickMillseconds();
57 isTaskScheduled = false;
58 isOneshotTask = false;
59 watchdogTid = getproctid();
60 }
61
WatchdogTask(std::string name,Task && task,uint64_t delay,uint64_t interval,bool isOneshot)62 WatchdogTask::WatchdogTask(std::string name, Task&& task, uint64_t delay, uint64_t interval, bool isOneshot)
63 : name(name), task(std::move(task)), timeOutCallback(nullptr), checker(nullptr), timeout(0), func(nullptr),
64 arg(nullptr), flag(0), watchdogTid(0), timeLimit(0), countLimit(0)
65 {
66 id = ++curId;
67 checkInterval = interval;
68 nextTickTime = GetCurrentTickMillseconds() + delay;
69 isTaskScheduled = false;
70 isOneshotTask = isOneshot;
71 }
72
WatchdogTask(std::string name,unsigned int timeout,XCollieCallback func,void * arg,unsigned int flag)73 WatchdogTask::WatchdogTask(std::string name, unsigned int timeout, XCollieCallback func, void *arg, unsigned int flag)
74 : name(name), task(nullptr), timeOutCallback(nullptr), checker(nullptr), timeout(timeout), func(std::move(func)),
75 arg(arg), flag(flag), timeLimit(0), countLimit(0)
76 {
77 id = ++curId;
78 checkInterval = 0;
79 nextTickTime = GetCurrentTickMillseconds() + timeout;
80 isTaskScheduled = false;
81 isOneshotTask = true;
82 watchdogTid = getproctid();
83 }
84
WatchdogTask(std::string name,unsigned int timeLimit,int countLimit)85 WatchdogTask::WatchdogTask(std::string name, unsigned int timeLimit, int countLimit)
86 : name(name), task(nullptr), timeOutCallback(nullptr), timeout(0), func(nullptr), arg(nullptr), flag(0),
87 isTaskScheduled(false), isOneshotTask(false), watchdogTid(0), timeLimit(timeLimit), countLimit(countLimit)
88 {
89 id = ++curId;
90 checkInterval = timeLimit / timeLimitIntervalRatio;
91 nextTickTime = GetCurrentTickMillseconds();
92 }
93
DoCallback()94 void WatchdogTask::DoCallback()
95 {
96 if (func) {
97 XCOLLIE_LOGE("XCollieInner::DoTimerCallback %{public}s callback", name.c_str());
98 func(arg);
99 }
100 if (WatchdogInner::GetInstance().IsCallbackLimit(flag)) {
101 XCOLLIE_LOGE("Too many callback triggered in a short time, %{public}s skip", name.c_str());
102 return;
103 }
104 if (flag & XCOLLIE_FLAG_LOG) {
105 /* send to freezedetector */
106 std::string msg = "timeout: " + name + " to check " + std::to_string(timeout) + "ms ago";
107 SendXCollieEvent(name, msg);
108 }
109 if (getuid() > uidTypeThreshold) {
110 XCOLLIE_LOGI("check uid is app, do not exit");
111 return;
112 }
113 if (flag & XCOLLIE_FLAG_RECOVERY) {
114 XCOLLIE_LOGE("%{public}s blocked, after timeout %{public}llu ,process will exit", name.c_str(),
115 static_cast<long long>(timeout));
116 std::thread exitFunc([]() {
117 std::string description = "timeout, exit...";
118 WatchdogInner::LeftTimeExitProcess(description);
119 });
120 if (exitFunc.joinable()) {
121 exitFunc.detach();
122 }
123 }
124 }
125
Run(uint64_t now)126 void WatchdogTask::Run(uint64_t now)
127 {
128 if (countLimit > 0) {
129 TimerCountTask();
130 return;
131 }
132
133 constexpr int resetRatio = 2;
134 if ((checkInterval != 0) && (now - nextTickTime > (resetRatio * checkInterval))) {
135 XCOLLIE_LOGI("checker thread may be blocked, reset next tick time."
136 "now:%{public}" PRIu64 " expect:%{public}" PRIu64 " interval:%{public}" PRIu64 "",
137 now, nextTickTime, checkInterval);
138 nextTickTime = now;
139 isTaskScheduled = false;
140 return;
141 }
142
143 if (timeout != 0) {
144 DoCallback();
145 } else if (task != nullptr) {
146 task();
147 } else {
148 RunHandlerCheckerTask();
149 }
150 }
151
TimerCountTask()152 void WatchdogTask::TimerCountTask()
153 {
154 int size = static_cast<int>(triggerTimes.size());
155 if (size < countLimit) {
156 return;
157 }
158 XCOLLIE_LOGD("timeLimit : %{public}" PRIu64 ", countLimit : %{public}d, triggerTimes size : %{public}d",
159 timeLimit, countLimit, size);
160
161 while (size >= countLimit) {
162 uint64_t timeInterval = triggerTimes[size -1] - triggerTimes[size - countLimit];
163 if (timeInterval < timeLimit) {
164 std::string sendMsg = name + " occured " + std::to_string(countLimit) + " times in " +
165 std::to_string(timeInterval) + " ms, " + message;
166 HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, name, HiSysEvent::EventType::FAULT,
167 "PID", getprocpid(), "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg);
168 triggerTimes.clear();
169 return;
170 }
171 size--;
172 }
173
174 if (triggerTimes.size() > static_cast<unsigned long>(countLimit * countLimitNumMaxRatio)) {
175 triggerTimes.erase(triggerTimes.begin(), triggerTimes.end() - countLimit);
176 }
177 }
178
RunHandlerCheckerTask()179 void WatchdogTask::RunHandlerCheckerTask()
180 {
181 if (checker == nullptr) {
182 return;
183 }
184
185 if (!isTaskScheduled) {
186 checker->ScheduleCheck();
187 isTaskScheduled = true;
188 } else {
189 if (EvaluateCheckerState() == CheckStatus::COMPLETED) {
190 // allow next check
191 isTaskScheduled = false;
192 }
193 }
194 }
195
SendEvent(const std::string & msg,const std::string & eventName) const196 void WatchdogTask::SendEvent(const std::string &msg, const std::string &eventName) const
197 {
198 int32_t pid = getprocpid();
199 if (IsProcessDebug(pid)) {
200 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
201 return;
202 }
203 uint32_t gid = getgid();
204 uint32_t uid = getuid();
205 time_t curTime = time(nullptr);
206 std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
207 "\n" + msg + "\n";
208 sendMsg += checker->GetDumpInfo();
209
210 int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
211 "PID", pid, "TID", watchdogTid < pid ? pid : watchdogTid, "TGID", gid, "UID", uid, "MODULE_NAME", name,
212 "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg, "STACK", GetProcessStacktrace());
213 XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], msg=%{public}s",
214 ret, eventName.c_str(), msg.c_str());
215 }
216
SendXCollieEvent(const std::string & timerName,const std::string & keyMsg) const217 void WatchdogTask::SendXCollieEvent(const std::string &timerName, const std::string &keyMsg) const
218 {
219 int32_t pid = getprocpid();
220 if (IsProcessDebug(pid)) {
221 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
222 return;
223 }
224 uint32_t gid = getgid();
225 uint32_t uid = getuid();
226 time_t curTime = time(nullptr);
227 std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) + "\n" +
228 "timeout timer: " + timerName + "\n" + keyMsg;
229
230 struct HstackVal val;
231 if (memset_s(&val, sizeof(val), 0, sizeof(val)) != 0) {
232 XCOLLIE_LOGE("memset val failed\n");
233 return;
234 }
235 val.tid = watchdogTid;
236 val.magic = MAGIC_NUM;
237 int fd = open(BBOX_PATH, O_WRONLY | O_CLOEXEC);
238 if (fd < 0) {
239 XCOLLIE_LOGE("open %{public}s failed", BBOX_PATH);
240 return;
241 }
242 int ret = ioctl(fd, LOGGER_GET_STACK, &val);
243 close(fd);
244 if (ret != 0) {
245 XCOLLIE_LOGE("XCollieDumpKernel getStack failed");
246 } else {
247 XCOLLIE_LOGI("XCollieDumpKernel buff is %{public}s", val.hstackLogBuff);
248 }
249
250 std::string eventName = "APP_HICOLLIE";
251 std::string processName = GetSelfProcName();
252 std::string stack = "";
253 if (uid <= uidTypeThreshold) {
254 eventName = std::find(std::begin(CORE_PROCS), std::end(CORE_PROCS), processName) != std::end(CORE_PROCS) ?
255 "SERVICE_TIMEOUT" : "SERVICE_TIMEOUT_WARNING";
256 stack = GetProcessStacktrace();
257 } else if (!GetBacktraceStringByTid(stack, watchdogTid, 0, true)) {
258 XCOLLIE_LOGE("get tid:%{public}d BacktraceString failed", watchdogTid);
259 }
260
261 int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT, "PID", pid,
262 "TID", watchdogTid, "TGID", gid, "UID", uid, "MODULE_NAME", timerName, "PROCESS_NAME", processName,
263 "MSG", sendMsg, "STACK", stack + "\n"+ (ret != 0 ? "" : val.hstackLogBuff));
264 XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
265 "msg=%{public}s", result, eventName.c_str(), keyMsg.c_str());
266 }
267
EvaluateCheckerState()268 int WatchdogTask::EvaluateCheckerState()
269 {
270 int waitState = checker->GetCheckState();
271 if (waitState == CheckStatus::COMPLETED) {
272 return waitState;
273 } else if (waitState == CheckStatus::WAITED_HALF) {
274 XCOLLIE_LOGI("Watchdog half-block happened, send event");
275 std::string description = GetBlockDescription(checkInterval / 1000); // 1s = 1000ms
276 if (timeOutCallback != nullptr) {
277 timeOutCallback(name, waitState);
278 } else {
279 if (name.compare(IPC_FULL) != 0) {
280 SendEvent(description, "SERVICE_WARNING");
281 }
282 }
283 } else {
284 XCOLLIE_LOGI("Watchdog happened, send event twice.");
285 std::string description = GetBlockDescription(checkInterval / 1000) +
286 ", report twice instead of exiting process."; // 1s = 1000ms
287 if (timeOutCallback != nullptr) {
288 timeOutCallback(name, waitState);
289 } else {
290 if (name.compare(IPC_FULL) == 0) {
291 SendEvent(description, IPC_FULL);
292 } else {
293 SendEvent(description, "SERVICE_BLOCK");
294 }
295 // peer binder log is collected in hiview asynchronously
296 // if blocked process exit early, binder blocked state will change
297 // thus delay exit and let hiview have time to collect log.
298 WatchdogInner::KillPeerBinderProcess(description);
299 }
300 }
301 return waitState;
302 }
303
GetBlockDescription(uint64_t interval)304 std::string WatchdogTask::GetBlockDescription(uint64_t interval)
305 {
306 std::string desc = "Watchdog: thread(";
307 desc += name;
308 desc += ") blocked " + std::to_string(interval) + "s";
309 return desc;
310 }
311 } // end of namespace HiviewDFX
312 } // end of namespace OHOS
313