1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "core/common/watch_dog.h"
17
18 #include <cerrno>
19 #include <csignal>
20 #include <cstdint>
21 #include <pthread.h>
22 #include <shared_mutex>
23
24 #include "base/log/event_report.h"
25 #include "base/log/log.h"
26 #include "base/thread/background_task_executor.h"
27 #include "base/thread/task_executor.h"
28 #include "base/utils/utils.h"
29 #include "bridge/common/utils/engine_helper.h"
30 #include "core/common/ace_application_info.h"
31 #include "core/common/ace_engine.h"
32 #include "core/common/anr_thread.h"
33
34 namespace OHOS::Ace {
35 namespace {
36
37 constexpr int32_t NORMAL_CHECK_PERIOD = 3;
38 constexpr int32_t WARNING_CHECK_PERIOD = 2;
39 constexpr int32_t FREEZE_CHECK_PERIOD = 1;
40 constexpr char JS_THREAD_NAME[] = "JS";
41 constexpr char UI_THREAD_NAME[] = "UI";
42 constexpr char UNKNOWN_THREAD_NAME[] = "unknown thread";
43 constexpr uint64_t ANR_INPUT_FREEZE_TIME = 5000;
44 constexpr int32_t IMMEDIATELY_PERIOD = 0;
45 constexpr int32_t ANR_DIALOG_BLOCK_TIME = 20;
46
47 enum class State { NORMAL, WARNING, FREEZE };
48
49 #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
50 constexpr int32_t SIGNAL_FOR_GC = 60;
51 constexpr int32_t GC_CHECK_PERIOD = 1;
52 pthread_t g_signalThread;
53
CheckGcSignal()54 void CheckGcSignal()
55 {
56 // Check if GC signal is in pending signal set
57 sigset_t sigSet;
58 sigemptyset(&sigSet);
59 sigaddset(&sigSet, SIGNAL_FOR_GC);
60 struct timespec interval = {
61 .tv_sec = 0,
62 .tv_nsec = 0,
63 };
64 int32_t result = sigtimedwait(&sigSet, nullptr, &interval);
65 if (result < 0) {
66 if (errno != EAGAIN && errno != EINTR) {
67 LOGE("Failed to wait signals, errno = %{public}d", errno);
68 return;
69 }
70 } else {
71 ACE_DCHECK(result == SIGNAL_FOR_GC);
72
73 // Start GC
74 LOGE("Receive GC signal");
75 AceEngine::Get().TriggerGarbageCollection();
76 }
77
78 // Check again
79 AnrThread::AnrThread::PostTaskToTaskRunner(CheckGcSignal, GC_CHECK_PERIOD);
80 }
81
BlockGcSignal()82 inline int32_t BlockGcSignal()
83 {
84 // Block GC signal on current thread.
85 sigset_t sigSet;
86 sigemptyset(&sigSet);
87 sigaddset(&sigSet, SIGNAL_FOR_GC);
88 return pthread_sigmask(SIG_BLOCK, &sigSet, nullptr);
89 }
90
OnSignalReceive(int32_t sigNum)91 void OnSignalReceive(int32_t sigNum)
92 {
93 // Forward GC signal to signal handling thread
94 pthread_kill(g_signalThread, sigNum);
95 BlockGcSignal();
96 }
97
InitializeGcTrigger()98 void InitializeGcTrigger()
99 {
100 // Record watch dog thread as signal handling thread
101 g_signalThread = pthread_self();
102
103 int32_t result = BlockGcSignal();
104 if (result != 0) {
105 LOGE("Failed to block GC signal, errno = %{public}d", result);
106 return;
107 }
108
109 // Start to receive GC signal
110 signal(SIGNAL_FOR_GC, OnSignalReceive);
111 // Start check GC signal
112 CheckGcSignal();
113 }
114 #endif // #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
115
116 } // namespace
117
118 class ThreadWatcher final : public Referenced {
119 public:
120 ThreadWatcher(int32_t instanceId, TaskExecutor::TaskType type, bool useUIAsJSThread = false);
121 ~ThreadWatcher() override;
122
123 void SetTaskExecutor(const RefPtr<TaskExecutor>& taskExecutor);
124
125 void BuriedBomb(uint64_t bombId);
126 void DefusingBomb();
127
128 private:
129 void InitThreadName();
130 void CheckAndResetIfNeeded();
131 bool IsThreadStuck();
132 void HiviewReport() const;
133 void RawReport(RawEventType type) const;
134 void PostCheckTask();
135 void TagIncrease();
136 void Check();
137 void ShowDialog() const;
138 void DefusingTopBomb();
139 void DetonatedBomb();
140
141 mutable std::shared_mutex mutex_;
142 int32_t instanceId_ = 0;
143 TaskExecutor::TaskType type_;
144 std::string threadName_;
145 int32_t loopTime_ = 0;
146 int32_t threadTag_ = 0;
147 int32_t lastLoopTime_ = 0;
148 int32_t lastThreadTag_ = 0;
149 int32_t freezeCount_ = 0;
150 int64_t lastTaskId_ = -1;
151 State state_ = State::NORMAL;
152 WeakPtr<TaskExecutor> taskExecutor_;
153 std::queue<uint64_t> inputTaskIds_;
154 bool canShowDialog_ = true;
155 int32_t showDialogCount_ = 0;
156 bool useUIAsJSThread_ = false;
157 };
158
ThreadWatcher(int32_t instanceId,TaskExecutor::TaskType type,bool useUIAsJSThread)159 ThreadWatcher::ThreadWatcher(int32_t instanceId, TaskExecutor::TaskType type, bool useUIAsJSThread)
160 : instanceId_(instanceId), type_(type), useUIAsJSThread_(useUIAsJSThread)
161 {
162 InitThreadName();
163 AnrThread::PostTaskToTaskRunner(
164 [weak = Referenced::WeakClaim(this)]() {
165 auto sp = weak.Upgrade();
166 CHECK_NULL_VOID_NOLOG(sp);
167 sp->Check();
168 },
169 NORMAL_CHECK_PERIOD);
170 }
171
~ThreadWatcher()172 ThreadWatcher::~ThreadWatcher() {}
173
SetTaskExecutor(const RefPtr<TaskExecutor> & taskExecutor)174 void ThreadWatcher::SetTaskExecutor(const RefPtr<TaskExecutor>& taskExecutor)
175 {
176 taskExecutor_ = taskExecutor;
177 }
178
BuriedBomb(uint64_t bombId)179 void ThreadWatcher::BuriedBomb(uint64_t bombId)
180 {
181 std::unique_lock<std::shared_mutex> lock(mutex_);
182 inputTaskIds_.emplace(bombId);
183 }
184
DefusingBomb()185 void ThreadWatcher::DefusingBomb()
186 {
187 auto taskExecutor = taskExecutor_.Upgrade();
188 CHECK_NULL_VOID_NOLOG(taskExecutor);
189 taskExecutor->PostTask(
190 [weak = Referenced::WeakClaim(this)]() {
191 auto sp = weak.Upgrade();
192 if (sp) {
193 sp->DefusingTopBomb();
194 }
195 },
196 type_);
197 }
198
DefusingTopBomb()199 void ThreadWatcher::DefusingTopBomb()
200 {
201 std::unique_lock<std::shared_mutex> lock(mutex_);
202 if (inputTaskIds_.empty()) {
203 return;
204 }
205
206 inputTaskIds_.pop();
207 }
208
InitThreadName()209 void ThreadWatcher::InitThreadName()
210 {
211 switch (type_) {
212 case TaskExecutor::TaskType::JS:
213 threadName_ = JS_THREAD_NAME;
214 break;
215 case TaskExecutor::TaskType::UI:
216 threadName_ = UI_THREAD_NAME;
217 break;
218 default:
219 threadName_ = UNKNOWN_THREAD_NAME;
220 break;
221 }
222 }
223
DetonatedBomb()224 void ThreadWatcher::DetonatedBomb()
225 {
226 std::shared_lock<std::shared_mutex> lock(mutex_);
227 if (inputTaskIds_.empty()) {
228 return;
229 }
230
231 uint64_t currentTime = GetMilliseconds();
232 uint64_t bombId = inputTaskIds_.front();
233
234 if (currentTime - bombId > ANR_INPUT_FREEZE_TIME) {
235 LOGE("Detonated the Bomb, which bombId is %{public}s and currentTime is %{public}s",
236 std::to_string(bombId).c_str(), std::to_string(currentTime).c_str());
237 if (canShowDialog_) {
238 ShowDialog();
239 canShowDialog_ = false;
240 showDialogCount_ = 0;
241 } else {
242 LOGE("Can not show dialog when detonated the Bomb.");
243 }
244
245 std::queue<uint64_t> empty;
246 std::swap(empty, inputTaskIds_);
247 }
248 }
249
Check()250 void ThreadWatcher::Check()
251 {
252 int32_t period = NORMAL_CHECK_PERIOD;
253 if (!IsThreadStuck()) {
254 if (state_ == State::FREEZE) {
255 RawReport(RawEventType::RECOVER);
256 }
257 freezeCount_ = 0;
258 state_ = State::NORMAL;
259 canShowDialog_ = true;
260 showDialogCount_ = 0;
261 } else {
262 if (state_ == State::NORMAL) {
263 HiviewReport();
264 RawReport(RawEventType::WARNING);
265 state_ = State::WARNING;
266 period = WARNING_CHECK_PERIOD;
267 } else if (state_ == State::WARNING) {
268 RawReport(RawEventType::FREEZE);
269 state_ = State::FREEZE;
270 period = FREEZE_CHECK_PERIOD;
271 DetonatedBomb();
272 } else {
273 if (!canShowDialog_) {
274 showDialogCount_++;
275 if (showDialogCount_ >= ANR_DIALOG_BLOCK_TIME) {
276 canShowDialog_ = true;
277 showDialogCount_ = 0;
278 }
279 }
280
281 if (++freezeCount_ >= 5) {
282 RawReport(RawEventType::FREEZE);
283 freezeCount_ = 0;
284 }
285 period = FREEZE_CHECK_PERIOD;
286 DetonatedBomb();
287 }
288 }
289
290 AnrThread::PostTaskToTaskRunner(
291 [weak = Referenced::WeakClaim(this)]() {
292 auto sp = weak.Upgrade();
293 CHECK_NULL_VOID_NOLOG(sp);
294 sp->Check();
295 },
296 period);
297 }
298
CheckAndResetIfNeeded()299 void ThreadWatcher::CheckAndResetIfNeeded()
300 {
301 {
302 std::shared_lock<std::shared_mutex> lock(mutex_);
303 if (loopTime_ < INT32_MAX) {
304 return;
305 }
306 }
307
308 std::unique_lock<std::shared_mutex> lock(mutex_);
309 loopTime_ = 0;
310 threadTag_ = 0;
311 }
312
IsThreadStuck()313 bool ThreadWatcher::IsThreadStuck()
314 {
315 bool res = false;
316 auto taskExecutor = taskExecutor_.Upgrade();
317 CHECK_NULL_RETURN(taskExecutor, false);
318 uint32_t taskId = taskExecutor->GetTotalTaskNum(type_);
319 if (useUIAsJSThread_) {
320 taskId += taskExecutor->GetTotalTaskNum(TaskExecutor::TaskType::JS);
321 }
322 {
323 std::shared_lock<std::shared_mutex> lock(mutex_);
324 if (((loopTime_ - threadTag_) > (lastLoopTime_ - lastThreadTag_)) && (lastTaskId_ == taskId)) {
325 std::string abilityName;
326 if (AceEngine::Get().GetContainer(instanceId_) != nullptr) {
327 abilityName = AceEngine::Get().GetContainer(instanceId_)->GetHostClassName();
328 }
329 LOGE("thread stuck, ability: %{public}s, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
330 "checktime: %{public}d",
331 abilityName.c_str(), instanceId_, threadName_.c_str(), loopTime_, threadTag_);
332 res = true;
333 }
334 lastTaskId_ = taskId;
335 lastLoopTime_ = loopTime_;
336 lastThreadTag_ = threadTag_;
337 }
338 CheckAndResetIfNeeded();
339 PostCheckTask();
340 return res;
341 }
342
HiviewReport() const343 void ThreadWatcher::HiviewReport() const
344 {
345 if (type_ == TaskExecutor::TaskType::JS) {
346 EventReport::SendJsException(JsExcepType::JS_THREAD_STUCK);
347 } else if (type_ == TaskExecutor::TaskType::UI) {
348 EventReport::SendRenderException(RenderExcepType::UI_THREAD_STUCK);
349 }
350 }
351
RawReport(RawEventType type) const352 void ThreadWatcher::RawReport(RawEventType type) const
353 {
354 std::string message;
355 if (type == RawEventType::FREEZE &&
356 (type_ == TaskExecutor::TaskType::JS || (useUIAsJSThread_ && (type_ == TaskExecutor::TaskType::UI)))) {
357 auto engine = EngineHelper::GetEngine(instanceId_);
358 message = engine ? engine->GetStacktraceMessage() : "";
359 }
360 int32_t tid = 0;
361 auto taskExecutor = taskExecutor_.Upgrade();
362 if (taskExecutor) {
363 tid = taskExecutor->GetTid(type_);
364 }
365 std::string threadInfo = "Blocked thread id = " + std::to_string(tid) + "\n";
366 threadInfo += "JSVM instance id = " + std::to_string(instanceId_) + "\n";
367 message = threadInfo + message;
368 EventReport::ANRRawReport(type, AceApplicationInfo::GetInstance().GetUid(),
369 AceApplicationInfo::GetInstance().GetPackageName(), AceApplicationInfo::GetInstance().GetProcessName(),
370 message);
371 }
372
ShowDialog() const373 void ThreadWatcher::ShowDialog() const
374 {
375 EventReport::ANRShowDialog(AceApplicationInfo::GetInstance().GetUid(),
376 AceApplicationInfo::GetInstance().GetPackageName(), AceApplicationInfo::GetInstance().GetProcessName());
377 }
378
PostCheckTask()379 void ThreadWatcher::PostCheckTask()
380 {
381 auto taskExecutor = taskExecutor_.Upgrade();
382 if (taskExecutor) {
383 // post task to specified thread to check it
384 taskExecutor->PostTask(
385 [weak = Referenced::WeakClaim(this)]() {
386 auto sp = weak.Upgrade();
387 CHECK_NULL_VOID_NOLOG(sp);
388 sp->TagIncrease();
389 },
390 type_);
391 std::unique_lock<std::shared_mutex> lock(mutex_);
392 ++loopTime_;
393 if (state_ != State::NORMAL) {
394 LOGW("thread check, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
395 "checktime: %{public}d",
396 instanceId_, threadName_.c_str(), loopTime_, threadTag_);
397 }
398 } else {
399 LOGW("task executor with instanceId %{public}d invalid when check %{public}s thread whether stuck or not",
400 instanceId_, threadName_.c_str());
401 }
402 }
403
TagIncrease()404 void ThreadWatcher::TagIncrease()
405 {
406 std::unique_lock<std::shared_mutex> lock(mutex_);
407 ++threadTag_;
408 if (state_ != State::NORMAL) {
409 LOGW("thread check, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
410 "checktime: %{public}d",
411 instanceId_, threadName_.c_str(), loopTime_, threadTag_);
412 }
413 }
414
WatchDog()415 WatchDog::WatchDog()
416 {
417 AnrThread::Start();
418 #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
419 AnrThread::PostTaskToTaskRunner(InitializeGcTrigger, GC_CHECK_PERIOD);
420 #endif
421 }
422
~WatchDog()423 WatchDog::~WatchDog()
424 {
425 AnrThread::Stop();
426 }
427
Register(int32_t instanceId,const RefPtr<TaskExecutor> & taskExecutor,bool useUIAsJSThread)428 void WatchDog::Register(int32_t instanceId, const RefPtr<TaskExecutor>& taskExecutor, bool useUIAsJSThread)
429 {
430 Watchers watchers = {
431 .jsWatcher = AceType::MakeRefPtr<ThreadWatcher>(instanceId, TaskExecutor::TaskType::JS),
432 .uiWatcher = AceType::MakeRefPtr<ThreadWatcher>(instanceId, TaskExecutor::TaskType::UI, useUIAsJSThread),
433 };
434 watchers.uiWatcher->SetTaskExecutor(taskExecutor);
435 if (!useUIAsJSThread) {
436 watchers.jsWatcher->SetTaskExecutor(taskExecutor);
437 } else {
438 watchers.jsWatcher = nullptr;
439 }
440 const auto resExecutor = watchMap_.try_emplace(instanceId, watchers);
441 if (!resExecutor.second) {
442 LOGW("Duplicate instance id: %{public}d when register to watch dog", instanceId);
443 }
444 }
445
Unregister(int32_t instanceId)446 void WatchDog::Unregister(int32_t instanceId)
447 {
448 auto num = watchMap_.erase(instanceId);
449 if (num == 0) {
450 LOGW("Unregister from watch dog failed with instanceID %{public}d", instanceId);
451 }
452 }
453
BuriedBomb(int32_t instanceId,uint64_t bombId)454 void WatchDog::BuriedBomb(int32_t instanceId, uint64_t bombId)
455 {
456 auto iter = watchMap_.find(instanceId);
457 if (iter == watchMap_.end()) {
458 return;
459 }
460
461 Watchers watchers = iter->second;
462 AnrThread::PostTaskToTaskRunner(
463 [watchers, bombId]() {
464 if (watchers.jsWatcher) {
465 watchers.jsWatcher->BuriedBomb(bombId);
466 }
467
468 if (watchers.uiWatcher) {
469 watchers.uiWatcher->BuriedBomb(bombId);
470 }
471 },
472 IMMEDIATELY_PERIOD);
473 }
474
DefusingBomb(int32_t instanceId)475 void WatchDog::DefusingBomb(int32_t instanceId)
476 {
477 auto iter = watchMap_.find(instanceId);
478 if (iter == watchMap_.end()) {
479 return;
480 }
481
482 Watchers watchers = iter->second;
483 AnrThread::PostTaskToTaskRunner(
484 [watchers]() {
485 if (watchers.jsWatcher) {
486 watchers.jsWatcher->DefusingBomb();
487 }
488
489 if (watchers.uiWatcher) {
490 watchers.uiWatcher->DefusingBomb();
491 }
492 },
493 IMMEDIATELY_PERIOD);
494 }
495
496 } // namespace OHOS::Ace