• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "core/common/watch_dog.h"
17 
18 #include <cerrno>
19 #include <csignal>
20 #include <cstdint>
21 #include <pthread.h>
22 #include <queue>
23 #include <shared_mutex>
24 
25 #include "base/log/event_report.h"
26 #include "base/log/log.h"
27 #include "base/thread/background_task_executor.h"
28 #include "base/thread/task_executor.h"
29 #include "base/utils/utils.h"
30 #include "bridge/common/utils/engine_helper.h"
31 #include "core/common/ace_application_info.h"
32 #include "core/common/ace_engine.h"
33 #include "core/common/anr_thread.h"
34 
35 namespace OHOS::Ace {
36 namespace {
37 
38 constexpr int32_t NORMAL_CHECK_PERIOD = 3;
39 constexpr int32_t WARNING_CHECK_PERIOD = 2;
40 constexpr int32_t FREEZE_CHECK_PERIOD = 1;
41 constexpr char JS_THREAD_NAME[] = "JS";
42 constexpr char UI_THREAD_NAME[] = "UI";
43 constexpr char UNKNOWN_THREAD_NAME[] = "unknown thread";
44 constexpr uint64_t ANR_INPUT_FREEZE_TIME = 5000;
45 constexpr int32_t IMMEDIATELY_PERIOD = 0;
46 constexpr int32_t ANR_DIALOG_BLOCK_TIME = 20;
47 
48 enum class State { NORMAL, WARNING, FREEZE };
49 
50 #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
51 constexpr int32_t SIGNAL_FOR_GC = 60;
52 constexpr int32_t GC_CHECK_PERIOD = 1;
53 pthread_t g_signalThread;
54 
CheckGcSignal()55 void CheckGcSignal()
56 {
57     // Check if GC signal is in pending signal set
58     sigset_t sigSet;
59     sigemptyset(&sigSet);
60     sigaddset(&sigSet, SIGNAL_FOR_GC);
61     struct timespec interval = {
62         .tv_sec = 0,
63         .tv_nsec = 0,
64     };
65     int32_t result = sigtimedwait(&sigSet, nullptr, &interval);
66     if (result < 0) {
67         if (errno != EAGAIN && errno != EINTR) {
68             LOGE("Failed to wait signals, errno = %{public}d", errno);
69             return;
70         }
71     } else {
72         ACE_DCHECK(result == SIGNAL_FOR_GC);
73 
74         // Start GC
75         LOGE("Receive GC signal");
76         AceEngine::Get().TriggerGarbageCollection();
77     }
78 
79     // Check again
80     AnrThread::AnrThread::PostTaskToTaskRunner(CheckGcSignal, GC_CHECK_PERIOD);
81 }
82 
BlockGcSignal()83 inline int32_t BlockGcSignal()
84 {
85     // Block GC signal on current thread.
86     sigset_t sigSet;
87     sigemptyset(&sigSet);
88     sigaddset(&sigSet, SIGNAL_FOR_GC);
89     return pthread_sigmask(SIG_BLOCK, &sigSet, nullptr);
90 }
91 
OnSignalReceive(int32_t sigNum)92 void OnSignalReceive(int32_t sigNum)
93 {
94     // Forward GC signal to signal handling thread
95     pthread_kill(g_signalThread, sigNum);
96     BlockGcSignal();
97 }
98 
InitializeGcTrigger()99 void InitializeGcTrigger()
100 {
101     // Record watch dog thread as signal handling thread
102     g_signalThread = pthread_self();
103 
104     int32_t result = BlockGcSignal();
105     if (result != 0) {
106         LOGE("Failed to block GC signal, errno = %{public}d", result);
107         return;
108     }
109 
110     // Start to receive GC signal
111     signal(SIGNAL_FOR_GC, OnSignalReceive);
112     // Start check GC signal
113     CheckGcSignal();
114 }
115 #endif // #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
116 
117 } // namespace
118 
119 class ThreadWatcher final : public Referenced {
120 public:
121     ThreadWatcher(int32_t instanceId, TaskExecutor::TaskType type, bool useUIAsJSThread = false);
122     ~ThreadWatcher() override;
123 
124     void SetTaskExecutor(const RefPtr<TaskExecutor>& taskExecutor);
125 
126     void BuriedBomb(uint64_t bombId);
127     void DefusingBomb();
128 
129 private:
130     void InitThreadName();
131     void CheckAndResetIfNeeded();
132     bool IsThreadStuck();
133     void HiviewReport() const;
134     void RawReport(RawEventType type) const;
135     void PostCheckTask();
136     void TagIncrease();
137     void Check();
138     void ShowDialog() const;
139     void DefusingTopBomb();
140     void DetonatedBomb();
141 
142     mutable std::shared_mutex mutex_;
143     int32_t instanceId_ = 0;
144     TaskExecutor::TaskType type_;
145     std::string threadName_;
146     int32_t loopTime_ = 0;
147     int32_t threadTag_ = 0;
148     int32_t lastLoopTime_ = 0;
149     int32_t lastThreadTag_ = 0;
150     int32_t freezeCount_ = 0;
151     int64_t lastTaskId_ = -1;
152     State state_ = State::NORMAL;
153     WeakPtr<TaskExecutor> taskExecutor_;
154     std::queue<uint64_t> inputTaskIds_;
155     bool canShowDialog_ = true;
156     int32_t showDialogCount_ = 0;
157     bool useUIAsJSThread_ = false;
158 };
159 
ThreadWatcher(int32_t instanceId,TaskExecutor::TaskType type,bool useUIAsJSThread)160 ThreadWatcher::ThreadWatcher(int32_t instanceId, TaskExecutor::TaskType type, bool useUIAsJSThread)
161     : instanceId_(instanceId), type_(type), useUIAsJSThread_(useUIAsJSThread)
162 {
163     InitThreadName();
164     AnrThread::PostTaskToTaskRunner(
165         [weak = Referenced::WeakClaim(this)]() {
166             auto sp = weak.Upgrade();
167             CHECK_NULL_VOID_NOLOG(sp);
168             sp->Check();
169         },
170         NORMAL_CHECK_PERIOD);
171 }
172 
~ThreadWatcher()173 ThreadWatcher::~ThreadWatcher() {}
174 
SetTaskExecutor(const RefPtr<TaskExecutor> & taskExecutor)175 void ThreadWatcher::SetTaskExecutor(const RefPtr<TaskExecutor>& taskExecutor)
176 {
177     taskExecutor_ = taskExecutor;
178 }
179 
BuriedBomb(uint64_t bombId)180 void ThreadWatcher::BuriedBomb(uint64_t bombId)
181 {
182     std::unique_lock<std::shared_mutex> lock(mutex_);
183     inputTaskIds_.emplace(bombId);
184 }
185 
DefusingBomb()186 void ThreadWatcher::DefusingBomb()
187 {
188     auto taskExecutor = taskExecutor_.Upgrade();
189     CHECK_NULL_VOID_NOLOG(taskExecutor);
190     taskExecutor->PostTask(
191         [weak = Referenced::WeakClaim(this)]() {
192             auto sp = weak.Upgrade();
193             if (sp) {
194                 sp->DefusingTopBomb();
195             }
196         },
197         type_);
198 }
199 
DefusingTopBomb()200 void ThreadWatcher::DefusingTopBomb()
201 {
202     std::unique_lock<std::shared_mutex> lock(mutex_);
203     if (inputTaskIds_.empty()) {
204         return;
205     }
206 
207     inputTaskIds_.pop();
208 }
209 
InitThreadName()210 void ThreadWatcher::InitThreadName()
211 {
212     switch (type_) {
213         case TaskExecutor::TaskType::JS:
214             threadName_ = JS_THREAD_NAME;
215             break;
216         case TaskExecutor::TaskType::UI:
217             threadName_ = UI_THREAD_NAME;
218             break;
219         default:
220             threadName_ = UNKNOWN_THREAD_NAME;
221             break;
222     }
223 }
224 
DetonatedBomb()225 void ThreadWatcher::DetonatedBomb()
226 {
227     std::shared_lock<std::shared_mutex> lock(mutex_);
228     if (inputTaskIds_.empty()) {
229         return;
230     }
231 
232     uint64_t currentTime = GetMilliseconds();
233     uint64_t bombId = inputTaskIds_.front();
234 
235     if (currentTime - bombId > ANR_INPUT_FREEZE_TIME) {
236         LOGE("Detonated the Bomb, which bombId is %{public}s and currentTime is %{public}s",
237             std::to_string(bombId).c_str(), std::to_string(currentTime).c_str());
238         if (canShowDialog_) {
239             ShowDialog();
240             canShowDialog_ = false;
241             showDialogCount_ = 0;
242         } else {
243             LOGE("Can not show dialog when detonated the Bomb.");
244         }
245 
246         std::queue<uint64_t> empty;
247         std::swap(empty, inputTaskIds_);
248     }
249 }
250 
Check()251 void ThreadWatcher::Check()
252 {
253     int32_t period = NORMAL_CHECK_PERIOD;
254     if (!IsThreadStuck()) {
255         if (state_ == State::FREEZE) {
256             RawReport(RawEventType::RECOVER);
257         }
258         freezeCount_ = 0;
259         state_ = State::NORMAL;
260         canShowDialog_ = true;
261         showDialogCount_ = 0;
262     } else {
263         if (state_ == State::NORMAL) {
264             HiviewReport();
265             RawReport(RawEventType::WARNING);
266             state_ = State::WARNING;
267             period = WARNING_CHECK_PERIOD;
268         } else if (state_ == State::WARNING) {
269             RawReport(RawEventType::FREEZE);
270             state_ = State::FREEZE;
271             period = FREEZE_CHECK_PERIOD;
272             DetonatedBomb();
273         } else {
274             if (!canShowDialog_) {
275                 showDialogCount_++;
276                 if (showDialogCount_ >= ANR_DIALOG_BLOCK_TIME) {
277                     canShowDialog_ = true;
278                     showDialogCount_ = 0;
279                 }
280             }
281 
282             if (++freezeCount_ >= 5) {
283                 RawReport(RawEventType::FREEZE);
284                 freezeCount_ = 0;
285             }
286             period = FREEZE_CHECK_PERIOD;
287             DetonatedBomb();
288         }
289     }
290 
291     AnrThread::PostTaskToTaskRunner(
292         [weak = Referenced::WeakClaim(this)]() {
293             auto sp = weak.Upgrade();
294             CHECK_NULL_VOID_NOLOG(sp);
295             sp->Check();
296         },
297         period);
298 }
299 
CheckAndResetIfNeeded()300 void ThreadWatcher::CheckAndResetIfNeeded()
301 {
302     {
303         std::shared_lock<std::shared_mutex> lock(mutex_);
304         if (loopTime_ < INT32_MAX) {
305             return;
306         }
307     }
308 
309     std::unique_lock<std::shared_mutex> lock(mutex_);
310     loopTime_ = 0;
311     threadTag_ = 0;
312 }
313 
IsThreadStuck()314 bool ThreadWatcher::IsThreadStuck()
315 {
316     bool res = false;
317     auto taskExecutor = taskExecutor_.Upgrade();
318     CHECK_NULL_RETURN(taskExecutor, false);
319     uint32_t taskId = taskExecutor->GetTotalTaskNum(type_);
320     if (useUIAsJSThread_) {
321         taskId += taskExecutor->GetTotalTaskNum(TaskExecutor::TaskType::JS);
322     }
323     {
324         std::shared_lock<std::shared_mutex> lock(mutex_);
325         if (((loopTime_ - threadTag_) > (lastLoopTime_ - lastThreadTag_)) && (lastTaskId_ == taskId)) {
326             std::string abilityName;
327             if (AceEngine::Get().GetContainer(instanceId_) != nullptr) {
328                 abilityName = AceEngine::Get().GetContainer(instanceId_)->GetHostClassName();
329             }
330             LOGE("thread stuck, ability: %{public}s, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
331                  "checktime: %{public}d",
332                 abilityName.c_str(), instanceId_, threadName_.c_str(), loopTime_, threadTag_);
333             res = true;
334         }
335         lastTaskId_ = taskId;
336         lastLoopTime_ = loopTime_;
337         lastThreadTag_ = threadTag_;
338     }
339     CheckAndResetIfNeeded();
340     PostCheckTask();
341     return res;
342 }
343 
HiviewReport() const344 void ThreadWatcher::HiviewReport() const
345 {
346     if (type_ == TaskExecutor::TaskType::JS) {
347         EventReport::SendJsException(JsExcepType::JS_THREAD_STUCK);
348     } else if (type_ == TaskExecutor::TaskType::UI) {
349         EventReport::SendRenderException(RenderExcepType::UI_THREAD_STUCK);
350     }
351 }
352 
RawReport(RawEventType type) const353 void ThreadWatcher::RawReport(RawEventType type) const
354 {
355     std::string message;
356     if (type == RawEventType::FREEZE &&
357         (type_ == TaskExecutor::TaskType::JS || (useUIAsJSThread_ && (type_ == TaskExecutor::TaskType::UI)))) {
358         auto engine = EngineHelper::GetEngine(instanceId_);
359         message = engine ? engine->GetStacktraceMessage() : "";
360     }
361     int32_t tid = 0;
362     auto taskExecutor = taskExecutor_.Upgrade();
363     if (taskExecutor) {
364         tid = taskExecutor->GetTid(type_);
365     }
366     std::string threadInfo = "Blocked thread id = " + std::to_string(tid) + "\n";
367     threadInfo += "JSVM instance id = " + std::to_string(instanceId_) + "\n";
368     message = threadInfo + message;
369     EventReport::ANRRawReport(type, AceApplicationInfo::GetInstance().GetUid(),
370         AceApplicationInfo::GetInstance().GetPackageName(), AceApplicationInfo::GetInstance().GetProcessName(),
371         message);
372 }
373 
ShowDialog() const374 void ThreadWatcher::ShowDialog() const
375 {
376     EventReport::ANRShowDialog(AceApplicationInfo::GetInstance().GetUid(),
377         AceApplicationInfo::GetInstance().GetPackageName(), AceApplicationInfo::GetInstance().GetProcessName());
378 }
379 
PostCheckTask()380 void ThreadWatcher::PostCheckTask()
381 {
382     auto taskExecutor = taskExecutor_.Upgrade();
383     if (taskExecutor) {
384         // post task to specified thread to check it
385         taskExecutor->PostTask(
386             [weak = Referenced::WeakClaim(this)]() {
387                 auto sp = weak.Upgrade();
388                 CHECK_NULL_VOID_NOLOG(sp);
389                 sp->TagIncrease();
390             },
391             type_);
392         std::unique_lock<std::shared_mutex> lock(mutex_);
393         ++loopTime_;
394         if (state_ != State::NORMAL) {
395             LOGW("thread check, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
396                  "checktime: %{public}d",
397                 instanceId_, threadName_.c_str(), loopTime_, threadTag_);
398         }
399     } else {
400         LOGW("task executor with instanceId %{public}d invalid when check %{public}s thread whether stuck or not",
401             instanceId_, threadName_.c_str());
402     }
403 }
404 
TagIncrease()405 void ThreadWatcher::TagIncrease()
406 {
407     std::unique_lock<std::shared_mutex> lock(mutex_);
408     ++threadTag_;
409     if (state_ != State::NORMAL) {
410         LOGW("thread check, instanceId: %{public}d, thread: %{public}s, looptime: %{public}d, "
411              "checktime: %{public}d",
412             instanceId_, threadName_.c_str(), loopTime_, threadTag_);
413     }
414 }
415 
WatchDog()416 WatchDog::WatchDog()
417 {
418     AnrThread::Start();
419 #if defined(OHOS_PLATFORM) || defined(ANDROID_PLATFORM)
420     AnrThread::PostTaskToTaskRunner(InitializeGcTrigger, GC_CHECK_PERIOD);
421 #endif
422 }
423 
~WatchDog()424 WatchDog::~WatchDog()
425 {
426     AnrThread::Stop();
427 }
428 
Register(int32_t instanceId,const RefPtr<TaskExecutor> & taskExecutor,bool useUIAsJSThread)429 void WatchDog::Register(int32_t instanceId, const RefPtr<TaskExecutor>& taskExecutor, bool useUIAsJSThread)
430 {
431     Watchers watchers = {
432         .jsWatcher = AceType::MakeRefPtr<ThreadWatcher>(instanceId, TaskExecutor::TaskType::JS),
433         .uiWatcher = AceType::MakeRefPtr<ThreadWatcher>(instanceId, TaskExecutor::TaskType::UI, useUIAsJSThread),
434     };
435     watchers.uiWatcher->SetTaskExecutor(taskExecutor);
436     if (!useUIAsJSThread) {
437         watchers.jsWatcher->SetTaskExecutor(taskExecutor);
438     } else {
439         watchers.jsWatcher = nullptr;
440     }
441     const auto resExecutor = watchMap_.try_emplace(instanceId, watchers);
442     if (!resExecutor.second) {
443         LOGW("Duplicate instance id: %{public}d when register to watch dog", instanceId);
444     }
445 }
446 
Unregister(int32_t instanceId)447 void WatchDog::Unregister(int32_t instanceId)
448 {
449     auto num = watchMap_.erase(instanceId);
450     if (num == 0) {
451         LOGW("Unregister from watch dog failed with instanceID %{public}d", instanceId);
452     }
453 }
454 
BuriedBomb(int32_t instanceId,uint64_t bombId)455 void WatchDog::BuriedBomb(int32_t instanceId, uint64_t bombId)
456 {
457     auto iter = watchMap_.find(instanceId);
458     if (iter == watchMap_.end()) {
459         return;
460     }
461 
462     Watchers watchers = iter->second;
463     AnrThread::PostTaskToTaskRunner(
464         [watchers, bombId]() {
465             if (watchers.jsWatcher) {
466                 watchers.jsWatcher->BuriedBomb(bombId);
467             }
468 
469             if (watchers.uiWatcher) {
470                 watchers.uiWatcher->BuriedBomb(bombId);
471             }
472         },
473         IMMEDIATELY_PERIOD);
474 }
475 
DefusingBomb(int32_t instanceId)476 void WatchDog::DefusingBomb(int32_t instanceId)
477 {
478     auto iter = watchMap_.find(instanceId);
479     if (iter == watchMap_.end()) {
480         return;
481     }
482 
483     Watchers watchers = iter->second;
484     AnrThread::PostTaskToTaskRunner(
485         [watchers]() {
486             if (watchers.jsWatcher) {
487                 watchers.jsWatcher->DefusingBomb();
488             }
489 
490             if (watchers.uiWatcher) {
491                 watchers.uiWatcher->DefusingBomb();
492             }
493         },
494         IMMEDIATELY_PERIOD);
495 }
496 } // namespace OHOS::Ace
497