• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/threading/hang_watcher.h"
6 
7 #include <atomic>
8 #include <utility>
9 
10 #include "base/containers/flat_map.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/crash_logging.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/feature_list.h"
16 #include "base/functional/bind.h"
17 #include "base/functional/callback_helpers.h"
18 #include "base/metrics/field_trial_params.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/power_monitor/power_monitor.h"
21 #include "base/ranges/algorithm.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/synchronization/lock.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/platform_thread.h"
26 #include "base/threading/thread_checker.h"
27 #include "base/threading/thread_restrictions.h"
28 #include "base/threading/threading_features.h"
29 #include "base/time/default_tick_clock.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/base_tracing.h"
32 #include "build/build_config.h"
33 #include "third_party/abseil-cpp/absl/base/attributes.h"
34 
35 namespace base {
36 
37 namespace {
38 
39 // Defines how much logging happens when the HangWatcher monitors the threads.
40 // Logging levels are set per thread type through Finch. It's important that
41 // the order of the enum members stay the same and that their numerical
42 // values be in increasing order. The implementation of
43 // ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
44 enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
45 
46 HangWatcher* g_instance = nullptr;
47 ABSL_CONST_INIT thread_local internal::HangWatchState* hang_watch_state =
48     nullptr;
49 std::atomic<bool> g_use_hang_watcher{false};
50 std::atomic<HangWatcher::ProcessType> g_hang_watcher_process_type{
51     HangWatcher::ProcessType::kBrowserProcess};
52 
53 std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
54 std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
55 std::atomic<LoggingLevel> g_main_thread_log_level{LoggingLevel::kNone};
56 
57 // Indicates whether HangWatcher::Run() should return after the next monitoring.
58 std::atomic<bool> g_keep_monitoring{true};
59 
60 // Emits the hung thread count histogram. |count| is the number of threads
61 // of type |thread_type| that were hung or became hung during the last
62 // monitoring window. This function should be invoked for each thread type
63 // encountered on each call to Monitor().
LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,int count)64 void LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,
65                                  int count) {
66   // In the case of unique threads like the IO or UI/Main thread a count does
67   // not make sense.
68   const bool any_thread_hung = count >= 1;
69 
70   const HangWatcher::ProcessType process_type =
71       g_hang_watcher_process_type.load(std::memory_order_relaxed);
72   switch (process_type) {
73     case HangWatcher::ProcessType::kUnknownProcess:
74       break;
75 
76     case HangWatcher::ProcessType::kBrowserProcess:
77       switch (thread_type) {
78         case HangWatcher::ThreadType::kIOThread:
79           UMA_HISTOGRAM_BOOLEAN(
80               "HangWatcher.IsThreadHung.BrowserProcess."
81               "IOThread",
82               any_thread_hung);
83           break;
84         case HangWatcher::ThreadType::kMainThread:
85           UMA_HISTOGRAM_BOOLEAN(
86               "HangWatcher.IsThreadHung.BrowserProcess."
87               "UIThread",
88               any_thread_hung);
89           break;
90         case HangWatcher::ThreadType::kThreadPoolThread:
91           // Not recorded for now.
92           break;
93       }
94       break;
95 
96     case HangWatcher::ProcessType::kGPUProcess:
97       // Not recorded for now.
98       break;
99 
100     case HangWatcher::ProcessType::kRendererProcess:
101       switch (thread_type) {
102         case HangWatcher::ThreadType::kIOThread:
103           UMA_HISTOGRAM_BOOLEAN(
104               "HangWatcher.IsThreadHung.RendererProcess."
105               "IOThread",
106               any_thread_hung);
107           break;
108         case HangWatcher::ThreadType::kMainThread:
109           UMA_HISTOGRAM_BOOLEAN(
110               "HangWatcher.IsThreadHung.RendererProcess."
111               "MainThread",
112               any_thread_hung);
113           break;
114         case HangWatcher::ThreadType::kThreadPoolThread:
115           // Not recorded for now.
116           break;
117       }
118       break;
119 
120     case HangWatcher::ProcessType::kUtilityProcess:
121       switch (thread_type) {
122         case HangWatcher::ThreadType::kIOThread:
123           UMA_HISTOGRAM_BOOLEAN(
124               "HangWatcher.IsThreadHung.UtilityProcess."
125               "IOThread",
126               any_thread_hung);
127           break;
128         case HangWatcher::ThreadType::kMainThread:
129           UMA_HISTOGRAM_BOOLEAN(
130               "HangWatcher.IsThreadHung.UtilityProcess."
131               "MainThread",
132               any_thread_hung);
133           break;
134         case HangWatcher::ThreadType::kThreadPoolThread:
135           // Not recorded for now.
136           break;
137       }
138       break;
139   }
140 }
141 
142 // Returns true if |thread_type| was configured through Finch to have a logging
143 // level that is equal to or exceeds |logging_level|.
ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,LoggingLevel logging_level)144 bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
145                                           LoggingLevel logging_level) {
146   switch (thread_type) {
147     case HangWatcher::ThreadType::kIOThread:
148       return g_io_thread_log_level.load(std::memory_order_relaxed) >=
149              logging_level;
150     case HangWatcher::ThreadType::kMainThread:
151       return g_main_thread_log_level.load(std::memory_order_relaxed) >=
152              logging_level;
153     case HangWatcher::ThreadType::kThreadPoolThread:
154       return g_threadpool_log_level.load(std::memory_order_relaxed) >=
155              logging_level;
156   }
157 }
158 
159 }  // namespace
160 
161 // Determines if the HangWatcher is activated. When false the HangWatcher
162 // thread never started.
163 BASE_FEATURE(kEnableHangWatcher,
164              "EnableHangWatcher",
165              FEATURE_ENABLED_BY_DEFAULT);
166 
167 // Browser process.
168 constexpr base::FeatureParam<int> kIOThreadLogLevel{
169     &kEnableHangWatcher, "io_thread_log_level",
170     static_cast<int>(LoggingLevel::kUmaOnly)};
171 constexpr base::FeatureParam<int> kUIThreadLogLevel{
172     &kEnableHangWatcher, "ui_thread_log_level",
173     static_cast<int>(LoggingLevel::kUmaOnly)};
174 constexpr base::FeatureParam<int> kThreadPoolLogLevel{
175     &kEnableHangWatcher, "threadpool_log_level",
176     static_cast<int>(LoggingLevel::kUmaOnly)};
177 
178 // GPU process.
179 constexpr base::FeatureParam<int> kGPUProcessIOThreadLogLevel{
180     &kEnableHangWatcher, "gpu_process_io_thread_log_level",
181     static_cast<int>(LoggingLevel::kNone)};
182 constexpr base::FeatureParam<int> kGPUProcessMainThreadLogLevel{
183     &kEnableHangWatcher, "gpu_process_main_thread_log_level",
184     static_cast<int>(LoggingLevel::kNone)};
185 constexpr base::FeatureParam<int> kGPUProcessThreadPoolLogLevel{
186     &kEnableHangWatcher, "gpu_process_threadpool_log_level",
187     static_cast<int>(LoggingLevel::kNone)};
188 
189 // Renderer process.
190 constexpr base::FeatureParam<int> kRendererProcessIOThreadLogLevel{
191     &kEnableHangWatcher, "renderer_process_io_thread_log_level",
192     static_cast<int>(LoggingLevel::kUmaOnly)};
193 constexpr base::FeatureParam<int> kRendererProcessMainThreadLogLevel{
194     &kEnableHangWatcher, "renderer_process_main_thread_log_level",
195     static_cast<int>(LoggingLevel::kUmaOnly)};
196 constexpr base::FeatureParam<int> kRendererProcessThreadPoolLogLevel{
197     &kEnableHangWatcher, "renderer_process_threadpool_log_level",
198     static_cast<int>(LoggingLevel::kUmaOnly)};
199 
200 // Utility process.
201 constexpr base::FeatureParam<int> kUtilityProcessIOThreadLogLevel{
202     &kEnableHangWatcher, "utility_process_io_thread_log_level",
203     static_cast<int>(LoggingLevel::kUmaOnly)};
204 constexpr base::FeatureParam<int> kUtilityProcessMainThreadLogLevel{
205     &kEnableHangWatcher, "utility_process_main_thread_log_level",
206     static_cast<int>(LoggingLevel::kUmaOnly)};
207 constexpr base::FeatureParam<int> kUtilityProcessThreadPoolLogLevel{
208     &kEnableHangWatcher, "utility_process_threadpool_log_level",
209     static_cast<int>(LoggingLevel::kUmaOnly)};
210 
211 constexpr const char* kThreadName = "HangWatcher";
212 
213 // The time that the HangWatcher thread will sleep for between calls to
214 // Monitor(). Increasing or decreasing this does not modify the type of hangs
215 // that can be detected. It instead increases the probability that a call to
216 // Monitor() will happen at the right time to catch a hang. This has to be
217 // balanced with power/cpu use concerns as busy looping would catch amost all
218 // hangs but present unacceptable overhead. NOTE: If this period is ever changed
219 // then all metrics that depend on it like
220 // HangWatcher.IsThreadHung need to be updated.
221 constexpr auto kMonitoringPeriod = base::Seconds(10);
222 
WatchHangsInScope(TimeDelta timeout)223 WatchHangsInScope::WatchHangsInScope(TimeDelta timeout) {
224   internal::HangWatchState* current_hang_watch_state =
225       HangWatcher::IsEnabled()
226           ? internal::HangWatchState::GetHangWatchStateForCurrentThread()
227           : nullptr;
228 
229   DCHECK(timeout >= base::TimeDelta()) << "Negative timeouts are invalid.";
230 
231   // Thread is not monitored, noop.
232   if (!current_hang_watch_state) {
233     took_effect_ = false;
234     return;
235   }
236 
237 #if DCHECK_IS_ON()
238   previous_watch_hangs_in_scope_ =
239       current_hang_watch_state->GetCurrentWatchHangsInScope();
240   current_hang_watch_state->SetCurrentWatchHangsInScope(this);
241 #endif
242 
243   auto [old_flags, old_deadline] =
244       current_hang_watch_state->GetFlagsAndDeadline();
245 
246   // TODO(crbug.com/1034046): Check whether we are over deadline already for the
247   // previous WatchHangsInScope here by issuing only one TimeTicks::Now()
248   // and resuing the value.
249 
250   previous_deadline_ = old_deadline;
251   TimeTicks deadline = TimeTicks::Now() + timeout;
252   current_hang_watch_state->SetDeadline(deadline);
253   current_hang_watch_state->IncrementNestingLevel();
254 
255   const bool hangs_ignored_for_current_scope =
256       internal::HangWatchDeadline::IsFlagSet(
257           internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
258           old_flags);
259 
260   // If the current WatchHangsInScope is ignored, temporarily reactivate hang
261   // watching for newly created WatchHangsInScopes. On exiting hang watching
262   // is suspended again to return to the original state.
263   if (hangs_ignored_for_current_scope) {
264     current_hang_watch_state->UnsetIgnoreCurrentWatchHangsInScope();
265     set_hangs_ignored_on_exit_ = true;
266   }
267 }
268 
~WatchHangsInScope()269 WatchHangsInScope::~WatchHangsInScope() {
270   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
271 
272   // If hang watching was not enabled at construction time there is nothing to
273   // validate or undo.
274   if (!took_effect_) {
275     return;
276   }
277 
278   // If the thread was unregistered since construction there is also nothing to
279   // do.
280   auto* const state =
281       internal::HangWatchState::GetHangWatchStateForCurrentThread();
282   if (!state) {
283     return;
284   }
285 
286   // If a hang is currently being captured we should block here so execution
287   // stops and we avoid recording unrelated stack frames in the crash.
288   if (state->IsFlagSet(internal::HangWatchDeadline::Flag::kShouldBlockOnHang)) {
289     base::HangWatcher::GetInstance()->BlockIfCaptureInProgress();
290   }
291 
292 #if DCHECK_IS_ON()
293   // Verify that no Scope was destructed out of order.
294   DCHECK_EQ(this, state->GetCurrentWatchHangsInScope());
295   state->SetCurrentWatchHangsInScope(previous_watch_hangs_in_scope_);
296 #endif
297 
298   if (state->nesting_level() == 1) {
299     // If a call to InvalidateActiveExpectations() suspended hang watching
300     // during the lifetime of this or any nested WatchHangsInScope it can now
301     // safely be reactivated by clearing the ignore bit since this is the
302     // outer-most scope.
303     state->UnsetIgnoreCurrentWatchHangsInScope();
304   } else if (set_hangs_ignored_on_exit_) {
305     // Return to ignoring hangs since this was the previous state before hang
306     // watching was temporarily enabled for this WatchHangsInScope only in the
307     // constructor.
308     state->SetIgnoreCurrentWatchHangsInScope();
309   }
310 
311   // Reset the deadline to the value it had before entering this
312   // WatchHangsInScope.
313   state->SetDeadline(previous_deadline_);
314   // TODO(crbug.com/1034046): Log when a WatchHangsInScope exits after its
315   // deadline and that went undetected by the HangWatcher.
316 
317   state->DecrementNestingLevel();
318 }
319 
320 // static
InitializeOnMainThread(ProcessType process_type)321 void HangWatcher::InitializeOnMainThread(ProcessType process_type) {
322   DCHECK(!g_use_hang_watcher);
323   DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
324   DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
325   DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
326 
327   bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
328 
329   // Do not start HangWatcher in the GPU process until the issue related to
330   // invalid magic signature in the GPU WatchDog is fixed
331   // (https://crbug.com/1297760).
332   if (process_type == ProcessType::kGPUProcess)
333     enable_hang_watcher = false;
334 
335   g_use_hang_watcher.store(enable_hang_watcher, std::memory_order_relaxed);
336 
337   // Keep the process type.
338   g_hang_watcher_process_type.store(process_type, std::memory_order_relaxed);
339 
340   // If hang watching is disabled as a whole there is no need to read the
341   // params.
342   if (!enable_hang_watcher)
343     return;
344 
345   // Retrieve thread-specific config for hang watching.
346   switch (process_type) {
347     case HangWatcher::ProcessType::kUnknownProcess:
348       break;
349 
350     case HangWatcher::ProcessType::kBrowserProcess:
351       g_threadpool_log_level.store(
352           static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
353           std::memory_order_relaxed);
354       g_io_thread_log_level.store(
355           static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
356           std::memory_order_relaxed);
357       g_main_thread_log_level.store(
358           static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
359           std::memory_order_relaxed);
360       break;
361 
362     case HangWatcher::ProcessType::kGPUProcess:
363       g_threadpool_log_level.store(
364           static_cast<LoggingLevel>(kGPUProcessThreadPoolLogLevel.Get()),
365           std::memory_order_relaxed);
366       g_io_thread_log_level.store(
367           static_cast<LoggingLevel>(kGPUProcessIOThreadLogLevel.Get()),
368           std::memory_order_relaxed);
369       g_main_thread_log_level.store(
370           static_cast<LoggingLevel>(kGPUProcessMainThreadLogLevel.Get()),
371           std::memory_order_relaxed);
372       break;
373 
374     case HangWatcher::ProcessType::kRendererProcess:
375       g_threadpool_log_level.store(
376           static_cast<LoggingLevel>(kRendererProcessThreadPoolLogLevel.Get()),
377           std::memory_order_relaxed);
378       g_io_thread_log_level.store(
379           static_cast<LoggingLevel>(kRendererProcessIOThreadLogLevel.Get()),
380           std::memory_order_relaxed);
381       g_main_thread_log_level.store(
382           static_cast<LoggingLevel>(kRendererProcessMainThreadLogLevel.Get()),
383           std::memory_order_relaxed);
384       break;
385 
386     case HangWatcher::ProcessType::kUtilityProcess:
387       g_threadpool_log_level.store(
388           static_cast<LoggingLevel>(kUtilityProcessThreadPoolLogLevel.Get()),
389           std::memory_order_relaxed);
390       g_io_thread_log_level.store(
391           static_cast<LoggingLevel>(kUtilityProcessIOThreadLogLevel.Get()),
392           std::memory_order_relaxed);
393       g_main_thread_log_level.store(
394           static_cast<LoggingLevel>(kUtilityProcessMainThreadLogLevel.Get()),
395           std::memory_order_relaxed);
396       break;
397   }
398 }
399 
UnitializeOnMainThreadForTesting()400 void HangWatcher::UnitializeOnMainThreadForTesting() {
401   g_use_hang_watcher.store(false, std::memory_order_relaxed);
402   g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
403   g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
404   g_main_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
405 }
406 
407 // static
IsEnabled()408 bool HangWatcher::IsEnabled() {
409   return g_use_hang_watcher.load(std::memory_order_relaxed);
410 }
411 
412 // static
IsThreadPoolHangWatchingEnabled()413 bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
414   return g_threadpool_log_level.load(std::memory_order_relaxed) !=
415          LoggingLevel::kNone;
416 }
417 
418 // static
IsIOThreadHangWatchingEnabled()419 bool HangWatcher::IsIOThreadHangWatchingEnabled() {
420   return g_io_thread_log_level.load(std::memory_order_relaxed) !=
421          LoggingLevel::kNone;
422 }
423 
424 // static
IsCrashReportingEnabled()425 bool HangWatcher::IsCrashReportingEnabled() {
426   if (g_main_thread_log_level.load(std::memory_order_relaxed) ==
427       LoggingLevel::kUmaAndCrash) {
428     return true;
429   }
430   if (g_io_thread_log_level.load(std::memory_order_relaxed) ==
431       LoggingLevel::kUmaAndCrash) {
432     return true;
433   }
434   if (g_threadpool_log_level.load(std::memory_order_relaxed) ==
435       LoggingLevel::kUmaAndCrash) {
436     return true;
437   }
438   return false;
439 }
440 
441 // static
InvalidateActiveExpectations()442 void HangWatcher::InvalidateActiveExpectations() {
443   auto* const state =
444       internal::HangWatchState::GetHangWatchStateForCurrentThread();
445   if (!state) {
446     // If the current thread is not under watch there is nothing to invalidate.
447     return;
448   }
449   state->SetIgnoreCurrentWatchHangsInScope();
450 }
451 
HangWatcher()452 HangWatcher::HangWatcher()
453     : monitor_period_(kMonitoringPeriod),
454       should_monitor_(WaitableEvent::ResetPolicy::AUTOMATIC),
455       thread_(this, kThreadName),
456       tick_clock_(base::DefaultTickClock::GetInstance()),
457       memory_pressure_listener_(
458           FROM_HERE,
459           base::BindRepeating(&HangWatcher::OnMemoryPressure,
460                               base::Unretained(this))) {
461   // |thread_checker_| should not be bound to the constructing thread.
462   DETACH_FROM_THREAD(hang_watcher_thread_checker_);
463 
464   should_monitor_.declare_only_used_while_idle();
465 
466   DCHECK(!g_instance);
467   g_instance = this;
468 }
469 
470 // static
CreateHangWatcherInstance()471 void HangWatcher::CreateHangWatcherInstance() {
472   DCHECK(!g_instance);
473   g_instance = new base::HangWatcher();
474   // The hang watcher is leaked to make sure it survives all watched threads.
475   ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
476 }
477 
478 #if !BUILDFLAG(IS_NACL)
479 debug::ScopedCrashKeyString
GetTimeSinceLastCriticalMemoryPressureCrashKey()480 HangWatcher::GetTimeSinceLastCriticalMemoryPressureCrashKey() {
481   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
482 
483   // The crash key size is large enough to hold the biggest possible return
484   // value from base::TimeDelta::InSeconds().
485   constexpr debug::CrashKeySize kCrashKeyContentSize =
486       debug::CrashKeySize::Size32;
487   DCHECK_GE(static_cast<uint64_t>(kCrashKeyContentSize),
488             base::NumberToString(std::numeric_limits<int64_t>::max()).size());
489 
490   static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
491       "seconds-since-last-memory-pressure", kCrashKeyContentSize);
492 
493   const base::TimeTicks last_critical_memory_pressure_time =
494       last_critical_memory_pressure_.load(std::memory_order_relaxed);
495   if (last_critical_memory_pressure_time.is_null()) {
496     constexpr char kNoMemoryPressureMsg[] = "No critical memory pressure";
497     static_assert(
498         std::size(kNoMemoryPressureMsg) <=
499             static_cast<uint64_t>(kCrashKeyContentSize),
500         "The crash key is too small to hold \"No critical memory pressure\".");
501     return debug::ScopedCrashKeyString(crash_key, kNoMemoryPressureMsg);
502   } else {
503     base::TimeDelta time_since_last_critical_memory_pressure =
504         base::TimeTicks::Now() - last_critical_memory_pressure_time;
505     return debug::ScopedCrashKeyString(
506         crash_key, base::NumberToString(
507                        time_since_last_critical_memory_pressure.InSeconds()));
508   }
509 }
510 #endif
511 
GetTimeSinceLastSystemPowerResumeCrashKeyValue() const512 std::string HangWatcher::GetTimeSinceLastSystemPowerResumeCrashKeyValue()
513     const {
514   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
515 
516   const TimeTicks last_system_power_resume_time =
517       PowerMonitor::GetLastSystemResumeTime();
518   if (last_system_power_resume_time.is_null())
519     return "Never suspended";
520   if (last_system_power_resume_time == TimeTicks::Max())
521     return "Power suspended";
522 
523   const TimeDelta time_since_last_system_resume =
524       TimeTicks::Now() - last_system_power_resume_time;
525   return NumberToString(time_since_last_system_resume.InSeconds());
526 }
527 
OnMemoryPressure(base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level)528 void HangWatcher::OnMemoryPressure(
529     base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level) {
530   if (memory_pressure_level ==
531       base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL) {
532     last_critical_memory_pressure_.store(base::TimeTicks::Now(),
533                                          std::memory_order_relaxed);
534   }
535 }
536 
~HangWatcher()537 HangWatcher::~HangWatcher() {
538   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
539   DCHECK_EQ(g_instance, this);
540   DCHECK(watch_states_.empty());
541   g_instance = nullptr;
542   Stop();
543 }
544 
Start()545 void HangWatcher::Start() {
546   thread_.Start();
547 }
548 
Stop()549 void HangWatcher::Stop() {
550   g_keep_monitoring.store(false, std::memory_order_relaxed);
551   should_monitor_.Signal();
552   thread_.Join();
553 
554   // In production HangWatcher is always leaked but during testing it's possibly
555   // stopped and restarted using a new instance. This makes sure the next call
556   // to Start() will actually monitor in that case.
557   g_keep_monitoring.store(true, std::memory_order_relaxed);
558 }
559 
IsWatchListEmpty()560 bool HangWatcher::IsWatchListEmpty() {
561   AutoLock auto_lock(watch_state_lock_);
562   return watch_states_.empty();
563 }
564 
Wait()565 void HangWatcher::Wait() {
566   while (true) {
567     // Amount by which the actual time spent sleeping can deviate from
568     // the target time and still be considered timely.
569     constexpr base::TimeDelta kWaitDriftTolerance = base::Milliseconds(100);
570 
571     const base::TimeTicks time_before_wait = tick_clock_->NowTicks();
572 
573     // Sleep until next scheduled monitoring or until signaled.
574     const bool was_signaled = should_monitor_.TimedWait(monitor_period_);
575 
576     if (after_wait_callback_)
577       after_wait_callback_.Run(time_before_wait);
578 
579     const base::TimeTicks time_after_wait = tick_clock_->NowTicks();
580     const base::TimeDelta wait_time = time_after_wait - time_before_wait;
581     const bool wait_was_normal =
582         wait_time <= (monitor_period_ + kWaitDriftTolerance);
583 
584     UMA_HISTOGRAM_TIMES("HangWatcher.SleepDrift.BrowserProcess",
585                         wait_time - monitor_period_);
586 
587     if (!wait_was_normal) {
588       // If the time spent waiting was too high it might indicate the machine is
589       // very slow or that that it went to sleep. In any case we can't trust the
590       // WatchHangsInScopes that are currently live. Update the ignore
591       // threshold to make sure they don't trigger a hang on subsequent monitors
592       // then keep waiting.
593 
594       base::AutoLock auto_lock(watch_state_lock_);
595 
596       // Find the latest deadline among the live watch states. They might change
597       // atomically while iterating but that's fine because if they do that
598       // means the new WatchHangsInScope was constructed very soon after the
599       // abnormal sleep happened and might be affected by the root cause still.
600       // Ignoring it is cautious and harmless.
601       base::TimeTicks latest_deadline;
602       for (const auto& state : watch_states_) {
603         base::TimeTicks deadline = state->GetDeadline();
604         if (deadline > latest_deadline) {
605           latest_deadline = deadline;
606         }
607       }
608 
609       deadline_ignore_threshold_ = latest_deadline;
610     }
611 
612     // Stop waiting.
613     if (wait_was_normal || was_signaled)
614       return;
615   }
616 }
617 
Run()618 void HangWatcher::Run() {
619   // Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
620   // sure of that.
621   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
622 
623   while (g_keep_monitoring.load(std::memory_order_relaxed)) {
624     Wait();
625 
626     if (!IsWatchListEmpty() &&
627         g_keep_monitoring.load(std::memory_order_relaxed)) {
628       Monitor();
629       if (after_monitor_closure_for_testing_) {
630         after_monitor_closure_for_testing_.Run();
631       }
632     }
633   }
634 }
635 
636 // static
GetInstance()637 HangWatcher* HangWatcher::GetInstance() {
638   return g_instance;
639 }
640 
641 // static
RecordHang()642 void HangWatcher::RecordHang() {
643   base::debug::DumpWithoutCrashing();
644   NO_CODE_FOLDING();
645 }
646 
RegisterThreadInternal(ThreadType thread_type)647 ScopedClosureRunner HangWatcher::RegisterThreadInternal(
648     ThreadType thread_type) {
649   AutoLock auto_lock(watch_state_lock_);
650   CHECK(base::FeatureList::GetInstance());
651 
652   // Do not install a WatchState if the results would never be observable.
653   if (!ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
654                                             LoggingLevel::kUmaOnly)) {
655     return ScopedClosureRunner(base::DoNothing());
656   }
657 
658   watch_states_.push_back(
659       internal::HangWatchState::CreateHangWatchStateForCurrentThread(
660           thread_type));
661   return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
662                                       Unretained(HangWatcher::GetInstance())));
663 }
664 
665 // static
RegisterThread(ThreadType thread_type)666 ScopedClosureRunner HangWatcher::RegisterThread(ThreadType thread_type) {
667   if (!GetInstance()) {
668     return ScopedClosureRunner();
669   }
670 
671   return GetInstance()->RegisterThreadInternal(thread_type);
672 }
673 
GetHighestDeadline() const674 base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
675   DCHECK(IsActionable());
676 
677   // Since entries are sorted in increasing order the last entry is the largest
678   // one.
679   return hung_watch_state_copies_.back().deadline;
680 }
681 
682 HangWatcher::WatchStateSnapShot::WatchStateSnapShot() = default;
683 
Init(const HangWatchStates & watch_states,base::TimeTicks deadline_ignore_threshold)684 void HangWatcher::WatchStateSnapShot::Init(
685     const HangWatchStates& watch_states,
686     base::TimeTicks deadline_ignore_threshold) {
687   DCHECK(!initialized_);
688 
689   // No matter if the snapshot is actionable or not after this function
690   // it will have been initialized.
691   initialized_ = true;
692 
693   const base::TimeTicks now = base::TimeTicks::Now();
694   bool all_threads_marked = true;
695   bool found_deadline_before_ignore_threshold = false;
696 
697   // Use an std::array to store the hang counts to avoid allocations. The
698   // numerical values of the HangWatcher::ThreadType enum is used to index into
699   // the array. A |kInvalidHangCount| is used to signify there were no threads
700   // of the type found.
701   constexpr size_t kHangCountArraySize =
702       static_cast<std::size_t>(base::HangWatcher::ThreadType::kMax) + 1;
703   std::array<int, kHangCountArraySize> hung_counts_per_thread_type;
704 
705   constexpr int kInvalidHangCount = -1;
706   hung_counts_per_thread_type.fill(kInvalidHangCount);
707 
708   // Will be true if any of the hung threads has a logging level high enough,
709   // as defined through finch params, to warant dumping a crash.
710   bool any_hung_thread_has_dumping_enabled = false;
711 
712   // Copy hung thread information.
713   for (const auto& watch_state : watch_states) {
714     uint64_t flags;
715     TimeTicks deadline;
716     std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
717 
718     if (deadline <= deadline_ignore_threshold) {
719       found_deadline_before_ignore_threshold = true;
720     }
721 
722     if (internal::HangWatchDeadline::IsFlagSet(
723             internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
724             flags)) {
725       continue;
726     }
727 
728     // If a thread type is monitored and did not hang it still needs to be
729     // logged as a zero count;
730     const size_t hang_count_index =
731         static_cast<size_t>(watch_state.get()->thread_type());
732     if (hung_counts_per_thread_type[hang_count_index] == kInvalidHangCount) {
733       hung_counts_per_thread_type[hang_count_index] = 0;
734     }
735 
736     // Only copy hung threads.
737     if (deadline <= now) {
738       ++hung_counts_per_thread_type[hang_count_index];
739 
740       if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
741                                                LoggingLevel::kUmaAndCrash)) {
742         any_hung_thread_has_dumping_enabled = true;
743       }
744 
745 #if BUILDFLAG(ENABLE_BASE_TRACING)
746       // Emit trace events for monitored threads.
747       if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
748                                                LoggingLevel::kUmaOnly)) {
749         const PlatformThreadId thread_id = watch_state.get()->GetThreadID();
750         const auto track = perfetto::Track::FromPointer(
751             this, perfetto::ThreadTrack::ForThread(thread_id));
752         TRACE_EVENT_BEGIN("base", "HangWatcher::ThreadHung", track, deadline);
753         TRACE_EVENT_END("base", track, now);
754         // TODO(crbug.com/1021571): Remove this once fixed.
755         PERFETTO_INTERNAL_ADD_EMPTY_EVENT();
756       }
757 #endif
758 
759       // Attempt to mark the thread as needing to stay within its current
760       // WatchHangsInScope until capture is complete.
761       bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
762 
763       // If marking some threads already failed the snapshot won't be kept so
764       // there is no need to keep adding to it. The loop doesn't abort though
765       // to keep marking the other threads. If these threads remain hung until
766       // the next capture then they'll already be marked and will be included
767       // in the capture at that time.
768       if (thread_marked && all_threads_marked) {
769         hung_watch_state_copies_.push_back(
770             WatchStateCopy{deadline, watch_state.get()->GetThreadID()});
771       } else {
772         all_threads_marked = false;
773       }
774     }
775   }
776 
777   // Log the hung thread counts to histograms for each thread type if any thread
778   // of the type were found.
779   for (size_t i = 0; i < kHangCountArraySize; ++i) {
780     const int hang_count = hung_counts_per_thread_type[i];
781     const HangWatcher::ThreadType thread_type =
782         static_cast<HangWatcher::ThreadType>(i);
783     if (hang_count != kInvalidHangCount &&
784         ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
785                                              LoggingLevel::kUmaOnly)) {
786       LogHungThreadCountHistogram(thread_type, hang_count);
787     }
788   }
789 
790   // Three cases can invalidate this snapshot and prevent the capture of the
791   // hang.
792   //
793   // 1. Some threads could not be marked for blocking so this snapshot isn't
794   // actionable since marked threads could be hung because of unmarked ones.
795   // If only the marked threads were captured the information would be
796   // incomplete.
797   //
798   // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
799   // If any thread is ignored it reduces the confidence in the whole state and
800   // it's better to avoid capturing misleading data.
801   //
802   // 3. The hung threads found were all of types that are not configured through
803   // Finch to trigger a crash dump.
804   //
805   if (!all_threads_marked || found_deadline_before_ignore_threshold ||
806       !any_hung_thread_has_dumping_enabled) {
807     hung_watch_state_copies_.clear();
808     return;
809   }
810 
811   // Sort |hung_watch_state_copies_| by order of decreasing hang severity so the
812   // most severe hang is first in the list.
813   ranges::sort(hung_watch_state_copies_,
814                [](const WatchStateCopy& lhs, const WatchStateCopy& rhs) {
815                  return lhs.deadline < rhs.deadline;
816                });
817 }
818 
Clear()819 void HangWatcher::WatchStateSnapShot::Clear() {
820   hung_watch_state_copies_.clear();
821   initialized_ = false;
822 }
823 
824 HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
825     const WatchStateSnapShot& other) = default;
826 
827 HangWatcher::WatchStateSnapShot::~WatchStateSnapShot() = default;
828 
PrepareHungThreadListCrashKey() const829 std::string HangWatcher::WatchStateSnapShot::PrepareHungThreadListCrashKey()
830     const {
831   DCHECK(IsActionable());
832 
833   // Build a crash key string that contains the ids of the hung threads.
834   constexpr char kSeparator{'|'};
835   std::string list_of_hung_thread_ids;
836 
837   // Add as many thread ids to the crash key as possible.
838   for (const WatchStateCopy& copy : hung_watch_state_copies_) {
839     std::string fragment = base::NumberToString(copy.thread_id) + kSeparator;
840     if (list_of_hung_thread_ids.size() + fragment.size() <
841         static_cast<std::size_t>(debug::CrashKeySize::Size256)) {
842       list_of_hung_thread_ids += fragment;
843     } else {
844       // Respect the by priority ordering of thread ids in the crash key by
845       // stopping the construction as soon as one does not fit. This avoids
846       // including lesser priority ids while omitting more important ones.
847       break;
848     }
849   }
850 
851   return list_of_hung_thread_ids;
852 }
853 
IsActionable() const854 bool HangWatcher::WatchStateSnapShot::IsActionable() const {
855   DCHECK(initialized_);
856   return !hung_watch_state_copies_.empty();
857 }
858 
GrabWatchStateSnapshotForTesting() const859 HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
860     const {
861   WatchStateSnapShot snapshot;
862   snapshot.Init(watch_states_, deadline_ignore_threshold_);
863   return snapshot;
864 }
865 
Monitor()866 void HangWatcher::Monitor() {
867   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
868   AutoLock auto_lock(watch_state_lock_);
869 
870   // If all threads unregistered since this function was invoked there's
871   // nothing to do anymore.
872   if (watch_states_.empty())
873     return;
874 
875   watch_state_snapshot_.Init(watch_states_, deadline_ignore_threshold_);
876 
877   if (watch_state_snapshot_.IsActionable()) {
878     DoDumpWithoutCrashing(watch_state_snapshot_);
879   }
880 
881   watch_state_snapshot_.Clear();
882 }
883 
DoDumpWithoutCrashing(const WatchStateSnapShot & watch_state_snapshot)884 void HangWatcher::DoDumpWithoutCrashing(
885     const WatchStateSnapShot& watch_state_snapshot) {
886   TRACE_EVENT("base", "HangWatcher::DoDumpWithoutCrashing");
887 
888   capture_in_progress_.store(true, std::memory_order_relaxed);
889   base::AutoLock scope_lock(capture_lock_);
890 
891 #if !BUILDFLAG(IS_NACL)
892   const std::string list_of_hung_thread_ids =
893       watch_state_snapshot.PrepareHungThreadListCrashKey();
894 
895   static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
896       "list-of-hung-threads", debug::CrashKeySize::Size256);
897 
898   const debug::ScopedCrashKeyString list_of_hung_threads_crash_key_string(
899       crash_key, list_of_hung_thread_ids);
900 
901   const debug::ScopedCrashKeyString
902       time_since_last_critical_memory_pressure_crash_key_string =
903           GetTimeSinceLastCriticalMemoryPressureCrashKey();
904 
905   SCOPED_CRASH_KEY_STRING32("HangWatcher", "seconds-since-last-resume",
906                             GetTimeSinceLastSystemPowerResumeCrashKeyValue());
907 #endif
908 
909   // To avoid capturing more than one hang that blames a subset of the same
910   // threads it's necessary to keep track of what is the furthest deadline
911   // that contributed to declaring a hang. Only once
912   // all threads have deadlines past this point can we be sure that a newly
913   // discovered hang is not directly related.
914   // Example:
915   // **********************************************************************
916   // Timeline A : L------1-------2----------3-------4----------N-----------
917   // Timeline B : -------2----------3-------4----------L----5------N-------
918   // Timeline C : L----------------------------5------6----7---8------9---N
919   // **********************************************************************
920   // In the example when a Monitor() happens during timeline A
921   // |deadline_ignore_threshold_| (L) is at time zero and deadlines (1-4)
922   // are before Now() (N) . A hang is captured and L is updated. During
923   // the next Monitor() (timeline B) a new deadline is over but we can't
924   // capture a hang because deadlines 2-4 are still live and already counted
925   // toward a hang. During a third monitor (timeline C) all live deadlines
926   // are now after L and a second hang can be recorded.
927   base::TimeTicks latest_expired_deadline =
928       watch_state_snapshot.GetHighestDeadline();
929 
930   if (on_hang_closure_for_testing_)
931     on_hang_closure_for_testing_.Run();
932   else
933     RecordHang();
934 
935   // Update after running the actual capture.
936   deadline_ignore_threshold_ = latest_expired_deadline;
937 
938   capture_in_progress_.store(false, std::memory_order_relaxed);
939 }
940 
SetAfterMonitorClosureForTesting(base::RepeatingClosure closure)941 void HangWatcher::SetAfterMonitorClosureForTesting(
942     base::RepeatingClosure closure) {
943   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
944   after_monitor_closure_for_testing_ = std::move(closure);
945 }
946 
SetOnHangClosureForTesting(base::RepeatingClosure closure)947 void HangWatcher::SetOnHangClosureForTesting(base::RepeatingClosure closure) {
948   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
949   on_hang_closure_for_testing_ = std::move(closure);
950 }
951 
SetMonitoringPeriodForTesting(base::TimeDelta period)952 void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
953   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
954   monitor_period_ = period;
955 }
956 
SetAfterWaitCallbackForTesting(RepeatingCallback<void (TimeTicks)> callback)957 void HangWatcher::SetAfterWaitCallbackForTesting(
958     RepeatingCallback<void(TimeTicks)> callback) {
959   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
960   after_wait_callback_ = callback;
961 }
962 
SignalMonitorEventForTesting()963 void HangWatcher::SignalMonitorEventForTesting() {
964   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
965   should_monitor_.Signal();
966 }
967 
968 // static
StopMonitoringForTesting()969 void HangWatcher::StopMonitoringForTesting() {
970   g_keep_monitoring.store(false, std::memory_order_relaxed);
971 }
972 
SetTickClockForTesting(const base::TickClock * tick_clock)973 void HangWatcher::SetTickClockForTesting(const base::TickClock* tick_clock) {
974   tick_clock_ = tick_clock;
975 }
976 
BlockIfCaptureInProgress()977 void HangWatcher::BlockIfCaptureInProgress() {
978   // Makes a best-effort attempt to block execution if a hang is currently being
979   // captured. Only block on |capture_lock| if |capture_in_progress_| hints that
980   // it's already held to avoid serializing all threads on this function when no
981   // hang capture is in-progress.
982   if (capture_in_progress_.load(std::memory_order_relaxed))
983     base::AutoLock hang_lock(capture_lock_);
984 }
985 
UnregisterThread()986 void HangWatcher::UnregisterThread() {
987   AutoLock auto_lock(watch_state_lock_);
988 
989   auto it = ranges::find(
990       watch_states_,
991       internal::HangWatchState::GetHangWatchStateForCurrentThread(),
992       &std::unique_ptr<internal::HangWatchState>::get);
993 
994   // Thread should be registered to get unregistered.
995   DCHECK(it != watch_states_.end());
996 
997   watch_states_.erase(it);
998 }
999 
1000 namespace internal {
1001 namespace {
1002 
1003 constexpr uint64_t kOnlyDeadlineMask = 0x00FF'FFFF'FFFF'FFFFu;
1004 constexpr uint64_t kOnlyFlagsMask = ~kOnlyDeadlineMask;
1005 constexpr uint64_t kMaximumFlag = 0x8000'0000'0000'0000u;
1006 
1007 // Use as a mask to keep persistent flags and the deadline.
1008 constexpr uint64_t kPersistentFlagsAndDeadlineMask =
1009     kOnlyDeadlineMask |
1010     static_cast<uint64_t>(
1011         HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope);
1012 }  // namespace
1013 
1014 // Flag binary representation assertions.
1015 static_assert(
1016     static_cast<uint64_t>(HangWatchDeadline::Flag::kMinValue) >
1017         kOnlyDeadlineMask,
1018     "Invalid numerical value for flag. Would interfere with bits of data.");
1019 static_assert(static_cast<uint64_t>(HangWatchDeadline::Flag::kMaxValue) <=
1020                   kMaximumFlag,
1021               "A flag can only set a single bit.");
1022 
1023 HangWatchDeadline::HangWatchDeadline() = default;
1024 HangWatchDeadline::~HangWatchDeadline() = default;
1025 
GetFlagsAndDeadline() const1026 std::pair<uint64_t, TimeTicks> HangWatchDeadline::GetFlagsAndDeadline() const {
1027   uint64_t bits = bits_.load(std::memory_order_relaxed);
1028   return std::make_pair(ExtractFlags(bits),
1029                         DeadlineFromBits(ExtractDeadline((bits))));
1030 }
1031 
GetDeadline() const1032 TimeTicks HangWatchDeadline::GetDeadline() const {
1033   return DeadlineFromBits(
1034       ExtractDeadline(bits_.load(std::memory_order_relaxed)));
1035 }
1036 
1037 // static
Max()1038 TimeTicks HangWatchDeadline::Max() {
1039   // |kOnlyDeadlineMask| has all the bits reserved for the TimeTicks value
1040   // set. This means it also represents the highest representable value.
1041   return DeadlineFromBits(kOnlyDeadlineMask);
1042 }
1043 
1044 // static
IsFlagSet(Flag flag,uint64_t flags)1045 bool HangWatchDeadline::IsFlagSet(Flag flag, uint64_t flags) {
1046   return static_cast<uint64_t>(flag) & flags;
1047 }
1048 
SetDeadline(TimeTicks new_deadline)1049 void HangWatchDeadline::SetDeadline(TimeTicks new_deadline) {
1050   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1051   DCHECK(new_deadline <= Max()) << "Value too high to be represented.";
1052   DCHECK(new_deadline >= TimeTicks{}) << "Value cannot be negative.";
1053 
1054   if (switch_bits_callback_for_testing_) {
1055     const uint64_t switched_in_bits = SwitchBitsForTesting();
1056     // If a concurrent deadline change is tested it cannot have a deadline or
1057     // persistent flag change since those always happen on the same thread.
1058     DCHECK((switched_in_bits & kPersistentFlagsAndDeadlineMask) == 0u);
1059   }
1060 
1061   // Discard all non-persistent flags and apply deadline change.
1062   const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1063   const uint64_t new_flags =
1064       ExtractFlags(old_bits & kPersistentFlagsAndDeadlineMask);
1065   bits_.store(new_flags | ExtractDeadline(static_cast<uint64_t>(
1066                               new_deadline.ToInternalValue())),
1067               std::memory_order_relaxed);
1068 }
1069 
1070 // TODO(crbug.com/1087026): Add flag DCHECKs here.
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1071 bool HangWatchDeadline::SetShouldBlockOnHang(uint64_t old_flags,
1072                                              TimeTicks old_deadline) {
1073   DCHECK(old_deadline <= Max()) << "Value too high to be represented.";
1074   DCHECK(old_deadline >= TimeTicks{}) << "Value cannot be negative.";
1075 
1076   // Set the kShouldBlockOnHang flag only if |bits_| did not change since it was
1077   // read. kShouldBlockOnHang is the only non-persistent flag and should never
1078   // be set twice. Persistent flags and deadline changes are done from the same
1079   // thread so there is no risk of losing concurrently added information.
1080   uint64_t old_bits =
1081       old_flags | static_cast<uint64_t>(old_deadline.ToInternalValue());
1082   const uint64_t desired_bits =
1083       old_bits | static_cast<uint64_t>(Flag::kShouldBlockOnHang);
1084 
1085   // If a test needs to simulate |bits_| changing since calling this function
1086   // this happens now.
1087   if (switch_bits_callback_for_testing_) {
1088     const uint64_t switched_in_bits = SwitchBitsForTesting();
1089 
1090     // Injecting the flag being tested is invalid.
1091     DCHECK(!IsFlagSet(Flag::kShouldBlockOnHang, switched_in_bits));
1092   }
1093 
1094   return bits_.compare_exchange_weak(old_bits, desired_bits,
1095                                      std::memory_order_relaxed,
1096                                      std::memory_order_relaxed);
1097 }
1098 
SetIgnoreCurrentWatchHangsInScope()1099 void HangWatchDeadline::SetIgnoreCurrentWatchHangsInScope() {
1100   SetPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1101 }
1102 
UnsetIgnoreCurrentWatchHangsInScope()1103 void HangWatchDeadline::UnsetIgnoreCurrentWatchHangsInScope() {
1104   ClearPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1105 }
1106 
SetPersistentFlag(Flag flag)1107 void HangWatchDeadline::SetPersistentFlag(Flag flag) {
1108   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1109   if (switch_bits_callback_for_testing_)
1110     SwitchBitsForTesting();
1111   bits_.fetch_or(static_cast<uint64_t>(flag), std::memory_order_relaxed);
1112 }
1113 
ClearPersistentFlag(Flag flag)1114 void HangWatchDeadline::ClearPersistentFlag(Flag flag) {
1115   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1116   if (switch_bits_callback_for_testing_)
1117     SwitchBitsForTesting();
1118   bits_.fetch_and(~(static_cast<uint64_t>(flag)), std::memory_order_relaxed);
1119 }
1120 
1121 // static
ExtractFlags(uint64_t bits)1122 uint64_t HangWatchDeadline::ExtractFlags(uint64_t bits) {
1123   return bits & kOnlyFlagsMask;
1124 }
1125 
1126 // static
ExtractDeadline(uint64_t bits)1127 uint64_t HangWatchDeadline::ExtractDeadline(uint64_t bits) {
1128   return bits & kOnlyDeadlineMask;
1129 }
1130 
1131 // static
DeadlineFromBits(uint64_t bits)1132 TimeTicks HangWatchDeadline::DeadlineFromBits(uint64_t bits) {
1133   // |kOnlyDeadlineMask| has all the deadline bits set to 1 so is the largest
1134   // representable value.
1135   DCHECK(bits <= kOnlyDeadlineMask)
1136       << "Flags bits are set. Remove them before returning deadline.";
1137   static_assert(kOnlyDeadlineMask <= std::numeric_limits<int64_t>::max());
1138   return TimeTicks::FromInternalValue(static_cast<int64_t>(bits));
1139 }
1140 
IsFlagSet(Flag flag) const1141 bool HangWatchDeadline::IsFlagSet(Flag flag) const {
1142   return bits_.load(std::memory_order_relaxed) & static_cast<uint64_t>(flag);
1143 }
1144 
SetSwitchBitsClosureForTesting(RepeatingCallback<uint64_t (void)> closure)1145 void HangWatchDeadline::SetSwitchBitsClosureForTesting(
1146     RepeatingCallback<uint64_t(void)> closure) {
1147   switch_bits_callback_for_testing_ = closure;
1148 }
1149 
ResetSwitchBitsClosureForTesting()1150 void HangWatchDeadline::ResetSwitchBitsClosureForTesting() {
1151   DCHECK(switch_bits_callback_for_testing_);
1152   switch_bits_callback_for_testing_.Reset();
1153 }
1154 
SwitchBitsForTesting()1155 uint64_t HangWatchDeadline::SwitchBitsForTesting() {
1156   DCHECK(switch_bits_callback_for_testing_);
1157 
1158   const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1159   const uint64_t new_bits = switch_bits_callback_for_testing_.Run();
1160   const uint64_t old_flags = ExtractFlags(old_bits);
1161 
1162   const uint64_t switched_in_bits = old_flags | new_bits;
1163   bits_.store(switched_in_bits, std::memory_order_relaxed);
1164   return switched_in_bits;
1165 }
1166 
HangWatchState(HangWatcher::ThreadType thread_type)1167 HangWatchState::HangWatchState(HangWatcher::ThreadType thread_type)
1168     : resetter_(&hang_watch_state, this, nullptr), thread_type_(thread_type) {
1169 // TODO(crbug.com/1223033): Remove this once macOS uses system-wide ids.
1170 // On macOS the thread ids used by CrashPad are not the same as the ones
1171 // provided by PlatformThread. Make sure to use the same for correct
1172 // attribution.
1173 #if BUILDFLAG(IS_MAC)
1174   uint64_t thread_id;
1175   pthread_threadid_np(pthread_self(), &thread_id);
1176   thread_id_ = checked_cast<PlatformThreadId>(thread_id);
1177 #else
1178   thread_id_ = PlatformThread::CurrentId();
1179 #endif
1180 }
1181 
~HangWatchState()1182 HangWatchState::~HangWatchState() {
1183   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1184 
1185   DCHECK_EQ(GetHangWatchStateForCurrentThread(), this);
1186 
1187 #if DCHECK_IS_ON()
1188   // Destroying the HangWatchState should not be done if there are live
1189   // WatchHangsInScopes.
1190   DCHECK(!current_watch_hangs_in_scope_);
1191 #endif
1192 }
1193 
1194 // static
1195 std::unique_ptr<HangWatchState>
CreateHangWatchStateForCurrentThread(HangWatcher::ThreadType thread_type)1196 HangWatchState::CreateHangWatchStateForCurrentThread(
1197     HangWatcher::ThreadType thread_type) {
1198   // Allocate a watch state object for this thread.
1199   std::unique_ptr<HangWatchState> hang_state =
1200       std::make_unique<HangWatchState>(thread_type);
1201 
1202   // Setting the thread local worked.
1203   DCHECK_EQ(GetHangWatchStateForCurrentThread(), hang_state.get());
1204 
1205   // Transfer ownership to caller.
1206   return hang_state;
1207 }
1208 
GetDeadline() const1209 TimeTicks HangWatchState::GetDeadline() const {
1210   return deadline_.GetDeadline();
1211 }
1212 
GetFlagsAndDeadline() const1213 std::pair<uint64_t, TimeTicks> HangWatchState::GetFlagsAndDeadline() const {
1214   return deadline_.GetFlagsAndDeadline();
1215 }
1216 
SetDeadline(TimeTicks deadline)1217 void HangWatchState::SetDeadline(TimeTicks deadline) {
1218   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1219   deadline_.SetDeadline(deadline);
1220 }
1221 
IsOverDeadline() const1222 bool HangWatchState::IsOverDeadline() const {
1223   return TimeTicks::Now() > deadline_.GetDeadline();
1224 }
1225 
SetIgnoreCurrentWatchHangsInScope()1226 void HangWatchState::SetIgnoreCurrentWatchHangsInScope() {
1227   deadline_.SetIgnoreCurrentWatchHangsInScope();
1228 }
1229 
UnsetIgnoreCurrentWatchHangsInScope()1230 void HangWatchState::UnsetIgnoreCurrentWatchHangsInScope() {
1231   deadline_.UnsetIgnoreCurrentWatchHangsInScope();
1232 }
1233 
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1234 bool HangWatchState::SetShouldBlockOnHang(uint64_t old_flags,
1235                                           TimeTicks old_deadline) {
1236   return deadline_.SetShouldBlockOnHang(old_flags, old_deadline);
1237 }
1238 
IsFlagSet(HangWatchDeadline::Flag flag)1239 bool HangWatchState::IsFlagSet(HangWatchDeadline::Flag flag) {
1240   return deadline_.IsFlagSet(flag);
1241 }
1242 
1243 #if DCHECK_IS_ON()
SetCurrentWatchHangsInScope(WatchHangsInScope * current_hang_watch_scope_enable)1244 void HangWatchState::SetCurrentWatchHangsInScope(
1245     WatchHangsInScope* current_hang_watch_scope_enable) {
1246   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1247   current_watch_hangs_in_scope_ = current_hang_watch_scope_enable;
1248 }
1249 
GetCurrentWatchHangsInScope()1250 WatchHangsInScope* HangWatchState::GetCurrentWatchHangsInScope() {
1251   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1252   return current_watch_hangs_in_scope_;
1253 }
1254 #endif
1255 
GetHangWatchDeadlineForTesting()1256 HangWatchDeadline* HangWatchState::GetHangWatchDeadlineForTesting() {
1257   return &deadline_;
1258 }
1259 
IncrementNestingLevel()1260 void HangWatchState::IncrementNestingLevel() {
1261   ++nesting_level_;
1262 }
1263 
DecrementNestingLevel()1264 void HangWatchState::DecrementNestingLevel() {
1265   --nesting_level_;
1266 }
1267 
1268 // static
GetHangWatchStateForCurrentThread()1269 HangWatchState* HangWatchState::GetHangWatchStateForCurrentThread() {
1270   // Workaround false-positive MSAN use-of-uninitialized-value on
1271   // thread_local storage for loaded libraries:
1272   // https://github.com/google/sanitizers/issues/1265
1273   MSAN_UNPOISON(&hang_watch_state, sizeof(internal::HangWatchState*));
1274 
1275   return hang_watch_state;
1276 }
1277 
GetThreadID() const1278 PlatformThreadId HangWatchState::GetThreadID() const {
1279   return thread_id_;
1280 }
1281 
1282 }  // namespace internal
1283 
1284 }  // namespace base
1285