• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/threading/hang_watcher.h"
6 
7 #include <atomic>
8 #include <utility>
9 
10 #include "base/containers/flat_map.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/crash_logging.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/feature_list.h"
16 #include "base/functional/bind.h"
17 #include "base/functional/callback_helpers.h"
18 #include "base/metrics/field_trial_params.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/power_monitor/power_monitor.h"
21 #include "base/ranges/algorithm.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/synchronization/lock.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/platform_thread.h"
26 #include "base/threading/thread_checker.h"
27 #include "base/threading/thread_restrictions.h"
28 #include "base/threading/threading_features.h"
29 #include "base/time/default_tick_clock.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/base_tracing.h"
32 #include "build/build_config.h"
33 #include "third_party/abseil-cpp/absl/base/attributes.h"
34 
35 namespace base {
36 
37 namespace {
38 
39 // Defines how much logging happens when the HangWatcher monitors the threads.
40 // Logging levels are set per thread type through Finch. It's important that
41 // the order of the enum members stay the same and that their numerical
42 // values be in increasing order. The implementation of
43 // ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
44 enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
45 
46 HangWatcher* g_instance = nullptr;
47 ABSL_CONST_INIT thread_local internal::HangWatchState* hang_watch_state =
48     nullptr;
49 std::atomic<bool> g_use_hang_watcher{false};
50 std::atomic<HangWatcher::ProcessType> g_hang_watcher_process_type{
51     HangWatcher::ProcessType::kBrowserProcess};
52 
53 std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
54 std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
55 std::atomic<LoggingLevel> g_main_thread_log_level{LoggingLevel::kNone};
56 
57 // Indicates whether HangWatcher::Run() should return after the next monitoring.
58 std::atomic<bool> g_keep_monitoring{true};
59 
60 // Emits the hung thread count histogram. |count| is the number of threads
61 // of type |thread_type| that were hung or became hung during the last
62 // monitoring window. This function should be invoked for each thread type
63 // encountered on each call to Monitor().
LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,int count)64 void LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,
65                                  int count) {
66   // In the case of unique threads like the IO or UI/Main thread a count does
67   // not make sense.
68   const bool any_thread_hung = count >= 1;
69 
70   const HangWatcher::ProcessType process_type =
71       g_hang_watcher_process_type.load(std::memory_order_relaxed);
72   switch (process_type) {
73     case HangWatcher::ProcessType::kUnknownProcess:
74       break;
75 
76     case HangWatcher::ProcessType::kBrowserProcess:
77       switch (thread_type) {
78         case HangWatcher::ThreadType::kIOThread:
79           UMA_HISTOGRAM_BOOLEAN(
80               "HangWatcher.IsThreadHung.BrowserProcess."
81               "IOThread",
82               any_thread_hung);
83           break;
84         case HangWatcher::ThreadType::kMainThread:
85           UMA_HISTOGRAM_BOOLEAN(
86               "HangWatcher.IsThreadHung.BrowserProcess."
87               "UIThread",
88               any_thread_hung);
89           break;
90         case HangWatcher::ThreadType::kThreadPoolThread:
91           // Not recorded for now.
92           break;
93       }
94       break;
95 
96     case HangWatcher::ProcessType::kGPUProcess:
97       // Not recorded for now.
98       break;
99 
100     case HangWatcher::ProcessType::kRendererProcess:
101       switch (thread_type) {
102         case HangWatcher::ThreadType::kIOThread:
103           UMA_HISTOGRAM_BOOLEAN(
104               "HangWatcher.IsThreadHung.RendererProcess."
105               "IOThread",
106               any_thread_hung);
107           break;
108         case HangWatcher::ThreadType::kMainThread:
109           UMA_HISTOGRAM_BOOLEAN(
110               "HangWatcher.IsThreadHung.RendererProcess."
111               "MainThread",
112               any_thread_hung);
113           break;
114         case HangWatcher::ThreadType::kThreadPoolThread:
115           // Not recorded for now.
116           break;
117       }
118       break;
119 
120     case HangWatcher::ProcessType::kUtilityProcess:
121       switch (thread_type) {
122         case HangWatcher::ThreadType::kIOThread:
123           UMA_HISTOGRAM_BOOLEAN(
124               "HangWatcher.IsThreadHung.UtilityProcess."
125               "IOThread",
126               any_thread_hung);
127           break;
128         case HangWatcher::ThreadType::kMainThread:
129           UMA_HISTOGRAM_BOOLEAN(
130               "HangWatcher.IsThreadHung.UtilityProcess."
131               "MainThread",
132               any_thread_hung);
133           break;
134         case HangWatcher::ThreadType::kThreadPoolThread:
135           // Not recorded for now.
136           break;
137       }
138       break;
139   }
140 }
141 
142 // Returns true if |thread_type| was configured through Finch to have a logging
143 // level that is equal to or exceeds |logging_level|.
ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,LoggingLevel logging_level)144 bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
145                                           LoggingLevel logging_level) {
146   switch (thread_type) {
147     case HangWatcher::ThreadType::kIOThread:
148       return g_io_thread_log_level.load(std::memory_order_relaxed) >=
149              logging_level;
150     case HangWatcher::ThreadType::kMainThread:
151       return g_main_thread_log_level.load(std::memory_order_relaxed) >=
152              logging_level;
153     case HangWatcher::ThreadType::kThreadPoolThread:
154       return g_threadpool_log_level.load(std::memory_order_relaxed) >=
155              logging_level;
156   }
157 }
158 
159 }  // namespace
160 
161 // Determines if the HangWatcher is activated. When false the HangWatcher
162 // thread never started.
163 BASE_FEATURE(kEnableHangWatcher,
164              "EnableHangWatcher",
165              FEATURE_ENABLED_BY_DEFAULT);
166 
167 BASE_FEATURE(kEnableHangWatcherInZygoteChildren,
168              "EnableHangWatcherInZygoteChildren",
169              FEATURE_ENABLED_BY_DEFAULT);
170 
171 // Browser process.
172 constexpr base::FeatureParam<int> kIOThreadLogLevel{
173     &kEnableHangWatcher, "io_thread_log_level",
174     static_cast<int>(LoggingLevel::kUmaOnly)};
175 constexpr base::FeatureParam<int> kUIThreadLogLevel{
176     &kEnableHangWatcher, "ui_thread_log_level",
177     static_cast<int>(LoggingLevel::kUmaOnly)};
178 constexpr base::FeatureParam<int> kThreadPoolLogLevel{
179     &kEnableHangWatcher, "threadpool_log_level",
180     static_cast<int>(LoggingLevel::kUmaOnly)};
181 
182 // GPU process.
183 constexpr base::FeatureParam<int> kGPUProcessIOThreadLogLevel{
184     &kEnableHangWatcher, "gpu_process_io_thread_log_level",
185     static_cast<int>(LoggingLevel::kNone)};
186 constexpr base::FeatureParam<int> kGPUProcessMainThreadLogLevel{
187     &kEnableHangWatcher, "gpu_process_main_thread_log_level",
188     static_cast<int>(LoggingLevel::kNone)};
189 constexpr base::FeatureParam<int> kGPUProcessThreadPoolLogLevel{
190     &kEnableHangWatcher, "gpu_process_threadpool_log_level",
191     static_cast<int>(LoggingLevel::kNone)};
192 
193 // Renderer process.
194 constexpr base::FeatureParam<int> kRendererProcessIOThreadLogLevel{
195     &kEnableHangWatcher, "renderer_process_io_thread_log_level",
196     static_cast<int>(LoggingLevel::kUmaOnly)};
197 constexpr base::FeatureParam<int> kRendererProcessMainThreadLogLevel{
198     &kEnableHangWatcher, "renderer_process_main_thread_log_level",
199     static_cast<int>(LoggingLevel::kUmaOnly)};
200 constexpr base::FeatureParam<int> kRendererProcessThreadPoolLogLevel{
201     &kEnableHangWatcher, "renderer_process_threadpool_log_level",
202     static_cast<int>(LoggingLevel::kUmaOnly)};
203 
204 // Utility process.
205 constexpr base::FeatureParam<int> kUtilityProcessIOThreadLogLevel{
206     &kEnableHangWatcher, "utility_process_io_thread_log_level",
207     static_cast<int>(LoggingLevel::kUmaOnly)};
208 constexpr base::FeatureParam<int> kUtilityProcessMainThreadLogLevel{
209     &kEnableHangWatcher, "utility_process_main_thread_log_level",
210     static_cast<int>(LoggingLevel::kUmaOnly)};
211 constexpr base::FeatureParam<int> kUtilityProcessThreadPoolLogLevel{
212     &kEnableHangWatcher, "utility_process_threadpool_log_level",
213     static_cast<int>(LoggingLevel::kUmaOnly)};
214 
215 constexpr const char* kThreadName = "HangWatcher";
216 
217 // The time that the HangWatcher thread will sleep for between calls to
218 // Monitor(). Increasing or decreasing this does not modify the type of hangs
219 // that can be detected. It instead increases the probability that a call to
220 // Monitor() will happen at the right time to catch a hang. This has to be
221 // balanced with power/cpu use concerns as busy looping would catch amost all
222 // hangs but present unacceptable overhead. NOTE: If this period is ever changed
223 // then all metrics that depend on it like
224 // HangWatcher.IsThreadHung need to be updated.
225 constexpr auto kMonitoringPeriod = base::Seconds(10);
226 
WatchHangsInScope(TimeDelta timeout)227 WatchHangsInScope::WatchHangsInScope(TimeDelta timeout) {
228   internal::HangWatchState* current_hang_watch_state =
229       HangWatcher::IsEnabled()
230           ? internal::HangWatchState::GetHangWatchStateForCurrentThread()
231           : nullptr;
232 
233   DCHECK(timeout >= base::TimeDelta()) << "Negative timeouts are invalid.";
234 
235   // Thread is not monitored, noop.
236   if (!current_hang_watch_state) {
237     took_effect_ = false;
238     return;
239   }
240 
241 #if DCHECK_IS_ON()
242   previous_watch_hangs_in_scope_ =
243       current_hang_watch_state->GetCurrentWatchHangsInScope();
244   current_hang_watch_state->SetCurrentWatchHangsInScope(this);
245 #endif
246 
247   auto [old_flags, old_deadline] =
248       current_hang_watch_state->GetFlagsAndDeadline();
249 
250   // TODO(crbug.com/1034046): Check whether we are over deadline already for the
251   // previous WatchHangsInScope here by issuing only one TimeTicks::Now()
252   // and resuing the value.
253 
254   previous_deadline_ = old_deadline;
255   TimeTicks deadline = TimeTicks::Now() + timeout;
256   current_hang_watch_state->SetDeadline(deadline);
257   current_hang_watch_state->IncrementNestingLevel();
258 
259   const bool hangs_ignored_for_current_scope =
260       internal::HangWatchDeadline::IsFlagSet(
261           internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
262           old_flags);
263 
264   // If the current WatchHangsInScope is ignored, temporarily reactivate hang
265   // watching for newly created WatchHangsInScopes. On exiting hang watching
266   // is suspended again to return to the original state.
267   if (hangs_ignored_for_current_scope) {
268     current_hang_watch_state->UnsetIgnoreCurrentWatchHangsInScope();
269     set_hangs_ignored_on_exit_ = true;
270   }
271 }
272 
~WatchHangsInScope()273 WatchHangsInScope::~WatchHangsInScope() {
274   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
275 
276   // If hang watching was not enabled at construction time there is nothing to
277   // validate or undo.
278   if (!took_effect_) {
279     return;
280   }
281 
282   // If the thread was unregistered since construction there is also nothing to
283   // do.
284   auto* const state =
285       internal::HangWatchState::GetHangWatchStateForCurrentThread();
286   if (!state) {
287     return;
288   }
289 
290   // If a hang is currently being captured we should block here so execution
291   // stops and we avoid recording unrelated stack frames in the crash.
292   if (state->IsFlagSet(internal::HangWatchDeadline::Flag::kShouldBlockOnHang)) {
293     base::HangWatcher::GetInstance()->BlockIfCaptureInProgress();
294   }
295 
296 #if DCHECK_IS_ON()
297   // Verify that no Scope was destructed out of order.
298   DCHECK_EQ(this, state->GetCurrentWatchHangsInScope());
299   state->SetCurrentWatchHangsInScope(previous_watch_hangs_in_scope_);
300 #endif
301 
302   if (state->nesting_level() == 1) {
303     // If a call to InvalidateActiveExpectations() suspended hang watching
304     // during the lifetime of this or any nested WatchHangsInScope it can now
305     // safely be reactivated by clearing the ignore bit since this is the
306     // outer-most scope.
307     state->UnsetIgnoreCurrentWatchHangsInScope();
308   } else if (set_hangs_ignored_on_exit_) {
309     // Return to ignoring hangs since this was the previous state before hang
310     // watching was temporarily enabled for this WatchHangsInScope only in the
311     // constructor.
312     state->SetIgnoreCurrentWatchHangsInScope();
313   }
314 
315   // Reset the deadline to the value it had before entering this
316   // WatchHangsInScope.
317   state->SetDeadline(previous_deadline_);
318   // TODO(crbug.com/1034046): Log when a WatchHangsInScope exits after its
319   // deadline and that went undetected by the HangWatcher.
320 
321   state->DecrementNestingLevel();
322 }
323 
324 // static
InitializeOnMainThread(ProcessType process_type,bool is_zygote_child)325 void HangWatcher::InitializeOnMainThread(ProcessType process_type,
326                                          bool is_zygote_child) {
327   DCHECK(!g_use_hang_watcher);
328   DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
329   DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
330   DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
331 
332   bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
333 
334 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
335   if (is_zygote_child) {
336     enable_hang_watcher =
337         enable_hang_watcher &&
338         base::FeatureList::IsEnabled(kEnableHangWatcherInZygoteChildren);
339   }
340 #endif
341 
342   // Do not start HangWatcher in the GPU process until the issue related to
343   // invalid magic signature in the GPU WatchDog is fixed
344   // (https://crbug.com/1297760).
345   if (process_type == ProcessType::kGPUProcess)
346     enable_hang_watcher = false;
347 
348   g_use_hang_watcher.store(enable_hang_watcher, std::memory_order_relaxed);
349 
350   // Keep the process type.
351   g_hang_watcher_process_type.store(process_type, std::memory_order_relaxed);
352 
353   // If hang watching is disabled as a whole there is no need to read the
354   // params.
355   if (!enable_hang_watcher)
356     return;
357 
358   // Retrieve thread-specific config for hang watching.
359   switch (process_type) {
360     case HangWatcher::ProcessType::kUnknownProcess:
361       break;
362 
363     case HangWatcher::ProcessType::kBrowserProcess:
364       g_threadpool_log_level.store(
365           static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
366           std::memory_order_relaxed);
367       g_io_thread_log_level.store(
368           static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
369           std::memory_order_relaxed);
370       g_main_thread_log_level.store(
371           static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
372           std::memory_order_relaxed);
373       break;
374 
375     case HangWatcher::ProcessType::kGPUProcess:
376       g_threadpool_log_level.store(
377           static_cast<LoggingLevel>(kGPUProcessThreadPoolLogLevel.Get()),
378           std::memory_order_relaxed);
379       g_io_thread_log_level.store(
380           static_cast<LoggingLevel>(kGPUProcessIOThreadLogLevel.Get()),
381           std::memory_order_relaxed);
382       g_main_thread_log_level.store(
383           static_cast<LoggingLevel>(kGPUProcessMainThreadLogLevel.Get()),
384           std::memory_order_relaxed);
385       break;
386 
387     case HangWatcher::ProcessType::kRendererProcess:
388       g_threadpool_log_level.store(
389           static_cast<LoggingLevel>(kRendererProcessThreadPoolLogLevel.Get()),
390           std::memory_order_relaxed);
391       g_io_thread_log_level.store(
392           static_cast<LoggingLevel>(kRendererProcessIOThreadLogLevel.Get()),
393           std::memory_order_relaxed);
394       g_main_thread_log_level.store(
395           static_cast<LoggingLevel>(kRendererProcessMainThreadLogLevel.Get()),
396           std::memory_order_relaxed);
397       break;
398 
399     case HangWatcher::ProcessType::kUtilityProcess:
400       g_threadpool_log_level.store(
401           static_cast<LoggingLevel>(kUtilityProcessThreadPoolLogLevel.Get()),
402           std::memory_order_relaxed);
403       g_io_thread_log_level.store(
404           static_cast<LoggingLevel>(kUtilityProcessIOThreadLogLevel.Get()),
405           std::memory_order_relaxed);
406       g_main_thread_log_level.store(
407           static_cast<LoggingLevel>(kUtilityProcessMainThreadLogLevel.Get()),
408           std::memory_order_relaxed);
409       break;
410   }
411 }
412 
UnitializeOnMainThreadForTesting()413 void HangWatcher::UnitializeOnMainThreadForTesting() {
414   g_use_hang_watcher.store(false, std::memory_order_relaxed);
415   g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
416   g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
417   g_main_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
418 }
419 
420 // static
IsEnabled()421 bool HangWatcher::IsEnabled() {
422   return g_use_hang_watcher.load(std::memory_order_relaxed);
423 }
424 
425 // static
IsThreadPoolHangWatchingEnabled()426 bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
427   return g_threadpool_log_level.load(std::memory_order_relaxed) !=
428          LoggingLevel::kNone;
429 }
430 
431 // static
IsIOThreadHangWatchingEnabled()432 bool HangWatcher::IsIOThreadHangWatchingEnabled() {
433   return g_io_thread_log_level.load(std::memory_order_relaxed) !=
434          LoggingLevel::kNone;
435 }
436 
437 // static
IsCrashReportingEnabled()438 bool HangWatcher::IsCrashReportingEnabled() {
439   if (g_main_thread_log_level.load(std::memory_order_relaxed) ==
440       LoggingLevel::kUmaAndCrash) {
441     return true;
442   }
443   if (g_io_thread_log_level.load(std::memory_order_relaxed) ==
444       LoggingLevel::kUmaAndCrash) {
445     return true;
446   }
447   if (g_threadpool_log_level.load(std::memory_order_relaxed) ==
448       LoggingLevel::kUmaAndCrash) {
449     return true;
450   }
451   return false;
452 }
453 
454 // static
InvalidateActiveExpectations()455 void HangWatcher::InvalidateActiveExpectations() {
456   auto* const state =
457       internal::HangWatchState::GetHangWatchStateForCurrentThread();
458   if (!state) {
459     // If the current thread is not under watch there is nothing to invalidate.
460     return;
461   }
462   state->SetIgnoreCurrentWatchHangsInScope();
463 }
464 
HangWatcher()465 HangWatcher::HangWatcher()
466     : monitor_period_(kMonitoringPeriod),
467       should_monitor_(WaitableEvent::ResetPolicy::AUTOMATIC),
468       thread_(this, kThreadName),
469       tick_clock_(base::DefaultTickClock::GetInstance()),
470       memory_pressure_listener_(
471           FROM_HERE,
472           base::BindRepeating(&HangWatcher::OnMemoryPressure,
473                               base::Unretained(this))) {
474   // |thread_checker_| should not be bound to the constructing thread.
475   DETACH_FROM_THREAD(hang_watcher_thread_checker_);
476 
477   should_monitor_.declare_only_used_while_idle();
478 
479   DCHECK(!g_instance);
480   g_instance = this;
481 }
482 
483 // static
CreateHangWatcherInstance()484 void HangWatcher::CreateHangWatcherInstance() {
485   DCHECK(!g_instance);
486   g_instance = new base::HangWatcher();
487   // The hang watcher is leaked to make sure it survives all watched threads.
488   ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
489 }
490 
491 #if !BUILDFLAG(IS_NACL)
492 debug::ScopedCrashKeyString
GetTimeSinceLastCriticalMemoryPressureCrashKey()493 HangWatcher::GetTimeSinceLastCriticalMemoryPressureCrashKey() {
494   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
495 
496   // The crash key size is large enough to hold the biggest possible return
497   // value from base::TimeDelta::InSeconds().
498   constexpr debug::CrashKeySize kCrashKeyContentSize =
499       debug::CrashKeySize::Size32;
500   DCHECK_GE(static_cast<uint64_t>(kCrashKeyContentSize),
501             base::NumberToString(std::numeric_limits<int64_t>::max()).size());
502 
503   static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
504       "seconds-since-last-memory-pressure", kCrashKeyContentSize);
505 
506   const base::TimeTicks last_critical_memory_pressure_time =
507       last_critical_memory_pressure_.load(std::memory_order_relaxed);
508   if (last_critical_memory_pressure_time.is_null()) {
509     constexpr char kNoMemoryPressureMsg[] = "No critical memory pressure";
510     static_assert(
511         std::size(kNoMemoryPressureMsg) <=
512             static_cast<uint64_t>(kCrashKeyContentSize),
513         "The crash key is too small to hold \"No critical memory pressure\".");
514     return debug::ScopedCrashKeyString(crash_key, kNoMemoryPressureMsg);
515   } else {
516     base::TimeDelta time_since_last_critical_memory_pressure =
517         base::TimeTicks::Now() - last_critical_memory_pressure_time;
518     return debug::ScopedCrashKeyString(
519         crash_key, base::NumberToString(
520                        time_since_last_critical_memory_pressure.InSeconds()));
521   }
522 }
523 #endif
524 
GetTimeSinceLastSystemPowerResumeCrashKeyValue() const525 std::string HangWatcher::GetTimeSinceLastSystemPowerResumeCrashKeyValue()
526     const {
527   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
528 
529   const TimeTicks last_system_power_resume_time =
530       PowerMonitor::GetLastSystemResumeTime();
531   if (last_system_power_resume_time.is_null())
532     return "Never suspended";
533   if (last_system_power_resume_time == TimeTicks::Max())
534     return "Power suspended";
535 
536   const TimeDelta time_since_last_system_resume =
537       TimeTicks::Now() - last_system_power_resume_time;
538   return NumberToString(time_since_last_system_resume.InSeconds());
539 }
540 
OnMemoryPressure(base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level)541 void HangWatcher::OnMemoryPressure(
542     base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level) {
543   if (memory_pressure_level ==
544       base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL) {
545     last_critical_memory_pressure_.store(base::TimeTicks::Now(),
546                                          std::memory_order_relaxed);
547   }
548 }
549 
~HangWatcher()550 HangWatcher::~HangWatcher() {
551   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
552   DCHECK_EQ(g_instance, this);
553   DCHECK(watch_states_.empty());
554   g_instance = nullptr;
555   Stop();
556 }
557 
Start()558 void HangWatcher::Start() {
559   thread_.Start();
560   thread_started_ = true;
561 }
562 
Stop()563 void HangWatcher::Stop() {
564   g_keep_monitoring.store(false, std::memory_order_relaxed);
565   should_monitor_.Signal();
566   thread_.Join();
567   thread_started_ = false;
568 
569   // In production HangWatcher is always leaked but during testing it's possibly
570   // stopped and restarted using a new instance. This makes sure the next call
571   // to Start() will actually monitor in that case.
572   g_keep_monitoring.store(true, std::memory_order_relaxed);
573 }
574 
IsWatchListEmpty()575 bool HangWatcher::IsWatchListEmpty() {
576   AutoLock auto_lock(watch_state_lock_);
577   return watch_states_.empty();
578 }
579 
Wait()580 void HangWatcher::Wait() {
581   while (true) {
582     // Amount by which the actual time spent sleeping can deviate from
583     // the target time and still be considered timely.
584     constexpr base::TimeDelta kWaitDriftTolerance = base::Milliseconds(100);
585 
586     const base::TimeTicks time_before_wait = tick_clock_->NowTicks();
587 
588     // Sleep until next scheduled monitoring or until signaled.
589     const bool was_signaled = should_monitor_.TimedWait(monitor_period_);
590 
591     if (after_wait_callback_)
592       after_wait_callback_.Run(time_before_wait);
593 
594     const base::TimeTicks time_after_wait = tick_clock_->NowTicks();
595     const base::TimeDelta wait_time = time_after_wait - time_before_wait;
596     const bool wait_was_normal =
597         wait_time <= (monitor_period_ + kWaitDriftTolerance);
598 
599     UMA_HISTOGRAM_TIMES("HangWatcher.SleepDrift.BrowserProcess",
600                         wait_time - monitor_period_);
601 
602     if (!wait_was_normal) {
603       // If the time spent waiting was too high it might indicate the machine is
604       // very slow or that that it went to sleep. In any case we can't trust the
605       // WatchHangsInScopes that are currently live. Update the ignore
606       // threshold to make sure they don't trigger a hang on subsequent monitors
607       // then keep waiting.
608 
609       base::AutoLock auto_lock(watch_state_lock_);
610 
611       // Find the latest deadline among the live watch states. They might change
612       // atomically while iterating but that's fine because if they do that
613       // means the new WatchHangsInScope was constructed very soon after the
614       // abnormal sleep happened and might be affected by the root cause still.
615       // Ignoring it is cautious and harmless.
616       base::TimeTicks latest_deadline;
617       for (const auto& state : watch_states_) {
618         base::TimeTicks deadline = state->GetDeadline();
619         if (deadline > latest_deadline) {
620           latest_deadline = deadline;
621         }
622       }
623 
624       deadline_ignore_threshold_ = latest_deadline;
625     }
626 
627     // Stop waiting.
628     if (wait_was_normal || was_signaled)
629       return;
630   }
631 }
632 
Run()633 void HangWatcher::Run() {
634   // Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
635   // sure of that.
636   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
637 
638   while (g_keep_monitoring.load(std::memory_order_relaxed)) {
639     Wait();
640 
641     if (!IsWatchListEmpty() &&
642         g_keep_monitoring.load(std::memory_order_relaxed)) {
643       Monitor();
644       if (after_monitor_closure_for_testing_) {
645         after_monitor_closure_for_testing_.Run();
646       }
647     }
648   }
649 }
650 
651 // static
GetInstance()652 HangWatcher* HangWatcher::GetInstance() {
653   return g_instance;
654 }
655 
656 // static
RecordHang()657 void HangWatcher::RecordHang() {
658   base::debug::DumpWithoutCrashing();
659   NO_CODE_FOLDING();
660 }
661 
RegisterThreadInternal(ThreadType thread_type)662 ScopedClosureRunner HangWatcher::RegisterThreadInternal(
663     ThreadType thread_type) {
664   AutoLock auto_lock(watch_state_lock_);
665   CHECK(base::FeatureList::GetInstance());
666 
667   // Do not install a WatchState if the results would never be observable.
668   if (!ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
669                                             LoggingLevel::kUmaOnly)) {
670     return ScopedClosureRunner(base::DoNothing());
671   }
672 
673   watch_states_.push_back(
674       internal::HangWatchState::CreateHangWatchStateForCurrentThread(
675           thread_type));
676   return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
677                                       Unretained(HangWatcher::GetInstance())));
678 }
679 
680 // static
RegisterThread(ThreadType thread_type)681 ScopedClosureRunner HangWatcher::RegisterThread(ThreadType thread_type) {
682   if (!GetInstance()) {
683     return ScopedClosureRunner();
684   }
685 
686   return GetInstance()->RegisterThreadInternal(thread_type);
687 }
688 
GetHighestDeadline() const689 base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
690   DCHECK(IsActionable());
691 
692   // Since entries are sorted in increasing order the last entry is the largest
693   // one.
694   return hung_watch_state_copies_.back().deadline;
695 }
696 
697 HangWatcher::WatchStateSnapShot::WatchStateSnapShot() = default;
698 
Init(const HangWatchStates & watch_states,base::TimeTicks deadline_ignore_threshold)699 void HangWatcher::WatchStateSnapShot::Init(
700     const HangWatchStates& watch_states,
701     base::TimeTicks deadline_ignore_threshold) {
702   DCHECK(!initialized_);
703 
704   // No matter if the snapshot is actionable or not after this function
705   // it will have been initialized.
706   initialized_ = true;
707 
708   const base::TimeTicks now = base::TimeTicks::Now();
709   bool all_threads_marked = true;
710   bool found_deadline_before_ignore_threshold = false;
711 
712   // Use an std::array to store the hang counts to avoid allocations. The
713   // numerical values of the HangWatcher::ThreadType enum is used to index into
714   // the array. A |kInvalidHangCount| is used to signify there were no threads
715   // of the type found.
716   constexpr size_t kHangCountArraySize =
717       static_cast<std::size_t>(base::HangWatcher::ThreadType::kMax) + 1;
718   std::array<int, kHangCountArraySize> hung_counts_per_thread_type;
719 
720   constexpr int kInvalidHangCount = -1;
721   hung_counts_per_thread_type.fill(kInvalidHangCount);
722 
723   // Will be true if any of the hung threads has a logging level high enough,
724   // as defined through finch params, to warant dumping a crash.
725   bool any_hung_thread_has_dumping_enabled = false;
726 
727   // Copy hung thread information.
728   for (const auto& watch_state : watch_states) {
729     uint64_t flags;
730     TimeTicks deadline;
731     std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
732 
733     if (deadline <= deadline_ignore_threshold) {
734       found_deadline_before_ignore_threshold = true;
735     }
736 
737     if (internal::HangWatchDeadline::IsFlagSet(
738             internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
739             flags)) {
740       continue;
741     }
742 
743     // If a thread type is monitored and did not hang it still needs to be
744     // logged as a zero count;
745     const size_t hang_count_index =
746         static_cast<size_t>(watch_state.get()->thread_type());
747     if (hung_counts_per_thread_type[hang_count_index] == kInvalidHangCount) {
748       hung_counts_per_thread_type[hang_count_index] = 0;
749     }
750 
751     // Only copy hung threads.
752     if (deadline <= now) {
753       ++hung_counts_per_thread_type[hang_count_index];
754 
755       if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
756                                                LoggingLevel::kUmaAndCrash)) {
757         any_hung_thread_has_dumping_enabled = true;
758       }
759 
760 #if BUILDFLAG(ENABLE_BASE_TRACING)
761       // Emit trace events for monitored threads.
762       if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
763                                                LoggingLevel::kUmaOnly)) {
764         const PlatformThreadId thread_id = watch_state.get()->GetThreadID();
765         const auto track = perfetto::Track::FromPointer(
766             this, perfetto::ThreadTrack::ForThread(thread_id));
767         TRACE_EVENT_BEGIN("base", "HangWatcher::ThreadHung", track, deadline);
768         TRACE_EVENT_END("base", track, now);
769         // TODO(crbug.com/1021571): Remove this once fixed.
770         PERFETTO_INTERNAL_ADD_EMPTY_EVENT();
771       }
772 #endif
773 
774       // Attempt to mark the thread as needing to stay within its current
775       // WatchHangsInScope until capture is complete.
776       bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
777 
778       // If marking some threads already failed the snapshot won't be kept so
779       // there is no need to keep adding to it. The loop doesn't abort though
780       // to keep marking the other threads. If these threads remain hung until
781       // the next capture then they'll already be marked and will be included
782       // in the capture at that time.
783       if (thread_marked && all_threads_marked) {
784         hung_watch_state_copies_.push_back(
785             WatchStateCopy{deadline, watch_state.get()->GetThreadID()});
786       } else {
787         all_threads_marked = false;
788       }
789     }
790   }
791 
792   // Log the hung thread counts to histograms for each thread type if any thread
793   // of the type were found.
794   for (size_t i = 0; i < kHangCountArraySize; ++i) {
795     const int hang_count = hung_counts_per_thread_type[i];
796     const HangWatcher::ThreadType thread_type =
797         static_cast<HangWatcher::ThreadType>(i);
798     if (hang_count != kInvalidHangCount &&
799         ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
800                                              LoggingLevel::kUmaOnly)) {
801       LogHungThreadCountHistogram(thread_type, hang_count);
802     }
803   }
804 
805   // Three cases can invalidate this snapshot and prevent the capture of the
806   // hang.
807   //
808   // 1. Some threads could not be marked for blocking so this snapshot isn't
809   // actionable since marked threads could be hung because of unmarked ones.
810   // If only the marked threads were captured the information would be
811   // incomplete.
812   //
813   // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
814   // If any thread is ignored it reduces the confidence in the whole state and
815   // it's better to avoid capturing misleading data.
816   //
817   // 3. The hung threads found were all of types that are not configured through
818   // Finch to trigger a crash dump.
819   //
820   if (!all_threads_marked || found_deadline_before_ignore_threshold ||
821       !any_hung_thread_has_dumping_enabled) {
822     hung_watch_state_copies_.clear();
823     return;
824   }
825 
826   // Sort |hung_watch_state_copies_| by order of decreasing hang severity so the
827   // most severe hang is first in the list.
828   ranges::sort(hung_watch_state_copies_,
829                [](const WatchStateCopy& lhs, const WatchStateCopy& rhs) {
830                  return lhs.deadline < rhs.deadline;
831                });
832 }
833 
Clear()834 void HangWatcher::WatchStateSnapShot::Clear() {
835   hung_watch_state_copies_.clear();
836   initialized_ = false;
837 }
838 
839 HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
840     const WatchStateSnapShot& other) = default;
841 
842 HangWatcher::WatchStateSnapShot::~WatchStateSnapShot() = default;
843 
PrepareHungThreadListCrashKey() const844 std::string HangWatcher::WatchStateSnapShot::PrepareHungThreadListCrashKey()
845     const {
846   DCHECK(IsActionable());
847 
848   // Build a crash key string that contains the ids of the hung threads.
849   constexpr char kSeparator{'|'};
850   std::string list_of_hung_thread_ids;
851 
852   // Add as many thread ids to the crash key as possible.
853   for (const WatchStateCopy& copy : hung_watch_state_copies_) {
854     std::string fragment = base::NumberToString(copy.thread_id) + kSeparator;
855     if (list_of_hung_thread_ids.size() + fragment.size() <
856         static_cast<std::size_t>(debug::CrashKeySize::Size256)) {
857       list_of_hung_thread_ids += fragment;
858     } else {
859       // Respect the by priority ordering of thread ids in the crash key by
860       // stopping the construction as soon as one does not fit. This avoids
861       // including lesser priority ids while omitting more important ones.
862       break;
863     }
864   }
865 
866   return list_of_hung_thread_ids;
867 }
868 
IsActionable() const869 bool HangWatcher::WatchStateSnapShot::IsActionable() const {
870   DCHECK(initialized_);
871   return !hung_watch_state_copies_.empty();
872 }
873 
GrabWatchStateSnapshotForTesting() const874 HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
875     const {
876   WatchStateSnapShot snapshot;
877   snapshot.Init(watch_states_, deadline_ignore_threshold_);
878   return snapshot;
879 }
880 
Monitor()881 void HangWatcher::Monitor() {
882   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
883   AutoLock auto_lock(watch_state_lock_);
884 
885   // If all threads unregistered since this function was invoked there's
886   // nothing to do anymore.
887   if (watch_states_.empty())
888     return;
889 
890   watch_state_snapshot_.Init(watch_states_, deadline_ignore_threshold_);
891 
892   if (watch_state_snapshot_.IsActionable()) {
893     DoDumpWithoutCrashing(watch_state_snapshot_);
894   }
895 
896   watch_state_snapshot_.Clear();
897 }
898 
DoDumpWithoutCrashing(const WatchStateSnapShot & watch_state_snapshot)899 void HangWatcher::DoDumpWithoutCrashing(
900     const WatchStateSnapShot& watch_state_snapshot) {
901   TRACE_EVENT("base", "HangWatcher::DoDumpWithoutCrashing");
902 
903   capture_in_progress_.store(true, std::memory_order_relaxed);
904   base::AutoLock scope_lock(capture_lock_);
905 
906 #if !BUILDFLAG(IS_NACL)
907   const std::string list_of_hung_thread_ids =
908       watch_state_snapshot.PrepareHungThreadListCrashKey();
909 
910   static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
911       "list-of-hung-threads", debug::CrashKeySize::Size256);
912 
913   const debug::ScopedCrashKeyString list_of_hung_threads_crash_key_string(
914       crash_key, list_of_hung_thread_ids);
915 
916   const debug::ScopedCrashKeyString
917       time_since_last_critical_memory_pressure_crash_key_string =
918           GetTimeSinceLastCriticalMemoryPressureCrashKey();
919 
920   SCOPED_CRASH_KEY_STRING32("HangWatcher", "seconds-since-last-resume",
921                             GetTimeSinceLastSystemPowerResumeCrashKeyValue());
922 #endif
923 
924   // To avoid capturing more than one hang that blames a subset of the same
925   // threads it's necessary to keep track of what is the furthest deadline
926   // that contributed to declaring a hang. Only once
927   // all threads have deadlines past this point can we be sure that a newly
928   // discovered hang is not directly related.
929   // Example:
930   // **********************************************************************
931   // Timeline A : L------1-------2----------3-------4----------N-----------
932   // Timeline B : -------2----------3-------4----------L----5------N-------
933   // Timeline C : L----------------------------5------6----7---8------9---N
934   // **********************************************************************
935   // In the example when a Monitor() happens during timeline A
936   // |deadline_ignore_threshold_| (L) is at time zero and deadlines (1-4)
937   // are before Now() (N) . A hang is captured and L is updated. During
938   // the next Monitor() (timeline B) a new deadline is over but we can't
939   // capture a hang because deadlines 2-4 are still live and already counted
940   // toward a hang. During a third monitor (timeline C) all live deadlines
941   // are now after L and a second hang can be recorded.
942   base::TimeTicks latest_expired_deadline =
943       watch_state_snapshot.GetHighestDeadline();
944 
945   if (on_hang_closure_for_testing_)
946     on_hang_closure_for_testing_.Run();
947   else
948     RecordHang();
949 
950   // Update after running the actual capture.
951   deadline_ignore_threshold_ = latest_expired_deadline;
952 
953   capture_in_progress_.store(false, std::memory_order_relaxed);
954 }
955 
SetAfterMonitorClosureForTesting(base::RepeatingClosure closure)956 void HangWatcher::SetAfterMonitorClosureForTesting(
957     base::RepeatingClosure closure) {
958   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
959   after_monitor_closure_for_testing_ = std::move(closure);
960 }
961 
SetOnHangClosureForTesting(base::RepeatingClosure closure)962 void HangWatcher::SetOnHangClosureForTesting(base::RepeatingClosure closure) {
963   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
964   on_hang_closure_for_testing_ = std::move(closure);
965 }
966 
SetMonitoringPeriodForTesting(base::TimeDelta period)967 void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
968   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
969   monitor_period_ = period;
970 }
971 
SetAfterWaitCallbackForTesting(RepeatingCallback<void (TimeTicks)> callback)972 void HangWatcher::SetAfterWaitCallbackForTesting(
973     RepeatingCallback<void(TimeTicks)> callback) {
974   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
975   after_wait_callback_ = callback;
976 }
977 
SignalMonitorEventForTesting()978 void HangWatcher::SignalMonitorEventForTesting() {
979   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
980   should_monitor_.Signal();
981 }
982 
983 // static
StopMonitoringForTesting()984 void HangWatcher::StopMonitoringForTesting() {
985   g_keep_monitoring.store(false, std::memory_order_relaxed);
986 }
987 
SetTickClockForTesting(const base::TickClock * tick_clock)988 void HangWatcher::SetTickClockForTesting(const base::TickClock* tick_clock) {
989   tick_clock_ = tick_clock;
990 }
991 
BlockIfCaptureInProgress()992 void HangWatcher::BlockIfCaptureInProgress() {
993   // Makes a best-effort attempt to block execution if a hang is currently being
994   // captured. Only block on |capture_lock| if |capture_in_progress_| hints that
995   // it's already held to avoid serializing all threads on this function when no
996   // hang capture is in-progress.
997   if (capture_in_progress_.load(std::memory_order_relaxed))
998     base::AutoLock hang_lock(capture_lock_);
999 }
1000 
UnregisterThread()1001 void HangWatcher::UnregisterThread() {
1002   AutoLock auto_lock(watch_state_lock_);
1003 
1004   auto it = ranges::find(
1005       watch_states_,
1006       internal::HangWatchState::GetHangWatchStateForCurrentThread(),
1007       &std::unique_ptr<internal::HangWatchState>::get);
1008 
1009   // Thread should be registered to get unregistered.
1010   DCHECK(it != watch_states_.end());
1011 
1012   watch_states_.erase(it);
1013 }
1014 
1015 namespace internal {
1016 namespace {
1017 
1018 constexpr uint64_t kOnlyDeadlineMask = 0x00FF'FFFF'FFFF'FFFFu;
1019 constexpr uint64_t kOnlyFlagsMask = ~kOnlyDeadlineMask;
1020 constexpr uint64_t kMaximumFlag = 0x8000'0000'0000'0000u;
1021 
1022 // Use as a mask to keep persistent flags and the deadline.
1023 constexpr uint64_t kPersistentFlagsAndDeadlineMask =
1024     kOnlyDeadlineMask |
1025     static_cast<uint64_t>(
1026         HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope);
1027 }  // namespace
1028 
1029 // Flag binary representation assertions.
1030 static_assert(
1031     static_cast<uint64_t>(HangWatchDeadline::Flag::kMinValue) >
1032         kOnlyDeadlineMask,
1033     "Invalid numerical value for flag. Would interfere with bits of data.");
1034 static_assert(static_cast<uint64_t>(HangWatchDeadline::Flag::kMaxValue) <=
1035                   kMaximumFlag,
1036               "A flag can only set a single bit.");
1037 
1038 HangWatchDeadline::HangWatchDeadline() = default;
1039 HangWatchDeadline::~HangWatchDeadline() = default;
1040 
GetFlagsAndDeadline() const1041 std::pair<uint64_t, TimeTicks> HangWatchDeadline::GetFlagsAndDeadline() const {
1042   uint64_t bits = bits_.load(std::memory_order_relaxed);
1043   return std::make_pair(ExtractFlags(bits),
1044                         DeadlineFromBits(ExtractDeadline((bits))));
1045 }
1046 
GetDeadline() const1047 TimeTicks HangWatchDeadline::GetDeadline() const {
1048   return DeadlineFromBits(
1049       ExtractDeadline(bits_.load(std::memory_order_relaxed)));
1050 }
1051 
1052 // static
Max()1053 TimeTicks HangWatchDeadline::Max() {
1054   // |kOnlyDeadlineMask| has all the bits reserved for the TimeTicks value
1055   // set. This means it also represents the highest representable value.
1056   return DeadlineFromBits(kOnlyDeadlineMask);
1057 }
1058 
1059 // static
IsFlagSet(Flag flag,uint64_t flags)1060 bool HangWatchDeadline::IsFlagSet(Flag flag, uint64_t flags) {
1061   return static_cast<uint64_t>(flag) & flags;
1062 }
1063 
SetDeadline(TimeTicks new_deadline)1064 void HangWatchDeadline::SetDeadline(TimeTicks new_deadline) {
1065   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1066   DCHECK(new_deadline <= Max()) << "Value too high to be represented.";
1067   DCHECK(new_deadline >= TimeTicks{}) << "Value cannot be negative.";
1068 
1069   if (switch_bits_callback_for_testing_) {
1070     const uint64_t switched_in_bits = SwitchBitsForTesting();
1071     // If a concurrent deadline change is tested it cannot have a deadline or
1072     // persistent flag change since those always happen on the same thread.
1073     DCHECK((switched_in_bits & kPersistentFlagsAndDeadlineMask) == 0u);
1074   }
1075 
1076   // Discard all non-persistent flags and apply deadline change.
1077   const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1078   const uint64_t new_flags =
1079       ExtractFlags(old_bits & kPersistentFlagsAndDeadlineMask);
1080   bits_.store(new_flags | ExtractDeadline(static_cast<uint64_t>(
1081                               new_deadline.ToInternalValue())),
1082               std::memory_order_relaxed);
1083 }
1084 
1085 // TODO(crbug.com/1087026): Add flag DCHECKs here.
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1086 bool HangWatchDeadline::SetShouldBlockOnHang(uint64_t old_flags,
1087                                              TimeTicks old_deadline) {
1088   DCHECK(old_deadline <= Max()) << "Value too high to be represented.";
1089   DCHECK(old_deadline >= TimeTicks{}) << "Value cannot be negative.";
1090 
1091   // Set the kShouldBlockOnHang flag only if |bits_| did not change since it was
1092   // read. kShouldBlockOnHang is the only non-persistent flag and should never
1093   // be set twice. Persistent flags and deadline changes are done from the same
1094   // thread so there is no risk of losing concurrently added information.
1095   uint64_t old_bits =
1096       old_flags | static_cast<uint64_t>(old_deadline.ToInternalValue());
1097   const uint64_t desired_bits =
1098       old_bits | static_cast<uint64_t>(Flag::kShouldBlockOnHang);
1099 
1100   // If a test needs to simulate |bits_| changing since calling this function
1101   // this happens now.
1102   if (switch_bits_callback_for_testing_) {
1103     const uint64_t switched_in_bits = SwitchBitsForTesting();
1104 
1105     // Injecting the flag being tested is invalid.
1106     DCHECK(!IsFlagSet(Flag::kShouldBlockOnHang, switched_in_bits));
1107   }
1108 
1109   return bits_.compare_exchange_weak(old_bits, desired_bits,
1110                                      std::memory_order_relaxed,
1111                                      std::memory_order_relaxed);
1112 }
1113 
SetIgnoreCurrentWatchHangsInScope()1114 void HangWatchDeadline::SetIgnoreCurrentWatchHangsInScope() {
1115   SetPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1116 }
1117 
UnsetIgnoreCurrentWatchHangsInScope()1118 void HangWatchDeadline::UnsetIgnoreCurrentWatchHangsInScope() {
1119   ClearPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1120 }
1121 
SetPersistentFlag(Flag flag)1122 void HangWatchDeadline::SetPersistentFlag(Flag flag) {
1123   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1124   if (switch_bits_callback_for_testing_)
1125     SwitchBitsForTesting();
1126   bits_.fetch_or(static_cast<uint64_t>(flag), std::memory_order_relaxed);
1127 }
1128 
ClearPersistentFlag(Flag flag)1129 void HangWatchDeadline::ClearPersistentFlag(Flag flag) {
1130   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1131   if (switch_bits_callback_for_testing_)
1132     SwitchBitsForTesting();
1133   bits_.fetch_and(~(static_cast<uint64_t>(flag)), std::memory_order_relaxed);
1134 }
1135 
1136 // static
ExtractFlags(uint64_t bits)1137 uint64_t HangWatchDeadline::ExtractFlags(uint64_t bits) {
1138   return bits & kOnlyFlagsMask;
1139 }
1140 
1141 // static
ExtractDeadline(uint64_t bits)1142 uint64_t HangWatchDeadline::ExtractDeadline(uint64_t bits) {
1143   return bits & kOnlyDeadlineMask;
1144 }
1145 
1146 // static
DeadlineFromBits(uint64_t bits)1147 TimeTicks HangWatchDeadline::DeadlineFromBits(uint64_t bits) {
1148   // |kOnlyDeadlineMask| has all the deadline bits set to 1 so is the largest
1149   // representable value.
1150   DCHECK(bits <= kOnlyDeadlineMask)
1151       << "Flags bits are set. Remove them before returning deadline.";
1152   static_assert(kOnlyDeadlineMask <= std::numeric_limits<int64_t>::max());
1153   return TimeTicks::FromInternalValue(static_cast<int64_t>(bits));
1154 }
1155 
IsFlagSet(Flag flag) const1156 bool HangWatchDeadline::IsFlagSet(Flag flag) const {
1157   return bits_.load(std::memory_order_relaxed) & static_cast<uint64_t>(flag);
1158 }
1159 
SetSwitchBitsClosureForTesting(RepeatingCallback<uint64_t (void)> closure)1160 void HangWatchDeadline::SetSwitchBitsClosureForTesting(
1161     RepeatingCallback<uint64_t(void)> closure) {
1162   switch_bits_callback_for_testing_ = closure;
1163 }
1164 
ResetSwitchBitsClosureForTesting()1165 void HangWatchDeadline::ResetSwitchBitsClosureForTesting() {
1166   DCHECK(switch_bits_callback_for_testing_);
1167   switch_bits_callback_for_testing_.Reset();
1168 }
1169 
SwitchBitsForTesting()1170 uint64_t HangWatchDeadline::SwitchBitsForTesting() {
1171   DCHECK(switch_bits_callback_for_testing_);
1172 
1173   const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1174   const uint64_t new_bits = switch_bits_callback_for_testing_.Run();
1175   const uint64_t old_flags = ExtractFlags(old_bits);
1176 
1177   const uint64_t switched_in_bits = old_flags | new_bits;
1178   bits_.store(switched_in_bits, std::memory_order_relaxed);
1179   return switched_in_bits;
1180 }
1181 
HangWatchState(HangWatcher::ThreadType thread_type)1182 HangWatchState::HangWatchState(HangWatcher::ThreadType thread_type)
1183     : resetter_(&hang_watch_state, this, nullptr), thread_type_(thread_type) {
1184 // TODO(crbug.com/1223033): Remove this once macOS uses system-wide ids.
1185 // On macOS the thread ids used by CrashPad are not the same as the ones
1186 // provided by PlatformThread. Make sure to use the same for correct
1187 // attribution.
1188 #if BUILDFLAG(IS_MAC)
1189   uint64_t thread_id;
1190   pthread_threadid_np(pthread_self(), &thread_id);
1191   thread_id_ = checked_cast<PlatformThreadId>(thread_id);
1192 #else
1193   thread_id_ = PlatformThread::CurrentId();
1194 #endif
1195 }
1196 
~HangWatchState()1197 HangWatchState::~HangWatchState() {
1198   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1199 
1200   DCHECK_EQ(GetHangWatchStateForCurrentThread(), this);
1201 
1202 #if DCHECK_IS_ON()
1203   // Destroying the HangWatchState should not be done if there are live
1204   // WatchHangsInScopes.
1205   DCHECK(!current_watch_hangs_in_scope_);
1206 #endif
1207 }
1208 
1209 // static
1210 std::unique_ptr<HangWatchState>
CreateHangWatchStateForCurrentThread(HangWatcher::ThreadType thread_type)1211 HangWatchState::CreateHangWatchStateForCurrentThread(
1212     HangWatcher::ThreadType thread_type) {
1213   // Allocate a watch state object for this thread.
1214   std::unique_ptr<HangWatchState> hang_state =
1215       std::make_unique<HangWatchState>(thread_type);
1216 
1217   // Setting the thread local worked.
1218   DCHECK_EQ(GetHangWatchStateForCurrentThread(), hang_state.get());
1219 
1220   // Transfer ownership to caller.
1221   return hang_state;
1222 }
1223 
GetDeadline() const1224 TimeTicks HangWatchState::GetDeadline() const {
1225   return deadline_.GetDeadline();
1226 }
1227 
GetFlagsAndDeadline() const1228 std::pair<uint64_t, TimeTicks> HangWatchState::GetFlagsAndDeadline() const {
1229   return deadline_.GetFlagsAndDeadline();
1230 }
1231 
SetDeadline(TimeTicks deadline)1232 void HangWatchState::SetDeadline(TimeTicks deadline) {
1233   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1234   deadline_.SetDeadline(deadline);
1235 }
1236 
IsOverDeadline() const1237 bool HangWatchState::IsOverDeadline() const {
1238   return TimeTicks::Now() > deadline_.GetDeadline();
1239 }
1240 
SetIgnoreCurrentWatchHangsInScope()1241 void HangWatchState::SetIgnoreCurrentWatchHangsInScope() {
1242   deadline_.SetIgnoreCurrentWatchHangsInScope();
1243 }
1244 
UnsetIgnoreCurrentWatchHangsInScope()1245 void HangWatchState::UnsetIgnoreCurrentWatchHangsInScope() {
1246   deadline_.UnsetIgnoreCurrentWatchHangsInScope();
1247 }
1248 
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1249 bool HangWatchState::SetShouldBlockOnHang(uint64_t old_flags,
1250                                           TimeTicks old_deadline) {
1251   return deadline_.SetShouldBlockOnHang(old_flags, old_deadline);
1252 }
1253 
IsFlagSet(HangWatchDeadline::Flag flag)1254 bool HangWatchState::IsFlagSet(HangWatchDeadline::Flag flag) {
1255   return deadline_.IsFlagSet(flag);
1256 }
1257 
1258 #if DCHECK_IS_ON()
SetCurrentWatchHangsInScope(WatchHangsInScope * current_hang_watch_scope_enable)1259 void HangWatchState::SetCurrentWatchHangsInScope(
1260     WatchHangsInScope* current_hang_watch_scope_enable) {
1261   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1262   current_watch_hangs_in_scope_ = current_hang_watch_scope_enable;
1263 }
1264 
GetCurrentWatchHangsInScope()1265 WatchHangsInScope* HangWatchState::GetCurrentWatchHangsInScope() {
1266   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1267   return current_watch_hangs_in_scope_;
1268 }
1269 #endif
1270 
GetHangWatchDeadlineForTesting()1271 HangWatchDeadline* HangWatchState::GetHangWatchDeadlineForTesting() {
1272   return &deadline_;
1273 }
1274 
IncrementNestingLevel()1275 void HangWatchState::IncrementNestingLevel() {
1276   ++nesting_level_;
1277 }
1278 
DecrementNestingLevel()1279 void HangWatchState::DecrementNestingLevel() {
1280   --nesting_level_;
1281 }
1282 
1283 // static
GetHangWatchStateForCurrentThread()1284 HangWatchState* HangWatchState::GetHangWatchStateForCurrentThread() {
1285   // Workaround false-positive MSAN use-of-uninitialized-value on
1286   // thread_local storage for loaded libraries:
1287   // https://github.com/google/sanitizers/issues/1265
1288   MSAN_UNPOISON(&hang_watch_state, sizeof(internal::HangWatchState*));
1289 
1290   return hang_watch_state;
1291 }
1292 
GetThreadID() const1293 PlatformThreadId HangWatchState::GetThreadID() const {
1294   return thread_id_;
1295 }
1296 
1297 }  // namespace internal
1298 
1299 }  // namespace base
1300