• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/threading/hang_watcher.h"
6 
7 #include <atomic>
8 #include <utility>
9 
10 #include "base/containers/flat_map.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/crash_logging.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/feature_list.h"
16 #include "base/functional/bind.h"
17 #include "base/functional/callback_helpers.h"
18 #include "base/metrics/field_trial_params.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/power_monitor/power_monitor.h"
21 #include "base/ranges/algorithm.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/synchronization/lock.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/platform_thread.h"
26 #include "base/threading/thread_checker.h"
27 #include "base/threading/thread_restrictions.h"
28 #include "base/threading/threading_features.h"
29 #include "base/time/default_tick_clock.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/base_tracing.h"
32 #include "build/build_config.h"
33 
34 namespace base {
35 
36 namespace {
37 
38 // Defines how much logging happens when the HangWatcher monitors the threads.
39 // Logging levels are set per thread type through Finch. It's important that
40 // the order of the enum members stay the same and that their numerical
41 // values be in increasing order. The implementation of
42 // ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
43 enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
44 
45 HangWatcher* g_instance = nullptr;
46 constinit thread_local internal::HangWatchState* hang_watch_state = nullptr;
47 std::atomic<bool> g_use_hang_watcher{false};
48 std::atomic<HangWatcher::ProcessType> g_hang_watcher_process_type{
49     HangWatcher::ProcessType::kBrowserProcess};
50 
51 std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
52 std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
53 std::atomic<LoggingLevel> g_main_thread_log_level{LoggingLevel::kNone};
54 
55 // Indicates whether HangWatcher::Run() should return after the next monitoring.
56 std::atomic<bool> g_keep_monitoring{true};
57 
58 // If true, indicates that this process's shutdown sequence has started. Once
59 // flipped to true, cannot be un-flipped.
60 std::atomic<bool> g_shutting_down{false};
61 
62 // Emits the hung thread count histogram. |count| is the number of threads
63 // of type |thread_type| that were hung or became hung during the last
64 // monitoring window. This function should be invoked for each thread type
65 // encountered on each call to Monitor(). `sample_ticks` is the time at which
66 // the sample was taken and `monitoring_period` is the interval being sampled.
LogStatusHistogram(HangWatcher::ThreadType thread_type,int count,TimeTicks sample_ticks,TimeDelta monitoring_period)67 void LogStatusHistogram(HangWatcher::ThreadType thread_type,
68                         int count,
69                         TimeTicks sample_ticks,
70                         TimeDelta monitoring_period) {
71   // In the case of unique threads like the IO or UI/Main thread a count does
72   // not make sense.
73   const bool any_thread_hung = count >= 1;
74   const bool shutting_down = g_shutting_down.load(std::memory_order_relaxed);
75 
76   // Uses histogram macros instead of functions. This increases binary size
77   // slightly, but runs slightly faster. These histograms are logged pretty
78   // often, so we prefer improving runtime.
79   const HangWatcher::ProcessType process_type =
80       g_hang_watcher_process_type.load(std::memory_order_relaxed);
81   switch (process_type) {
82     case HangWatcher::ProcessType::kUnknownProcess:
83       break;
84 
85     case HangWatcher::ProcessType::kBrowserProcess:
86       switch (thread_type) {
87         case HangWatcher::ThreadType::kIOThread:
88           if (shutting_down) {
89             UMA_HISTOGRAM_BOOLEAN(
90                 "HangWatcher.IsThreadHung.BrowserProcess.IOThread.Shutdown",
91                 any_thread_hung);
92           } else {
93             UMA_HISTOGRAM_BOOLEAN(
94                 "HangWatcher.IsThreadHung.BrowserProcess.IOThread.Normal",
95                 any_thread_hung);
96           }
97           break;
98         case HangWatcher::ThreadType::kMainThread:
99           if (shutting_down) {
100             UMA_HISTOGRAM_BOOLEAN(
101                 "HangWatcher.IsThreadHung.BrowserProcess.UIThread.Shutdown",
102                 any_thread_hung);
103           } else {
104             UMA_HISTOGRAM_BOOLEAN(
105                 "HangWatcher.IsThreadHung.BrowserProcess.UIThread.Normal",
106                 any_thread_hung);
107           }
108           break;
109         case HangWatcher::ThreadType::kThreadPoolThread:
110           // Not recorded for now.
111           break;
112       }
113       break;
114 
115     case HangWatcher::ProcessType::kGPUProcess:
116       // Not recorded for now.
117       CHECK(!shutting_down);
118       break;
119 
120     case HangWatcher::ProcessType::kRendererProcess:
121       CHECK(!shutting_down);
122       switch (thread_type) {
123         case HangWatcher::ThreadType::kIOThread:
124           UMA_HISTOGRAM_SPLIT_BY_PROCESS_PRIORITY(
125               UMA_HISTOGRAM_BOOLEAN, sample_ticks, monitoring_period,
126               "HangWatcher.IsThreadHung.RendererProcess.IOThread",
127               any_thread_hung);
128           break;
129         case HangWatcher::ThreadType::kMainThread:
130           UMA_HISTOGRAM_SPLIT_BY_PROCESS_PRIORITY(
131               UMA_HISTOGRAM_BOOLEAN, sample_ticks, monitoring_period,
132               "HangWatcher.IsThreadHung.RendererProcess.MainThread",
133               any_thread_hung);
134           break;
135         case HangWatcher::ThreadType::kThreadPoolThread:
136           // Not recorded for now.
137           break;
138       }
139       break;
140 
141     case HangWatcher::ProcessType::kUtilityProcess:
142       CHECK(!shutting_down);
143       switch (thread_type) {
144         case HangWatcher::ThreadType::kIOThread:
145           UMA_HISTOGRAM_BOOLEAN(
146               "HangWatcher.IsThreadHung.UtilityProcess.IOThread",
147               any_thread_hung);
148           break;
149         case HangWatcher::ThreadType::kMainThread:
150           UMA_HISTOGRAM_BOOLEAN(
151               "HangWatcher.IsThreadHung.UtilityProcess.MainThread",
152               any_thread_hung);
153           break;
154         case HangWatcher::ThreadType::kThreadPoolThread:
155           // Not recorded for now.
156           break;
157       }
158       break;
159   }
160 }
161 
162 // Returns true if |thread_type| was configured through Finch to have a logging
163 // level that is equal to or exceeds |logging_level|.
ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,LoggingLevel logging_level)164 bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
165                                           LoggingLevel logging_level) {
166   switch (thread_type) {
167     case HangWatcher::ThreadType::kIOThread:
168       return g_io_thread_log_level.load(std::memory_order_relaxed) >=
169              logging_level;
170     case HangWatcher::ThreadType::kMainThread:
171       return g_main_thread_log_level.load(std::memory_order_relaxed) >=
172              logging_level;
173     case HangWatcher::ThreadType::kThreadPoolThread:
174       return g_threadpool_log_level.load(std::memory_order_relaxed) >=
175              logging_level;
176   }
177 }
178 
179 }  // namespace
180 
181 // Determines if the HangWatcher is activated. When false the HangWatcher
182 // thread never started.
183 BASE_FEATURE(kEnableHangWatcher,
184              "EnableHangWatcher",
185              FEATURE_ENABLED_BY_DEFAULT);
186 
187 // Browser process.
188 constexpr base::FeatureParam<int> kIOThreadLogLevel{
189     &kEnableHangWatcher, "io_thread_log_level",
190     static_cast<int>(LoggingLevel::kUmaOnly)};
191 constexpr base::FeatureParam<int> kUIThreadLogLevel{
192     &kEnableHangWatcher, "ui_thread_log_level",
193     static_cast<int>(LoggingLevel::kUmaOnly)};
194 constexpr base::FeatureParam<int> kThreadPoolLogLevel{
195     &kEnableHangWatcher, "threadpool_log_level",
196     static_cast<int>(LoggingLevel::kUmaOnly)};
197 
198 // GPU process.
199 constexpr base::FeatureParam<int> kGPUProcessIOThreadLogLevel{
200     &kEnableHangWatcher, "gpu_process_io_thread_log_level",
201     static_cast<int>(LoggingLevel::kNone)};
202 constexpr base::FeatureParam<int> kGPUProcessMainThreadLogLevel{
203     &kEnableHangWatcher, "gpu_process_main_thread_log_level",
204     static_cast<int>(LoggingLevel::kNone)};
205 constexpr base::FeatureParam<int> kGPUProcessThreadPoolLogLevel{
206     &kEnableHangWatcher, "gpu_process_threadpool_log_level",
207     static_cast<int>(LoggingLevel::kNone)};
208 
209 // Renderer process.
210 constexpr base::FeatureParam<int> kRendererProcessIOThreadLogLevel{
211     &kEnableHangWatcher, "renderer_process_io_thread_log_level",
212     static_cast<int>(LoggingLevel::kUmaOnly)};
213 constexpr base::FeatureParam<int> kRendererProcessMainThreadLogLevel{
214     &kEnableHangWatcher, "renderer_process_main_thread_log_level",
215     static_cast<int>(LoggingLevel::kUmaOnly)};
216 constexpr base::FeatureParam<int> kRendererProcessThreadPoolLogLevel{
217     &kEnableHangWatcher, "renderer_process_threadpool_log_level",
218     static_cast<int>(LoggingLevel::kUmaOnly)};
219 
220 // Utility process.
221 constexpr base::FeatureParam<int> kUtilityProcessIOThreadLogLevel{
222     &kEnableHangWatcher, "utility_process_io_thread_log_level",
223     static_cast<int>(LoggingLevel::kUmaOnly)};
224 constexpr base::FeatureParam<int> kUtilityProcessMainThreadLogLevel{
225     &kEnableHangWatcher, "utility_process_main_thread_log_level",
226     static_cast<int>(LoggingLevel::kUmaOnly)};
227 constexpr base::FeatureParam<int> kUtilityProcessThreadPoolLogLevel{
228     &kEnableHangWatcher, "utility_process_threadpool_log_level",
229     static_cast<int>(LoggingLevel::kUmaOnly)};
230 
231 constexpr const char* kThreadName = "HangWatcher";
232 
233 // The time that the HangWatcher thread will sleep for between calls to
234 // Monitor(). Increasing or decreasing this does not modify the type of hangs
235 // that can be detected. It instead increases the probability that a call to
236 // Monitor() will happen at the right time to catch a hang. This has to be
237 // balanced with power/cpu use concerns as busy looping would catch amost all
238 // hangs but present unacceptable overhead. NOTE: If this period is ever changed
239 // then all metrics that depend on it like
240 // HangWatcher.IsThreadHung need to be updated.
241 constexpr auto kMonitoringPeriod = base::Seconds(10);
242 
WatchHangsInScope(TimeDelta timeout)243 WatchHangsInScope::WatchHangsInScope(TimeDelta timeout) {
244   internal::HangWatchState* current_hang_watch_state =
245       HangWatcher::IsEnabled()
246           ? internal::HangWatchState::GetHangWatchStateForCurrentThread()
247           : nullptr;
248 
249   DCHECK(timeout >= base::TimeDelta()) << "Negative timeouts are invalid.";
250 
251   // Thread is not monitored, noop.
252   if (!current_hang_watch_state) {
253     took_effect_ = false;
254     return;
255   }
256 
257 #if DCHECK_IS_ON()
258   previous_watch_hangs_in_scope_ =
259       current_hang_watch_state->GetCurrentWatchHangsInScope();
260   current_hang_watch_state->SetCurrentWatchHangsInScope(this);
261 #endif
262 
263   auto [old_flags, old_deadline] =
264       current_hang_watch_state->GetFlagsAndDeadline();
265 
266   // TODO(crbug.com/40111620): Check whether we are over deadline already for
267   // the previous WatchHangsInScope here by issuing only one TimeTicks::Now()
268   // and resuing the value.
269 
270   previous_deadline_ = old_deadline;
271   TimeTicks deadline = TimeTicks::Now() + timeout;
272   current_hang_watch_state->SetDeadline(deadline);
273   current_hang_watch_state->IncrementNestingLevel();
274 
275   const bool hangs_ignored_for_current_scope =
276       internal::HangWatchDeadline::IsFlagSet(
277           internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
278           old_flags);
279 
280   // If the current WatchHangsInScope is ignored, temporarily reactivate hang
281   // watching for newly created WatchHangsInScopes. On exiting hang watching
282   // is suspended again to return to the original state.
283   if (hangs_ignored_for_current_scope) {
284     current_hang_watch_state->UnsetIgnoreCurrentWatchHangsInScope();
285     set_hangs_ignored_on_exit_ = true;
286   }
287 }
288 
~WatchHangsInScope()289 WatchHangsInScope::~WatchHangsInScope() {
290   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
291 
292   // If hang watching was not enabled at construction time there is nothing to
293   // validate or undo.
294   if (!took_effect_) {
295     return;
296   }
297 
298   // If the thread was unregistered since construction there is also nothing to
299   // do.
300   auto* const state =
301       internal::HangWatchState::GetHangWatchStateForCurrentThread();
302   if (!state) {
303     return;
304   }
305 
306   // If a hang is currently being captured we should block here so execution
307   // stops and we avoid recording unrelated stack frames in the crash.
308   if (state->IsFlagSet(internal::HangWatchDeadline::Flag::kShouldBlockOnHang)) {
309     base::HangWatcher::GetInstance()->BlockIfCaptureInProgress();
310   }
311 
312 #if DCHECK_IS_ON()
313   // Verify that no Scope was destructed out of order.
314   DCHECK_EQ(this, state->GetCurrentWatchHangsInScope());
315   state->SetCurrentWatchHangsInScope(previous_watch_hangs_in_scope_);
316 #endif
317 
318   if (state->nesting_level() == 1) {
319     // If a call to InvalidateActiveExpectations() suspended hang watching
320     // during the lifetime of this or any nested WatchHangsInScope it can now
321     // safely be reactivated by clearing the ignore bit since this is the
322     // outer-most scope.
323     state->UnsetIgnoreCurrentWatchHangsInScope();
324   } else if (set_hangs_ignored_on_exit_) {
325     // Return to ignoring hangs since this was the previous state before hang
326     // watching was temporarily enabled for this WatchHangsInScope only in the
327     // constructor.
328     state->SetIgnoreCurrentWatchHangsInScope();
329   }
330 
331   // Reset the deadline to the value it had before entering this
332   // WatchHangsInScope.
333   state->SetDeadline(previous_deadline_);
334   // TODO(crbug.com/40111620): Log when a WatchHangsInScope exits after its
335   // deadline and that went undetected by the HangWatcher.
336 
337   state->DecrementNestingLevel();
338 }
339 
340 // static
InitializeOnMainThread(ProcessType process_type,bool emit_crashes)341 void HangWatcher::InitializeOnMainThread(ProcessType process_type,
342                                          bool emit_crashes) {
343   DCHECK(!g_use_hang_watcher);
344   DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
345   DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
346   DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
347 
348   bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
349 
350   // Do not start HangWatcher in the GPU process until the issue related to
351   // invalid magic signature in the GPU WatchDog is fixed
352   // (https://crbug.com/1297760).
353   if (process_type == ProcessType::kGPUProcess)
354     enable_hang_watcher = false;
355 
356   g_use_hang_watcher.store(enable_hang_watcher, std::memory_order_relaxed);
357 
358   // Keep the process type.
359   g_hang_watcher_process_type.store(process_type, std::memory_order_relaxed);
360 
361   // If hang watching is disabled as a whole there is no need to read the
362   // params.
363   if (!enable_hang_watcher)
364     return;
365 
366   // Retrieve thread-specific config for hang watching.
367   if (process_type == HangWatcher::ProcessType::kBrowserProcess) {
368     // Crashes are set to always emit. Override any feature flags.
369     if (emit_crashes) {
370       g_io_thread_log_level.store(
371           static_cast<LoggingLevel>(LoggingLevel::kUmaAndCrash),
372           std::memory_order_relaxed);
373       g_main_thread_log_level.store(
374           static_cast<LoggingLevel>(LoggingLevel::kUmaAndCrash),
375           std::memory_order_relaxed);
376     } else {
377       g_io_thread_log_level.store(
378           static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
379           std::memory_order_relaxed);
380       g_main_thread_log_level.store(
381           static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
382           std::memory_order_relaxed);
383     }
384 
385     g_threadpool_log_level.store(
386         static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
387         std::memory_order_relaxed);
388   } else if (process_type == HangWatcher::ProcessType::kGPUProcess) {
389     g_threadpool_log_level.store(
390         static_cast<LoggingLevel>(kGPUProcessThreadPoolLogLevel.Get()),
391         std::memory_order_relaxed);
392     g_io_thread_log_level.store(
393         static_cast<LoggingLevel>(kGPUProcessIOThreadLogLevel.Get()),
394         std::memory_order_relaxed);
395     g_main_thread_log_level.store(
396         static_cast<LoggingLevel>(kGPUProcessMainThreadLogLevel.Get()),
397         std::memory_order_relaxed);
398   } else if (process_type == HangWatcher::ProcessType::kRendererProcess) {
399     g_threadpool_log_level.store(
400         static_cast<LoggingLevel>(kRendererProcessThreadPoolLogLevel.Get()),
401         std::memory_order_relaxed);
402     g_io_thread_log_level.store(
403         static_cast<LoggingLevel>(kRendererProcessIOThreadLogLevel.Get()),
404         std::memory_order_relaxed);
405     g_main_thread_log_level.store(
406         static_cast<LoggingLevel>(kRendererProcessMainThreadLogLevel.Get()),
407         std::memory_order_relaxed);
408   } else if (process_type == HangWatcher::ProcessType::kUtilityProcess) {
409     g_threadpool_log_level.store(
410         static_cast<LoggingLevel>(kUtilityProcessThreadPoolLogLevel.Get()),
411         std::memory_order_relaxed);
412     g_io_thread_log_level.store(
413         static_cast<LoggingLevel>(kUtilityProcessIOThreadLogLevel.Get()),
414         std::memory_order_relaxed);
415     g_main_thread_log_level.store(
416         static_cast<LoggingLevel>(kUtilityProcessMainThreadLogLevel.Get()),
417         std::memory_order_relaxed);
418   }
419 }
420 
UnitializeOnMainThreadForTesting()421 void HangWatcher::UnitializeOnMainThreadForTesting() {
422   g_use_hang_watcher.store(false, std::memory_order_relaxed);
423   g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
424   g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
425   g_main_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
426   g_shutting_down.store(false, std::memory_order_relaxed);
427 }
428 
429 // static
IsEnabled()430 bool HangWatcher::IsEnabled() {
431   return g_use_hang_watcher.load(std::memory_order_relaxed);
432 }
433 
434 // static
IsThreadPoolHangWatchingEnabled()435 bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
436   return g_threadpool_log_level.load(std::memory_order_relaxed) !=
437          LoggingLevel::kNone;
438 }
439 
440 // static
IsIOThreadHangWatchingEnabled()441 bool HangWatcher::IsIOThreadHangWatchingEnabled() {
442   return g_io_thread_log_level.load(std::memory_order_relaxed) !=
443          LoggingLevel::kNone;
444 }
445 
446 // static
IsCrashReportingEnabled()447 bool HangWatcher::IsCrashReportingEnabled() {
448   if (g_main_thread_log_level.load(std::memory_order_relaxed) ==
449       LoggingLevel::kUmaAndCrash) {
450     return true;
451   }
452   if (g_io_thread_log_level.load(std::memory_order_relaxed) ==
453       LoggingLevel::kUmaAndCrash) {
454     return true;
455   }
456   if (g_threadpool_log_level.load(std::memory_order_relaxed) ==
457       LoggingLevel::kUmaAndCrash) {
458     return true;
459   }
460   return false;
461 }
462 
463 // static
InvalidateActiveExpectations()464 void HangWatcher::InvalidateActiveExpectations() {
465   auto* const state =
466       internal::HangWatchState::GetHangWatchStateForCurrentThread();
467   if (!state) {
468     // If the current thread is not under watch there is nothing to invalidate.
469     return;
470   }
471   state->SetIgnoreCurrentWatchHangsInScope();
472 }
473 
474 // static
SetShuttingDown()475 void HangWatcher::SetShuttingDown() {
476   // memory_order_relaxed offers no memory order guarantees. In rare cases, we
477   // could falsely log to BrowserProcess.Normal instead of
478   // BrowserProcess.Shutdown. This is OK in practice.
479   bool was_shutting_down =
480       g_shutting_down.exchange(true, std::memory_order_relaxed);
481   DCHECK(!was_shutting_down);
482 }
483 
HangWatcher()484 HangWatcher::HangWatcher()
485     : monitoring_period_(kMonitoringPeriod),
486       should_monitor_(WaitableEvent::ResetPolicy::AUTOMATIC),
487       thread_(this, kThreadName),
488       tick_clock_(base::DefaultTickClock::GetInstance()),
489       memory_pressure_listener_(
490           FROM_HERE,
491           base::BindRepeating(&HangWatcher::OnMemoryPressure,
492                               base::Unretained(this))) {
493   // |thread_checker_| should not be bound to the constructing thread.
494   DETACH_FROM_THREAD(hang_watcher_thread_checker_);
495 
496   should_monitor_.declare_only_used_while_idle();
497 
498   DCHECK(!g_instance);
499   g_instance = this;
500 }
501 
502 // static
CreateHangWatcherInstance()503 void HangWatcher::CreateHangWatcherInstance() {
504   DCHECK(!g_instance);
505   g_instance = new base::HangWatcher();
506   // The hang watcher is leaked to make sure it survives all watched threads.
507   ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
508 }
509 
510 #if !BUILDFLAG(IS_NACL)
511 debug::ScopedCrashKeyString
GetTimeSinceLastCriticalMemoryPressureCrashKey()512 HangWatcher::GetTimeSinceLastCriticalMemoryPressureCrashKey() {
513   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
514 
515   // The crash key size is large enough to hold the biggest possible return
516   // value from base::TimeDelta::InSeconds().
517   constexpr debug::CrashKeySize kCrashKeyContentSize =
518       debug::CrashKeySize::Size32;
519   DCHECK_GE(static_cast<uint64_t>(kCrashKeyContentSize),
520             base::NumberToString(std::numeric_limits<int64_t>::max()).size());
521 
522   static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
523       "seconds-since-last-memory-pressure", kCrashKeyContentSize);
524 
525   const base::TimeTicks last_critical_memory_pressure_time =
526       last_critical_memory_pressure_.load(std::memory_order_relaxed);
527   if (last_critical_memory_pressure_time.is_null()) {
528     constexpr char kNoMemoryPressureMsg[] = "No critical memory pressure";
529     static_assert(
530         std::size(kNoMemoryPressureMsg) <=
531             static_cast<uint64_t>(kCrashKeyContentSize),
532         "The crash key is too small to hold \"No critical memory pressure\".");
533     return debug::ScopedCrashKeyString(crash_key, kNoMemoryPressureMsg);
534   } else {
535     base::TimeDelta time_since_last_critical_memory_pressure =
536         base::TimeTicks::Now() - last_critical_memory_pressure_time;
537     return debug::ScopedCrashKeyString(
538         crash_key, base::NumberToString(
539                        time_since_last_critical_memory_pressure.InSeconds()));
540   }
541 }
542 #endif
543 
GetTimeSinceLastSystemPowerResumeCrashKeyValue() const544 std::string HangWatcher::GetTimeSinceLastSystemPowerResumeCrashKeyValue()
545     const {
546   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
547 
548   const TimeTicks last_system_power_resume_time =
549       PowerMonitor::GetInstance()->GetLastSystemResumeTime();
550   if (last_system_power_resume_time.is_null())
551     return "Never suspended";
552   if (last_system_power_resume_time == TimeTicks::Max())
553     return "Power suspended";
554 
555   const TimeDelta time_since_last_system_resume =
556       TimeTicks::Now() - last_system_power_resume_time;
557   return NumberToString(time_since_last_system_resume.InSeconds());
558 }
559 
OnMemoryPressure(base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level)560 void HangWatcher::OnMemoryPressure(
561     base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level) {
562   if (memory_pressure_level ==
563       base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL) {
564     last_critical_memory_pressure_.store(base::TimeTicks::Now(),
565                                          std::memory_order_relaxed);
566   }
567 }
568 
~HangWatcher()569 HangWatcher::~HangWatcher() {
570   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
571   DCHECK_EQ(g_instance, this);
572   DCHECK(watch_states_.empty());
573   g_instance = nullptr;
574   Stop();
575 }
576 
Start()577 void HangWatcher::Start() {
578   thread_.Start();
579   thread_started_ = true;
580 }
581 
Stop()582 void HangWatcher::Stop() {
583   g_keep_monitoring.store(false, std::memory_order_relaxed);
584   should_monitor_.Signal();
585   thread_.Join();
586   thread_started_ = false;
587 
588   // In production HangWatcher is always leaked but during testing it's possibly
589   // stopped and restarted using a new instance. This makes sure the next call
590   // to Start() will actually monitor in that case.
591   g_keep_monitoring.store(true, std::memory_order_relaxed);
592 }
593 
IsWatchListEmpty()594 bool HangWatcher::IsWatchListEmpty() {
595   AutoLock auto_lock(watch_state_lock_);
596   return watch_states_.empty();
597 }
598 
Wait()599 void HangWatcher::Wait() {
600   while (true) {
601     // Amount by which the actual time spent sleeping can deviate from
602     // the target time and still be considered timely.
603     constexpr base::TimeDelta kWaitDriftTolerance = base::Milliseconds(100);
604 
605     const base::TimeTicks time_before_wait = tick_clock_->NowTicks();
606 
607     // Sleep until next scheduled monitoring or until signaled.
608     const bool was_signaled = should_monitor_.TimedWait(monitoring_period_);
609 
610     if (after_wait_callback_)
611       after_wait_callback_.Run(time_before_wait);
612 
613     const base::TimeTicks time_after_wait = tick_clock_->NowTicks();
614     const base::TimeDelta wait_time = time_after_wait - time_before_wait;
615     const bool wait_was_normal =
616         wait_time <= (monitoring_period_ + kWaitDriftTolerance);
617 
618     if (!wait_was_normal) {
619       // If the time spent waiting was too high it might indicate the machine is
620       // very slow or that that it went to sleep. In any case we can't trust the
621       // WatchHangsInScopes that are currently live. Update the ignore
622       // threshold to make sure they don't trigger a hang on subsequent monitors
623       // then keep waiting.
624 
625       base::AutoLock auto_lock(watch_state_lock_);
626 
627       // Find the latest deadline among the live watch states. They might change
628       // atomically while iterating but that's fine because if they do that
629       // means the new WatchHangsInScope was constructed very soon after the
630       // abnormal sleep happened and might be affected by the root cause still.
631       // Ignoring it is cautious and harmless.
632       base::TimeTicks latest_deadline;
633       for (const auto& state : watch_states_) {
634         base::TimeTicks deadline = state->GetDeadline();
635         if (deadline > latest_deadline) {
636           latest_deadline = deadline;
637         }
638       }
639 
640       deadline_ignore_threshold_ = latest_deadline;
641     }
642 
643     // Stop waiting.
644     if (wait_was_normal || was_signaled)
645       return;
646   }
647 }
648 
Run()649 void HangWatcher::Run() {
650   // Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
651   // sure of that.
652   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
653 
654   while (g_keep_monitoring.load(std::memory_order_relaxed)) {
655     Wait();
656 
657     if (!IsWatchListEmpty() &&
658         g_keep_monitoring.load(std::memory_order_relaxed)) {
659       Monitor();
660       if (after_monitor_closure_for_testing_) {
661         after_monitor_closure_for_testing_.Run();
662       }
663     }
664   }
665 }
666 
667 // static
GetInstance()668 HangWatcher* HangWatcher::GetInstance() {
669   return g_instance;
670 }
671 
672 // static
RecordHang()673 void HangWatcher::RecordHang() {
674   base::debug::DumpWithoutCrashing();
675   NO_CODE_FOLDING();
676 }
677 
RegisterThreadInternal(ThreadType thread_type)678 ScopedClosureRunner HangWatcher::RegisterThreadInternal(
679     ThreadType thread_type) {
680   AutoLock auto_lock(watch_state_lock_);
681   CHECK(base::FeatureList::GetInstance());
682 
683   // Do not install a WatchState if the results would never be observable.
684   if (!ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
685                                             LoggingLevel::kUmaOnly)) {
686     return ScopedClosureRunner(base::DoNothing());
687   }
688 
689   watch_states_.push_back(
690       internal::HangWatchState::CreateHangWatchStateForCurrentThread(
691           thread_type));
692   return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
693                                       Unretained(HangWatcher::GetInstance())));
694 }
695 
696 // static
RegisterThread(ThreadType thread_type)697 ScopedClosureRunner HangWatcher::RegisterThread(ThreadType thread_type) {
698   if (!GetInstance()) {
699     return ScopedClosureRunner();
700   }
701 
702   return GetInstance()->RegisterThreadInternal(thread_type);
703 }
704 
GetHighestDeadline() const705 base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
706   DCHECK(IsActionable());
707 
708   // Since entries are sorted in increasing order the last entry is the largest
709   // one.
710   return hung_watch_state_copies_.back().deadline;
711 }
712 
713 HangWatcher::WatchStateSnapShot::WatchStateSnapShot() = default;
714 
Init(const HangWatchStates & watch_states,base::TimeTicks deadline_ignore_threshold,base::TimeDelta monitoring_period)715 void HangWatcher::WatchStateSnapShot::Init(
716     const HangWatchStates& watch_states,
717     base::TimeTicks deadline_ignore_threshold,
718     base::TimeDelta monitoring_period) {
719   DCHECK(!initialized_);
720 
721   // No matter if the snapshot is actionable or not after this function
722   // it will have been initialized.
723   initialized_ = true;
724 
725   const base::TimeTicks now = base::TimeTicks::Now();
726   bool all_threads_marked = true;
727   bool found_deadline_before_ignore_threshold = false;
728 
729   // Use an std::array to store the hang counts to avoid allocations. The
730   // numerical values of the HangWatcher::ThreadType enum is used to index into
731   // the array. A |kInvalidHangCount| is used to signify there were no threads
732   // of the type found.
733   constexpr size_t kHangCountArraySize =
734       static_cast<std::size_t>(base::HangWatcher::ThreadType::kMax) + 1;
735   std::array<int, kHangCountArraySize> hung_counts_per_thread_type;
736 
737   constexpr int kInvalidHangCount = -1;
738   hung_counts_per_thread_type.fill(kInvalidHangCount);
739 
740   // Will be true if any of the hung threads has a logging level high enough,
741   // as defined through finch params, to warant dumping a crash.
742   bool any_hung_thread_has_dumping_enabled = false;
743 
744   // Copy hung thread information.
745   for (const auto& watch_state : watch_states) {
746     uint64_t flags;
747     TimeTicks deadline;
748     std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
749 
750     if (deadline <= deadline_ignore_threshold) {
751       found_deadline_before_ignore_threshold = true;
752     }
753 
754     if (internal::HangWatchDeadline::IsFlagSet(
755             internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
756             flags)) {
757       continue;
758     }
759 
760     // If a thread type is monitored and did not hang it still needs to be
761     // logged as a zero count;
762     const size_t hang_count_index =
763         static_cast<size_t>(watch_state.get()->thread_type());
764     if (hung_counts_per_thread_type[hang_count_index] == kInvalidHangCount) {
765       hung_counts_per_thread_type[hang_count_index] = 0;
766     }
767 
768     // Only copy hung threads.
769     if (deadline <= now) {
770       ++hung_counts_per_thread_type[hang_count_index];
771 
772       if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
773                                                LoggingLevel::kUmaAndCrash)) {
774         any_hung_thread_has_dumping_enabled = true;
775       }
776 
777 #if BUILDFLAG(ENABLE_BASE_TRACING)
778       // Emit trace events for monitored threads.
779       if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
780                                                LoggingLevel::kUmaOnly)) {
781         const PlatformThreadId thread_id = watch_state.get()->GetThreadID();
782         const auto track = perfetto::Track::FromPointer(
783             this, perfetto::ThreadTrack::ForThread(thread_id));
784         TRACE_EVENT_BEGIN("latency", "HangWatcher::ThreadHung", track,
785                           deadline);
786         TRACE_EVENT_END("latency", track, now);
787       }
788 #endif
789 
790       // Attempt to mark the thread as needing to stay within its current
791       // WatchHangsInScope until capture is complete.
792       bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
793 
794       // If marking some threads already failed the snapshot won't be kept so
795       // there is no need to keep adding to it. The loop doesn't abort though
796       // to keep marking the other threads. If these threads remain hung until
797       // the next capture then they'll already be marked and will be included
798       // in the capture at that time.
799       if (thread_marked && all_threads_marked) {
800         hung_watch_state_copies_.push_back(
801             WatchStateCopy{deadline, watch_state.get()->GetThreadID()});
802       } else {
803         all_threads_marked = false;
804       }
805     }
806   }
807 
808   // Log the hung thread counts to histograms for each thread type if any thread
809   // of the type were found.
810   for (size_t i = 0; i < kHangCountArraySize; ++i) {
811     const int hang_count = hung_counts_per_thread_type[i];
812     const HangWatcher::ThreadType thread_type =
813         static_cast<HangWatcher::ThreadType>(i);
814     if (hang_count != kInvalidHangCount &&
815         ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
816                                              LoggingLevel::kUmaOnly)) {
817       LogStatusHistogram(thread_type, hang_count, now, monitoring_period);
818     }
819   }
820 
821   // Three cases can invalidate this snapshot and prevent the capture of the
822   // hang.
823   //
824   // 1. Some threads could not be marked for blocking so this snapshot isn't
825   // actionable since marked threads could be hung because of unmarked ones.
826   // If only the marked threads were captured the information would be
827   // incomplete.
828   //
829   // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
830   // If any thread is ignored it reduces the confidence in the whole state and
831   // it's better to avoid capturing misleading data.
832   //
833   // 3. The hung threads found were all of types that are not configured through
834   // Finch to trigger a crash dump.
835   //
836   if (!all_threads_marked || found_deadline_before_ignore_threshold ||
837       !any_hung_thread_has_dumping_enabled) {
838     hung_watch_state_copies_.clear();
839     return;
840   }
841 
842   // Sort |hung_watch_state_copies_| by order of decreasing hang severity so the
843   // most severe hang is first in the list.
844   ranges::sort(hung_watch_state_copies_,
845                [](const WatchStateCopy& lhs, const WatchStateCopy& rhs) {
846                  return lhs.deadline < rhs.deadline;
847                });
848 }
849 
Clear()850 void HangWatcher::WatchStateSnapShot::Clear() {
851   hung_watch_state_copies_.clear();
852   initialized_ = false;
853 }
854 
855 HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
856     const WatchStateSnapShot& other) = default;
857 
858 HangWatcher::WatchStateSnapShot::~WatchStateSnapShot() = default;
859 
PrepareHungThreadListCrashKey() const860 std::string HangWatcher::WatchStateSnapShot::PrepareHungThreadListCrashKey()
861     const {
862   DCHECK(IsActionable());
863 
864   // Build a crash key string that contains the ids of the hung threads.
865   constexpr char kSeparator{'|'};
866   std::string list_of_hung_thread_ids;
867 
868   // Add as many thread ids to the crash key as possible.
869   for (const WatchStateCopy& copy : hung_watch_state_copies_) {
870     std::string fragment = base::NumberToString(copy.thread_id) + kSeparator;
871     if (list_of_hung_thread_ids.size() + fragment.size() <
872         static_cast<std::size_t>(debug::CrashKeySize::Size256)) {
873       list_of_hung_thread_ids += fragment;
874     } else {
875       // Respect the by priority ordering of thread ids in the crash key by
876       // stopping the construction as soon as one does not fit. This avoids
877       // including lesser priority ids while omitting more important ones.
878       break;
879     }
880   }
881 
882   return list_of_hung_thread_ids;
883 }
884 
IsActionable() const885 bool HangWatcher::WatchStateSnapShot::IsActionable() const {
886   DCHECK(initialized_);
887   return !hung_watch_state_copies_.empty();
888 }
889 
GrabWatchStateSnapshotForTesting() const890 HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
891     const {
892   WatchStateSnapShot snapshot;
893   snapshot.Init(watch_states_, deadline_ignore_threshold_, TimeDelta());
894   return snapshot;
895 }
896 
Monitor()897 void HangWatcher::Monitor() {
898   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
899   AutoLock auto_lock(watch_state_lock_);
900 
901   // If all threads unregistered since this function was invoked there's
902   // nothing to do anymore.
903   if (watch_states_.empty())
904     return;
905 
906   watch_state_snapshot_.Init(watch_states_, deadline_ignore_threshold_,
907                              monitoring_period_);
908 
909   if (watch_state_snapshot_.IsActionable()) {
910     DoDumpWithoutCrashing(watch_state_snapshot_);
911   }
912 
913   watch_state_snapshot_.Clear();
914 }
915 
DoDumpWithoutCrashing(const WatchStateSnapShot & watch_state_snapshot)916 void HangWatcher::DoDumpWithoutCrashing(
917     const WatchStateSnapShot& watch_state_snapshot) {
918   TRACE_EVENT("latency", "HangWatcher::DoDumpWithoutCrashing");
919 
920   capture_in_progress_.store(true, std::memory_order_relaxed);
921   base::AutoLock scope_lock(capture_lock_);
922 
923 #if !BUILDFLAG(IS_NACL)
924   const std::string list_of_hung_thread_ids =
925       watch_state_snapshot.PrepareHungThreadListCrashKey();
926 
927   static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
928       "list-of-hung-threads", debug::CrashKeySize::Size256);
929 
930   const debug::ScopedCrashKeyString list_of_hung_threads_crash_key_string(
931       crash_key, list_of_hung_thread_ids);
932 
933   const debug::ScopedCrashKeyString
934       time_since_last_critical_memory_pressure_crash_key_string =
935           GetTimeSinceLastCriticalMemoryPressureCrashKey();
936 
937   SCOPED_CRASH_KEY_STRING32("HangWatcher", "seconds-since-last-resume",
938                             GetTimeSinceLastSystemPowerResumeCrashKeyValue());
939 
940   SCOPED_CRASH_KEY_BOOL("HangWatcher", "shutting-down",
941                         g_shutting_down.load(std::memory_order_relaxed));
942 #endif
943 
944   // To avoid capturing more than one hang that blames a subset of the same
945   // threads it's necessary to keep track of what is the furthest deadline
946   // that contributed to declaring a hang. Only once
947   // all threads have deadlines past this point can we be sure that a newly
948   // discovered hang is not directly related.
949   // Example:
950   // **********************************************************************
951   // Timeline A : L------1-------2----------3-------4----------N-----------
952   // Timeline B : -------2----------3-------4----------L----5------N-------
953   // Timeline C : L----------------------------5------6----7---8------9---N
954   // **********************************************************************
955   // In the example when a Monitor() happens during timeline A
956   // |deadline_ignore_threshold_| (L) is at time zero and deadlines (1-4)
957   // are before Now() (N) . A hang is captured and L is updated. During
958   // the next Monitor() (timeline B) a new deadline is over but we can't
959   // capture a hang because deadlines 2-4 are still live and already counted
960   // toward a hang. During a third monitor (timeline C) all live deadlines
961   // are now after L and a second hang can be recorded.
962   base::TimeTicks latest_expired_deadline =
963       watch_state_snapshot.GetHighestDeadline();
964 
965   if (on_hang_closure_for_testing_)
966     on_hang_closure_for_testing_.Run();
967   else
968     RecordHang();
969 
970   // Update after running the actual capture.
971   deadline_ignore_threshold_ = latest_expired_deadline;
972 
973   capture_in_progress_.store(false, std::memory_order_relaxed);
974 }
975 
SetAfterMonitorClosureForTesting(base::RepeatingClosure closure)976 void HangWatcher::SetAfterMonitorClosureForTesting(
977     base::RepeatingClosure closure) {
978   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
979   after_monitor_closure_for_testing_ = std::move(closure);
980 }
981 
SetOnHangClosureForTesting(base::RepeatingClosure closure)982 void HangWatcher::SetOnHangClosureForTesting(base::RepeatingClosure closure) {
983   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
984   on_hang_closure_for_testing_ = std::move(closure);
985 }
986 
SetMonitoringPeriodForTesting(base::TimeDelta period)987 void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
988   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
989   monitoring_period_ = period;
990 }
991 
SetAfterWaitCallbackForTesting(RepeatingCallback<void (TimeTicks)> callback)992 void HangWatcher::SetAfterWaitCallbackForTesting(
993     RepeatingCallback<void(TimeTicks)> callback) {
994   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
995   after_wait_callback_ = callback;
996 }
997 
SignalMonitorEventForTesting()998 void HangWatcher::SignalMonitorEventForTesting() {
999   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
1000   should_monitor_.Signal();
1001 }
1002 
1003 // static
StopMonitoringForTesting()1004 void HangWatcher::StopMonitoringForTesting() {
1005   g_keep_monitoring.store(false, std::memory_order_relaxed);
1006 }
1007 
SetTickClockForTesting(const base::TickClock * tick_clock)1008 void HangWatcher::SetTickClockForTesting(const base::TickClock* tick_clock) {
1009   tick_clock_ = tick_clock;
1010 }
1011 
BlockIfCaptureInProgress()1012 void HangWatcher::BlockIfCaptureInProgress() {
1013   // Makes a best-effort attempt to block execution if a hang is currently being
1014   // captured. Only block on |capture_lock| if |capture_in_progress_| hints that
1015   // it's already held to avoid serializing all threads on this function when no
1016   // hang capture is in-progress.
1017   if (capture_in_progress_.load(std::memory_order_relaxed))
1018     base::AutoLock hang_lock(capture_lock_);
1019 }
1020 
UnregisterThread()1021 void HangWatcher::UnregisterThread() {
1022   AutoLock auto_lock(watch_state_lock_);
1023 
1024   auto it = ranges::find(
1025       watch_states_,
1026       internal::HangWatchState::GetHangWatchStateForCurrentThread(),
1027       &std::unique_ptr<internal::HangWatchState>::get);
1028 
1029   // Thread should be registered to get unregistered.
1030   CHECK(it != watch_states_.end(), base::NotFatalUntil::M125);
1031 
1032   watch_states_.erase(it);
1033 }
1034 
1035 namespace internal {
1036 namespace {
1037 
1038 constexpr uint64_t kOnlyDeadlineMask = 0x00FF'FFFF'FFFF'FFFFu;
1039 constexpr uint64_t kOnlyFlagsMask = ~kOnlyDeadlineMask;
1040 constexpr uint64_t kMaximumFlag = 0x8000'0000'0000'0000u;
1041 
1042 // Use as a mask to keep persistent flags and the deadline.
1043 constexpr uint64_t kPersistentFlagsAndDeadlineMask =
1044     kOnlyDeadlineMask |
1045     static_cast<uint64_t>(
1046         HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope);
1047 }  // namespace
1048 
1049 // Flag binary representation assertions.
1050 static_assert(
1051     static_cast<uint64_t>(HangWatchDeadline::Flag::kMinValue) >
1052         kOnlyDeadlineMask,
1053     "Invalid numerical value for flag. Would interfere with bits of data.");
1054 static_assert(static_cast<uint64_t>(HangWatchDeadline::Flag::kMaxValue) <=
1055                   kMaximumFlag,
1056               "A flag can only set a single bit.");
1057 
1058 HangWatchDeadline::HangWatchDeadline() = default;
1059 HangWatchDeadline::~HangWatchDeadline() = default;
1060 
GetFlagsAndDeadline() const1061 std::pair<uint64_t, TimeTicks> HangWatchDeadline::GetFlagsAndDeadline() const {
1062   uint64_t bits = bits_.load(std::memory_order_relaxed);
1063   return std::make_pair(ExtractFlags(bits),
1064                         DeadlineFromBits(ExtractDeadline((bits))));
1065 }
1066 
GetDeadline() const1067 TimeTicks HangWatchDeadline::GetDeadline() const {
1068   return DeadlineFromBits(
1069       ExtractDeadline(bits_.load(std::memory_order_relaxed)));
1070 }
1071 
1072 // static
Max()1073 TimeTicks HangWatchDeadline::Max() {
1074   // |kOnlyDeadlineMask| has all the bits reserved for the TimeTicks value
1075   // set. This means it also represents the highest representable value.
1076   return DeadlineFromBits(kOnlyDeadlineMask);
1077 }
1078 
1079 // static
IsFlagSet(Flag flag,uint64_t flags)1080 bool HangWatchDeadline::IsFlagSet(Flag flag, uint64_t flags) {
1081   return static_cast<uint64_t>(flag) & flags;
1082 }
1083 
SetDeadline(TimeTicks new_deadline)1084 void HangWatchDeadline::SetDeadline(TimeTicks new_deadline) {
1085   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1086   DCHECK(new_deadline <= Max()) << "Value too high to be represented.";
1087   DCHECK(new_deadline >= TimeTicks{}) << "Value cannot be negative.";
1088 
1089   if (switch_bits_callback_for_testing_) {
1090     const uint64_t switched_in_bits = SwitchBitsForTesting();
1091     // If a concurrent deadline change is tested it cannot have a deadline or
1092     // persistent flag change since those always happen on the same thread.
1093     DCHECK((switched_in_bits & kPersistentFlagsAndDeadlineMask) == 0u);
1094   }
1095 
1096   // Discard all non-persistent flags and apply deadline change.
1097   const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1098   const uint64_t new_flags =
1099       ExtractFlags(old_bits & kPersistentFlagsAndDeadlineMask);
1100   bits_.store(new_flags | ExtractDeadline(static_cast<uint64_t>(
1101                               new_deadline.ToInternalValue())),
1102               std::memory_order_relaxed);
1103 }
1104 
1105 // TODO(crbug.com/40132796): Add flag DCHECKs here.
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1106 bool HangWatchDeadline::SetShouldBlockOnHang(uint64_t old_flags,
1107                                              TimeTicks old_deadline) {
1108   DCHECK(old_deadline <= Max()) << "Value too high to be represented.";
1109   DCHECK(old_deadline >= TimeTicks{}) << "Value cannot be negative.";
1110 
1111   // Set the kShouldBlockOnHang flag only if |bits_| did not change since it was
1112   // read. kShouldBlockOnHang is the only non-persistent flag and should never
1113   // be set twice. Persistent flags and deadline changes are done from the same
1114   // thread so there is no risk of losing concurrently added information.
1115   uint64_t old_bits =
1116       old_flags | static_cast<uint64_t>(old_deadline.ToInternalValue());
1117   const uint64_t desired_bits =
1118       old_bits | static_cast<uint64_t>(Flag::kShouldBlockOnHang);
1119 
1120   // If a test needs to simulate |bits_| changing since calling this function
1121   // this happens now.
1122   if (switch_bits_callback_for_testing_) {
1123     const uint64_t switched_in_bits = SwitchBitsForTesting();
1124 
1125     // Injecting the flag being tested is invalid.
1126     DCHECK(!IsFlagSet(Flag::kShouldBlockOnHang, switched_in_bits));
1127   }
1128 
1129   return bits_.compare_exchange_weak(old_bits, desired_bits,
1130                                      std::memory_order_relaxed,
1131                                      std::memory_order_relaxed);
1132 }
1133 
SetIgnoreCurrentWatchHangsInScope()1134 void HangWatchDeadline::SetIgnoreCurrentWatchHangsInScope() {
1135   SetPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1136 }
1137 
UnsetIgnoreCurrentWatchHangsInScope()1138 void HangWatchDeadline::UnsetIgnoreCurrentWatchHangsInScope() {
1139   ClearPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1140 }
1141 
SetPersistentFlag(Flag flag)1142 void HangWatchDeadline::SetPersistentFlag(Flag flag) {
1143   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1144   if (switch_bits_callback_for_testing_)
1145     SwitchBitsForTesting();
1146   bits_.fetch_or(static_cast<uint64_t>(flag), std::memory_order_relaxed);
1147 }
1148 
ClearPersistentFlag(Flag flag)1149 void HangWatchDeadline::ClearPersistentFlag(Flag flag) {
1150   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1151   if (switch_bits_callback_for_testing_)
1152     SwitchBitsForTesting();
1153   bits_.fetch_and(~(static_cast<uint64_t>(flag)), std::memory_order_relaxed);
1154 }
1155 
1156 // static
ExtractFlags(uint64_t bits)1157 uint64_t HangWatchDeadline::ExtractFlags(uint64_t bits) {
1158   return bits & kOnlyFlagsMask;
1159 }
1160 
1161 // static
ExtractDeadline(uint64_t bits)1162 uint64_t HangWatchDeadline::ExtractDeadline(uint64_t bits) {
1163   return bits & kOnlyDeadlineMask;
1164 }
1165 
1166 // static
DeadlineFromBits(uint64_t bits)1167 TimeTicks HangWatchDeadline::DeadlineFromBits(uint64_t bits) {
1168   // |kOnlyDeadlineMask| has all the deadline bits set to 1 so is the largest
1169   // representable value.
1170   DCHECK(bits <= kOnlyDeadlineMask)
1171       << "Flags bits are set. Remove them before returning deadline.";
1172   static_assert(kOnlyDeadlineMask <= std::numeric_limits<int64_t>::max());
1173   return TimeTicks::FromInternalValue(static_cast<int64_t>(bits));
1174 }
1175 
IsFlagSet(Flag flag) const1176 bool HangWatchDeadline::IsFlagSet(Flag flag) const {
1177   return bits_.load(std::memory_order_relaxed) & static_cast<uint64_t>(flag);
1178 }
1179 
SetSwitchBitsClosureForTesting(RepeatingCallback<uint64_t (void)> closure)1180 void HangWatchDeadline::SetSwitchBitsClosureForTesting(
1181     RepeatingCallback<uint64_t(void)> closure) {
1182   switch_bits_callback_for_testing_ = closure;
1183 }
1184 
ResetSwitchBitsClosureForTesting()1185 void HangWatchDeadline::ResetSwitchBitsClosureForTesting() {
1186   DCHECK(switch_bits_callback_for_testing_);
1187   switch_bits_callback_for_testing_.Reset();
1188 }
1189 
SwitchBitsForTesting()1190 uint64_t HangWatchDeadline::SwitchBitsForTesting() {
1191   DCHECK(switch_bits_callback_for_testing_);
1192 
1193   const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1194   const uint64_t new_bits = switch_bits_callback_for_testing_.Run();
1195   const uint64_t old_flags = ExtractFlags(old_bits);
1196 
1197   const uint64_t switched_in_bits = old_flags | new_bits;
1198   bits_.store(switched_in_bits, std::memory_order_relaxed);
1199   return switched_in_bits;
1200 }
1201 
HangWatchState(HangWatcher::ThreadType thread_type)1202 HangWatchState::HangWatchState(HangWatcher::ThreadType thread_type)
1203     : resetter_(&hang_watch_state, this, nullptr), thread_type_(thread_type) {
1204 // TODO(crbug.com/40187449): Remove this once macOS uses system-wide ids.
1205 // On macOS the thread ids used by CrashPad are not the same as the ones
1206 // provided by PlatformThread. Make sure to use the same for correct
1207 // attribution.
1208 #if BUILDFLAG(IS_MAC)
1209   uint64_t thread_id;
1210   pthread_threadid_np(pthread_self(), &thread_id);
1211   thread_id_ = checked_cast<PlatformThreadId>(thread_id);
1212 #else
1213   thread_id_ = PlatformThread::CurrentId();
1214 #endif
1215 }
1216 
~HangWatchState()1217 HangWatchState::~HangWatchState() {
1218   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1219 
1220   DCHECK_EQ(GetHangWatchStateForCurrentThread(), this);
1221 
1222 #if DCHECK_IS_ON()
1223   // Destroying the HangWatchState should not be done if there are live
1224   // WatchHangsInScopes.
1225   DCHECK(!current_watch_hangs_in_scope_);
1226 #endif
1227 }
1228 
1229 // static
1230 std::unique_ptr<HangWatchState>
CreateHangWatchStateForCurrentThread(HangWatcher::ThreadType thread_type)1231 HangWatchState::CreateHangWatchStateForCurrentThread(
1232     HangWatcher::ThreadType thread_type) {
1233   // Allocate a watch state object for this thread.
1234   std::unique_ptr<HangWatchState> hang_state =
1235       std::make_unique<HangWatchState>(thread_type);
1236 
1237   // Setting the thread local worked.
1238   DCHECK_EQ(GetHangWatchStateForCurrentThread(), hang_state.get());
1239 
1240   // Transfer ownership to caller.
1241   return hang_state;
1242 }
1243 
GetDeadline() const1244 TimeTicks HangWatchState::GetDeadline() const {
1245   return deadline_.GetDeadline();
1246 }
1247 
GetFlagsAndDeadline() const1248 std::pair<uint64_t, TimeTicks> HangWatchState::GetFlagsAndDeadline() const {
1249   return deadline_.GetFlagsAndDeadline();
1250 }
1251 
SetDeadline(TimeTicks deadline)1252 void HangWatchState::SetDeadline(TimeTicks deadline) {
1253   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1254   deadline_.SetDeadline(deadline);
1255 }
1256 
IsOverDeadline() const1257 bool HangWatchState::IsOverDeadline() const {
1258   return TimeTicks::Now() > deadline_.GetDeadline();
1259 }
1260 
SetIgnoreCurrentWatchHangsInScope()1261 void HangWatchState::SetIgnoreCurrentWatchHangsInScope() {
1262   deadline_.SetIgnoreCurrentWatchHangsInScope();
1263 }
1264 
UnsetIgnoreCurrentWatchHangsInScope()1265 void HangWatchState::UnsetIgnoreCurrentWatchHangsInScope() {
1266   deadline_.UnsetIgnoreCurrentWatchHangsInScope();
1267 }
1268 
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1269 bool HangWatchState::SetShouldBlockOnHang(uint64_t old_flags,
1270                                           TimeTicks old_deadline) {
1271   return deadline_.SetShouldBlockOnHang(old_flags, old_deadline);
1272 }
1273 
IsFlagSet(HangWatchDeadline::Flag flag)1274 bool HangWatchState::IsFlagSet(HangWatchDeadline::Flag flag) {
1275   return deadline_.IsFlagSet(flag);
1276 }
1277 
1278 #if DCHECK_IS_ON()
SetCurrentWatchHangsInScope(WatchHangsInScope * current_hang_watch_scope_enable)1279 void HangWatchState::SetCurrentWatchHangsInScope(
1280     WatchHangsInScope* current_hang_watch_scope_enable) {
1281   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1282   current_watch_hangs_in_scope_ = current_hang_watch_scope_enable;
1283 }
1284 
GetCurrentWatchHangsInScope()1285 WatchHangsInScope* HangWatchState::GetCurrentWatchHangsInScope() {
1286   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1287   return current_watch_hangs_in_scope_;
1288 }
1289 #endif
1290 
GetHangWatchDeadlineForTesting()1291 HangWatchDeadline* HangWatchState::GetHangWatchDeadlineForTesting() {
1292   return &deadline_;
1293 }
1294 
IncrementNestingLevel()1295 void HangWatchState::IncrementNestingLevel() {
1296   ++nesting_level_;
1297 }
1298 
DecrementNestingLevel()1299 void HangWatchState::DecrementNestingLevel() {
1300   --nesting_level_;
1301 }
1302 
1303 // static
GetHangWatchStateForCurrentThread()1304 HangWatchState* HangWatchState::GetHangWatchStateForCurrentThread() {
1305   // Workaround false-positive MSAN use-of-uninitialized-value on
1306   // thread_local storage for loaded libraries:
1307   // https://github.com/google/sanitizers/issues/1265
1308   MSAN_UNPOISON(&hang_watch_state, sizeof(internal::HangWatchState*));
1309 
1310   return hang_watch_state;
1311 }
1312 
GetThreadID() const1313 PlatformThreadId HangWatchState::GetThreadID() const {
1314   return thread_id_;
1315 }
1316 
1317 }  // namespace internal
1318 
1319 }  // namespace base
1320