1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/threading/hang_watcher.h"
6
7 #include <atomic>
8 #include <utility>
9
10 #include "base/containers/flat_map.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/crash_logging.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/feature_list.h"
16 #include "base/functional/bind.h"
17 #include "base/functional/callback_helpers.h"
18 #include "base/metrics/field_trial_params.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/power_monitor/power_monitor.h"
21 #include "base/ranges/algorithm.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/synchronization/lock.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/platform_thread.h"
26 #include "base/threading/thread_checker.h"
27 #include "base/threading/thread_restrictions.h"
28 #include "base/threading/threading_features.h"
29 #include "base/time/default_tick_clock.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/base_tracing.h"
32 #include "build/build_config.h"
33 #include "third_party/abseil-cpp/absl/base/attributes.h"
34
35 namespace base {
36
37 namespace {
38
39 // Defines how much logging happens when the HangWatcher monitors the threads.
40 // Logging levels are set per thread type through Finch. It's important that
41 // the order of the enum members stay the same and that their numerical
42 // values be in increasing order. The implementation of
43 // ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
44 enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
45
46 HangWatcher* g_instance = nullptr;
47 ABSL_CONST_INIT thread_local internal::HangWatchState* hang_watch_state =
48 nullptr;
49 std::atomic<bool> g_use_hang_watcher{false};
50 std::atomic<HangWatcher::ProcessType> g_hang_watcher_process_type{
51 HangWatcher::ProcessType::kBrowserProcess};
52
53 std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
54 std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
55 std::atomic<LoggingLevel> g_main_thread_log_level{LoggingLevel::kNone};
56
57 // Indicates whether HangWatcher::Run() should return after the next monitoring.
58 std::atomic<bool> g_keep_monitoring{true};
59
60 // Emits the hung thread count histogram. |count| is the number of threads
61 // of type |thread_type| that were hung or became hung during the last
62 // monitoring window. This function should be invoked for each thread type
63 // encountered on each call to Monitor().
LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,int count)64 void LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,
65 int count) {
66 // In the case of unique threads like the IO or UI/Main thread a count does
67 // not make sense.
68 const bool any_thread_hung = count >= 1;
69
70 const HangWatcher::ProcessType process_type =
71 g_hang_watcher_process_type.load(std::memory_order_relaxed);
72 switch (process_type) {
73 case HangWatcher::ProcessType::kUnknownProcess:
74 break;
75
76 case HangWatcher::ProcessType::kBrowserProcess:
77 switch (thread_type) {
78 case HangWatcher::ThreadType::kIOThread:
79 UMA_HISTOGRAM_BOOLEAN(
80 "HangWatcher.IsThreadHung.BrowserProcess."
81 "IOThread",
82 any_thread_hung);
83 break;
84 case HangWatcher::ThreadType::kMainThread:
85 UMA_HISTOGRAM_BOOLEAN(
86 "HangWatcher.IsThreadHung.BrowserProcess."
87 "UIThread",
88 any_thread_hung);
89 break;
90 case HangWatcher::ThreadType::kThreadPoolThread:
91 // Not recorded for now.
92 break;
93 }
94 break;
95
96 case HangWatcher::ProcessType::kGPUProcess:
97 // Not recorded for now.
98 break;
99
100 case HangWatcher::ProcessType::kRendererProcess:
101 switch (thread_type) {
102 case HangWatcher::ThreadType::kIOThread:
103 UMA_HISTOGRAM_BOOLEAN(
104 "HangWatcher.IsThreadHung.RendererProcess."
105 "IOThread",
106 any_thread_hung);
107 break;
108 case HangWatcher::ThreadType::kMainThread:
109 UMA_HISTOGRAM_BOOLEAN(
110 "HangWatcher.IsThreadHung.RendererProcess."
111 "MainThread",
112 any_thread_hung);
113 break;
114 case HangWatcher::ThreadType::kThreadPoolThread:
115 // Not recorded for now.
116 break;
117 }
118 break;
119
120 case HangWatcher::ProcessType::kUtilityProcess:
121 switch (thread_type) {
122 case HangWatcher::ThreadType::kIOThread:
123 UMA_HISTOGRAM_BOOLEAN(
124 "HangWatcher.IsThreadHung.UtilityProcess."
125 "IOThread",
126 any_thread_hung);
127 break;
128 case HangWatcher::ThreadType::kMainThread:
129 UMA_HISTOGRAM_BOOLEAN(
130 "HangWatcher.IsThreadHung.UtilityProcess."
131 "MainThread",
132 any_thread_hung);
133 break;
134 case HangWatcher::ThreadType::kThreadPoolThread:
135 // Not recorded for now.
136 break;
137 }
138 break;
139 }
140 }
141
142 // Returns true if |thread_type| was configured through Finch to have a logging
143 // level that is equal to or exceeds |logging_level|.
ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,LoggingLevel logging_level)144 bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
145 LoggingLevel logging_level) {
146 switch (thread_type) {
147 case HangWatcher::ThreadType::kIOThread:
148 return g_io_thread_log_level.load(std::memory_order_relaxed) >=
149 logging_level;
150 case HangWatcher::ThreadType::kMainThread:
151 return g_main_thread_log_level.load(std::memory_order_relaxed) >=
152 logging_level;
153 case HangWatcher::ThreadType::kThreadPoolThread:
154 return g_threadpool_log_level.load(std::memory_order_relaxed) >=
155 logging_level;
156 }
157 }
158
159 } // namespace
160
161 // Determines if the HangWatcher is activated. When false the HangWatcher
162 // thread never started.
163 BASE_FEATURE(kEnableHangWatcher,
164 "EnableHangWatcher",
165 FEATURE_ENABLED_BY_DEFAULT);
166
167 // Browser process.
168 constexpr base::FeatureParam<int> kIOThreadLogLevel{
169 &kEnableHangWatcher, "io_thread_log_level",
170 static_cast<int>(LoggingLevel::kUmaOnly)};
171 constexpr base::FeatureParam<int> kUIThreadLogLevel{
172 &kEnableHangWatcher, "ui_thread_log_level",
173 static_cast<int>(LoggingLevel::kUmaOnly)};
174 constexpr base::FeatureParam<int> kThreadPoolLogLevel{
175 &kEnableHangWatcher, "threadpool_log_level",
176 static_cast<int>(LoggingLevel::kUmaOnly)};
177
178 // GPU process.
179 constexpr base::FeatureParam<int> kGPUProcessIOThreadLogLevel{
180 &kEnableHangWatcher, "gpu_process_io_thread_log_level",
181 static_cast<int>(LoggingLevel::kNone)};
182 constexpr base::FeatureParam<int> kGPUProcessMainThreadLogLevel{
183 &kEnableHangWatcher, "gpu_process_main_thread_log_level",
184 static_cast<int>(LoggingLevel::kNone)};
185 constexpr base::FeatureParam<int> kGPUProcessThreadPoolLogLevel{
186 &kEnableHangWatcher, "gpu_process_threadpool_log_level",
187 static_cast<int>(LoggingLevel::kNone)};
188
189 // Renderer process.
190 constexpr base::FeatureParam<int> kRendererProcessIOThreadLogLevel{
191 &kEnableHangWatcher, "renderer_process_io_thread_log_level",
192 static_cast<int>(LoggingLevel::kUmaOnly)};
193 constexpr base::FeatureParam<int> kRendererProcessMainThreadLogLevel{
194 &kEnableHangWatcher, "renderer_process_main_thread_log_level",
195 static_cast<int>(LoggingLevel::kUmaOnly)};
196 constexpr base::FeatureParam<int> kRendererProcessThreadPoolLogLevel{
197 &kEnableHangWatcher, "renderer_process_threadpool_log_level",
198 static_cast<int>(LoggingLevel::kUmaOnly)};
199
200 // Utility process.
201 constexpr base::FeatureParam<int> kUtilityProcessIOThreadLogLevel{
202 &kEnableHangWatcher, "utility_process_io_thread_log_level",
203 static_cast<int>(LoggingLevel::kUmaOnly)};
204 constexpr base::FeatureParam<int> kUtilityProcessMainThreadLogLevel{
205 &kEnableHangWatcher, "utility_process_main_thread_log_level",
206 static_cast<int>(LoggingLevel::kUmaOnly)};
207 constexpr base::FeatureParam<int> kUtilityProcessThreadPoolLogLevel{
208 &kEnableHangWatcher, "utility_process_threadpool_log_level",
209 static_cast<int>(LoggingLevel::kUmaOnly)};
210
211 constexpr const char* kThreadName = "HangWatcher";
212
213 // The time that the HangWatcher thread will sleep for between calls to
214 // Monitor(). Increasing or decreasing this does not modify the type of hangs
215 // that can be detected. It instead increases the probability that a call to
216 // Monitor() will happen at the right time to catch a hang. This has to be
217 // balanced with power/cpu use concerns as busy looping would catch amost all
218 // hangs but present unacceptable overhead. NOTE: If this period is ever changed
219 // then all metrics that depend on it like
220 // HangWatcher.IsThreadHung need to be updated.
221 constexpr auto kMonitoringPeriod = base::Seconds(10);
222
WatchHangsInScope(TimeDelta timeout)223 WatchHangsInScope::WatchHangsInScope(TimeDelta timeout) {
224 internal::HangWatchState* current_hang_watch_state =
225 HangWatcher::IsEnabled()
226 ? internal::HangWatchState::GetHangWatchStateForCurrentThread()
227 : nullptr;
228
229 DCHECK(timeout >= base::TimeDelta()) << "Negative timeouts are invalid.";
230
231 // Thread is not monitored, noop.
232 if (!current_hang_watch_state) {
233 took_effect_ = false;
234 return;
235 }
236
237 #if DCHECK_IS_ON()
238 previous_watch_hangs_in_scope_ =
239 current_hang_watch_state->GetCurrentWatchHangsInScope();
240 current_hang_watch_state->SetCurrentWatchHangsInScope(this);
241 #endif
242
243 auto [old_flags, old_deadline] =
244 current_hang_watch_state->GetFlagsAndDeadline();
245
246 // TODO(crbug.com/1034046): Check whether we are over deadline already for the
247 // previous WatchHangsInScope here by issuing only one TimeTicks::Now()
248 // and resuing the value.
249
250 previous_deadline_ = old_deadline;
251 TimeTicks deadline = TimeTicks::Now() + timeout;
252 current_hang_watch_state->SetDeadline(deadline);
253 current_hang_watch_state->IncrementNestingLevel();
254
255 const bool hangs_ignored_for_current_scope =
256 internal::HangWatchDeadline::IsFlagSet(
257 internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
258 old_flags);
259
260 // If the current WatchHangsInScope is ignored, temporarily reactivate hang
261 // watching for newly created WatchHangsInScopes. On exiting hang watching
262 // is suspended again to return to the original state.
263 if (hangs_ignored_for_current_scope) {
264 current_hang_watch_state->UnsetIgnoreCurrentWatchHangsInScope();
265 set_hangs_ignored_on_exit_ = true;
266 }
267 }
268
~WatchHangsInScope()269 WatchHangsInScope::~WatchHangsInScope() {
270 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
271
272 // If hang watching was not enabled at construction time there is nothing to
273 // validate or undo.
274 if (!took_effect_) {
275 return;
276 }
277
278 // If the thread was unregistered since construction there is also nothing to
279 // do.
280 auto* const state =
281 internal::HangWatchState::GetHangWatchStateForCurrentThread();
282 if (!state) {
283 return;
284 }
285
286 // If a hang is currently being captured we should block here so execution
287 // stops and we avoid recording unrelated stack frames in the crash.
288 if (state->IsFlagSet(internal::HangWatchDeadline::Flag::kShouldBlockOnHang)) {
289 base::HangWatcher::GetInstance()->BlockIfCaptureInProgress();
290 }
291
292 #if DCHECK_IS_ON()
293 // Verify that no Scope was destructed out of order.
294 DCHECK_EQ(this, state->GetCurrentWatchHangsInScope());
295 state->SetCurrentWatchHangsInScope(previous_watch_hangs_in_scope_);
296 #endif
297
298 if (state->nesting_level() == 1) {
299 // If a call to InvalidateActiveExpectations() suspended hang watching
300 // during the lifetime of this or any nested WatchHangsInScope it can now
301 // safely be reactivated by clearing the ignore bit since this is the
302 // outer-most scope.
303 state->UnsetIgnoreCurrentWatchHangsInScope();
304 } else if (set_hangs_ignored_on_exit_) {
305 // Return to ignoring hangs since this was the previous state before hang
306 // watching was temporarily enabled for this WatchHangsInScope only in the
307 // constructor.
308 state->SetIgnoreCurrentWatchHangsInScope();
309 }
310
311 // Reset the deadline to the value it had before entering this
312 // WatchHangsInScope.
313 state->SetDeadline(previous_deadline_);
314 // TODO(crbug.com/1034046): Log when a WatchHangsInScope exits after its
315 // deadline and that went undetected by the HangWatcher.
316
317 state->DecrementNestingLevel();
318 }
319
320 // static
InitializeOnMainThread(ProcessType process_type)321 void HangWatcher::InitializeOnMainThread(ProcessType process_type) {
322 DCHECK(!g_use_hang_watcher);
323 DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
324 DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
325 DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
326
327 bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
328
329 // Do not start HangWatcher in the GPU process until the issue related to
330 // invalid magic signature in the GPU WatchDog is fixed
331 // (https://crbug.com/1297760).
332 if (process_type == ProcessType::kGPUProcess)
333 enable_hang_watcher = false;
334
335 g_use_hang_watcher.store(enable_hang_watcher, std::memory_order_relaxed);
336
337 // Keep the process type.
338 g_hang_watcher_process_type.store(process_type, std::memory_order_relaxed);
339
340 // If hang watching is disabled as a whole there is no need to read the
341 // params.
342 if (!enable_hang_watcher)
343 return;
344
345 // Retrieve thread-specific config for hang watching.
346 switch (process_type) {
347 case HangWatcher::ProcessType::kUnknownProcess:
348 break;
349
350 case HangWatcher::ProcessType::kBrowserProcess:
351 g_threadpool_log_level.store(
352 static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
353 std::memory_order_relaxed);
354 g_io_thread_log_level.store(
355 static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
356 std::memory_order_relaxed);
357 g_main_thread_log_level.store(
358 static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
359 std::memory_order_relaxed);
360 break;
361
362 case HangWatcher::ProcessType::kGPUProcess:
363 g_threadpool_log_level.store(
364 static_cast<LoggingLevel>(kGPUProcessThreadPoolLogLevel.Get()),
365 std::memory_order_relaxed);
366 g_io_thread_log_level.store(
367 static_cast<LoggingLevel>(kGPUProcessIOThreadLogLevel.Get()),
368 std::memory_order_relaxed);
369 g_main_thread_log_level.store(
370 static_cast<LoggingLevel>(kGPUProcessMainThreadLogLevel.Get()),
371 std::memory_order_relaxed);
372 break;
373
374 case HangWatcher::ProcessType::kRendererProcess:
375 g_threadpool_log_level.store(
376 static_cast<LoggingLevel>(kRendererProcessThreadPoolLogLevel.Get()),
377 std::memory_order_relaxed);
378 g_io_thread_log_level.store(
379 static_cast<LoggingLevel>(kRendererProcessIOThreadLogLevel.Get()),
380 std::memory_order_relaxed);
381 g_main_thread_log_level.store(
382 static_cast<LoggingLevel>(kRendererProcessMainThreadLogLevel.Get()),
383 std::memory_order_relaxed);
384 break;
385
386 case HangWatcher::ProcessType::kUtilityProcess:
387 g_threadpool_log_level.store(
388 static_cast<LoggingLevel>(kUtilityProcessThreadPoolLogLevel.Get()),
389 std::memory_order_relaxed);
390 g_io_thread_log_level.store(
391 static_cast<LoggingLevel>(kUtilityProcessIOThreadLogLevel.Get()),
392 std::memory_order_relaxed);
393 g_main_thread_log_level.store(
394 static_cast<LoggingLevel>(kUtilityProcessMainThreadLogLevel.Get()),
395 std::memory_order_relaxed);
396 break;
397 }
398 }
399
UnitializeOnMainThreadForTesting()400 void HangWatcher::UnitializeOnMainThreadForTesting() {
401 g_use_hang_watcher.store(false, std::memory_order_relaxed);
402 g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
403 g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
404 g_main_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
405 }
406
407 // static
IsEnabled()408 bool HangWatcher::IsEnabled() {
409 return g_use_hang_watcher.load(std::memory_order_relaxed);
410 }
411
412 // static
IsThreadPoolHangWatchingEnabled()413 bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
414 return g_threadpool_log_level.load(std::memory_order_relaxed) !=
415 LoggingLevel::kNone;
416 }
417
418 // static
IsIOThreadHangWatchingEnabled()419 bool HangWatcher::IsIOThreadHangWatchingEnabled() {
420 return g_io_thread_log_level.load(std::memory_order_relaxed) !=
421 LoggingLevel::kNone;
422 }
423
424 // static
IsCrashReportingEnabled()425 bool HangWatcher::IsCrashReportingEnabled() {
426 if (g_main_thread_log_level.load(std::memory_order_relaxed) ==
427 LoggingLevel::kUmaAndCrash) {
428 return true;
429 }
430 if (g_io_thread_log_level.load(std::memory_order_relaxed) ==
431 LoggingLevel::kUmaAndCrash) {
432 return true;
433 }
434 if (g_threadpool_log_level.load(std::memory_order_relaxed) ==
435 LoggingLevel::kUmaAndCrash) {
436 return true;
437 }
438 return false;
439 }
440
441 // static
InvalidateActiveExpectations()442 void HangWatcher::InvalidateActiveExpectations() {
443 auto* const state =
444 internal::HangWatchState::GetHangWatchStateForCurrentThread();
445 if (!state) {
446 // If the current thread is not under watch there is nothing to invalidate.
447 return;
448 }
449 state->SetIgnoreCurrentWatchHangsInScope();
450 }
451
HangWatcher()452 HangWatcher::HangWatcher()
453 : monitor_period_(kMonitoringPeriod),
454 should_monitor_(WaitableEvent::ResetPolicy::AUTOMATIC),
455 thread_(this, kThreadName),
456 tick_clock_(base::DefaultTickClock::GetInstance()),
457 memory_pressure_listener_(
458 FROM_HERE,
459 base::BindRepeating(&HangWatcher::OnMemoryPressure,
460 base::Unretained(this))) {
461 // |thread_checker_| should not be bound to the constructing thread.
462 DETACH_FROM_THREAD(hang_watcher_thread_checker_);
463
464 should_monitor_.declare_only_used_while_idle();
465
466 DCHECK(!g_instance);
467 g_instance = this;
468 }
469
470 // static
CreateHangWatcherInstance()471 void HangWatcher::CreateHangWatcherInstance() {
472 DCHECK(!g_instance);
473 g_instance = new base::HangWatcher();
474 // The hang watcher is leaked to make sure it survives all watched threads.
475 ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
476 }
477
478 #if !BUILDFLAG(IS_NACL)
479 debug::ScopedCrashKeyString
GetTimeSinceLastCriticalMemoryPressureCrashKey()480 HangWatcher::GetTimeSinceLastCriticalMemoryPressureCrashKey() {
481 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
482
483 // The crash key size is large enough to hold the biggest possible return
484 // value from base::TimeDelta::InSeconds().
485 constexpr debug::CrashKeySize kCrashKeyContentSize =
486 debug::CrashKeySize::Size32;
487 DCHECK_GE(static_cast<uint64_t>(kCrashKeyContentSize),
488 base::NumberToString(std::numeric_limits<int64_t>::max()).size());
489
490 static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
491 "seconds-since-last-memory-pressure", kCrashKeyContentSize);
492
493 const base::TimeTicks last_critical_memory_pressure_time =
494 last_critical_memory_pressure_.load(std::memory_order_relaxed);
495 if (last_critical_memory_pressure_time.is_null()) {
496 constexpr char kNoMemoryPressureMsg[] = "No critical memory pressure";
497 static_assert(
498 std::size(kNoMemoryPressureMsg) <=
499 static_cast<uint64_t>(kCrashKeyContentSize),
500 "The crash key is too small to hold \"No critical memory pressure\".");
501 return debug::ScopedCrashKeyString(crash_key, kNoMemoryPressureMsg);
502 } else {
503 base::TimeDelta time_since_last_critical_memory_pressure =
504 base::TimeTicks::Now() - last_critical_memory_pressure_time;
505 return debug::ScopedCrashKeyString(
506 crash_key, base::NumberToString(
507 time_since_last_critical_memory_pressure.InSeconds()));
508 }
509 }
510 #endif
511
GetTimeSinceLastSystemPowerResumeCrashKeyValue() const512 std::string HangWatcher::GetTimeSinceLastSystemPowerResumeCrashKeyValue()
513 const {
514 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
515
516 const TimeTicks last_system_power_resume_time =
517 PowerMonitor::GetLastSystemResumeTime();
518 if (last_system_power_resume_time.is_null())
519 return "Never suspended";
520 if (last_system_power_resume_time == TimeTicks::Max())
521 return "Power suspended";
522
523 const TimeDelta time_since_last_system_resume =
524 TimeTicks::Now() - last_system_power_resume_time;
525 return NumberToString(time_since_last_system_resume.InSeconds());
526 }
527
OnMemoryPressure(base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level)528 void HangWatcher::OnMemoryPressure(
529 base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level) {
530 if (memory_pressure_level ==
531 base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL) {
532 last_critical_memory_pressure_.store(base::TimeTicks::Now(),
533 std::memory_order_relaxed);
534 }
535 }
536
~HangWatcher()537 HangWatcher::~HangWatcher() {
538 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
539 DCHECK_EQ(g_instance, this);
540 DCHECK(watch_states_.empty());
541 g_instance = nullptr;
542 Stop();
543 }
544
Start()545 void HangWatcher::Start() {
546 thread_.Start();
547 }
548
Stop()549 void HangWatcher::Stop() {
550 g_keep_monitoring.store(false, std::memory_order_relaxed);
551 should_monitor_.Signal();
552 thread_.Join();
553
554 // In production HangWatcher is always leaked but during testing it's possibly
555 // stopped and restarted using a new instance. This makes sure the next call
556 // to Start() will actually monitor in that case.
557 g_keep_monitoring.store(true, std::memory_order_relaxed);
558 }
559
IsWatchListEmpty()560 bool HangWatcher::IsWatchListEmpty() {
561 AutoLock auto_lock(watch_state_lock_);
562 return watch_states_.empty();
563 }
564
Wait()565 void HangWatcher::Wait() {
566 while (true) {
567 // Amount by which the actual time spent sleeping can deviate from
568 // the target time and still be considered timely.
569 constexpr base::TimeDelta kWaitDriftTolerance = base::Milliseconds(100);
570
571 const base::TimeTicks time_before_wait = tick_clock_->NowTicks();
572
573 // Sleep until next scheduled monitoring or until signaled.
574 const bool was_signaled = should_monitor_.TimedWait(monitor_period_);
575
576 if (after_wait_callback_)
577 after_wait_callback_.Run(time_before_wait);
578
579 const base::TimeTicks time_after_wait = tick_clock_->NowTicks();
580 const base::TimeDelta wait_time = time_after_wait - time_before_wait;
581 const bool wait_was_normal =
582 wait_time <= (monitor_period_ + kWaitDriftTolerance);
583
584 UMA_HISTOGRAM_TIMES("HangWatcher.SleepDrift.BrowserProcess",
585 wait_time - monitor_period_);
586
587 if (!wait_was_normal) {
588 // If the time spent waiting was too high it might indicate the machine is
589 // very slow or that that it went to sleep. In any case we can't trust the
590 // WatchHangsInScopes that are currently live. Update the ignore
591 // threshold to make sure they don't trigger a hang on subsequent monitors
592 // then keep waiting.
593
594 base::AutoLock auto_lock(watch_state_lock_);
595
596 // Find the latest deadline among the live watch states. They might change
597 // atomically while iterating but that's fine because if they do that
598 // means the new WatchHangsInScope was constructed very soon after the
599 // abnormal sleep happened and might be affected by the root cause still.
600 // Ignoring it is cautious and harmless.
601 base::TimeTicks latest_deadline;
602 for (const auto& state : watch_states_) {
603 base::TimeTicks deadline = state->GetDeadline();
604 if (deadline > latest_deadline) {
605 latest_deadline = deadline;
606 }
607 }
608
609 deadline_ignore_threshold_ = latest_deadline;
610 }
611
612 // Stop waiting.
613 if (wait_was_normal || was_signaled)
614 return;
615 }
616 }
617
Run()618 void HangWatcher::Run() {
619 // Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
620 // sure of that.
621 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
622
623 while (g_keep_monitoring.load(std::memory_order_relaxed)) {
624 Wait();
625
626 if (!IsWatchListEmpty() &&
627 g_keep_monitoring.load(std::memory_order_relaxed)) {
628 Monitor();
629 if (after_monitor_closure_for_testing_) {
630 after_monitor_closure_for_testing_.Run();
631 }
632 }
633 }
634 }
635
636 // static
GetInstance()637 HangWatcher* HangWatcher::GetInstance() {
638 return g_instance;
639 }
640
641 // static
RecordHang()642 void HangWatcher::RecordHang() {
643 base::debug::DumpWithoutCrashing();
644 NO_CODE_FOLDING();
645 }
646
RegisterThreadInternal(ThreadType thread_type)647 ScopedClosureRunner HangWatcher::RegisterThreadInternal(
648 ThreadType thread_type) {
649 AutoLock auto_lock(watch_state_lock_);
650 CHECK(base::FeatureList::GetInstance());
651
652 // Do not install a WatchState if the results would never be observable.
653 if (!ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
654 LoggingLevel::kUmaOnly)) {
655 return ScopedClosureRunner(base::DoNothing());
656 }
657
658 watch_states_.push_back(
659 internal::HangWatchState::CreateHangWatchStateForCurrentThread(
660 thread_type));
661 return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
662 Unretained(HangWatcher::GetInstance())));
663 }
664
665 // static
RegisterThread(ThreadType thread_type)666 ScopedClosureRunner HangWatcher::RegisterThread(ThreadType thread_type) {
667 if (!GetInstance()) {
668 return ScopedClosureRunner();
669 }
670
671 return GetInstance()->RegisterThreadInternal(thread_type);
672 }
673
GetHighestDeadline() const674 base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
675 DCHECK(IsActionable());
676
677 // Since entries are sorted in increasing order the last entry is the largest
678 // one.
679 return hung_watch_state_copies_.back().deadline;
680 }
681
682 HangWatcher::WatchStateSnapShot::WatchStateSnapShot() = default;
683
Init(const HangWatchStates & watch_states,base::TimeTicks deadline_ignore_threshold)684 void HangWatcher::WatchStateSnapShot::Init(
685 const HangWatchStates& watch_states,
686 base::TimeTicks deadline_ignore_threshold) {
687 DCHECK(!initialized_);
688
689 // No matter if the snapshot is actionable or not after this function
690 // it will have been initialized.
691 initialized_ = true;
692
693 const base::TimeTicks now = base::TimeTicks::Now();
694 bool all_threads_marked = true;
695 bool found_deadline_before_ignore_threshold = false;
696
697 // Use an std::array to store the hang counts to avoid allocations. The
698 // numerical values of the HangWatcher::ThreadType enum is used to index into
699 // the array. A |kInvalidHangCount| is used to signify there were no threads
700 // of the type found.
701 constexpr size_t kHangCountArraySize =
702 static_cast<std::size_t>(base::HangWatcher::ThreadType::kMax) + 1;
703 std::array<int, kHangCountArraySize> hung_counts_per_thread_type;
704
705 constexpr int kInvalidHangCount = -1;
706 hung_counts_per_thread_type.fill(kInvalidHangCount);
707
708 // Will be true if any of the hung threads has a logging level high enough,
709 // as defined through finch params, to warant dumping a crash.
710 bool any_hung_thread_has_dumping_enabled = false;
711
712 // Copy hung thread information.
713 for (const auto& watch_state : watch_states) {
714 uint64_t flags;
715 TimeTicks deadline;
716 std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
717
718 if (deadline <= deadline_ignore_threshold) {
719 found_deadline_before_ignore_threshold = true;
720 }
721
722 if (internal::HangWatchDeadline::IsFlagSet(
723 internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
724 flags)) {
725 continue;
726 }
727
728 // If a thread type is monitored and did not hang it still needs to be
729 // logged as a zero count;
730 const size_t hang_count_index =
731 static_cast<size_t>(watch_state.get()->thread_type());
732 if (hung_counts_per_thread_type[hang_count_index] == kInvalidHangCount) {
733 hung_counts_per_thread_type[hang_count_index] = 0;
734 }
735
736 // Only copy hung threads.
737 if (deadline <= now) {
738 ++hung_counts_per_thread_type[hang_count_index];
739
740 if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
741 LoggingLevel::kUmaAndCrash)) {
742 any_hung_thread_has_dumping_enabled = true;
743 }
744
745 #if BUILDFLAG(ENABLE_BASE_TRACING)
746 // Emit trace events for monitored threads.
747 if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
748 LoggingLevel::kUmaOnly)) {
749 const PlatformThreadId thread_id = watch_state.get()->GetThreadID();
750 const auto track = perfetto::Track::FromPointer(
751 this, perfetto::ThreadTrack::ForThread(thread_id));
752 TRACE_EVENT_BEGIN("base", "HangWatcher::ThreadHung", track, deadline);
753 TRACE_EVENT_END("base", track, now);
754 // TODO(crbug.com/1021571): Remove this once fixed.
755 PERFETTO_INTERNAL_ADD_EMPTY_EVENT();
756 }
757 #endif
758
759 // Attempt to mark the thread as needing to stay within its current
760 // WatchHangsInScope until capture is complete.
761 bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
762
763 // If marking some threads already failed the snapshot won't be kept so
764 // there is no need to keep adding to it. The loop doesn't abort though
765 // to keep marking the other threads. If these threads remain hung until
766 // the next capture then they'll already be marked and will be included
767 // in the capture at that time.
768 if (thread_marked && all_threads_marked) {
769 hung_watch_state_copies_.push_back(
770 WatchStateCopy{deadline, watch_state.get()->GetThreadID()});
771 } else {
772 all_threads_marked = false;
773 }
774 }
775 }
776
777 // Log the hung thread counts to histograms for each thread type if any thread
778 // of the type were found.
779 for (size_t i = 0; i < kHangCountArraySize; ++i) {
780 const int hang_count = hung_counts_per_thread_type[i];
781 const HangWatcher::ThreadType thread_type =
782 static_cast<HangWatcher::ThreadType>(i);
783 if (hang_count != kInvalidHangCount &&
784 ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
785 LoggingLevel::kUmaOnly)) {
786 LogHungThreadCountHistogram(thread_type, hang_count);
787 }
788 }
789
790 // Three cases can invalidate this snapshot and prevent the capture of the
791 // hang.
792 //
793 // 1. Some threads could not be marked for blocking so this snapshot isn't
794 // actionable since marked threads could be hung because of unmarked ones.
795 // If only the marked threads were captured the information would be
796 // incomplete.
797 //
798 // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
799 // If any thread is ignored it reduces the confidence in the whole state and
800 // it's better to avoid capturing misleading data.
801 //
802 // 3. The hung threads found were all of types that are not configured through
803 // Finch to trigger a crash dump.
804 //
805 if (!all_threads_marked || found_deadline_before_ignore_threshold ||
806 !any_hung_thread_has_dumping_enabled) {
807 hung_watch_state_copies_.clear();
808 return;
809 }
810
811 // Sort |hung_watch_state_copies_| by order of decreasing hang severity so the
812 // most severe hang is first in the list.
813 ranges::sort(hung_watch_state_copies_,
814 [](const WatchStateCopy& lhs, const WatchStateCopy& rhs) {
815 return lhs.deadline < rhs.deadline;
816 });
817 }
818
Clear()819 void HangWatcher::WatchStateSnapShot::Clear() {
820 hung_watch_state_copies_.clear();
821 initialized_ = false;
822 }
823
824 HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
825 const WatchStateSnapShot& other) = default;
826
827 HangWatcher::WatchStateSnapShot::~WatchStateSnapShot() = default;
828
PrepareHungThreadListCrashKey() const829 std::string HangWatcher::WatchStateSnapShot::PrepareHungThreadListCrashKey()
830 const {
831 DCHECK(IsActionable());
832
833 // Build a crash key string that contains the ids of the hung threads.
834 constexpr char kSeparator{'|'};
835 std::string list_of_hung_thread_ids;
836
837 // Add as many thread ids to the crash key as possible.
838 for (const WatchStateCopy& copy : hung_watch_state_copies_) {
839 std::string fragment = base::NumberToString(copy.thread_id) + kSeparator;
840 if (list_of_hung_thread_ids.size() + fragment.size() <
841 static_cast<std::size_t>(debug::CrashKeySize::Size256)) {
842 list_of_hung_thread_ids += fragment;
843 } else {
844 // Respect the by priority ordering of thread ids in the crash key by
845 // stopping the construction as soon as one does not fit. This avoids
846 // including lesser priority ids while omitting more important ones.
847 break;
848 }
849 }
850
851 return list_of_hung_thread_ids;
852 }
853
IsActionable() const854 bool HangWatcher::WatchStateSnapShot::IsActionable() const {
855 DCHECK(initialized_);
856 return !hung_watch_state_copies_.empty();
857 }
858
GrabWatchStateSnapshotForTesting() const859 HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
860 const {
861 WatchStateSnapShot snapshot;
862 snapshot.Init(watch_states_, deadline_ignore_threshold_);
863 return snapshot;
864 }
865
Monitor()866 void HangWatcher::Monitor() {
867 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
868 AutoLock auto_lock(watch_state_lock_);
869
870 // If all threads unregistered since this function was invoked there's
871 // nothing to do anymore.
872 if (watch_states_.empty())
873 return;
874
875 watch_state_snapshot_.Init(watch_states_, deadline_ignore_threshold_);
876
877 if (watch_state_snapshot_.IsActionable()) {
878 DoDumpWithoutCrashing(watch_state_snapshot_);
879 }
880
881 watch_state_snapshot_.Clear();
882 }
883
DoDumpWithoutCrashing(const WatchStateSnapShot & watch_state_snapshot)884 void HangWatcher::DoDumpWithoutCrashing(
885 const WatchStateSnapShot& watch_state_snapshot) {
886 TRACE_EVENT("base", "HangWatcher::DoDumpWithoutCrashing");
887
888 capture_in_progress_.store(true, std::memory_order_relaxed);
889 base::AutoLock scope_lock(capture_lock_);
890
891 #if !BUILDFLAG(IS_NACL)
892 const std::string list_of_hung_thread_ids =
893 watch_state_snapshot.PrepareHungThreadListCrashKey();
894
895 static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
896 "list-of-hung-threads", debug::CrashKeySize::Size256);
897
898 const debug::ScopedCrashKeyString list_of_hung_threads_crash_key_string(
899 crash_key, list_of_hung_thread_ids);
900
901 const debug::ScopedCrashKeyString
902 time_since_last_critical_memory_pressure_crash_key_string =
903 GetTimeSinceLastCriticalMemoryPressureCrashKey();
904
905 SCOPED_CRASH_KEY_STRING32("HangWatcher", "seconds-since-last-resume",
906 GetTimeSinceLastSystemPowerResumeCrashKeyValue());
907 #endif
908
909 // To avoid capturing more than one hang that blames a subset of the same
910 // threads it's necessary to keep track of what is the furthest deadline
911 // that contributed to declaring a hang. Only once
912 // all threads have deadlines past this point can we be sure that a newly
913 // discovered hang is not directly related.
914 // Example:
915 // **********************************************************************
916 // Timeline A : L------1-------2----------3-------4----------N-----------
917 // Timeline B : -------2----------3-------4----------L----5------N-------
918 // Timeline C : L----------------------------5------6----7---8------9---N
919 // **********************************************************************
920 // In the example when a Monitor() happens during timeline A
921 // |deadline_ignore_threshold_| (L) is at time zero and deadlines (1-4)
922 // are before Now() (N) . A hang is captured and L is updated. During
923 // the next Monitor() (timeline B) a new deadline is over but we can't
924 // capture a hang because deadlines 2-4 are still live and already counted
925 // toward a hang. During a third monitor (timeline C) all live deadlines
926 // are now after L and a second hang can be recorded.
927 base::TimeTicks latest_expired_deadline =
928 watch_state_snapshot.GetHighestDeadline();
929
930 if (on_hang_closure_for_testing_)
931 on_hang_closure_for_testing_.Run();
932 else
933 RecordHang();
934
935 // Update after running the actual capture.
936 deadline_ignore_threshold_ = latest_expired_deadline;
937
938 capture_in_progress_.store(false, std::memory_order_relaxed);
939 }
940
SetAfterMonitorClosureForTesting(base::RepeatingClosure closure)941 void HangWatcher::SetAfterMonitorClosureForTesting(
942 base::RepeatingClosure closure) {
943 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
944 after_monitor_closure_for_testing_ = std::move(closure);
945 }
946
SetOnHangClosureForTesting(base::RepeatingClosure closure)947 void HangWatcher::SetOnHangClosureForTesting(base::RepeatingClosure closure) {
948 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
949 on_hang_closure_for_testing_ = std::move(closure);
950 }
951
SetMonitoringPeriodForTesting(base::TimeDelta period)952 void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
953 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
954 monitor_period_ = period;
955 }
956
SetAfterWaitCallbackForTesting(RepeatingCallback<void (TimeTicks)> callback)957 void HangWatcher::SetAfterWaitCallbackForTesting(
958 RepeatingCallback<void(TimeTicks)> callback) {
959 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
960 after_wait_callback_ = callback;
961 }
962
SignalMonitorEventForTesting()963 void HangWatcher::SignalMonitorEventForTesting() {
964 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
965 should_monitor_.Signal();
966 }
967
968 // static
StopMonitoringForTesting()969 void HangWatcher::StopMonitoringForTesting() {
970 g_keep_monitoring.store(false, std::memory_order_relaxed);
971 }
972
SetTickClockForTesting(const base::TickClock * tick_clock)973 void HangWatcher::SetTickClockForTesting(const base::TickClock* tick_clock) {
974 tick_clock_ = tick_clock;
975 }
976
BlockIfCaptureInProgress()977 void HangWatcher::BlockIfCaptureInProgress() {
978 // Makes a best-effort attempt to block execution if a hang is currently being
979 // captured. Only block on |capture_lock| if |capture_in_progress_| hints that
980 // it's already held to avoid serializing all threads on this function when no
981 // hang capture is in-progress.
982 if (capture_in_progress_.load(std::memory_order_relaxed))
983 base::AutoLock hang_lock(capture_lock_);
984 }
985
UnregisterThread()986 void HangWatcher::UnregisterThread() {
987 AutoLock auto_lock(watch_state_lock_);
988
989 auto it = ranges::find(
990 watch_states_,
991 internal::HangWatchState::GetHangWatchStateForCurrentThread(),
992 &std::unique_ptr<internal::HangWatchState>::get);
993
994 // Thread should be registered to get unregistered.
995 DCHECK(it != watch_states_.end());
996
997 watch_states_.erase(it);
998 }
999
1000 namespace internal {
1001 namespace {
1002
1003 constexpr uint64_t kOnlyDeadlineMask = 0x00FF'FFFF'FFFF'FFFFu;
1004 constexpr uint64_t kOnlyFlagsMask = ~kOnlyDeadlineMask;
1005 constexpr uint64_t kMaximumFlag = 0x8000'0000'0000'0000u;
1006
1007 // Use as a mask to keep persistent flags and the deadline.
1008 constexpr uint64_t kPersistentFlagsAndDeadlineMask =
1009 kOnlyDeadlineMask |
1010 static_cast<uint64_t>(
1011 HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope);
1012 } // namespace
1013
1014 // Flag binary representation assertions.
1015 static_assert(
1016 static_cast<uint64_t>(HangWatchDeadline::Flag::kMinValue) >
1017 kOnlyDeadlineMask,
1018 "Invalid numerical value for flag. Would interfere with bits of data.");
1019 static_assert(static_cast<uint64_t>(HangWatchDeadline::Flag::kMaxValue) <=
1020 kMaximumFlag,
1021 "A flag can only set a single bit.");
1022
1023 HangWatchDeadline::HangWatchDeadline() = default;
1024 HangWatchDeadline::~HangWatchDeadline() = default;
1025
GetFlagsAndDeadline() const1026 std::pair<uint64_t, TimeTicks> HangWatchDeadline::GetFlagsAndDeadline() const {
1027 uint64_t bits = bits_.load(std::memory_order_relaxed);
1028 return std::make_pair(ExtractFlags(bits),
1029 DeadlineFromBits(ExtractDeadline((bits))));
1030 }
1031
GetDeadline() const1032 TimeTicks HangWatchDeadline::GetDeadline() const {
1033 return DeadlineFromBits(
1034 ExtractDeadline(bits_.load(std::memory_order_relaxed)));
1035 }
1036
1037 // static
Max()1038 TimeTicks HangWatchDeadline::Max() {
1039 // |kOnlyDeadlineMask| has all the bits reserved for the TimeTicks value
1040 // set. This means it also represents the highest representable value.
1041 return DeadlineFromBits(kOnlyDeadlineMask);
1042 }
1043
1044 // static
IsFlagSet(Flag flag,uint64_t flags)1045 bool HangWatchDeadline::IsFlagSet(Flag flag, uint64_t flags) {
1046 return static_cast<uint64_t>(flag) & flags;
1047 }
1048
SetDeadline(TimeTicks new_deadline)1049 void HangWatchDeadline::SetDeadline(TimeTicks new_deadline) {
1050 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1051 DCHECK(new_deadline <= Max()) << "Value too high to be represented.";
1052 DCHECK(new_deadline >= TimeTicks{}) << "Value cannot be negative.";
1053
1054 if (switch_bits_callback_for_testing_) {
1055 const uint64_t switched_in_bits = SwitchBitsForTesting();
1056 // If a concurrent deadline change is tested it cannot have a deadline or
1057 // persistent flag change since those always happen on the same thread.
1058 DCHECK((switched_in_bits & kPersistentFlagsAndDeadlineMask) == 0u);
1059 }
1060
1061 // Discard all non-persistent flags and apply deadline change.
1062 const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1063 const uint64_t new_flags =
1064 ExtractFlags(old_bits & kPersistentFlagsAndDeadlineMask);
1065 bits_.store(new_flags | ExtractDeadline(static_cast<uint64_t>(
1066 new_deadline.ToInternalValue())),
1067 std::memory_order_relaxed);
1068 }
1069
1070 // TODO(crbug.com/1087026): Add flag DCHECKs here.
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1071 bool HangWatchDeadline::SetShouldBlockOnHang(uint64_t old_flags,
1072 TimeTicks old_deadline) {
1073 DCHECK(old_deadline <= Max()) << "Value too high to be represented.";
1074 DCHECK(old_deadline >= TimeTicks{}) << "Value cannot be negative.";
1075
1076 // Set the kShouldBlockOnHang flag only if |bits_| did not change since it was
1077 // read. kShouldBlockOnHang is the only non-persistent flag and should never
1078 // be set twice. Persistent flags and deadline changes are done from the same
1079 // thread so there is no risk of losing concurrently added information.
1080 uint64_t old_bits =
1081 old_flags | static_cast<uint64_t>(old_deadline.ToInternalValue());
1082 const uint64_t desired_bits =
1083 old_bits | static_cast<uint64_t>(Flag::kShouldBlockOnHang);
1084
1085 // If a test needs to simulate |bits_| changing since calling this function
1086 // this happens now.
1087 if (switch_bits_callback_for_testing_) {
1088 const uint64_t switched_in_bits = SwitchBitsForTesting();
1089
1090 // Injecting the flag being tested is invalid.
1091 DCHECK(!IsFlagSet(Flag::kShouldBlockOnHang, switched_in_bits));
1092 }
1093
1094 return bits_.compare_exchange_weak(old_bits, desired_bits,
1095 std::memory_order_relaxed,
1096 std::memory_order_relaxed);
1097 }
1098
SetIgnoreCurrentWatchHangsInScope()1099 void HangWatchDeadline::SetIgnoreCurrentWatchHangsInScope() {
1100 SetPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1101 }
1102
UnsetIgnoreCurrentWatchHangsInScope()1103 void HangWatchDeadline::UnsetIgnoreCurrentWatchHangsInScope() {
1104 ClearPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1105 }
1106
SetPersistentFlag(Flag flag)1107 void HangWatchDeadline::SetPersistentFlag(Flag flag) {
1108 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1109 if (switch_bits_callback_for_testing_)
1110 SwitchBitsForTesting();
1111 bits_.fetch_or(static_cast<uint64_t>(flag), std::memory_order_relaxed);
1112 }
1113
ClearPersistentFlag(Flag flag)1114 void HangWatchDeadline::ClearPersistentFlag(Flag flag) {
1115 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1116 if (switch_bits_callback_for_testing_)
1117 SwitchBitsForTesting();
1118 bits_.fetch_and(~(static_cast<uint64_t>(flag)), std::memory_order_relaxed);
1119 }
1120
1121 // static
ExtractFlags(uint64_t bits)1122 uint64_t HangWatchDeadline::ExtractFlags(uint64_t bits) {
1123 return bits & kOnlyFlagsMask;
1124 }
1125
1126 // static
ExtractDeadline(uint64_t bits)1127 uint64_t HangWatchDeadline::ExtractDeadline(uint64_t bits) {
1128 return bits & kOnlyDeadlineMask;
1129 }
1130
1131 // static
DeadlineFromBits(uint64_t bits)1132 TimeTicks HangWatchDeadline::DeadlineFromBits(uint64_t bits) {
1133 // |kOnlyDeadlineMask| has all the deadline bits set to 1 so is the largest
1134 // representable value.
1135 DCHECK(bits <= kOnlyDeadlineMask)
1136 << "Flags bits are set. Remove them before returning deadline.";
1137 static_assert(kOnlyDeadlineMask <= std::numeric_limits<int64_t>::max());
1138 return TimeTicks::FromInternalValue(static_cast<int64_t>(bits));
1139 }
1140
IsFlagSet(Flag flag) const1141 bool HangWatchDeadline::IsFlagSet(Flag flag) const {
1142 return bits_.load(std::memory_order_relaxed) & static_cast<uint64_t>(flag);
1143 }
1144
SetSwitchBitsClosureForTesting(RepeatingCallback<uint64_t (void)> closure)1145 void HangWatchDeadline::SetSwitchBitsClosureForTesting(
1146 RepeatingCallback<uint64_t(void)> closure) {
1147 switch_bits_callback_for_testing_ = closure;
1148 }
1149
ResetSwitchBitsClosureForTesting()1150 void HangWatchDeadline::ResetSwitchBitsClosureForTesting() {
1151 DCHECK(switch_bits_callback_for_testing_);
1152 switch_bits_callback_for_testing_.Reset();
1153 }
1154
SwitchBitsForTesting()1155 uint64_t HangWatchDeadline::SwitchBitsForTesting() {
1156 DCHECK(switch_bits_callback_for_testing_);
1157
1158 const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1159 const uint64_t new_bits = switch_bits_callback_for_testing_.Run();
1160 const uint64_t old_flags = ExtractFlags(old_bits);
1161
1162 const uint64_t switched_in_bits = old_flags | new_bits;
1163 bits_.store(switched_in_bits, std::memory_order_relaxed);
1164 return switched_in_bits;
1165 }
1166
HangWatchState(HangWatcher::ThreadType thread_type)1167 HangWatchState::HangWatchState(HangWatcher::ThreadType thread_type)
1168 : resetter_(&hang_watch_state, this, nullptr), thread_type_(thread_type) {
1169 // TODO(crbug.com/1223033): Remove this once macOS uses system-wide ids.
1170 // On macOS the thread ids used by CrashPad are not the same as the ones
1171 // provided by PlatformThread. Make sure to use the same for correct
1172 // attribution.
1173 #if BUILDFLAG(IS_MAC)
1174 uint64_t thread_id;
1175 pthread_threadid_np(pthread_self(), &thread_id);
1176 thread_id_ = checked_cast<PlatformThreadId>(thread_id);
1177 #else
1178 thread_id_ = PlatformThread::CurrentId();
1179 #endif
1180 }
1181
~HangWatchState()1182 HangWatchState::~HangWatchState() {
1183 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1184
1185 DCHECK_EQ(GetHangWatchStateForCurrentThread(), this);
1186
1187 #if DCHECK_IS_ON()
1188 // Destroying the HangWatchState should not be done if there are live
1189 // WatchHangsInScopes.
1190 DCHECK(!current_watch_hangs_in_scope_);
1191 #endif
1192 }
1193
1194 // static
1195 std::unique_ptr<HangWatchState>
CreateHangWatchStateForCurrentThread(HangWatcher::ThreadType thread_type)1196 HangWatchState::CreateHangWatchStateForCurrentThread(
1197 HangWatcher::ThreadType thread_type) {
1198 // Allocate a watch state object for this thread.
1199 std::unique_ptr<HangWatchState> hang_state =
1200 std::make_unique<HangWatchState>(thread_type);
1201
1202 // Setting the thread local worked.
1203 DCHECK_EQ(GetHangWatchStateForCurrentThread(), hang_state.get());
1204
1205 // Transfer ownership to caller.
1206 return hang_state;
1207 }
1208
GetDeadline() const1209 TimeTicks HangWatchState::GetDeadline() const {
1210 return deadline_.GetDeadline();
1211 }
1212
GetFlagsAndDeadline() const1213 std::pair<uint64_t, TimeTicks> HangWatchState::GetFlagsAndDeadline() const {
1214 return deadline_.GetFlagsAndDeadline();
1215 }
1216
SetDeadline(TimeTicks deadline)1217 void HangWatchState::SetDeadline(TimeTicks deadline) {
1218 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1219 deadline_.SetDeadline(deadline);
1220 }
1221
IsOverDeadline() const1222 bool HangWatchState::IsOverDeadline() const {
1223 return TimeTicks::Now() > deadline_.GetDeadline();
1224 }
1225
SetIgnoreCurrentWatchHangsInScope()1226 void HangWatchState::SetIgnoreCurrentWatchHangsInScope() {
1227 deadline_.SetIgnoreCurrentWatchHangsInScope();
1228 }
1229
UnsetIgnoreCurrentWatchHangsInScope()1230 void HangWatchState::UnsetIgnoreCurrentWatchHangsInScope() {
1231 deadline_.UnsetIgnoreCurrentWatchHangsInScope();
1232 }
1233
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1234 bool HangWatchState::SetShouldBlockOnHang(uint64_t old_flags,
1235 TimeTicks old_deadline) {
1236 return deadline_.SetShouldBlockOnHang(old_flags, old_deadline);
1237 }
1238
IsFlagSet(HangWatchDeadline::Flag flag)1239 bool HangWatchState::IsFlagSet(HangWatchDeadline::Flag flag) {
1240 return deadline_.IsFlagSet(flag);
1241 }
1242
1243 #if DCHECK_IS_ON()
SetCurrentWatchHangsInScope(WatchHangsInScope * current_hang_watch_scope_enable)1244 void HangWatchState::SetCurrentWatchHangsInScope(
1245 WatchHangsInScope* current_hang_watch_scope_enable) {
1246 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1247 current_watch_hangs_in_scope_ = current_hang_watch_scope_enable;
1248 }
1249
GetCurrentWatchHangsInScope()1250 WatchHangsInScope* HangWatchState::GetCurrentWatchHangsInScope() {
1251 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1252 return current_watch_hangs_in_scope_;
1253 }
1254 #endif
1255
GetHangWatchDeadlineForTesting()1256 HangWatchDeadline* HangWatchState::GetHangWatchDeadlineForTesting() {
1257 return &deadline_;
1258 }
1259
IncrementNestingLevel()1260 void HangWatchState::IncrementNestingLevel() {
1261 ++nesting_level_;
1262 }
1263
DecrementNestingLevel()1264 void HangWatchState::DecrementNestingLevel() {
1265 --nesting_level_;
1266 }
1267
1268 // static
GetHangWatchStateForCurrentThread()1269 HangWatchState* HangWatchState::GetHangWatchStateForCurrentThread() {
1270 // Workaround false-positive MSAN use-of-uninitialized-value on
1271 // thread_local storage for loaded libraries:
1272 // https://github.com/google/sanitizers/issues/1265
1273 MSAN_UNPOISON(&hang_watch_state, sizeof(internal::HangWatchState*));
1274
1275 return hang_watch_state;
1276 }
1277
GetThreadID() const1278 PlatformThreadId HangWatchState::GetThreadID() const {
1279 return thread_id_;
1280 }
1281
1282 } // namespace internal
1283
1284 } // namespace base
1285