1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/threading/hang_watcher.h"
6
7 #include <atomic>
8 #include <utility>
9
10 #include "base/containers/flat_map.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/crash_logging.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/feature_list.h"
16 #include "base/functional/bind.h"
17 #include "base/functional/callback_helpers.h"
18 #include "base/metrics/field_trial_params.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/power_monitor/power_monitor.h"
21 #include "base/ranges/algorithm.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/synchronization/lock.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/platform_thread.h"
26 #include "base/threading/thread_checker.h"
27 #include "base/threading/thread_restrictions.h"
28 #include "base/threading/threading_features.h"
29 #include "base/time/default_tick_clock.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/base_tracing.h"
32 #include "build/build_config.h"
33 #include "third_party/abseil-cpp/absl/base/attributes.h"
34
35 namespace base {
36
37 namespace {
38
39 // Defines how much logging happens when the HangWatcher monitors the threads.
40 // Logging levels are set per thread type through Finch. It's important that
41 // the order of the enum members stay the same and that their numerical
42 // values be in increasing order. The implementation of
43 // ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
44 enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
45
46 HangWatcher* g_instance = nullptr;
47 ABSL_CONST_INIT thread_local internal::HangWatchState* hang_watch_state =
48 nullptr;
49 std::atomic<bool> g_use_hang_watcher{false};
50 std::atomic<HangWatcher::ProcessType> g_hang_watcher_process_type{
51 HangWatcher::ProcessType::kBrowserProcess};
52
53 std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
54 std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
55 std::atomic<LoggingLevel> g_main_thread_log_level{LoggingLevel::kNone};
56
57 // Indicates whether HangWatcher::Run() should return after the next monitoring.
58 std::atomic<bool> g_keep_monitoring{true};
59
60 // Emits the hung thread count histogram. |count| is the number of threads
61 // of type |thread_type| that were hung or became hung during the last
62 // monitoring window. This function should be invoked for each thread type
63 // encountered on each call to Monitor().
LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,int count)64 void LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,
65 int count) {
66 // In the case of unique threads like the IO or UI/Main thread a count does
67 // not make sense.
68 const bool any_thread_hung = count >= 1;
69
70 const HangWatcher::ProcessType process_type =
71 g_hang_watcher_process_type.load(std::memory_order_relaxed);
72 switch (process_type) {
73 case HangWatcher::ProcessType::kUnknownProcess:
74 break;
75
76 case HangWatcher::ProcessType::kBrowserProcess:
77 switch (thread_type) {
78 case HangWatcher::ThreadType::kIOThread:
79 UMA_HISTOGRAM_BOOLEAN(
80 "HangWatcher.IsThreadHung.BrowserProcess."
81 "IOThread",
82 any_thread_hung);
83 break;
84 case HangWatcher::ThreadType::kMainThread:
85 UMA_HISTOGRAM_BOOLEAN(
86 "HangWatcher.IsThreadHung.BrowserProcess."
87 "UIThread",
88 any_thread_hung);
89 break;
90 case HangWatcher::ThreadType::kThreadPoolThread:
91 // Not recorded for now.
92 break;
93 }
94 break;
95
96 case HangWatcher::ProcessType::kGPUProcess:
97 // Not recorded for now.
98 break;
99
100 case HangWatcher::ProcessType::kRendererProcess:
101 switch (thread_type) {
102 case HangWatcher::ThreadType::kIOThread:
103 UMA_HISTOGRAM_BOOLEAN(
104 "HangWatcher.IsThreadHung.RendererProcess."
105 "IOThread",
106 any_thread_hung);
107 break;
108 case HangWatcher::ThreadType::kMainThread:
109 UMA_HISTOGRAM_BOOLEAN(
110 "HangWatcher.IsThreadHung.RendererProcess."
111 "MainThread",
112 any_thread_hung);
113 break;
114 case HangWatcher::ThreadType::kThreadPoolThread:
115 // Not recorded for now.
116 break;
117 }
118 break;
119
120 case HangWatcher::ProcessType::kUtilityProcess:
121 switch (thread_type) {
122 case HangWatcher::ThreadType::kIOThread:
123 UMA_HISTOGRAM_BOOLEAN(
124 "HangWatcher.IsThreadHung.UtilityProcess."
125 "IOThread",
126 any_thread_hung);
127 break;
128 case HangWatcher::ThreadType::kMainThread:
129 UMA_HISTOGRAM_BOOLEAN(
130 "HangWatcher.IsThreadHung.UtilityProcess."
131 "MainThread",
132 any_thread_hung);
133 break;
134 case HangWatcher::ThreadType::kThreadPoolThread:
135 // Not recorded for now.
136 break;
137 }
138 break;
139 }
140 }
141
142 // Returns true if |thread_type| was configured through Finch to have a logging
143 // level that is equal to or exceeds |logging_level|.
ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,LoggingLevel logging_level)144 bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
145 LoggingLevel logging_level) {
146 switch (thread_type) {
147 case HangWatcher::ThreadType::kIOThread:
148 return g_io_thread_log_level.load(std::memory_order_relaxed) >=
149 logging_level;
150 case HangWatcher::ThreadType::kMainThread:
151 return g_main_thread_log_level.load(std::memory_order_relaxed) >=
152 logging_level;
153 case HangWatcher::ThreadType::kThreadPoolThread:
154 return g_threadpool_log_level.load(std::memory_order_relaxed) >=
155 logging_level;
156 }
157 }
158
159 } // namespace
160
161 // Determines if the HangWatcher is activated. When false the HangWatcher
162 // thread never started.
163 BASE_FEATURE(kEnableHangWatcher,
164 "EnableHangWatcher",
165 FEATURE_ENABLED_BY_DEFAULT);
166
167 BASE_FEATURE(kEnableHangWatcherInZygoteChildren,
168 "EnableHangWatcherInZygoteChildren",
169 FEATURE_ENABLED_BY_DEFAULT);
170
171 // Browser process.
172 constexpr base::FeatureParam<int> kIOThreadLogLevel{
173 &kEnableHangWatcher, "io_thread_log_level",
174 static_cast<int>(LoggingLevel::kUmaOnly)};
175 constexpr base::FeatureParam<int> kUIThreadLogLevel{
176 &kEnableHangWatcher, "ui_thread_log_level",
177 static_cast<int>(LoggingLevel::kUmaOnly)};
178 constexpr base::FeatureParam<int> kThreadPoolLogLevel{
179 &kEnableHangWatcher, "threadpool_log_level",
180 static_cast<int>(LoggingLevel::kUmaOnly)};
181
182 // GPU process.
183 constexpr base::FeatureParam<int> kGPUProcessIOThreadLogLevel{
184 &kEnableHangWatcher, "gpu_process_io_thread_log_level",
185 static_cast<int>(LoggingLevel::kNone)};
186 constexpr base::FeatureParam<int> kGPUProcessMainThreadLogLevel{
187 &kEnableHangWatcher, "gpu_process_main_thread_log_level",
188 static_cast<int>(LoggingLevel::kNone)};
189 constexpr base::FeatureParam<int> kGPUProcessThreadPoolLogLevel{
190 &kEnableHangWatcher, "gpu_process_threadpool_log_level",
191 static_cast<int>(LoggingLevel::kNone)};
192
193 // Renderer process.
194 constexpr base::FeatureParam<int> kRendererProcessIOThreadLogLevel{
195 &kEnableHangWatcher, "renderer_process_io_thread_log_level",
196 static_cast<int>(LoggingLevel::kUmaOnly)};
197 constexpr base::FeatureParam<int> kRendererProcessMainThreadLogLevel{
198 &kEnableHangWatcher, "renderer_process_main_thread_log_level",
199 static_cast<int>(LoggingLevel::kUmaOnly)};
200 constexpr base::FeatureParam<int> kRendererProcessThreadPoolLogLevel{
201 &kEnableHangWatcher, "renderer_process_threadpool_log_level",
202 static_cast<int>(LoggingLevel::kUmaOnly)};
203
204 // Utility process.
205 constexpr base::FeatureParam<int> kUtilityProcessIOThreadLogLevel{
206 &kEnableHangWatcher, "utility_process_io_thread_log_level",
207 static_cast<int>(LoggingLevel::kUmaOnly)};
208 constexpr base::FeatureParam<int> kUtilityProcessMainThreadLogLevel{
209 &kEnableHangWatcher, "utility_process_main_thread_log_level",
210 static_cast<int>(LoggingLevel::kUmaOnly)};
211 constexpr base::FeatureParam<int> kUtilityProcessThreadPoolLogLevel{
212 &kEnableHangWatcher, "utility_process_threadpool_log_level",
213 static_cast<int>(LoggingLevel::kUmaOnly)};
214
215 constexpr const char* kThreadName = "HangWatcher";
216
217 // The time that the HangWatcher thread will sleep for between calls to
218 // Monitor(). Increasing or decreasing this does not modify the type of hangs
219 // that can be detected. It instead increases the probability that a call to
220 // Monitor() will happen at the right time to catch a hang. This has to be
221 // balanced with power/cpu use concerns as busy looping would catch amost all
222 // hangs but present unacceptable overhead. NOTE: If this period is ever changed
223 // then all metrics that depend on it like
224 // HangWatcher.IsThreadHung need to be updated.
225 constexpr auto kMonitoringPeriod = base::Seconds(10);
226
WatchHangsInScope(TimeDelta timeout)227 WatchHangsInScope::WatchHangsInScope(TimeDelta timeout) {
228 internal::HangWatchState* current_hang_watch_state =
229 HangWatcher::IsEnabled()
230 ? internal::HangWatchState::GetHangWatchStateForCurrentThread()
231 : nullptr;
232
233 DCHECK(timeout >= base::TimeDelta()) << "Negative timeouts are invalid.";
234
235 // Thread is not monitored, noop.
236 if (!current_hang_watch_state) {
237 took_effect_ = false;
238 return;
239 }
240
241 #if DCHECK_IS_ON()
242 previous_watch_hangs_in_scope_ =
243 current_hang_watch_state->GetCurrentWatchHangsInScope();
244 current_hang_watch_state->SetCurrentWatchHangsInScope(this);
245 #endif
246
247 auto [old_flags, old_deadline] =
248 current_hang_watch_state->GetFlagsAndDeadline();
249
250 // TODO(crbug.com/1034046): Check whether we are over deadline already for the
251 // previous WatchHangsInScope here by issuing only one TimeTicks::Now()
252 // and resuing the value.
253
254 previous_deadline_ = old_deadline;
255 TimeTicks deadline = TimeTicks::Now() + timeout;
256 current_hang_watch_state->SetDeadline(deadline);
257 current_hang_watch_state->IncrementNestingLevel();
258
259 const bool hangs_ignored_for_current_scope =
260 internal::HangWatchDeadline::IsFlagSet(
261 internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
262 old_flags);
263
264 // If the current WatchHangsInScope is ignored, temporarily reactivate hang
265 // watching for newly created WatchHangsInScopes. On exiting hang watching
266 // is suspended again to return to the original state.
267 if (hangs_ignored_for_current_scope) {
268 current_hang_watch_state->UnsetIgnoreCurrentWatchHangsInScope();
269 set_hangs_ignored_on_exit_ = true;
270 }
271 }
272
~WatchHangsInScope()273 WatchHangsInScope::~WatchHangsInScope() {
274 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
275
276 // If hang watching was not enabled at construction time there is nothing to
277 // validate or undo.
278 if (!took_effect_) {
279 return;
280 }
281
282 // If the thread was unregistered since construction there is also nothing to
283 // do.
284 auto* const state =
285 internal::HangWatchState::GetHangWatchStateForCurrentThread();
286 if (!state) {
287 return;
288 }
289
290 // If a hang is currently being captured we should block here so execution
291 // stops and we avoid recording unrelated stack frames in the crash.
292 if (state->IsFlagSet(internal::HangWatchDeadline::Flag::kShouldBlockOnHang)) {
293 base::HangWatcher::GetInstance()->BlockIfCaptureInProgress();
294 }
295
296 #if DCHECK_IS_ON()
297 // Verify that no Scope was destructed out of order.
298 DCHECK_EQ(this, state->GetCurrentWatchHangsInScope());
299 state->SetCurrentWatchHangsInScope(previous_watch_hangs_in_scope_);
300 #endif
301
302 if (state->nesting_level() == 1) {
303 // If a call to InvalidateActiveExpectations() suspended hang watching
304 // during the lifetime of this or any nested WatchHangsInScope it can now
305 // safely be reactivated by clearing the ignore bit since this is the
306 // outer-most scope.
307 state->UnsetIgnoreCurrentWatchHangsInScope();
308 } else if (set_hangs_ignored_on_exit_) {
309 // Return to ignoring hangs since this was the previous state before hang
310 // watching was temporarily enabled for this WatchHangsInScope only in the
311 // constructor.
312 state->SetIgnoreCurrentWatchHangsInScope();
313 }
314
315 // Reset the deadline to the value it had before entering this
316 // WatchHangsInScope.
317 state->SetDeadline(previous_deadline_);
318 // TODO(crbug.com/1034046): Log when a WatchHangsInScope exits after its
319 // deadline and that went undetected by the HangWatcher.
320
321 state->DecrementNestingLevel();
322 }
323
324 // static
InitializeOnMainThread(ProcessType process_type,bool is_zygote_child)325 void HangWatcher::InitializeOnMainThread(ProcessType process_type,
326 bool is_zygote_child) {
327 DCHECK(!g_use_hang_watcher);
328 DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
329 DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
330 DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
331
332 bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
333
334 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
335 if (is_zygote_child) {
336 enable_hang_watcher =
337 enable_hang_watcher &&
338 base::FeatureList::IsEnabled(kEnableHangWatcherInZygoteChildren);
339 }
340 #endif
341
342 // Do not start HangWatcher in the GPU process until the issue related to
343 // invalid magic signature in the GPU WatchDog is fixed
344 // (https://crbug.com/1297760).
345 if (process_type == ProcessType::kGPUProcess)
346 enable_hang_watcher = false;
347
348 g_use_hang_watcher.store(enable_hang_watcher, std::memory_order_relaxed);
349
350 // Keep the process type.
351 g_hang_watcher_process_type.store(process_type, std::memory_order_relaxed);
352
353 // If hang watching is disabled as a whole there is no need to read the
354 // params.
355 if (!enable_hang_watcher)
356 return;
357
358 // Retrieve thread-specific config for hang watching.
359 switch (process_type) {
360 case HangWatcher::ProcessType::kUnknownProcess:
361 break;
362
363 case HangWatcher::ProcessType::kBrowserProcess:
364 g_threadpool_log_level.store(
365 static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
366 std::memory_order_relaxed);
367 g_io_thread_log_level.store(
368 static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
369 std::memory_order_relaxed);
370 g_main_thread_log_level.store(
371 static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
372 std::memory_order_relaxed);
373 break;
374
375 case HangWatcher::ProcessType::kGPUProcess:
376 g_threadpool_log_level.store(
377 static_cast<LoggingLevel>(kGPUProcessThreadPoolLogLevel.Get()),
378 std::memory_order_relaxed);
379 g_io_thread_log_level.store(
380 static_cast<LoggingLevel>(kGPUProcessIOThreadLogLevel.Get()),
381 std::memory_order_relaxed);
382 g_main_thread_log_level.store(
383 static_cast<LoggingLevel>(kGPUProcessMainThreadLogLevel.Get()),
384 std::memory_order_relaxed);
385 break;
386
387 case HangWatcher::ProcessType::kRendererProcess:
388 g_threadpool_log_level.store(
389 static_cast<LoggingLevel>(kRendererProcessThreadPoolLogLevel.Get()),
390 std::memory_order_relaxed);
391 g_io_thread_log_level.store(
392 static_cast<LoggingLevel>(kRendererProcessIOThreadLogLevel.Get()),
393 std::memory_order_relaxed);
394 g_main_thread_log_level.store(
395 static_cast<LoggingLevel>(kRendererProcessMainThreadLogLevel.Get()),
396 std::memory_order_relaxed);
397 break;
398
399 case HangWatcher::ProcessType::kUtilityProcess:
400 g_threadpool_log_level.store(
401 static_cast<LoggingLevel>(kUtilityProcessThreadPoolLogLevel.Get()),
402 std::memory_order_relaxed);
403 g_io_thread_log_level.store(
404 static_cast<LoggingLevel>(kUtilityProcessIOThreadLogLevel.Get()),
405 std::memory_order_relaxed);
406 g_main_thread_log_level.store(
407 static_cast<LoggingLevel>(kUtilityProcessMainThreadLogLevel.Get()),
408 std::memory_order_relaxed);
409 break;
410 }
411 }
412
UnitializeOnMainThreadForTesting()413 void HangWatcher::UnitializeOnMainThreadForTesting() {
414 g_use_hang_watcher.store(false, std::memory_order_relaxed);
415 g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
416 g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
417 g_main_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
418 }
419
420 // static
IsEnabled()421 bool HangWatcher::IsEnabled() {
422 return g_use_hang_watcher.load(std::memory_order_relaxed);
423 }
424
425 // static
IsThreadPoolHangWatchingEnabled()426 bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
427 return g_threadpool_log_level.load(std::memory_order_relaxed) !=
428 LoggingLevel::kNone;
429 }
430
431 // static
IsIOThreadHangWatchingEnabled()432 bool HangWatcher::IsIOThreadHangWatchingEnabled() {
433 return g_io_thread_log_level.load(std::memory_order_relaxed) !=
434 LoggingLevel::kNone;
435 }
436
437 // static
IsCrashReportingEnabled()438 bool HangWatcher::IsCrashReportingEnabled() {
439 if (g_main_thread_log_level.load(std::memory_order_relaxed) ==
440 LoggingLevel::kUmaAndCrash) {
441 return true;
442 }
443 if (g_io_thread_log_level.load(std::memory_order_relaxed) ==
444 LoggingLevel::kUmaAndCrash) {
445 return true;
446 }
447 if (g_threadpool_log_level.load(std::memory_order_relaxed) ==
448 LoggingLevel::kUmaAndCrash) {
449 return true;
450 }
451 return false;
452 }
453
454 // static
InvalidateActiveExpectations()455 void HangWatcher::InvalidateActiveExpectations() {
456 auto* const state =
457 internal::HangWatchState::GetHangWatchStateForCurrentThread();
458 if (!state) {
459 // If the current thread is not under watch there is nothing to invalidate.
460 return;
461 }
462 state->SetIgnoreCurrentWatchHangsInScope();
463 }
464
HangWatcher()465 HangWatcher::HangWatcher()
466 : monitor_period_(kMonitoringPeriod),
467 should_monitor_(WaitableEvent::ResetPolicy::AUTOMATIC),
468 thread_(this, kThreadName),
469 tick_clock_(base::DefaultTickClock::GetInstance()),
470 memory_pressure_listener_(
471 FROM_HERE,
472 base::BindRepeating(&HangWatcher::OnMemoryPressure,
473 base::Unretained(this))) {
474 // |thread_checker_| should not be bound to the constructing thread.
475 DETACH_FROM_THREAD(hang_watcher_thread_checker_);
476
477 should_monitor_.declare_only_used_while_idle();
478
479 DCHECK(!g_instance);
480 g_instance = this;
481 }
482
483 // static
CreateHangWatcherInstance()484 void HangWatcher::CreateHangWatcherInstance() {
485 DCHECK(!g_instance);
486 g_instance = new base::HangWatcher();
487 // The hang watcher is leaked to make sure it survives all watched threads.
488 ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
489 }
490
491 #if !BUILDFLAG(IS_NACL)
492 debug::ScopedCrashKeyString
GetTimeSinceLastCriticalMemoryPressureCrashKey()493 HangWatcher::GetTimeSinceLastCriticalMemoryPressureCrashKey() {
494 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
495
496 // The crash key size is large enough to hold the biggest possible return
497 // value from base::TimeDelta::InSeconds().
498 constexpr debug::CrashKeySize kCrashKeyContentSize =
499 debug::CrashKeySize::Size32;
500 DCHECK_GE(static_cast<uint64_t>(kCrashKeyContentSize),
501 base::NumberToString(std::numeric_limits<int64_t>::max()).size());
502
503 static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
504 "seconds-since-last-memory-pressure", kCrashKeyContentSize);
505
506 const base::TimeTicks last_critical_memory_pressure_time =
507 last_critical_memory_pressure_.load(std::memory_order_relaxed);
508 if (last_critical_memory_pressure_time.is_null()) {
509 constexpr char kNoMemoryPressureMsg[] = "No critical memory pressure";
510 static_assert(
511 std::size(kNoMemoryPressureMsg) <=
512 static_cast<uint64_t>(kCrashKeyContentSize),
513 "The crash key is too small to hold \"No critical memory pressure\".");
514 return debug::ScopedCrashKeyString(crash_key, kNoMemoryPressureMsg);
515 } else {
516 base::TimeDelta time_since_last_critical_memory_pressure =
517 base::TimeTicks::Now() - last_critical_memory_pressure_time;
518 return debug::ScopedCrashKeyString(
519 crash_key, base::NumberToString(
520 time_since_last_critical_memory_pressure.InSeconds()));
521 }
522 }
523 #endif
524
GetTimeSinceLastSystemPowerResumeCrashKeyValue() const525 std::string HangWatcher::GetTimeSinceLastSystemPowerResumeCrashKeyValue()
526 const {
527 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
528
529 const TimeTicks last_system_power_resume_time =
530 PowerMonitor::GetLastSystemResumeTime();
531 if (last_system_power_resume_time.is_null())
532 return "Never suspended";
533 if (last_system_power_resume_time == TimeTicks::Max())
534 return "Power suspended";
535
536 const TimeDelta time_since_last_system_resume =
537 TimeTicks::Now() - last_system_power_resume_time;
538 return NumberToString(time_since_last_system_resume.InSeconds());
539 }
540
OnMemoryPressure(base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level)541 void HangWatcher::OnMemoryPressure(
542 base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level) {
543 if (memory_pressure_level ==
544 base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL) {
545 last_critical_memory_pressure_.store(base::TimeTicks::Now(),
546 std::memory_order_relaxed);
547 }
548 }
549
~HangWatcher()550 HangWatcher::~HangWatcher() {
551 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
552 DCHECK_EQ(g_instance, this);
553 DCHECK(watch_states_.empty());
554 g_instance = nullptr;
555 Stop();
556 }
557
Start()558 void HangWatcher::Start() {
559 thread_.Start();
560 thread_started_ = true;
561 }
562
Stop()563 void HangWatcher::Stop() {
564 g_keep_monitoring.store(false, std::memory_order_relaxed);
565 should_monitor_.Signal();
566 thread_.Join();
567 thread_started_ = false;
568
569 // In production HangWatcher is always leaked but during testing it's possibly
570 // stopped and restarted using a new instance. This makes sure the next call
571 // to Start() will actually monitor in that case.
572 g_keep_monitoring.store(true, std::memory_order_relaxed);
573 }
574
IsWatchListEmpty()575 bool HangWatcher::IsWatchListEmpty() {
576 AutoLock auto_lock(watch_state_lock_);
577 return watch_states_.empty();
578 }
579
Wait()580 void HangWatcher::Wait() {
581 while (true) {
582 // Amount by which the actual time spent sleeping can deviate from
583 // the target time and still be considered timely.
584 constexpr base::TimeDelta kWaitDriftTolerance = base::Milliseconds(100);
585
586 const base::TimeTicks time_before_wait = tick_clock_->NowTicks();
587
588 // Sleep until next scheduled monitoring or until signaled.
589 const bool was_signaled = should_monitor_.TimedWait(monitor_period_);
590
591 if (after_wait_callback_)
592 after_wait_callback_.Run(time_before_wait);
593
594 const base::TimeTicks time_after_wait = tick_clock_->NowTicks();
595 const base::TimeDelta wait_time = time_after_wait - time_before_wait;
596 const bool wait_was_normal =
597 wait_time <= (monitor_period_ + kWaitDriftTolerance);
598
599 UMA_HISTOGRAM_TIMES("HangWatcher.SleepDrift.BrowserProcess",
600 wait_time - monitor_period_);
601
602 if (!wait_was_normal) {
603 // If the time spent waiting was too high it might indicate the machine is
604 // very slow or that that it went to sleep. In any case we can't trust the
605 // WatchHangsInScopes that are currently live. Update the ignore
606 // threshold to make sure they don't trigger a hang on subsequent monitors
607 // then keep waiting.
608
609 base::AutoLock auto_lock(watch_state_lock_);
610
611 // Find the latest deadline among the live watch states. They might change
612 // atomically while iterating but that's fine because if they do that
613 // means the new WatchHangsInScope was constructed very soon after the
614 // abnormal sleep happened and might be affected by the root cause still.
615 // Ignoring it is cautious and harmless.
616 base::TimeTicks latest_deadline;
617 for (const auto& state : watch_states_) {
618 base::TimeTicks deadline = state->GetDeadline();
619 if (deadline > latest_deadline) {
620 latest_deadline = deadline;
621 }
622 }
623
624 deadline_ignore_threshold_ = latest_deadline;
625 }
626
627 // Stop waiting.
628 if (wait_was_normal || was_signaled)
629 return;
630 }
631 }
632
Run()633 void HangWatcher::Run() {
634 // Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
635 // sure of that.
636 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
637
638 while (g_keep_monitoring.load(std::memory_order_relaxed)) {
639 Wait();
640
641 if (!IsWatchListEmpty() &&
642 g_keep_monitoring.load(std::memory_order_relaxed)) {
643 Monitor();
644 if (after_monitor_closure_for_testing_) {
645 after_monitor_closure_for_testing_.Run();
646 }
647 }
648 }
649 }
650
651 // static
GetInstance()652 HangWatcher* HangWatcher::GetInstance() {
653 return g_instance;
654 }
655
656 // static
RecordHang()657 void HangWatcher::RecordHang() {
658 base::debug::DumpWithoutCrashing();
659 NO_CODE_FOLDING();
660 }
661
RegisterThreadInternal(ThreadType thread_type)662 ScopedClosureRunner HangWatcher::RegisterThreadInternal(
663 ThreadType thread_type) {
664 AutoLock auto_lock(watch_state_lock_);
665 CHECK(base::FeatureList::GetInstance());
666
667 // Do not install a WatchState if the results would never be observable.
668 if (!ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
669 LoggingLevel::kUmaOnly)) {
670 return ScopedClosureRunner(base::DoNothing());
671 }
672
673 watch_states_.push_back(
674 internal::HangWatchState::CreateHangWatchStateForCurrentThread(
675 thread_type));
676 return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
677 Unretained(HangWatcher::GetInstance())));
678 }
679
680 // static
RegisterThread(ThreadType thread_type)681 ScopedClosureRunner HangWatcher::RegisterThread(ThreadType thread_type) {
682 if (!GetInstance()) {
683 return ScopedClosureRunner();
684 }
685
686 return GetInstance()->RegisterThreadInternal(thread_type);
687 }
688
GetHighestDeadline() const689 base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
690 DCHECK(IsActionable());
691
692 // Since entries are sorted in increasing order the last entry is the largest
693 // one.
694 return hung_watch_state_copies_.back().deadline;
695 }
696
697 HangWatcher::WatchStateSnapShot::WatchStateSnapShot() = default;
698
Init(const HangWatchStates & watch_states,base::TimeTicks deadline_ignore_threshold)699 void HangWatcher::WatchStateSnapShot::Init(
700 const HangWatchStates& watch_states,
701 base::TimeTicks deadline_ignore_threshold) {
702 DCHECK(!initialized_);
703
704 // No matter if the snapshot is actionable or not after this function
705 // it will have been initialized.
706 initialized_ = true;
707
708 const base::TimeTicks now = base::TimeTicks::Now();
709 bool all_threads_marked = true;
710 bool found_deadline_before_ignore_threshold = false;
711
712 // Use an std::array to store the hang counts to avoid allocations. The
713 // numerical values of the HangWatcher::ThreadType enum is used to index into
714 // the array. A |kInvalidHangCount| is used to signify there were no threads
715 // of the type found.
716 constexpr size_t kHangCountArraySize =
717 static_cast<std::size_t>(base::HangWatcher::ThreadType::kMax) + 1;
718 std::array<int, kHangCountArraySize> hung_counts_per_thread_type;
719
720 constexpr int kInvalidHangCount = -1;
721 hung_counts_per_thread_type.fill(kInvalidHangCount);
722
723 // Will be true if any of the hung threads has a logging level high enough,
724 // as defined through finch params, to warant dumping a crash.
725 bool any_hung_thread_has_dumping_enabled = false;
726
727 // Copy hung thread information.
728 for (const auto& watch_state : watch_states) {
729 uint64_t flags;
730 TimeTicks deadline;
731 std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
732
733 if (deadline <= deadline_ignore_threshold) {
734 found_deadline_before_ignore_threshold = true;
735 }
736
737 if (internal::HangWatchDeadline::IsFlagSet(
738 internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
739 flags)) {
740 continue;
741 }
742
743 // If a thread type is monitored and did not hang it still needs to be
744 // logged as a zero count;
745 const size_t hang_count_index =
746 static_cast<size_t>(watch_state.get()->thread_type());
747 if (hung_counts_per_thread_type[hang_count_index] == kInvalidHangCount) {
748 hung_counts_per_thread_type[hang_count_index] = 0;
749 }
750
751 // Only copy hung threads.
752 if (deadline <= now) {
753 ++hung_counts_per_thread_type[hang_count_index];
754
755 if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
756 LoggingLevel::kUmaAndCrash)) {
757 any_hung_thread_has_dumping_enabled = true;
758 }
759
760 #if BUILDFLAG(ENABLE_BASE_TRACING)
761 // Emit trace events for monitored threads.
762 if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
763 LoggingLevel::kUmaOnly)) {
764 const PlatformThreadId thread_id = watch_state.get()->GetThreadID();
765 const auto track = perfetto::Track::FromPointer(
766 this, perfetto::ThreadTrack::ForThread(thread_id));
767 TRACE_EVENT_BEGIN("base", "HangWatcher::ThreadHung", track, deadline);
768 TRACE_EVENT_END("base", track, now);
769 // TODO(crbug.com/1021571): Remove this once fixed.
770 PERFETTO_INTERNAL_ADD_EMPTY_EVENT();
771 }
772 #endif
773
774 // Attempt to mark the thread as needing to stay within its current
775 // WatchHangsInScope until capture is complete.
776 bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
777
778 // If marking some threads already failed the snapshot won't be kept so
779 // there is no need to keep adding to it. The loop doesn't abort though
780 // to keep marking the other threads. If these threads remain hung until
781 // the next capture then they'll already be marked and will be included
782 // in the capture at that time.
783 if (thread_marked && all_threads_marked) {
784 hung_watch_state_copies_.push_back(
785 WatchStateCopy{deadline, watch_state.get()->GetThreadID()});
786 } else {
787 all_threads_marked = false;
788 }
789 }
790 }
791
792 // Log the hung thread counts to histograms for each thread type if any thread
793 // of the type were found.
794 for (size_t i = 0; i < kHangCountArraySize; ++i) {
795 const int hang_count = hung_counts_per_thread_type[i];
796 const HangWatcher::ThreadType thread_type =
797 static_cast<HangWatcher::ThreadType>(i);
798 if (hang_count != kInvalidHangCount &&
799 ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
800 LoggingLevel::kUmaOnly)) {
801 LogHungThreadCountHistogram(thread_type, hang_count);
802 }
803 }
804
805 // Three cases can invalidate this snapshot and prevent the capture of the
806 // hang.
807 //
808 // 1. Some threads could not be marked for blocking so this snapshot isn't
809 // actionable since marked threads could be hung because of unmarked ones.
810 // If only the marked threads were captured the information would be
811 // incomplete.
812 //
813 // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
814 // If any thread is ignored it reduces the confidence in the whole state and
815 // it's better to avoid capturing misleading data.
816 //
817 // 3. The hung threads found were all of types that are not configured through
818 // Finch to trigger a crash dump.
819 //
820 if (!all_threads_marked || found_deadline_before_ignore_threshold ||
821 !any_hung_thread_has_dumping_enabled) {
822 hung_watch_state_copies_.clear();
823 return;
824 }
825
826 // Sort |hung_watch_state_copies_| by order of decreasing hang severity so the
827 // most severe hang is first in the list.
828 ranges::sort(hung_watch_state_copies_,
829 [](const WatchStateCopy& lhs, const WatchStateCopy& rhs) {
830 return lhs.deadline < rhs.deadline;
831 });
832 }
833
Clear()834 void HangWatcher::WatchStateSnapShot::Clear() {
835 hung_watch_state_copies_.clear();
836 initialized_ = false;
837 }
838
839 HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
840 const WatchStateSnapShot& other) = default;
841
842 HangWatcher::WatchStateSnapShot::~WatchStateSnapShot() = default;
843
PrepareHungThreadListCrashKey() const844 std::string HangWatcher::WatchStateSnapShot::PrepareHungThreadListCrashKey()
845 const {
846 DCHECK(IsActionable());
847
848 // Build a crash key string that contains the ids of the hung threads.
849 constexpr char kSeparator{'|'};
850 std::string list_of_hung_thread_ids;
851
852 // Add as many thread ids to the crash key as possible.
853 for (const WatchStateCopy& copy : hung_watch_state_copies_) {
854 std::string fragment = base::NumberToString(copy.thread_id) + kSeparator;
855 if (list_of_hung_thread_ids.size() + fragment.size() <
856 static_cast<std::size_t>(debug::CrashKeySize::Size256)) {
857 list_of_hung_thread_ids += fragment;
858 } else {
859 // Respect the by priority ordering of thread ids in the crash key by
860 // stopping the construction as soon as one does not fit. This avoids
861 // including lesser priority ids while omitting more important ones.
862 break;
863 }
864 }
865
866 return list_of_hung_thread_ids;
867 }
868
IsActionable() const869 bool HangWatcher::WatchStateSnapShot::IsActionable() const {
870 DCHECK(initialized_);
871 return !hung_watch_state_copies_.empty();
872 }
873
GrabWatchStateSnapshotForTesting() const874 HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
875 const {
876 WatchStateSnapShot snapshot;
877 snapshot.Init(watch_states_, deadline_ignore_threshold_);
878 return snapshot;
879 }
880
Monitor()881 void HangWatcher::Monitor() {
882 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
883 AutoLock auto_lock(watch_state_lock_);
884
885 // If all threads unregistered since this function was invoked there's
886 // nothing to do anymore.
887 if (watch_states_.empty())
888 return;
889
890 watch_state_snapshot_.Init(watch_states_, deadline_ignore_threshold_);
891
892 if (watch_state_snapshot_.IsActionable()) {
893 DoDumpWithoutCrashing(watch_state_snapshot_);
894 }
895
896 watch_state_snapshot_.Clear();
897 }
898
DoDumpWithoutCrashing(const WatchStateSnapShot & watch_state_snapshot)899 void HangWatcher::DoDumpWithoutCrashing(
900 const WatchStateSnapShot& watch_state_snapshot) {
901 TRACE_EVENT("base", "HangWatcher::DoDumpWithoutCrashing");
902
903 capture_in_progress_.store(true, std::memory_order_relaxed);
904 base::AutoLock scope_lock(capture_lock_);
905
906 #if !BUILDFLAG(IS_NACL)
907 const std::string list_of_hung_thread_ids =
908 watch_state_snapshot.PrepareHungThreadListCrashKey();
909
910 static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
911 "list-of-hung-threads", debug::CrashKeySize::Size256);
912
913 const debug::ScopedCrashKeyString list_of_hung_threads_crash_key_string(
914 crash_key, list_of_hung_thread_ids);
915
916 const debug::ScopedCrashKeyString
917 time_since_last_critical_memory_pressure_crash_key_string =
918 GetTimeSinceLastCriticalMemoryPressureCrashKey();
919
920 SCOPED_CRASH_KEY_STRING32("HangWatcher", "seconds-since-last-resume",
921 GetTimeSinceLastSystemPowerResumeCrashKeyValue());
922 #endif
923
924 // To avoid capturing more than one hang that blames a subset of the same
925 // threads it's necessary to keep track of what is the furthest deadline
926 // that contributed to declaring a hang. Only once
927 // all threads have deadlines past this point can we be sure that a newly
928 // discovered hang is not directly related.
929 // Example:
930 // **********************************************************************
931 // Timeline A : L------1-------2----------3-------4----------N-----------
932 // Timeline B : -------2----------3-------4----------L----5------N-------
933 // Timeline C : L----------------------------5------6----7---8------9---N
934 // **********************************************************************
935 // In the example when a Monitor() happens during timeline A
936 // |deadline_ignore_threshold_| (L) is at time zero and deadlines (1-4)
937 // are before Now() (N) . A hang is captured and L is updated. During
938 // the next Monitor() (timeline B) a new deadline is over but we can't
939 // capture a hang because deadlines 2-4 are still live and already counted
940 // toward a hang. During a third monitor (timeline C) all live deadlines
941 // are now after L and a second hang can be recorded.
942 base::TimeTicks latest_expired_deadline =
943 watch_state_snapshot.GetHighestDeadline();
944
945 if (on_hang_closure_for_testing_)
946 on_hang_closure_for_testing_.Run();
947 else
948 RecordHang();
949
950 // Update after running the actual capture.
951 deadline_ignore_threshold_ = latest_expired_deadline;
952
953 capture_in_progress_.store(false, std::memory_order_relaxed);
954 }
955
SetAfterMonitorClosureForTesting(base::RepeatingClosure closure)956 void HangWatcher::SetAfterMonitorClosureForTesting(
957 base::RepeatingClosure closure) {
958 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
959 after_monitor_closure_for_testing_ = std::move(closure);
960 }
961
SetOnHangClosureForTesting(base::RepeatingClosure closure)962 void HangWatcher::SetOnHangClosureForTesting(base::RepeatingClosure closure) {
963 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
964 on_hang_closure_for_testing_ = std::move(closure);
965 }
966
SetMonitoringPeriodForTesting(base::TimeDelta period)967 void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
968 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
969 monitor_period_ = period;
970 }
971
SetAfterWaitCallbackForTesting(RepeatingCallback<void (TimeTicks)> callback)972 void HangWatcher::SetAfterWaitCallbackForTesting(
973 RepeatingCallback<void(TimeTicks)> callback) {
974 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
975 after_wait_callback_ = callback;
976 }
977
SignalMonitorEventForTesting()978 void HangWatcher::SignalMonitorEventForTesting() {
979 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
980 should_monitor_.Signal();
981 }
982
983 // static
StopMonitoringForTesting()984 void HangWatcher::StopMonitoringForTesting() {
985 g_keep_monitoring.store(false, std::memory_order_relaxed);
986 }
987
SetTickClockForTesting(const base::TickClock * tick_clock)988 void HangWatcher::SetTickClockForTesting(const base::TickClock* tick_clock) {
989 tick_clock_ = tick_clock;
990 }
991
BlockIfCaptureInProgress()992 void HangWatcher::BlockIfCaptureInProgress() {
993 // Makes a best-effort attempt to block execution if a hang is currently being
994 // captured. Only block on |capture_lock| if |capture_in_progress_| hints that
995 // it's already held to avoid serializing all threads on this function when no
996 // hang capture is in-progress.
997 if (capture_in_progress_.load(std::memory_order_relaxed))
998 base::AutoLock hang_lock(capture_lock_);
999 }
1000
UnregisterThread()1001 void HangWatcher::UnregisterThread() {
1002 AutoLock auto_lock(watch_state_lock_);
1003
1004 auto it = ranges::find(
1005 watch_states_,
1006 internal::HangWatchState::GetHangWatchStateForCurrentThread(),
1007 &std::unique_ptr<internal::HangWatchState>::get);
1008
1009 // Thread should be registered to get unregistered.
1010 DCHECK(it != watch_states_.end());
1011
1012 watch_states_.erase(it);
1013 }
1014
1015 namespace internal {
1016 namespace {
1017
1018 constexpr uint64_t kOnlyDeadlineMask = 0x00FF'FFFF'FFFF'FFFFu;
1019 constexpr uint64_t kOnlyFlagsMask = ~kOnlyDeadlineMask;
1020 constexpr uint64_t kMaximumFlag = 0x8000'0000'0000'0000u;
1021
1022 // Use as a mask to keep persistent flags and the deadline.
1023 constexpr uint64_t kPersistentFlagsAndDeadlineMask =
1024 kOnlyDeadlineMask |
1025 static_cast<uint64_t>(
1026 HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope);
1027 } // namespace
1028
1029 // Flag binary representation assertions.
1030 static_assert(
1031 static_cast<uint64_t>(HangWatchDeadline::Flag::kMinValue) >
1032 kOnlyDeadlineMask,
1033 "Invalid numerical value for flag. Would interfere with bits of data.");
1034 static_assert(static_cast<uint64_t>(HangWatchDeadline::Flag::kMaxValue) <=
1035 kMaximumFlag,
1036 "A flag can only set a single bit.");
1037
1038 HangWatchDeadline::HangWatchDeadline() = default;
1039 HangWatchDeadline::~HangWatchDeadline() = default;
1040
GetFlagsAndDeadline() const1041 std::pair<uint64_t, TimeTicks> HangWatchDeadline::GetFlagsAndDeadline() const {
1042 uint64_t bits = bits_.load(std::memory_order_relaxed);
1043 return std::make_pair(ExtractFlags(bits),
1044 DeadlineFromBits(ExtractDeadline((bits))));
1045 }
1046
GetDeadline() const1047 TimeTicks HangWatchDeadline::GetDeadline() const {
1048 return DeadlineFromBits(
1049 ExtractDeadline(bits_.load(std::memory_order_relaxed)));
1050 }
1051
1052 // static
Max()1053 TimeTicks HangWatchDeadline::Max() {
1054 // |kOnlyDeadlineMask| has all the bits reserved for the TimeTicks value
1055 // set. This means it also represents the highest representable value.
1056 return DeadlineFromBits(kOnlyDeadlineMask);
1057 }
1058
1059 // static
IsFlagSet(Flag flag,uint64_t flags)1060 bool HangWatchDeadline::IsFlagSet(Flag flag, uint64_t flags) {
1061 return static_cast<uint64_t>(flag) & flags;
1062 }
1063
SetDeadline(TimeTicks new_deadline)1064 void HangWatchDeadline::SetDeadline(TimeTicks new_deadline) {
1065 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1066 DCHECK(new_deadline <= Max()) << "Value too high to be represented.";
1067 DCHECK(new_deadline >= TimeTicks{}) << "Value cannot be negative.";
1068
1069 if (switch_bits_callback_for_testing_) {
1070 const uint64_t switched_in_bits = SwitchBitsForTesting();
1071 // If a concurrent deadline change is tested it cannot have a deadline or
1072 // persistent flag change since those always happen on the same thread.
1073 DCHECK((switched_in_bits & kPersistentFlagsAndDeadlineMask) == 0u);
1074 }
1075
1076 // Discard all non-persistent flags and apply deadline change.
1077 const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1078 const uint64_t new_flags =
1079 ExtractFlags(old_bits & kPersistentFlagsAndDeadlineMask);
1080 bits_.store(new_flags | ExtractDeadline(static_cast<uint64_t>(
1081 new_deadline.ToInternalValue())),
1082 std::memory_order_relaxed);
1083 }
1084
1085 // TODO(crbug.com/1087026): Add flag DCHECKs here.
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1086 bool HangWatchDeadline::SetShouldBlockOnHang(uint64_t old_flags,
1087 TimeTicks old_deadline) {
1088 DCHECK(old_deadline <= Max()) << "Value too high to be represented.";
1089 DCHECK(old_deadline >= TimeTicks{}) << "Value cannot be negative.";
1090
1091 // Set the kShouldBlockOnHang flag only if |bits_| did not change since it was
1092 // read. kShouldBlockOnHang is the only non-persistent flag and should never
1093 // be set twice. Persistent flags and deadline changes are done from the same
1094 // thread so there is no risk of losing concurrently added information.
1095 uint64_t old_bits =
1096 old_flags | static_cast<uint64_t>(old_deadline.ToInternalValue());
1097 const uint64_t desired_bits =
1098 old_bits | static_cast<uint64_t>(Flag::kShouldBlockOnHang);
1099
1100 // If a test needs to simulate |bits_| changing since calling this function
1101 // this happens now.
1102 if (switch_bits_callback_for_testing_) {
1103 const uint64_t switched_in_bits = SwitchBitsForTesting();
1104
1105 // Injecting the flag being tested is invalid.
1106 DCHECK(!IsFlagSet(Flag::kShouldBlockOnHang, switched_in_bits));
1107 }
1108
1109 return bits_.compare_exchange_weak(old_bits, desired_bits,
1110 std::memory_order_relaxed,
1111 std::memory_order_relaxed);
1112 }
1113
SetIgnoreCurrentWatchHangsInScope()1114 void HangWatchDeadline::SetIgnoreCurrentWatchHangsInScope() {
1115 SetPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1116 }
1117
UnsetIgnoreCurrentWatchHangsInScope()1118 void HangWatchDeadline::UnsetIgnoreCurrentWatchHangsInScope() {
1119 ClearPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1120 }
1121
SetPersistentFlag(Flag flag)1122 void HangWatchDeadline::SetPersistentFlag(Flag flag) {
1123 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1124 if (switch_bits_callback_for_testing_)
1125 SwitchBitsForTesting();
1126 bits_.fetch_or(static_cast<uint64_t>(flag), std::memory_order_relaxed);
1127 }
1128
ClearPersistentFlag(Flag flag)1129 void HangWatchDeadline::ClearPersistentFlag(Flag flag) {
1130 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1131 if (switch_bits_callback_for_testing_)
1132 SwitchBitsForTesting();
1133 bits_.fetch_and(~(static_cast<uint64_t>(flag)), std::memory_order_relaxed);
1134 }
1135
1136 // static
ExtractFlags(uint64_t bits)1137 uint64_t HangWatchDeadline::ExtractFlags(uint64_t bits) {
1138 return bits & kOnlyFlagsMask;
1139 }
1140
1141 // static
ExtractDeadline(uint64_t bits)1142 uint64_t HangWatchDeadline::ExtractDeadline(uint64_t bits) {
1143 return bits & kOnlyDeadlineMask;
1144 }
1145
1146 // static
DeadlineFromBits(uint64_t bits)1147 TimeTicks HangWatchDeadline::DeadlineFromBits(uint64_t bits) {
1148 // |kOnlyDeadlineMask| has all the deadline bits set to 1 so is the largest
1149 // representable value.
1150 DCHECK(bits <= kOnlyDeadlineMask)
1151 << "Flags bits are set. Remove them before returning deadline.";
1152 static_assert(kOnlyDeadlineMask <= std::numeric_limits<int64_t>::max());
1153 return TimeTicks::FromInternalValue(static_cast<int64_t>(bits));
1154 }
1155
IsFlagSet(Flag flag) const1156 bool HangWatchDeadline::IsFlagSet(Flag flag) const {
1157 return bits_.load(std::memory_order_relaxed) & static_cast<uint64_t>(flag);
1158 }
1159
SetSwitchBitsClosureForTesting(RepeatingCallback<uint64_t (void)> closure)1160 void HangWatchDeadline::SetSwitchBitsClosureForTesting(
1161 RepeatingCallback<uint64_t(void)> closure) {
1162 switch_bits_callback_for_testing_ = closure;
1163 }
1164
ResetSwitchBitsClosureForTesting()1165 void HangWatchDeadline::ResetSwitchBitsClosureForTesting() {
1166 DCHECK(switch_bits_callback_for_testing_);
1167 switch_bits_callback_for_testing_.Reset();
1168 }
1169
SwitchBitsForTesting()1170 uint64_t HangWatchDeadline::SwitchBitsForTesting() {
1171 DCHECK(switch_bits_callback_for_testing_);
1172
1173 const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1174 const uint64_t new_bits = switch_bits_callback_for_testing_.Run();
1175 const uint64_t old_flags = ExtractFlags(old_bits);
1176
1177 const uint64_t switched_in_bits = old_flags | new_bits;
1178 bits_.store(switched_in_bits, std::memory_order_relaxed);
1179 return switched_in_bits;
1180 }
1181
HangWatchState(HangWatcher::ThreadType thread_type)1182 HangWatchState::HangWatchState(HangWatcher::ThreadType thread_type)
1183 : resetter_(&hang_watch_state, this, nullptr), thread_type_(thread_type) {
1184 // TODO(crbug.com/1223033): Remove this once macOS uses system-wide ids.
1185 // On macOS the thread ids used by CrashPad are not the same as the ones
1186 // provided by PlatformThread. Make sure to use the same for correct
1187 // attribution.
1188 #if BUILDFLAG(IS_MAC)
1189 uint64_t thread_id;
1190 pthread_threadid_np(pthread_self(), &thread_id);
1191 thread_id_ = checked_cast<PlatformThreadId>(thread_id);
1192 #else
1193 thread_id_ = PlatformThread::CurrentId();
1194 #endif
1195 }
1196
~HangWatchState()1197 HangWatchState::~HangWatchState() {
1198 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1199
1200 DCHECK_EQ(GetHangWatchStateForCurrentThread(), this);
1201
1202 #if DCHECK_IS_ON()
1203 // Destroying the HangWatchState should not be done if there are live
1204 // WatchHangsInScopes.
1205 DCHECK(!current_watch_hangs_in_scope_);
1206 #endif
1207 }
1208
1209 // static
1210 std::unique_ptr<HangWatchState>
CreateHangWatchStateForCurrentThread(HangWatcher::ThreadType thread_type)1211 HangWatchState::CreateHangWatchStateForCurrentThread(
1212 HangWatcher::ThreadType thread_type) {
1213 // Allocate a watch state object for this thread.
1214 std::unique_ptr<HangWatchState> hang_state =
1215 std::make_unique<HangWatchState>(thread_type);
1216
1217 // Setting the thread local worked.
1218 DCHECK_EQ(GetHangWatchStateForCurrentThread(), hang_state.get());
1219
1220 // Transfer ownership to caller.
1221 return hang_state;
1222 }
1223
GetDeadline() const1224 TimeTicks HangWatchState::GetDeadline() const {
1225 return deadline_.GetDeadline();
1226 }
1227
GetFlagsAndDeadline() const1228 std::pair<uint64_t, TimeTicks> HangWatchState::GetFlagsAndDeadline() const {
1229 return deadline_.GetFlagsAndDeadline();
1230 }
1231
SetDeadline(TimeTicks deadline)1232 void HangWatchState::SetDeadline(TimeTicks deadline) {
1233 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1234 deadline_.SetDeadline(deadline);
1235 }
1236
IsOverDeadline() const1237 bool HangWatchState::IsOverDeadline() const {
1238 return TimeTicks::Now() > deadline_.GetDeadline();
1239 }
1240
SetIgnoreCurrentWatchHangsInScope()1241 void HangWatchState::SetIgnoreCurrentWatchHangsInScope() {
1242 deadline_.SetIgnoreCurrentWatchHangsInScope();
1243 }
1244
UnsetIgnoreCurrentWatchHangsInScope()1245 void HangWatchState::UnsetIgnoreCurrentWatchHangsInScope() {
1246 deadline_.UnsetIgnoreCurrentWatchHangsInScope();
1247 }
1248
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1249 bool HangWatchState::SetShouldBlockOnHang(uint64_t old_flags,
1250 TimeTicks old_deadline) {
1251 return deadline_.SetShouldBlockOnHang(old_flags, old_deadline);
1252 }
1253
IsFlagSet(HangWatchDeadline::Flag flag)1254 bool HangWatchState::IsFlagSet(HangWatchDeadline::Flag flag) {
1255 return deadline_.IsFlagSet(flag);
1256 }
1257
1258 #if DCHECK_IS_ON()
SetCurrentWatchHangsInScope(WatchHangsInScope * current_hang_watch_scope_enable)1259 void HangWatchState::SetCurrentWatchHangsInScope(
1260 WatchHangsInScope* current_hang_watch_scope_enable) {
1261 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1262 current_watch_hangs_in_scope_ = current_hang_watch_scope_enable;
1263 }
1264
GetCurrentWatchHangsInScope()1265 WatchHangsInScope* HangWatchState::GetCurrentWatchHangsInScope() {
1266 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1267 return current_watch_hangs_in_scope_;
1268 }
1269 #endif
1270
GetHangWatchDeadlineForTesting()1271 HangWatchDeadline* HangWatchState::GetHangWatchDeadlineForTesting() {
1272 return &deadline_;
1273 }
1274
IncrementNestingLevel()1275 void HangWatchState::IncrementNestingLevel() {
1276 ++nesting_level_;
1277 }
1278
DecrementNestingLevel()1279 void HangWatchState::DecrementNestingLevel() {
1280 --nesting_level_;
1281 }
1282
1283 // static
GetHangWatchStateForCurrentThread()1284 HangWatchState* HangWatchState::GetHangWatchStateForCurrentThread() {
1285 // Workaround false-positive MSAN use-of-uninitialized-value on
1286 // thread_local storage for loaded libraries:
1287 // https://github.com/google/sanitizers/issues/1265
1288 MSAN_UNPOISON(&hang_watch_state, sizeof(internal::HangWatchState*));
1289
1290 return hang_watch_state;
1291 }
1292
GetThreadID() const1293 PlatformThreadId HangWatchState::GetThreadID() const {
1294 return thread_id_;
1295 }
1296
1297 } // namespace internal
1298
1299 } // namespace base
1300