1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/threading/hang_watcher.h"
6
7 #include <atomic>
8 #include <utility>
9
10 #include "base/containers/flat_map.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/crash_logging.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/feature_list.h"
16 #include "base/functional/bind.h"
17 #include "base/functional/callback_helpers.h"
18 #include "base/metrics/field_trial_params.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/power_monitor/power_monitor.h"
21 #include "base/ranges/algorithm.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/synchronization/lock.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/platform_thread.h"
26 #include "base/threading/thread_checker.h"
27 #include "base/threading/thread_restrictions.h"
28 #include "base/threading/threading_features.h"
29 #include "base/time/default_tick_clock.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/base_tracing.h"
32 #include "build/build_config.h"
33
34 namespace base {
35
36 namespace {
37
38 // Defines how much logging happens when the HangWatcher monitors the threads.
39 // Logging levels are set per thread type through Finch. It's important that
40 // the order of the enum members stay the same and that their numerical
41 // values be in increasing order. The implementation of
42 // ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
43 enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
44
45 HangWatcher* g_instance = nullptr;
46 constinit thread_local internal::HangWatchState* hang_watch_state = nullptr;
47 std::atomic<bool> g_use_hang_watcher{false};
48 std::atomic<HangWatcher::ProcessType> g_hang_watcher_process_type{
49 HangWatcher::ProcessType::kBrowserProcess};
50
51 std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
52 std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
53 std::atomic<LoggingLevel> g_main_thread_log_level{LoggingLevel::kNone};
54
55 // Indicates whether HangWatcher::Run() should return after the next monitoring.
56 std::atomic<bool> g_keep_monitoring{true};
57
58 // If true, indicates that this process's shutdown sequence has started. Once
59 // flipped to true, cannot be un-flipped.
60 std::atomic<bool> g_shutting_down{false};
61
62 // Emits the hung thread count histogram. |count| is the number of threads
63 // of type |thread_type| that were hung or became hung during the last
64 // monitoring window. This function should be invoked for each thread type
65 // encountered on each call to Monitor(). `sample_ticks` is the time at which
66 // the sample was taken and `monitoring_period` is the interval being sampled.
LogStatusHistogram(HangWatcher::ThreadType thread_type,int count,TimeTicks sample_ticks,TimeDelta monitoring_period)67 void LogStatusHistogram(HangWatcher::ThreadType thread_type,
68 int count,
69 TimeTicks sample_ticks,
70 TimeDelta monitoring_period) {
71 // In the case of unique threads like the IO or UI/Main thread a count does
72 // not make sense.
73 const bool any_thread_hung = count >= 1;
74 const bool shutting_down = g_shutting_down.load(std::memory_order_relaxed);
75
76 // Uses histogram macros instead of functions. This increases binary size
77 // slightly, but runs slightly faster. These histograms are logged pretty
78 // often, so we prefer improving runtime.
79 const HangWatcher::ProcessType process_type =
80 g_hang_watcher_process_type.load(std::memory_order_relaxed);
81 switch (process_type) {
82 case HangWatcher::ProcessType::kUnknownProcess:
83 break;
84
85 case HangWatcher::ProcessType::kBrowserProcess:
86 switch (thread_type) {
87 case HangWatcher::ThreadType::kIOThread:
88 if (shutting_down) {
89 UMA_HISTOGRAM_BOOLEAN(
90 "HangWatcher.IsThreadHung.BrowserProcess.IOThread.Shutdown",
91 any_thread_hung);
92 } else {
93 UMA_HISTOGRAM_BOOLEAN(
94 "HangWatcher.IsThreadHung.BrowserProcess.IOThread.Normal",
95 any_thread_hung);
96 }
97 break;
98 case HangWatcher::ThreadType::kMainThread:
99 if (shutting_down) {
100 UMA_HISTOGRAM_BOOLEAN(
101 "HangWatcher.IsThreadHung.BrowserProcess.UIThread.Shutdown",
102 any_thread_hung);
103 } else {
104 UMA_HISTOGRAM_BOOLEAN(
105 "HangWatcher.IsThreadHung.BrowserProcess.UIThread.Normal",
106 any_thread_hung);
107 }
108 break;
109 case HangWatcher::ThreadType::kThreadPoolThread:
110 // Not recorded for now.
111 break;
112 }
113 break;
114
115 case HangWatcher::ProcessType::kGPUProcess:
116 // Not recorded for now.
117 CHECK(!shutting_down);
118 break;
119
120 case HangWatcher::ProcessType::kRendererProcess:
121 CHECK(!shutting_down);
122 switch (thread_type) {
123 case HangWatcher::ThreadType::kIOThread:
124 UMA_HISTOGRAM_SPLIT_BY_PROCESS_PRIORITY(
125 UMA_HISTOGRAM_BOOLEAN, sample_ticks, monitoring_period,
126 "HangWatcher.IsThreadHung.RendererProcess.IOThread",
127 any_thread_hung);
128 break;
129 case HangWatcher::ThreadType::kMainThread:
130 UMA_HISTOGRAM_SPLIT_BY_PROCESS_PRIORITY(
131 UMA_HISTOGRAM_BOOLEAN, sample_ticks, monitoring_period,
132 "HangWatcher.IsThreadHung.RendererProcess.MainThread",
133 any_thread_hung);
134 break;
135 case HangWatcher::ThreadType::kThreadPoolThread:
136 // Not recorded for now.
137 break;
138 }
139 break;
140
141 case HangWatcher::ProcessType::kUtilityProcess:
142 CHECK(!shutting_down);
143 switch (thread_type) {
144 case HangWatcher::ThreadType::kIOThread:
145 UMA_HISTOGRAM_BOOLEAN(
146 "HangWatcher.IsThreadHung.UtilityProcess.IOThread",
147 any_thread_hung);
148 break;
149 case HangWatcher::ThreadType::kMainThread:
150 UMA_HISTOGRAM_BOOLEAN(
151 "HangWatcher.IsThreadHung.UtilityProcess.MainThread",
152 any_thread_hung);
153 break;
154 case HangWatcher::ThreadType::kThreadPoolThread:
155 // Not recorded for now.
156 break;
157 }
158 break;
159 }
160 }
161
162 // Returns true if |thread_type| was configured through Finch to have a logging
163 // level that is equal to or exceeds |logging_level|.
ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,LoggingLevel logging_level)164 bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
165 LoggingLevel logging_level) {
166 switch (thread_type) {
167 case HangWatcher::ThreadType::kIOThread:
168 return g_io_thread_log_level.load(std::memory_order_relaxed) >=
169 logging_level;
170 case HangWatcher::ThreadType::kMainThread:
171 return g_main_thread_log_level.load(std::memory_order_relaxed) >=
172 logging_level;
173 case HangWatcher::ThreadType::kThreadPoolThread:
174 return g_threadpool_log_level.load(std::memory_order_relaxed) >=
175 logging_level;
176 }
177 }
178
179 } // namespace
180
181 // Determines if the HangWatcher is activated. When false the HangWatcher
182 // thread never started.
183 BASE_FEATURE(kEnableHangWatcher,
184 "EnableHangWatcher",
185 FEATURE_ENABLED_BY_DEFAULT);
186
187 // Browser process.
188 constexpr base::FeatureParam<int> kIOThreadLogLevel{
189 &kEnableHangWatcher, "io_thread_log_level",
190 static_cast<int>(LoggingLevel::kUmaOnly)};
191 constexpr base::FeatureParam<int> kUIThreadLogLevel{
192 &kEnableHangWatcher, "ui_thread_log_level",
193 static_cast<int>(LoggingLevel::kUmaOnly)};
194 constexpr base::FeatureParam<int> kThreadPoolLogLevel{
195 &kEnableHangWatcher, "threadpool_log_level",
196 static_cast<int>(LoggingLevel::kUmaOnly)};
197
198 // GPU process.
199 constexpr base::FeatureParam<int> kGPUProcessIOThreadLogLevel{
200 &kEnableHangWatcher, "gpu_process_io_thread_log_level",
201 static_cast<int>(LoggingLevel::kNone)};
202 constexpr base::FeatureParam<int> kGPUProcessMainThreadLogLevel{
203 &kEnableHangWatcher, "gpu_process_main_thread_log_level",
204 static_cast<int>(LoggingLevel::kNone)};
205 constexpr base::FeatureParam<int> kGPUProcessThreadPoolLogLevel{
206 &kEnableHangWatcher, "gpu_process_threadpool_log_level",
207 static_cast<int>(LoggingLevel::kNone)};
208
209 // Renderer process.
210 constexpr base::FeatureParam<int> kRendererProcessIOThreadLogLevel{
211 &kEnableHangWatcher, "renderer_process_io_thread_log_level",
212 static_cast<int>(LoggingLevel::kUmaOnly)};
213 constexpr base::FeatureParam<int> kRendererProcessMainThreadLogLevel{
214 &kEnableHangWatcher, "renderer_process_main_thread_log_level",
215 static_cast<int>(LoggingLevel::kUmaOnly)};
216 constexpr base::FeatureParam<int> kRendererProcessThreadPoolLogLevel{
217 &kEnableHangWatcher, "renderer_process_threadpool_log_level",
218 static_cast<int>(LoggingLevel::kUmaOnly)};
219
220 // Utility process.
221 constexpr base::FeatureParam<int> kUtilityProcessIOThreadLogLevel{
222 &kEnableHangWatcher, "utility_process_io_thread_log_level",
223 static_cast<int>(LoggingLevel::kUmaOnly)};
224 constexpr base::FeatureParam<int> kUtilityProcessMainThreadLogLevel{
225 &kEnableHangWatcher, "utility_process_main_thread_log_level",
226 static_cast<int>(LoggingLevel::kUmaOnly)};
227 constexpr base::FeatureParam<int> kUtilityProcessThreadPoolLogLevel{
228 &kEnableHangWatcher, "utility_process_threadpool_log_level",
229 static_cast<int>(LoggingLevel::kUmaOnly)};
230
231 constexpr const char* kThreadName = "HangWatcher";
232
233 // The time that the HangWatcher thread will sleep for between calls to
234 // Monitor(). Increasing or decreasing this does not modify the type of hangs
235 // that can be detected. It instead increases the probability that a call to
236 // Monitor() will happen at the right time to catch a hang. This has to be
237 // balanced with power/cpu use concerns as busy looping would catch amost all
238 // hangs but present unacceptable overhead. NOTE: If this period is ever changed
239 // then all metrics that depend on it like
240 // HangWatcher.IsThreadHung need to be updated.
241 constexpr auto kMonitoringPeriod = base::Seconds(10);
242
WatchHangsInScope(TimeDelta timeout)243 WatchHangsInScope::WatchHangsInScope(TimeDelta timeout) {
244 internal::HangWatchState* current_hang_watch_state =
245 HangWatcher::IsEnabled()
246 ? internal::HangWatchState::GetHangWatchStateForCurrentThread()
247 : nullptr;
248
249 DCHECK(timeout >= base::TimeDelta()) << "Negative timeouts are invalid.";
250
251 // Thread is not monitored, noop.
252 if (!current_hang_watch_state) {
253 took_effect_ = false;
254 return;
255 }
256
257 #if DCHECK_IS_ON()
258 previous_watch_hangs_in_scope_ =
259 current_hang_watch_state->GetCurrentWatchHangsInScope();
260 current_hang_watch_state->SetCurrentWatchHangsInScope(this);
261 #endif
262
263 auto [old_flags, old_deadline] =
264 current_hang_watch_state->GetFlagsAndDeadline();
265
266 // TODO(crbug.com/40111620): Check whether we are over deadline already for
267 // the previous WatchHangsInScope here by issuing only one TimeTicks::Now()
268 // and resuing the value.
269
270 previous_deadline_ = old_deadline;
271 TimeTicks deadline = TimeTicks::Now() + timeout;
272 current_hang_watch_state->SetDeadline(deadline);
273 current_hang_watch_state->IncrementNestingLevel();
274
275 const bool hangs_ignored_for_current_scope =
276 internal::HangWatchDeadline::IsFlagSet(
277 internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
278 old_flags);
279
280 // If the current WatchHangsInScope is ignored, temporarily reactivate hang
281 // watching for newly created WatchHangsInScopes. On exiting hang watching
282 // is suspended again to return to the original state.
283 if (hangs_ignored_for_current_scope) {
284 current_hang_watch_state->UnsetIgnoreCurrentWatchHangsInScope();
285 set_hangs_ignored_on_exit_ = true;
286 }
287 }
288
~WatchHangsInScope()289 WatchHangsInScope::~WatchHangsInScope() {
290 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
291
292 // If hang watching was not enabled at construction time there is nothing to
293 // validate or undo.
294 if (!took_effect_) {
295 return;
296 }
297
298 // If the thread was unregistered since construction there is also nothing to
299 // do.
300 auto* const state =
301 internal::HangWatchState::GetHangWatchStateForCurrentThread();
302 if (!state) {
303 return;
304 }
305
306 // If a hang is currently being captured we should block here so execution
307 // stops and we avoid recording unrelated stack frames in the crash.
308 if (state->IsFlagSet(internal::HangWatchDeadline::Flag::kShouldBlockOnHang)) {
309 base::HangWatcher::GetInstance()->BlockIfCaptureInProgress();
310 }
311
312 #if DCHECK_IS_ON()
313 // Verify that no Scope was destructed out of order.
314 DCHECK_EQ(this, state->GetCurrentWatchHangsInScope());
315 state->SetCurrentWatchHangsInScope(previous_watch_hangs_in_scope_);
316 #endif
317
318 if (state->nesting_level() == 1) {
319 // If a call to InvalidateActiveExpectations() suspended hang watching
320 // during the lifetime of this or any nested WatchHangsInScope it can now
321 // safely be reactivated by clearing the ignore bit since this is the
322 // outer-most scope.
323 state->UnsetIgnoreCurrentWatchHangsInScope();
324 } else if (set_hangs_ignored_on_exit_) {
325 // Return to ignoring hangs since this was the previous state before hang
326 // watching was temporarily enabled for this WatchHangsInScope only in the
327 // constructor.
328 state->SetIgnoreCurrentWatchHangsInScope();
329 }
330
331 // Reset the deadline to the value it had before entering this
332 // WatchHangsInScope.
333 state->SetDeadline(previous_deadline_);
334 // TODO(crbug.com/40111620): Log when a WatchHangsInScope exits after its
335 // deadline and that went undetected by the HangWatcher.
336
337 state->DecrementNestingLevel();
338 }
339
340 // static
InitializeOnMainThread(ProcessType process_type,bool emit_crashes)341 void HangWatcher::InitializeOnMainThread(ProcessType process_type,
342 bool emit_crashes) {
343 DCHECK(!g_use_hang_watcher);
344 DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
345 DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
346 DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
347
348 bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
349
350 // Do not start HangWatcher in the GPU process until the issue related to
351 // invalid magic signature in the GPU WatchDog is fixed
352 // (https://crbug.com/1297760).
353 if (process_type == ProcessType::kGPUProcess)
354 enable_hang_watcher = false;
355
356 g_use_hang_watcher.store(enable_hang_watcher, std::memory_order_relaxed);
357
358 // Keep the process type.
359 g_hang_watcher_process_type.store(process_type, std::memory_order_relaxed);
360
361 // If hang watching is disabled as a whole there is no need to read the
362 // params.
363 if (!enable_hang_watcher)
364 return;
365
366 // Retrieve thread-specific config for hang watching.
367 if (process_type == HangWatcher::ProcessType::kBrowserProcess) {
368 // Crashes are set to always emit. Override any feature flags.
369 if (emit_crashes) {
370 g_io_thread_log_level.store(
371 static_cast<LoggingLevel>(LoggingLevel::kUmaAndCrash),
372 std::memory_order_relaxed);
373 g_main_thread_log_level.store(
374 static_cast<LoggingLevel>(LoggingLevel::kUmaAndCrash),
375 std::memory_order_relaxed);
376 } else {
377 g_io_thread_log_level.store(
378 static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
379 std::memory_order_relaxed);
380 g_main_thread_log_level.store(
381 static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
382 std::memory_order_relaxed);
383 }
384
385 g_threadpool_log_level.store(
386 static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
387 std::memory_order_relaxed);
388 } else if (process_type == HangWatcher::ProcessType::kGPUProcess) {
389 g_threadpool_log_level.store(
390 static_cast<LoggingLevel>(kGPUProcessThreadPoolLogLevel.Get()),
391 std::memory_order_relaxed);
392 g_io_thread_log_level.store(
393 static_cast<LoggingLevel>(kGPUProcessIOThreadLogLevel.Get()),
394 std::memory_order_relaxed);
395 g_main_thread_log_level.store(
396 static_cast<LoggingLevel>(kGPUProcessMainThreadLogLevel.Get()),
397 std::memory_order_relaxed);
398 } else if (process_type == HangWatcher::ProcessType::kRendererProcess) {
399 g_threadpool_log_level.store(
400 static_cast<LoggingLevel>(kRendererProcessThreadPoolLogLevel.Get()),
401 std::memory_order_relaxed);
402 g_io_thread_log_level.store(
403 static_cast<LoggingLevel>(kRendererProcessIOThreadLogLevel.Get()),
404 std::memory_order_relaxed);
405 g_main_thread_log_level.store(
406 static_cast<LoggingLevel>(kRendererProcessMainThreadLogLevel.Get()),
407 std::memory_order_relaxed);
408 } else if (process_type == HangWatcher::ProcessType::kUtilityProcess) {
409 g_threadpool_log_level.store(
410 static_cast<LoggingLevel>(kUtilityProcessThreadPoolLogLevel.Get()),
411 std::memory_order_relaxed);
412 g_io_thread_log_level.store(
413 static_cast<LoggingLevel>(kUtilityProcessIOThreadLogLevel.Get()),
414 std::memory_order_relaxed);
415 g_main_thread_log_level.store(
416 static_cast<LoggingLevel>(kUtilityProcessMainThreadLogLevel.Get()),
417 std::memory_order_relaxed);
418 }
419 }
420
UnitializeOnMainThreadForTesting()421 void HangWatcher::UnitializeOnMainThreadForTesting() {
422 g_use_hang_watcher.store(false, std::memory_order_relaxed);
423 g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
424 g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
425 g_main_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
426 g_shutting_down.store(false, std::memory_order_relaxed);
427 }
428
429 // static
IsEnabled()430 bool HangWatcher::IsEnabled() {
431 return g_use_hang_watcher.load(std::memory_order_relaxed);
432 }
433
434 // static
IsThreadPoolHangWatchingEnabled()435 bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
436 return g_threadpool_log_level.load(std::memory_order_relaxed) !=
437 LoggingLevel::kNone;
438 }
439
440 // static
IsIOThreadHangWatchingEnabled()441 bool HangWatcher::IsIOThreadHangWatchingEnabled() {
442 return g_io_thread_log_level.load(std::memory_order_relaxed) !=
443 LoggingLevel::kNone;
444 }
445
446 // static
IsCrashReportingEnabled()447 bool HangWatcher::IsCrashReportingEnabled() {
448 if (g_main_thread_log_level.load(std::memory_order_relaxed) ==
449 LoggingLevel::kUmaAndCrash) {
450 return true;
451 }
452 if (g_io_thread_log_level.load(std::memory_order_relaxed) ==
453 LoggingLevel::kUmaAndCrash) {
454 return true;
455 }
456 if (g_threadpool_log_level.load(std::memory_order_relaxed) ==
457 LoggingLevel::kUmaAndCrash) {
458 return true;
459 }
460 return false;
461 }
462
463 // static
InvalidateActiveExpectations()464 void HangWatcher::InvalidateActiveExpectations() {
465 auto* const state =
466 internal::HangWatchState::GetHangWatchStateForCurrentThread();
467 if (!state) {
468 // If the current thread is not under watch there is nothing to invalidate.
469 return;
470 }
471 state->SetIgnoreCurrentWatchHangsInScope();
472 }
473
474 // static
SetShuttingDown()475 void HangWatcher::SetShuttingDown() {
476 // memory_order_relaxed offers no memory order guarantees. In rare cases, we
477 // could falsely log to BrowserProcess.Normal instead of
478 // BrowserProcess.Shutdown. This is OK in practice.
479 bool was_shutting_down =
480 g_shutting_down.exchange(true, std::memory_order_relaxed);
481 DCHECK(!was_shutting_down);
482 }
483
HangWatcher()484 HangWatcher::HangWatcher()
485 : monitoring_period_(kMonitoringPeriod),
486 should_monitor_(WaitableEvent::ResetPolicy::AUTOMATIC),
487 thread_(this, kThreadName),
488 tick_clock_(base::DefaultTickClock::GetInstance()),
489 memory_pressure_listener_(
490 FROM_HERE,
491 base::BindRepeating(&HangWatcher::OnMemoryPressure,
492 base::Unretained(this))) {
493 // |thread_checker_| should not be bound to the constructing thread.
494 DETACH_FROM_THREAD(hang_watcher_thread_checker_);
495
496 should_monitor_.declare_only_used_while_idle();
497
498 DCHECK(!g_instance);
499 g_instance = this;
500 }
501
502 // static
CreateHangWatcherInstance()503 void HangWatcher::CreateHangWatcherInstance() {
504 DCHECK(!g_instance);
505 g_instance = new base::HangWatcher();
506 // The hang watcher is leaked to make sure it survives all watched threads.
507 ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
508 }
509
510 #if !BUILDFLAG(IS_NACL)
511 debug::ScopedCrashKeyString
GetTimeSinceLastCriticalMemoryPressureCrashKey()512 HangWatcher::GetTimeSinceLastCriticalMemoryPressureCrashKey() {
513 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
514
515 // The crash key size is large enough to hold the biggest possible return
516 // value from base::TimeDelta::InSeconds().
517 constexpr debug::CrashKeySize kCrashKeyContentSize =
518 debug::CrashKeySize::Size32;
519 DCHECK_GE(static_cast<uint64_t>(kCrashKeyContentSize),
520 base::NumberToString(std::numeric_limits<int64_t>::max()).size());
521
522 static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
523 "seconds-since-last-memory-pressure", kCrashKeyContentSize);
524
525 const base::TimeTicks last_critical_memory_pressure_time =
526 last_critical_memory_pressure_.load(std::memory_order_relaxed);
527 if (last_critical_memory_pressure_time.is_null()) {
528 constexpr char kNoMemoryPressureMsg[] = "No critical memory pressure";
529 static_assert(
530 std::size(kNoMemoryPressureMsg) <=
531 static_cast<uint64_t>(kCrashKeyContentSize),
532 "The crash key is too small to hold \"No critical memory pressure\".");
533 return debug::ScopedCrashKeyString(crash_key, kNoMemoryPressureMsg);
534 } else {
535 base::TimeDelta time_since_last_critical_memory_pressure =
536 base::TimeTicks::Now() - last_critical_memory_pressure_time;
537 return debug::ScopedCrashKeyString(
538 crash_key, base::NumberToString(
539 time_since_last_critical_memory_pressure.InSeconds()));
540 }
541 }
542 #endif
543
GetTimeSinceLastSystemPowerResumeCrashKeyValue() const544 std::string HangWatcher::GetTimeSinceLastSystemPowerResumeCrashKeyValue()
545 const {
546 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
547
548 const TimeTicks last_system_power_resume_time =
549 PowerMonitor::GetInstance()->GetLastSystemResumeTime();
550 if (last_system_power_resume_time.is_null())
551 return "Never suspended";
552 if (last_system_power_resume_time == TimeTicks::Max())
553 return "Power suspended";
554
555 const TimeDelta time_since_last_system_resume =
556 TimeTicks::Now() - last_system_power_resume_time;
557 return NumberToString(time_since_last_system_resume.InSeconds());
558 }
559
OnMemoryPressure(base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level)560 void HangWatcher::OnMemoryPressure(
561 base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level) {
562 if (memory_pressure_level ==
563 base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL) {
564 last_critical_memory_pressure_.store(base::TimeTicks::Now(),
565 std::memory_order_relaxed);
566 }
567 }
568
~HangWatcher()569 HangWatcher::~HangWatcher() {
570 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
571 DCHECK_EQ(g_instance, this);
572 DCHECK(watch_states_.empty());
573 g_instance = nullptr;
574 Stop();
575 }
576
Start()577 void HangWatcher::Start() {
578 thread_.Start();
579 thread_started_ = true;
580 }
581
Stop()582 void HangWatcher::Stop() {
583 g_keep_monitoring.store(false, std::memory_order_relaxed);
584 should_monitor_.Signal();
585 thread_.Join();
586 thread_started_ = false;
587
588 // In production HangWatcher is always leaked but during testing it's possibly
589 // stopped and restarted using a new instance. This makes sure the next call
590 // to Start() will actually monitor in that case.
591 g_keep_monitoring.store(true, std::memory_order_relaxed);
592 }
593
IsWatchListEmpty()594 bool HangWatcher::IsWatchListEmpty() {
595 AutoLock auto_lock(watch_state_lock_);
596 return watch_states_.empty();
597 }
598
Wait()599 void HangWatcher::Wait() {
600 while (true) {
601 // Amount by which the actual time spent sleeping can deviate from
602 // the target time and still be considered timely.
603 constexpr base::TimeDelta kWaitDriftTolerance = base::Milliseconds(100);
604
605 const base::TimeTicks time_before_wait = tick_clock_->NowTicks();
606
607 // Sleep until next scheduled monitoring or until signaled.
608 const bool was_signaled = should_monitor_.TimedWait(monitoring_period_);
609
610 if (after_wait_callback_)
611 after_wait_callback_.Run(time_before_wait);
612
613 const base::TimeTicks time_after_wait = tick_clock_->NowTicks();
614 const base::TimeDelta wait_time = time_after_wait - time_before_wait;
615 const bool wait_was_normal =
616 wait_time <= (monitoring_period_ + kWaitDriftTolerance);
617
618 if (!wait_was_normal) {
619 // If the time spent waiting was too high it might indicate the machine is
620 // very slow or that that it went to sleep. In any case we can't trust the
621 // WatchHangsInScopes that are currently live. Update the ignore
622 // threshold to make sure they don't trigger a hang on subsequent monitors
623 // then keep waiting.
624
625 base::AutoLock auto_lock(watch_state_lock_);
626
627 // Find the latest deadline among the live watch states. They might change
628 // atomically while iterating but that's fine because if they do that
629 // means the new WatchHangsInScope was constructed very soon after the
630 // abnormal sleep happened and might be affected by the root cause still.
631 // Ignoring it is cautious and harmless.
632 base::TimeTicks latest_deadline;
633 for (const auto& state : watch_states_) {
634 base::TimeTicks deadline = state->GetDeadline();
635 if (deadline > latest_deadline) {
636 latest_deadline = deadline;
637 }
638 }
639
640 deadline_ignore_threshold_ = latest_deadline;
641 }
642
643 // Stop waiting.
644 if (wait_was_normal || was_signaled)
645 return;
646 }
647 }
648
Run()649 void HangWatcher::Run() {
650 // Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
651 // sure of that.
652 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
653
654 while (g_keep_monitoring.load(std::memory_order_relaxed)) {
655 Wait();
656
657 if (!IsWatchListEmpty() &&
658 g_keep_monitoring.load(std::memory_order_relaxed)) {
659 Monitor();
660 if (after_monitor_closure_for_testing_) {
661 after_monitor_closure_for_testing_.Run();
662 }
663 }
664 }
665 }
666
667 // static
GetInstance()668 HangWatcher* HangWatcher::GetInstance() {
669 return g_instance;
670 }
671
672 // static
RecordHang()673 void HangWatcher::RecordHang() {
674 base::debug::DumpWithoutCrashing();
675 NO_CODE_FOLDING();
676 }
677
RegisterThreadInternal(ThreadType thread_type)678 ScopedClosureRunner HangWatcher::RegisterThreadInternal(
679 ThreadType thread_type) {
680 AutoLock auto_lock(watch_state_lock_);
681 CHECK(base::FeatureList::GetInstance());
682
683 // Do not install a WatchState if the results would never be observable.
684 if (!ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
685 LoggingLevel::kUmaOnly)) {
686 return ScopedClosureRunner(base::DoNothing());
687 }
688
689 watch_states_.push_back(
690 internal::HangWatchState::CreateHangWatchStateForCurrentThread(
691 thread_type));
692 return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
693 Unretained(HangWatcher::GetInstance())));
694 }
695
696 // static
RegisterThread(ThreadType thread_type)697 ScopedClosureRunner HangWatcher::RegisterThread(ThreadType thread_type) {
698 if (!GetInstance()) {
699 return ScopedClosureRunner();
700 }
701
702 return GetInstance()->RegisterThreadInternal(thread_type);
703 }
704
GetHighestDeadline() const705 base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
706 DCHECK(IsActionable());
707
708 // Since entries are sorted in increasing order the last entry is the largest
709 // one.
710 return hung_watch_state_copies_.back().deadline;
711 }
712
713 HangWatcher::WatchStateSnapShot::WatchStateSnapShot() = default;
714
Init(const HangWatchStates & watch_states,base::TimeTicks deadline_ignore_threshold,base::TimeDelta monitoring_period)715 void HangWatcher::WatchStateSnapShot::Init(
716 const HangWatchStates& watch_states,
717 base::TimeTicks deadline_ignore_threshold,
718 base::TimeDelta monitoring_period) {
719 DCHECK(!initialized_);
720
721 // No matter if the snapshot is actionable or not after this function
722 // it will have been initialized.
723 initialized_ = true;
724
725 const base::TimeTicks now = base::TimeTicks::Now();
726 bool all_threads_marked = true;
727 bool found_deadline_before_ignore_threshold = false;
728
729 // Use an std::array to store the hang counts to avoid allocations. The
730 // numerical values of the HangWatcher::ThreadType enum is used to index into
731 // the array. A |kInvalidHangCount| is used to signify there were no threads
732 // of the type found.
733 constexpr size_t kHangCountArraySize =
734 static_cast<std::size_t>(base::HangWatcher::ThreadType::kMax) + 1;
735 std::array<int, kHangCountArraySize> hung_counts_per_thread_type;
736
737 constexpr int kInvalidHangCount = -1;
738 hung_counts_per_thread_type.fill(kInvalidHangCount);
739
740 // Will be true if any of the hung threads has a logging level high enough,
741 // as defined through finch params, to warant dumping a crash.
742 bool any_hung_thread_has_dumping_enabled = false;
743
744 // Copy hung thread information.
745 for (const auto& watch_state : watch_states) {
746 uint64_t flags;
747 TimeTicks deadline;
748 std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
749
750 if (deadline <= deadline_ignore_threshold) {
751 found_deadline_before_ignore_threshold = true;
752 }
753
754 if (internal::HangWatchDeadline::IsFlagSet(
755 internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
756 flags)) {
757 continue;
758 }
759
760 // If a thread type is monitored and did not hang it still needs to be
761 // logged as a zero count;
762 const size_t hang_count_index =
763 static_cast<size_t>(watch_state.get()->thread_type());
764 if (hung_counts_per_thread_type[hang_count_index] == kInvalidHangCount) {
765 hung_counts_per_thread_type[hang_count_index] = 0;
766 }
767
768 // Only copy hung threads.
769 if (deadline <= now) {
770 ++hung_counts_per_thread_type[hang_count_index];
771
772 if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
773 LoggingLevel::kUmaAndCrash)) {
774 any_hung_thread_has_dumping_enabled = true;
775 }
776
777 #if BUILDFLAG(ENABLE_BASE_TRACING)
778 // Emit trace events for monitored threads.
779 if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
780 LoggingLevel::kUmaOnly)) {
781 const PlatformThreadId thread_id = watch_state.get()->GetThreadID();
782 const auto track = perfetto::Track::FromPointer(
783 this, perfetto::ThreadTrack::ForThread(thread_id));
784 TRACE_EVENT_BEGIN("latency", "HangWatcher::ThreadHung", track,
785 deadline);
786 TRACE_EVENT_END("latency", track, now);
787 }
788 #endif
789
790 // Attempt to mark the thread as needing to stay within its current
791 // WatchHangsInScope until capture is complete.
792 bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
793
794 // If marking some threads already failed the snapshot won't be kept so
795 // there is no need to keep adding to it. The loop doesn't abort though
796 // to keep marking the other threads. If these threads remain hung until
797 // the next capture then they'll already be marked and will be included
798 // in the capture at that time.
799 if (thread_marked && all_threads_marked) {
800 hung_watch_state_copies_.push_back(
801 WatchStateCopy{deadline, watch_state.get()->GetThreadID()});
802 } else {
803 all_threads_marked = false;
804 }
805 }
806 }
807
808 // Log the hung thread counts to histograms for each thread type if any thread
809 // of the type were found.
810 for (size_t i = 0; i < kHangCountArraySize; ++i) {
811 const int hang_count = hung_counts_per_thread_type[i];
812 const HangWatcher::ThreadType thread_type =
813 static_cast<HangWatcher::ThreadType>(i);
814 if (hang_count != kInvalidHangCount &&
815 ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
816 LoggingLevel::kUmaOnly)) {
817 LogStatusHistogram(thread_type, hang_count, now, monitoring_period);
818 }
819 }
820
821 // Three cases can invalidate this snapshot and prevent the capture of the
822 // hang.
823 //
824 // 1. Some threads could not be marked for blocking so this snapshot isn't
825 // actionable since marked threads could be hung because of unmarked ones.
826 // If only the marked threads were captured the information would be
827 // incomplete.
828 //
829 // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
830 // If any thread is ignored it reduces the confidence in the whole state and
831 // it's better to avoid capturing misleading data.
832 //
833 // 3. The hung threads found were all of types that are not configured through
834 // Finch to trigger a crash dump.
835 //
836 if (!all_threads_marked || found_deadline_before_ignore_threshold ||
837 !any_hung_thread_has_dumping_enabled) {
838 hung_watch_state_copies_.clear();
839 return;
840 }
841
842 // Sort |hung_watch_state_copies_| by order of decreasing hang severity so the
843 // most severe hang is first in the list.
844 ranges::sort(hung_watch_state_copies_,
845 [](const WatchStateCopy& lhs, const WatchStateCopy& rhs) {
846 return lhs.deadline < rhs.deadline;
847 });
848 }
849
Clear()850 void HangWatcher::WatchStateSnapShot::Clear() {
851 hung_watch_state_copies_.clear();
852 initialized_ = false;
853 }
854
855 HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
856 const WatchStateSnapShot& other) = default;
857
858 HangWatcher::WatchStateSnapShot::~WatchStateSnapShot() = default;
859
PrepareHungThreadListCrashKey() const860 std::string HangWatcher::WatchStateSnapShot::PrepareHungThreadListCrashKey()
861 const {
862 DCHECK(IsActionable());
863
864 // Build a crash key string that contains the ids of the hung threads.
865 constexpr char kSeparator{'|'};
866 std::string list_of_hung_thread_ids;
867
868 // Add as many thread ids to the crash key as possible.
869 for (const WatchStateCopy& copy : hung_watch_state_copies_) {
870 std::string fragment = base::NumberToString(copy.thread_id) + kSeparator;
871 if (list_of_hung_thread_ids.size() + fragment.size() <
872 static_cast<std::size_t>(debug::CrashKeySize::Size256)) {
873 list_of_hung_thread_ids += fragment;
874 } else {
875 // Respect the by priority ordering of thread ids in the crash key by
876 // stopping the construction as soon as one does not fit. This avoids
877 // including lesser priority ids while omitting more important ones.
878 break;
879 }
880 }
881
882 return list_of_hung_thread_ids;
883 }
884
IsActionable() const885 bool HangWatcher::WatchStateSnapShot::IsActionable() const {
886 DCHECK(initialized_);
887 return !hung_watch_state_copies_.empty();
888 }
889
GrabWatchStateSnapshotForTesting() const890 HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
891 const {
892 WatchStateSnapShot snapshot;
893 snapshot.Init(watch_states_, deadline_ignore_threshold_, TimeDelta());
894 return snapshot;
895 }
896
Monitor()897 void HangWatcher::Monitor() {
898 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
899 AutoLock auto_lock(watch_state_lock_);
900
901 // If all threads unregistered since this function was invoked there's
902 // nothing to do anymore.
903 if (watch_states_.empty())
904 return;
905
906 watch_state_snapshot_.Init(watch_states_, deadline_ignore_threshold_,
907 monitoring_period_);
908
909 if (watch_state_snapshot_.IsActionable()) {
910 DoDumpWithoutCrashing(watch_state_snapshot_);
911 }
912
913 watch_state_snapshot_.Clear();
914 }
915
DoDumpWithoutCrashing(const WatchStateSnapShot & watch_state_snapshot)916 void HangWatcher::DoDumpWithoutCrashing(
917 const WatchStateSnapShot& watch_state_snapshot) {
918 TRACE_EVENT("latency", "HangWatcher::DoDumpWithoutCrashing");
919
920 capture_in_progress_.store(true, std::memory_order_relaxed);
921 base::AutoLock scope_lock(capture_lock_);
922
923 #if !BUILDFLAG(IS_NACL)
924 const std::string list_of_hung_thread_ids =
925 watch_state_snapshot.PrepareHungThreadListCrashKey();
926
927 static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
928 "list-of-hung-threads", debug::CrashKeySize::Size256);
929
930 const debug::ScopedCrashKeyString list_of_hung_threads_crash_key_string(
931 crash_key, list_of_hung_thread_ids);
932
933 const debug::ScopedCrashKeyString
934 time_since_last_critical_memory_pressure_crash_key_string =
935 GetTimeSinceLastCriticalMemoryPressureCrashKey();
936
937 SCOPED_CRASH_KEY_STRING32("HangWatcher", "seconds-since-last-resume",
938 GetTimeSinceLastSystemPowerResumeCrashKeyValue());
939
940 SCOPED_CRASH_KEY_BOOL("HangWatcher", "shutting-down",
941 g_shutting_down.load(std::memory_order_relaxed));
942 #endif
943
944 // To avoid capturing more than one hang that blames a subset of the same
945 // threads it's necessary to keep track of what is the furthest deadline
946 // that contributed to declaring a hang. Only once
947 // all threads have deadlines past this point can we be sure that a newly
948 // discovered hang is not directly related.
949 // Example:
950 // **********************************************************************
951 // Timeline A : L------1-------2----------3-------4----------N-----------
952 // Timeline B : -------2----------3-------4----------L----5------N-------
953 // Timeline C : L----------------------------5------6----7---8------9---N
954 // **********************************************************************
955 // In the example when a Monitor() happens during timeline A
956 // |deadline_ignore_threshold_| (L) is at time zero and deadlines (1-4)
957 // are before Now() (N) . A hang is captured and L is updated. During
958 // the next Monitor() (timeline B) a new deadline is over but we can't
959 // capture a hang because deadlines 2-4 are still live and already counted
960 // toward a hang. During a third monitor (timeline C) all live deadlines
961 // are now after L and a second hang can be recorded.
962 base::TimeTicks latest_expired_deadline =
963 watch_state_snapshot.GetHighestDeadline();
964
965 if (on_hang_closure_for_testing_)
966 on_hang_closure_for_testing_.Run();
967 else
968 RecordHang();
969
970 // Update after running the actual capture.
971 deadline_ignore_threshold_ = latest_expired_deadline;
972
973 capture_in_progress_.store(false, std::memory_order_relaxed);
974 }
975
SetAfterMonitorClosureForTesting(base::RepeatingClosure closure)976 void HangWatcher::SetAfterMonitorClosureForTesting(
977 base::RepeatingClosure closure) {
978 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
979 after_monitor_closure_for_testing_ = std::move(closure);
980 }
981
SetOnHangClosureForTesting(base::RepeatingClosure closure)982 void HangWatcher::SetOnHangClosureForTesting(base::RepeatingClosure closure) {
983 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
984 on_hang_closure_for_testing_ = std::move(closure);
985 }
986
SetMonitoringPeriodForTesting(base::TimeDelta period)987 void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
988 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
989 monitoring_period_ = period;
990 }
991
SetAfterWaitCallbackForTesting(RepeatingCallback<void (TimeTicks)> callback)992 void HangWatcher::SetAfterWaitCallbackForTesting(
993 RepeatingCallback<void(TimeTicks)> callback) {
994 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
995 after_wait_callback_ = callback;
996 }
997
SignalMonitorEventForTesting()998 void HangWatcher::SignalMonitorEventForTesting() {
999 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
1000 should_monitor_.Signal();
1001 }
1002
1003 // static
StopMonitoringForTesting()1004 void HangWatcher::StopMonitoringForTesting() {
1005 g_keep_monitoring.store(false, std::memory_order_relaxed);
1006 }
1007
SetTickClockForTesting(const base::TickClock * tick_clock)1008 void HangWatcher::SetTickClockForTesting(const base::TickClock* tick_clock) {
1009 tick_clock_ = tick_clock;
1010 }
1011
BlockIfCaptureInProgress()1012 void HangWatcher::BlockIfCaptureInProgress() {
1013 // Makes a best-effort attempt to block execution if a hang is currently being
1014 // captured. Only block on |capture_lock| if |capture_in_progress_| hints that
1015 // it's already held to avoid serializing all threads on this function when no
1016 // hang capture is in-progress.
1017 if (capture_in_progress_.load(std::memory_order_relaxed))
1018 base::AutoLock hang_lock(capture_lock_);
1019 }
1020
UnregisterThread()1021 void HangWatcher::UnregisterThread() {
1022 AutoLock auto_lock(watch_state_lock_);
1023
1024 auto it = ranges::find(
1025 watch_states_,
1026 internal::HangWatchState::GetHangWatchStateForCurrentThread(),
1027 &std::unique_ptr<internal::HangWatchState>::get);
1028
1029 // Thread should be registered to get unregistered.
1030 CHECK(it != watch_states_.end(), base::NotFatalUntil::M125);
1031
1032 watch_states_.erase(it);
1033 }
1034
1035 namespace internal {
1036 namespace {
1037
1038 constexpr uint64_t kOnlyDeadlineMask = 0x00FF'FFFF'FFFF'FFFFu;
1039 constexpr uint64_t kOnlyFlagsMask = ~kOnlyDeadlineMask;
1040 constexpr uint64_t kMaximumFlag = 0x8000'0000'0000'0000u;
1041
1042 // Use as a mask to keep persistent flags and the deadline.
1043 constexpr uint64_t kPersistentFlagsAndDeadlineMask =
1044 kOnlyDeadlineMask |
1045 static_cast<uint64_t>(
1046 HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope);
1047 } // namespace
1048
1049 // Flag binary representation assertions.
1050 static_assert(
1051 static_cast<uint64_t>(HangWatchDeadline::Flag::kMinValue) >
1052 kOnlyDeadlineMask,
1053 "Invalid numerical value for flag. Would interfere with bits of data.");
1054 static_assert(static_cast<uint64_t>(HangWatchDeadline::Flag::kMaxValue) <=
1055 kMaximumFlag,
1056 "A flag can only set a single bit.");
1057
1058 HangWatchDeadline::HangWatchDeadline() = default;
1059 HangWatchDeadline::~HangWatchDeadline() = default;
1060
GetFlagsAndDeadline() const1061 std::pair<uint64_t, TimeTicks> HangWatchDeadline::GetFlagsAndDeadline() const {
1062 uint64_t bits = bits_.load(std::memory_order_relaxed);
1063 return std::make_pair(ExtractFlags(bits),
1064 DeadlineFromBits(ExtractDeadline((bits))));
1065 }
1066
GetDeadline() const1067 TimeTicks HangWatchDeadline::GetDeadline() const {
1068 return DeadlineFromBits(
1069 ExtractDeadline(bits_.load(std::memory_order_relaxed)));
1070 }
1071
1072 // static
Max()1073 TimeTicks HangWatchDeadline::Max() {
1074 // |kOnlyDeadlineMask| has all the bits reserved for the TimeTicks value
1075 // set. This means it also represents the highest representable value.
1076 return DeadlineFromBits(kOnlyDeadlineMask);
1077 }
1078
1079 // static
IsFlagSet(Flag flag,uint64_t flags)1080 bool HangWatchDeadline::IsFlagSet(Flag flag, uint64_t flags) {
1081 return static_cast<uint64_t>(flag) & flags;
1082 }
1083
SetDeadline(TimeTicks new_deadline)1084 void HangWatchDeadline::SetDeadline(TimeTicks new_deadline) {
1085 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1086 DCHECK(new_deadline <= Max()) << "Value too high to be represented.";
1087 DCHECK(new_deadline >= TimeTicks{}) << "Value cannot be negative.";
1088
1089 if (switch_bits_callback_for_testing_) {
1090 const uint64_t switched_in_bits = SwitchBitsForTesting();
1091 // If a concurrent deadline change is tested it cannot have a deadline or
1092 // persistent flag change since those always happen on the same thread.
1093 DCHECK((switched_in_bits & kPersistentFlagsAndDeadlineMask) == 0u);
1094 }
1095
1096 // Discard all non-persistent flags and apply deadline change.
1097 const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1098 const uint64_t new_flags =
1099 ExtractFlags(old_bits & kPersistentFlagsAndDeadlineMask);
1100 bits_.store(new_flags | ExtractDeadline(static_cast<uint64_t>(
1101 new_deadline.ToInternalValue())),
1102 std::memory_order_relaxed);
1103 }
1104
1105 // TODO(crbug.com/40132796): Add flag DCHECKs here.
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1106 bool HangWatchDeadline::SetShouldBlockOnHang(uint64_t old_flags,
1107 TimeTicks old_deadline) {
1108 DCHECK(old_deadline <= Max()) << "Value too high to be represented.";
1109 DCHECK(old_deadline >= TimeTicks{}) << "Value cannot be negative.";
1110
1111 // Set the kShouldBlockOnHang flag only if |bits_| did not change since it was
1112 // read. kShouldBlockOnHang is the only non-persistent flag and should never
1113 // be set twice. Persistent flags and deadline changes are done from the same
1114 // thread so there is no risk of losing concurrently added information.
1115 uint64_t old_bits =
1116 old_flags | static_cast<uint64_t>(old_deadline.ToInternalValue());
1117 const uint64_t desired_bits =
1118 old_bits | static_cast<uint64_t>(Flag::kShouldBlockOnHang);
1119
1120 // If a test needs to simulate |bits_| changing since calling this function
1121 // this happens now.
1122 if (switch_bits_callback_for_testing_) {
1123 const uint64_t switched_in_bits = SwitchBitsForTesting();
1124
1125 // Injecting the flag being tested is invalid.
1126 DCHECK(!IsFlagSet(Flag::kShouldBlockOnHang, switched_in_bits));
1127 }
1128
1129 return bits_.compare_exchange_weak(old_bits, desired_bits,
1130 std::memory_order_relaxed,
1131 std::memory_order_relaxed);
1132 }
1133
SetIgnoreCurrentWatchHangsInScope()1134 void HangWatchDeadline::SetIgnoreCurrentWatchHangsInScope() {
1135 SetPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1136 }
1137
UnsetIgnoreCurrentWatchHangsInScope()1138 void HangWatchDeadline::UnsetIgnoreCurrentWatchHangsInScope() {
1139 ClearPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1140 }
1141
SetPersistentFlag(Flag flag)1142 void HangWatchDeadline::SetPersistentFlag(Flag flag) {
1143 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1144 if (switch_bits_callback_for_testing_)
1145 SwitchBitsForTesting();
1146 bits_.fetch_or(static_cast<uint64_t>(flag), std::memory_order_relaxed);
1147 }
1148
ClearPersistentFlag(Flag flag)1149 void HangWatchDeadline::ClearPersistentFlag(Flag flag) {
1150 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1151 if (switch_bits_callback_for_testing_)
1152 SwitchBitsForTesting();
1153 bits_.fetch_and(~(static_cast<uint64_t>(flag)), std::memory_order_relaxed);
1154 }
1155
1156 // static
ExtractFlags(uint64_t bits)1157 uint64_t HangWatchDeadline::ExtractFlags(uint64_t bits) {
1158 return bits & kOnlyFlagsMask;
1159 }
1160
1161 // static
ExtractDeadline(uint64_t bits)1162 uint64_t HangWatchDeadline::ExtractDeadline(uint64_t bits) {
1163 return bits & kOnlyDeadlineMask;
1164 }
1165
1166 // static
DeadlineFromBits(uint64_t bits)1167 TimeTicks HangWatchDeadline::DeadlineFromBits(uint64_t bits) {
1168 // |kOnlyDeadlineMask| has all the deadline bits set to 1 so is the largest
1169 // representable value.
1170 DCHECK(bits <= kOnlyDeadlineMask)
1171 << "Flags bits are set. Remove them before returning deadline.";
1172 static_assert(kOnlyDeadlineMask <= std::numeric_limits<int64_t>::max());
1173 return TimeTicks::FromInternalValue(static_cast<int64_t>(bits));
1174 }
1175
IsFlagSet(Flag flag) const1176 bool HangWatchDeadline::IsFlagSet(Flag flag) const {
1177 return bits_.load(std::memory_order_relaxed) & static_cast<uint64_t>(flag);
1178 }
1179
SetSwitchBitsClosureForTesting(RepeatingCallback<uint64_t (void)> closure)1180 void HangWatchDeadline::SetSwitchBitsClosureForTesting(
1181 RepeatingCallback<uint64_t(void)> closure) {
1182 switch_bits_callback_for_testing_ = closure;
1183 }
1184
ResetSwitchBitsClosureForTesting()1185 void HangWatchDeadline::ResetSwitchBitsClosureForTesting() {
1186 DCHECK(switch_bits_callback_for_testing_);
1187 switch_bits_callback_for_testing_.Reset();
1188 }
1189
SwitchBitsForTesting()1190 uint64_t HangWatchDeadline::SwitchBitsForTesting() {
1191 DCHECK(switch_bits_callback_for_testing_);
1192
1193 const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1194 const uint64_t new_bits = switch_bits_callback_for_testing_.Run();
1195 const uint64_t old_flags = ExtractFlags(old_bits);
1196
1197 const uint64_t switched_in_bits = old_flags | new_bits;
1198 bits_.store(switched_in_bits, std::memory_order_relaxed);
1199 return switched_in_bits;
1200 }
1201
HangWatchState(HangWatcher::ThreadType thread_type)1202 HangWatchState::HangWatchState(HangWatcher::ThreadType thread_type)
1203 : resetter_(&hang_watch_state, this, nullptr), thread_type_(thread_type) {
1204 // TODO(crbug.com/40187449): Remove this once macOS uses system-wide ids.
1205 // On macOS the thread ids used by CrashPad are not the same as the ones
1206 // provided by PlatformThread. Make sure to use the same for correct
1207 // attribution.
1208 #if BUILDFLAG(IS_MAC)
1209 uint64_t thread_id;
1210 pthread_threadid_np(pthread_self(), &thread_id);
1211 thread_id_ = checked_cast<PlatformThreadId>(thread_id);
1212 #else
1213 thread_id_ = PlatformThread::CurrentId();
1214 #endif
1215 }
1216
~HangWatchState()1217 HangWatchState::~HangWatchState() {
1218 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1219
1220 DCHECK_EQ(GetHangWatchStateForCurrentThread(), this);
1221
1222 #if DCHECK_IS_ON()
1223 // Destroying the HangWatchState should not be done if there are live
1224 // WatchHangsInScopes.
1225 DCHECK(!current_watch_hangs_in_scope_);
1226 #endif
1227 }
1228
1229 // static
1230 std::unique_ptr<HangWatchState>
CreateHangWatchStateForCurrentThread(HangWatcher::ThreadType thread_type)1231 HangWatchState::CreateHangWatchStateForCurrentThread(
1232 HangWatcher::ThreadType thread_type) {
1233 // Allocate a watch state object for this thread.
1234 std::unique_ptr<HangWatchState> hang_state =
1235 std::make_unique<HangWatchState>(thread_type);
1236
1237 // Setting the thread local worked.
1238 DCHECK_EQ(GetHangWatchStateForCurrentThread(), hang_state.get());
1239
1240 // Transfer ownership to caller.
1241 return hang_state;
1242 }
1243
GetDeadline() const1244 TimeTicks HangWatchState::GetDeadline() const {
1245 return deadline_.GetDeadline();
1246 }
1247
GetFlagsAndDeadline() const1248 std::pair<uint64_t, TimeTicks> HangWatchState::GetFlagsAndDeadline() const {
1249 return deadline_.GetFlagsAndDeadline();
1250 }
1251
SetDeadline(TimeTicks deadline)1252 void HangWatchState::SetDeadline(TimeTicks deadline) {
1253 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1254 deadline_.SetDeadline(deadline);
1255 }
1256
IsOverDeadline() const1257 bool HangWatchState::IsOverDeadline() const {
1258 return TimeTicks::Now() > deadline_.GetDeadline();
1259 }
1260
SetIgnoreCurrentWatchHangsInScope()1261 void HangWatchState::SetIgnoreCurrentWatchHangsInScope() {
1262 deadline_.SetIgnoreCurrentWatchHangsInScope();
1263 }
1264
UnsetIgnoreCurrentWatchHangsInScope()1265 void HangWatchState::UnsetIgnoreCurrentWatchHangsInScope() {
1266 deadline_.UnsetIgnoreCurrentWatchHangsInScope();
1267 }
1268
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1269 bool HangWatchState::SetShouldBlockOnHang(uint64_t old_flags,
1270 TimeTicks old_deadline) {
1271 return deadline_.SetShouldBlockOnHang(old_flags, old_deadline);
1272 }
1273
IsFlagSet(HangWatchDeadline::Flag flag)1274 bool HangWatchState::IsFlagSet(HangWatchDeadline::Flag flag) {
1275 return deadline_.IsFlagSet(flag);
1276 }
1277
1278 #if DCHECK_IS_ON()
SetCurrentWatchHangsInScope(WatchHangsInScope * current_hang_watch_scope_enable)1279 void HangWatchState::SetCurrentWatchHangsInScope(
1280 WatchHangsInScope* current_hang_watch_scope_enable) {
1281 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1282 current_watch_hangs_in_scope_ = current_hang_watch_scope_enable;
1283 }
1284
GetCurrentWatchHangsInScope()1285 WatchHangsInScope* HangWatchState::GetCurrentWatchHangsInScope() {
1286 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1287 return current_watch_hangs_in_scope_;
1288 }
1289 #endif
1290
GetHangWatchDeadlineForTesting()1291 HangWatchDeadline* HangWatchState::GetHangWatchDeadlineForTesting() {
1292 return &deadline_;
1293 }
1294
IncrementNestingLevel()1295 void HangWatchState::IncrementNestingLevel() {
1296 ++nesting_level_;
1297 }
1298
DecrementNestingLevel()1299 void HangWatchState::DecrementNestingLevel() {
1300 --nesting_level_;
1301 }
1302
1303 // static
GetHangWatchStateForCurrentThread()1304 HangWatchState* HangWatchState::GetHangWatchStateForCurrentThread() {
1305 // Workaround false-positive MSAN use-of-uninitialized-value on
1306 // thread_local storage for loaded libraries:
1307 // https://github.com/google/sanitizers/issues/1265
1308 MSAN_UNPOISON(&hang_watch_state, sizeof(internal::HangWatchState*));
1309
1310 return hang_watch_state;
1311 }
1312
GetThreadID() const1313 PlatformThreadId HangWatchState::GetThreadID() const {
1314 return thread_id_;
1315 }
1316
1317 } // namespace internal
1318
1319 } // namespace base
1320