• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if defined(OS_WIN)
6 #include <windows.h>
7 #endif
8 
9 #include "content/gpu/gpu_watchdog_thread.h"
10 
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/file_util.h"
16 #include "base/power_monitor/power_monitor.h"
17 #include "base/process/process.h"
18 #include "build/build_config.h"
19 #include "content/public/common/content_switches.h"
20 #include "content/public/common/result_codes.h"
21 
22 namespace content {
23 namespace {
24 const int64 kCheckPeriodMs = 2000;
25 #if defined(OS_CHROMEOS)
26 const base::FilePath::CharType
27     kTtyFilePath[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active");
28 #endif
29 }  // namespace
30 
GpuWatchdogThread(int timeout)31 GpuWatchdogThread::GpuWatchdogThread(int timeout)
32     : base::Thread("Watchdog"),
33       watched_message_loop_(base::MessageLoop::current()),
34       timeout_(base::TimeDelta::FromMilliseconds(timeout)),
35       armed_(false),
36 #if defined(OS_WIN)
37       watched_thread_handle_(0),
38       arm_cpu_time_(),
39 #endif
40       task_observer_(this),
41       weak_factory_(this),
42       suspended_(false) {
43   DCHECK(timeout >= 0);
44 
45 #if defined(OS_WIN)
46   // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
47   // to identify another. DuplicateHandle creates a "real" handle that can be
48   // used for this purpose.
49   BOOL result = DuplicateHandle(GetCurrentProcess(),
50                                 GetCurrentThread(),
51                                 GetCurrentProcess(),
52                                 &watched_thread_handle_,
53                                 THREAD_QUERY_INFORMATION,
54                                 FALSE,
55                                 0);
56   DCHECK(result);
57 #endif
58 
59 #if defined(OS_CHROMEOS)
60   tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r");
61 #endif
62   watched_message_loop_->AddTaskObserver(&task_observer_);
63 }
64 
PostAcknowledge()65 void GpuWatchdogThread::PostAcknowledge() {
66   // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
67   // the method factory. Rely on reference counting instead.
68   message_loop()->PostTask(
69       FROM_HERE,
70       base::Bind(&GpuWatchdogThread::OnAcknowledge, this));
71 }
72 
CheckArmed()73 void GpuWatchdogThread::CheckArmed() {
74   // Acknowledge the watchdog if it has armed itself. The watchdog will not
75   // change its armed state until it is acknowledged.
76   if (armed()) {
77     PostAcknowledge();
78   }
79 }
80 
Init()81 void GpuWatchdogThread::Init() {
82   // Schedule the first check.
83   OnCheck(false);
84 }
85 
CleanUp()86 void GpuWatchdogThread::CleanUp() {
87   weak_factory_.InvalidateWeakPtrs();
88 }
89 
GpuWatchdogTaskObserver(GpuWatchdogThread * watchdog)90 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
91     GpuWatchdogThread* watchdog)
92     : watchdog_(watchdog) {
93 }
94 
~GpuWatchdogTaskObserver()95 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
96 }
97 
WillProcessTask(const base::PendingTask & pending_task)98 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
99     const base::PendingTask& pending_task) {
100   watchdog_->CheckArmed();
101 }
102 
DidProcessTask(const base::PendingTask & pending_task)103 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
104     const base::PendingTask& pending_task) {
105   watchdog_->CheckArmed();
106 }
107 
~GpuWatchdogThread()108 GpuWatchdogThread::~GpuWatchdogThread() {
109   // Verify that the thread was explicitly stopped. If the thread is stopped
110   // implicitly by the destructor, CleanUp() will not be called.
111   DCHECK(!weak_factory_.HasWeakPtrs());
112 
113 #if defined(OS_WIN)
114   CloseHandle(watched_thread_handle_);
115 #endif
116 
117   base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
118   if (power_monitor)
119     power_monitor->RemoveObserver(this);
120 
121 #if defined(OS_CHROMEOS)
122   if (tty_file_)
123     fclose(tty_file_);
124 #endif
125 
126   watched_message_loop_->RemoveTaskObserver(&task_observer_);
127 }
128 
OnAcknowledge()129 void GpuWatchdogThread::OnAcknowledge() {
130   CHECK(base::PlatformThread::CurrentId() == thread_id());
131 
132   // The check has already been acknowledged and another has already been
133   // scheduled by a previous call to OnAcknowledge. It is normal for a
134   // watched thread to see armed_ being true multiple times before
135   // the OnAcknowledge task is run on the watchdog thread.
136   if (!armed_)
137     return;
138 
139   // Revoke any pending hang termination.
140   weak_factory_.InvalidateWeakPtrs();
141   armed_ = false;
142 
143   if (suspended_)
144     return;
145 
146   // If it took a long time for the acknowledgement, assume the computer was
147   // recently suspended.
148   bool was_suspended = (base::Time::Now() > suspension_timeout_);
149 
150   // The monitored thread has responded. Post a task to check it again.
151   message_loop()->PostDelayedTask(
152       FROM_HERE,
153       base::Bind(&GpuWatchdogThread::OnCheck, weak_factory_.GetWeakPtr(),
154           was_suspended),
155       base::TimeDelta::FromMilliseconds(kCheckPeriodMs));
156 }
157 
OnCheck(bool after_suspend)158 void GpuWatchdogThread::OnCheck(bool after_suspend) {
159   CHECK(base::PlatformThread::CurrentId() == thread_id());
160 
161   // Do not create any new termination tasks if one has already been created
162   // or the system is suspended.
163   if (armed_ || suspended_)
164     return;
165 
166   // Must set armed before posting the task. This task might be the only task
167   // that will activate the TaskObserver on the watched thread and it must not
168   // miss the false -> true transition.
169   armed_ = true;
170 
171 #if defined(OS_WIN)
172   arm_cpu_time_ = GetWatchedThreadTime();
173 #endif
174 
175   // Immediately after the computer is woken up from being suspended it might
176   // be pretty sluggish, so allow some extra time before the next timeout.
177   base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1);
178   suspension_timeout_ = base::Time::Now() + timeout * 2;
179 
180   // Post a task to the monitored thread that does nothing but wake up the
181   // TaskObserver. Any other tasks that are pending on the watched thread will
182   // also wake up the observer. This simply ensures there is at least one.
183   watched_message_loop_->PostTask(
184       FROM_HERE,
185       base::Bind(&base::DoNothing));
186 
187   // Post a task to the watchdog thread to exit if the monitored thread does
188   // not respond in time.
189   message_loop()->PostDelayedTask(
190       FROM_HERE,
191       base::Bind(
192           &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
193           weak_factory_.GetWeakPtr()),
194       timeout);
195 }
196 
197 // Use the --disable-gpu-watchdog command line switch to disable this.
DeliberatelyTerminateToRecoverFromHang()198 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
199   // Should not get here while the system is suspended.
200   DCHECK(!suspended_);
201 
202 #if defined(OS_WIN)
203   // Defer termination until a certain amount of CPU time has elapsed on the
204   // watched thread.
205   base::TimeDelta time_since_arm = GetWatchedThreadTime() - arm_cpu_time_;
206   if (time_since_arm < timeout_) {
207     message_loop()->PostDelayedTask(
208         FROM_HERE,
209         base::Bind(
210             &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
211             weak_factory_.GetWeakPtr()),
212         timeout_ - time_since_arm);
213     return;
214   }
215 #endif
216 
217   // If the watchdog woke up significantly behind schedule, disarm and reset
218   // the watchdog check. This is to prevent the watchdog thread from terminating
219   // when a machine wakes up from sleep or hibernation, which would otherwise
220   // appear to be a hang.
221   if (base::Time::Now() > suspension_timeout_) {
222     armed_ = false;
223     OnCheck(true);
224     return;
225   }
226 
227   // For minimal developer annoyance, don't keep terminating. You need to skip
228   // the call to base::Process::Terminate below in a debugger for this to be
229   // useful.
230   static bool terminated = false;
231   if (terminated)
232     return;
233 
234 #if defined(OS_WIN)
235   if (IsDebuggerPresent())
236     return;
237 #endif
238 
239 #if defined(OS_CHROMEOS)
240   // Don't crash if we're not on tty1. This avoids noise in the GPU process
241   // crashes caused by people who use VT2 but still enable crash reporting.
242   char tty_string[8] = {0};
243   if (tty_file_ &&
244       !fseek(tty_file_, 0, SEEK_SET) &&
245       fread(tty_string, 1, 7, tty_file_)) {
246     int tty_number = -1;
247     int num_res = sscanf(tty_string, "tty%d", &tty_number);
248     if (num_res == 1 && tty_number != 1)
249       return;
250   }
251 #endif
252 
253   LOG(ERROR) << "The GPU process hung. Terminating after "
254              << timeout_.InMilliseconds() << " ms.";
255 
256   // Deliberately crash the process to create a crash dump.
257   *((volatile int*)0) = 0x1337;
258 
259   terminated = true;
260 }
261 
AddPowerObserver()262 void GpuWatchdogThread::AddPowerObserver() {
263   message_loop()->PostTask(
264       FROM_HERE,
265       base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this));
266 }
267 
OnAddPowerObserver()268 void GpuWatchdogThread::OnAddPowerObserver() {
269   base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
270   DCHECK(power_monitor);
271   power_monitor->AddObserver(this);
272 }
273 
OnSuspend()274 void GpuWatchdogThread::OnSuspend() {
275   suspended_ = true;
276 
277   // When suspending force an acknowledgement to cancel any pending termination
278   // tasks.
279   OnAcknowledge();
280 }
281 
OnResume()282 void GpuWatchdogThread::OnResume() {
283   suspended_ = false;
284 
285   // After resuming jump-start the watchdog again.
286   armed_ = false;
287   OnCheck(true);
288 }
289 
290 #if defined(OS_WIN)
GetWatchedThreadTime()291 base::TimeDelta GpuWatchdogThread::GetWatchedThreadTime() {
292   FILETIME creation_time;
293   FILETIME exit_time;
294   FILETIME user_time;
295   FILETIME kernel_time;
296   BOOL result = GetThreadTimes(watched_thread_handle_,
297                                &creation_time,
298                                &exit_time,
299                                &kernel_time,
300                                &user_time);
301   DCHECK(result);
302 
303   ULARGE_INTEGER user_time64;
304   user_time64.HighPart = user_time.dwHighDateTime;
305   user_time64.LowPart = user_time.dwLowDateTime;
306 
307   ULARGE_INTEGER kernel_time64;
308   kernel_time64.HighPart = kernel_time.dwHighDateTime;
309   kernel_time64.LowPart = kernel_time.dwLowDateTime;
310 
311   // Time is reported in units of 100 nanoseconds. Kernel and user time are
312   // summed to deal with to kinds of hangs. One is where the GPU process is
313   // stuck in user level, never calling into the kernel and kernel time is
314   // not increasing. The other is where either the kernel hangs and never
315   // returns to user level or where user level code
316   // calls into kernel level repeatedly, giving up its quanta before it is
317   // tracked, for example a loop that repeatedly Sleeps.
318   return base::TimeDelta::FromMilliseconds(static_cast<int64>(
319       (user_time64.QuadPart + kernel_time64.QuadPart) / 10000));
320 }
321 #endif
322 
323 }  // namespace content
324