• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This file defines a WatchDog thread that monitors the responsiveness of other
6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines
7 // ThreadWatcher class which performs health check on threads that would like to
8 // be watched. This file also defines ThreadWatcherList class that has list of
9 // all active ThreadWatcher objects.
10 //
11 // ThreadWatcher class sends ping message to the watched thread and the watched
12 // thread responds back with a pong message. It uploads response time
13 // (difference between ping and pong times) as a histogram.
14 //
15 // TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is
16 // detected, we should probably just crash, and allow the crash system to gather
17 // then stack trace.
18 //
19 // Example Usage:
20 //
21 //   The following is an example for watching responsiveness of IO thread.
22 //   sleep_time specifies how often ping messages have to be sent to IO thread.
23 //   unresponsive_time is the wait time after ping message is sent, to check if
24 //   we have received pong message or not.
25 //
26 //   base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5);
27 //   base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10);
28 //   ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time,
29 //                                unresponsive_time);
30 
31 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
32 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
33 
34 #include <map>
35 #include <string>
36 #include <vector>
37 
38 #include "base/basictypes.h"
39 #include "base/gtest_prod_util.h"
40 #include "base/memory/ref_counted.h"
41 #include "base/memory/scoped_ptr.h"
42 #include "base/message_loop.h"
43 #include "base/metrics/histogram.h"
44 #include "base/synchronization/lock.h"
45 #include "base/task.h"
46 #include "base/threading/thread.h"
47 #include "base/time.h"
48 #include "content/browser/browser_thread.h"
49 #include "content/common/notification_observer.h"
50 #include "content/common/notification_registrar.h"
51 
52 class CustomThreadWatcher;
53 class ThreadWatcherList;
54 
55 // This class performs health check on threads that would like to be watched.
56 class ThreadWatcher {
57  public:
58   // This method starts performing health check on the given thread_id. It will
59   // create ThreadWatcher object for the given thread_id, thread_name,
60   // sleep_time and unresponsive_time. sleep_time_ is the wait time between ping
61   // messages. unresponsive_time_ is the wait time after ping message is sent,
62   // to check if we have received pong message or not. It will register that
63   // ThreadWatcher object and activate the thread watching of the given
64   // thread_id.
65   static void StartWatching(const BrowserThread::ID& thread_id,
66                             const std::string& thread_name,
67                             const base::TimeDelta& sleep_time,
68                             const base::TimeDelta& unresponsive_time);
69 
70   // Return the thread_id of the thread being watched.
thread_id()71   BrowserThread::ID thread_id() const { return thread_id_; }
72 
73   // Return the name of the thread being watched.
thread_name()74   std::string thread_name() const { return thread_name_; }
75 
76   // Return the sleep time between ping messages to be sent to the thread.
sleep_time()77   base::TimeDelta sleep_time() const { return sleep_time_; }
78 
79   // Return the the wait time to check the responsiveness of the thread.
unresponsive_time()80   base::TimeDelta unresponsive_time() const { return unresponsive_time_; }
81 
82   // Returns true if we are montioring the thread.
active()83   bool active() const { return active_; }
84 
85   // Returns ping_time_ (used by unit tests).
ping_time()86   base::TimeTicks ping_time() const { return ping_time_; }
87 
88   // Returns ping_sequence_number_ (used by unit tests).
ping_sequence_number()89   uint64 ping_sequence_number() const { return ping_sequence_number_; }
90 
91  protected:
92   // Construct a ThreadWatcher for the given thread_id. sleep_time_ is the
93   // wait time between ping messages. unresponsive_time_ is the wait time after
94   // ping message is sent, to check if we have received pong message or not.
95   ThreadWatcher(const BrowserThread::ID& thread_id,
96                 const std::string& thread_name,
97                 const base::TimeDelta& sleep_time,
98                 const base::TimeDelta& unresponsive_time);
99   virtual ~ThreadWatcher();
100 
101   // This method activates the thread watching which starts ping/pong messaging.
102   virtual void ActivateThreadWatching();
103 
104   // This method de-activates the thread watching and revokes all tasks.
105   virtual void DeActivateThreadWatching();
106 
107   // This will ensure that the watching is actively taking place, and awaken
108   // (i.e., post a PostPingMessage) if the watcher has stopped pinging due to
109   // lack of user activity. It will also reset ping_count_ to kPingCount.
110   virtual void WakeUp();
111 
112   // This method records when ping message was sent and it will Post a task
113   // (OnPingMessage) to the watched thread that does nothing but respond with
114   // OnPongMessage. It also posts a task (OnCheckResponsiveness) to check
115   // responsiveness of monitored thread that would be called after waiting
116   // unresponsive_time_.
117   // This method is accessible on WatchDogThread.
118   virtual void PostPingMessage();
119 
120   // This method handles a Pong Message from watched thread. It will track the
121   // response time (pong time minus ping time) via histograms. It posts a
122   // PostPingMessage task that would be called after waiting sleep_time_.  It
123   // increments ping_sequence_number_ by 1.
124   // This method is accessible on WatchDogThread.
125   virtual void OnPongMessage(uint64 ping_sequence_number);
126 
127   // This method will determine if the watched thread is responsive or not. If
128   // the latest ping_sequence_number_ is not same as the ping_sequence_number
129   // that is passed in, then we can assume that watched thread has responded
130   // with a pong message.
131   // This method is accessible on WatchDogThread.
132   virtual bool OnCheckResponsiveness(uint64 ping_sequence_number);
133 
134  private:
135   friend class ThreadWatcherList;
136 
137   // Allow tests to access our innards for testing purposes.
138   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
139   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding);
140   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding);
141   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding);
142   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding);
143 
144   // Post constructor initialization.
145   void Initialize();
146 
147   // Watched thread does nothing except post callback_task to the WATCHDOG
148   // Thread. This method is called on watched thread.
149   static void OnPingMessage(const BrowserThread::ID& thread_id,
150                             Task* callback_task);
151 
152   // This is the number of ping messages to be sent when the user is idle.
153   // ping_count_ will be initialized to kPingCount whenever user becomes active.
154   static const int kPingCount;
155 
156   // The thread_id of the thread being watched. Only one instance can exist for
157   // the given thread_id of the thread being watched.
158   const BrowserThread::ID thread_id_;
159 
160   // The name of the thread being watched.
161   const std::string thread_name_;
162 
163   // It is the sleep time between between the receipt of a pong message back,
164   // and the sending of another ping message.
165   const base::TimeDelta sleep_time_;
166 
167   // It is the duration from sending a ping message, until we check status to be
168   // sure a pong message has been returned.
169   const base::TimeDelta unresponsive_time_;
170 
171   // This is the last time when ping message was sent.
172   base::TimeTicks ping_time_;
173 
174   // This is the sequence number of the next ping for which there is no pong. If
175   // the instance is sleeping, then it will be the sequence number for the next
176   // ping.
177   uint64 ping_sequence_number_;
178 
179   // This is set to true if thread watcher is watching.
180   bool active_;
181 
182   // The counter tracks least number of ping messages that will be sent to
183   // watched thread before the ping-pong mechanism will go into an extended
184   // sleep. If this value is zero, then the mechanism is in an extended sleep,
185   // and awaiting some observed user action before continuing.
186   int ping_count_;
187 
188   // Histogram that keeps track of response times for the watched thread.
189   base::Histogram* histogram_;
190 
191   // We use this factory to create callback tasks for ThreadWatcher object. We
192   // use this during ping-pong messaging between WatchDog thread and watched
193   // thread.
194   ScopedRunnableMethodFactory<ThreadWatcher> method_factory_;
195 
196   DISALLOW_COPY_AND_ASSIGN(ThreadWatcher);
197 };
198 
199 // Class with a list of all active thread watchers.  A thread watcher is active
200 // if it has been registered, which includes determing the histogram name. This
201 // class provides utility functions to start and stop watching all browser
202 // threads. Only one instance of this class exists.
203 class ThreadWatcherList : public NotificationObserver {
204  public:
205   // A map from BrowserThread to the actual instances.
206   typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList;
207 
208   // This singleton holds the global list of registered ThreadWatchers.
209   ThreadWatcherList();
210   // Destructor deletes all registered ThreadWatcher instances.
211   virtual ~ThreadWatcherList();
212 
213   // Register() stores a pointer to the given ThreadWatcher in a global map.
214   static void Register(ThreadWatcher* watcher);
215 
216   // This method returns true if the ThreadWatcher object is registerd.
217   static bool IsRegistered(const BrowserThread::ID thread_id);
218 
219   // This method posts a task on WatchDogThread to start watching all browser
220   // threads.
221   // This method is accessible on UI thread.
222   static void StartWatchingAll();
223 
224   // This method posts a task on WatchDogThread to RevokeAll tasks and to
225   // deactive thread watching of other threads and tell NotificationService to
226   // stop calling Observe.
227   // This method is accessible on UI thread.
228   static void StopWatchingAll();
229 
230   // RemoveAll NotificationTypes that are being observed.
231   // This method is accessible on UI thread.
232   static void RemoveNotifications();
233 
234  private:
235   // Allow tests to access our innards for testing purposes.
236   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
237 
238   // Delete all thread watcher objects and remove them from global map.
239   // This method is accessible on WatchDogThread.
240   void DeleteAll();
241 
242   // This will ensure that the watching is actively taking place. It will wakeup
243   // all thread watchers every 2 seconds. This is the implementation of
244   // NotificationObserver. When a matching notification is posted to the
245   // notification service, this method is called.
246   // This method is accessible on UI thread.
247   virtual void Observe(NotificationType type,
248                        const NotificationSource& source,
249                        const NotificationDetails& details);
250 
251   // This will ensure that the watching is actively taking place, and awaken
252   // all thread watchers that are registered.
253   // This method is accessible on WatchDogThread.
254   virtual void WakeUpAll();
255 
256   // The Find() method can be used to test to see if a given ThreadWatcher was
257   // already registered, or to retrieve a pointer to it from the global map.
258   static ThreadWatcher* Find(const BrowserThread::ID& thread_id);
259 
260   // Helper function should be called only while holding lock_.
261   ThreadWatcher* PreLockedFind(const BrowserThread::ID& thread_id);
262 
263   static ThreadWatcherList* global_;  // The singleton of this class.
264 
265   // Lock for access to registered_.
266   base::Lock lock_;
267 
268   // Map of all registered watched threads, from thread_id to ThreadWatcher.
269   RegistrationList registered_;
270 
271   // The registrar that holds NotificationTypes to be observed.
272   NotificationRegistrar registrar_;
273 
274   // This is the last time when woke all thread watchers up.
275   base::TimeTicks last_wakeup_time_;
276 
277   DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList);
278 };
279 
280 // Class for WatchDogThread and in its Init method, we start watching UI, IO,
281 // DB, FILE, CACHED threads.
282 class WatchDogThread : public base::Thread {
283  public:
284   // Constructor.
285   WatchDogThread();
286 
287   // Destroys the thread and stops the thread.
288   virtual ~WatchDogThread();
289 
290   // Callable on any thread.  Returns whether you're currently on a
291   // watchdog_thread_.
292   static bool CurrentlyOnWatchDogThread();
293 
294   // These are the same methods in message_loop.h, but are guaranteed to either
295   // get posted to the MessageLoop if it's still alive, or be deleted otherwise.
296   // They return true iff the watchdog thread existed and the task was posted.
297   // Note that even if the task is posted, there's no guarantee that it will
298   // run, since the target thread may already have a Quit message in its queue.
299   static bool PostTask(const tracked_objects::Location& from_here, Task* task);
300   static bool PostDelayedTask(const tracked_objects::Location& from_here,
301                               Task* task,
302                               int64 delay_ms);
303 
304  protected:
305   virtual void Init();
306   virtual void CleanUp();
307   virtual void CleanUpAfterMessageLoopDestruction();
308 
309  private:
310   static bool PostTaskHelper(
311       const tracked_objects::Location& from_here,
312       Task* task,
313       int64 delay_ms);
314 
315   // This lock protects watchdog_thread_.
316   static base::Lock lock_;
317 
318   static WatchDogThread* watchdog_thread_;  // The singleton of this class.
319 
320   DISALLOW_COPY_AND_ASSIGN(WatchDogThread);
321 };
322 
323 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling
324 // refcounting of ThreadWatcher and ThreadWatcherList classes.
325 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher);
326 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList);
327 
328 #endif  // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
329