1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This file defines a WatchDog thread that monitors the responsiveness of other 6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines 7 // ThreadWatcher class which performs health check on threads that would like to 8 // be watched. This file also defines ThreadWatcherList class that has list of 9 // all active ThreadWatcher objects. 10 // 11 // ThreadWatcher class sends ping message to the watched thread and the watched 12 // thread responds back with a pong message. It uploads response time 13 // (difference between ping and pong times) as a histogram. 14 // 15 // TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is 16 // detected, we should probably just crash, and allow the crash system to gather 17 // then stack trace. 18 // 19 // Example Usage: 20 // 21 // The following is an example for watching responsiveness of IO thread. 22 // sleep_time specifies how often ping messages have to be sent to IO thread. 23 // unresponsive_time is the wait time after ping message is sent, to check if 24 // we have received pong message or not. 25 // 26 // base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5); 27 // base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10); 28 // ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time, 29 // unresponsive_time); 30 31 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 32 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 33 34 #include <map> 35 #include <string> 36 #include <vector> 37 38 #include "base/basictypes.h" 39 #include "base/gtest_prod_util.h" 40 #include "base/memory/ref_counted.h" 41 #include "base/memory/scoped_ptr.h" 42 #include "base/message_loop.h" 43 #include "base/metrics/histogram.h" 44 #include "base/synchronization/lock.h" 45 #include "base/task.h" 46 #include "base/threading/thread.h" 47 #include "base/time.h" 48 #include "content/browser/browser_thread.h" 49 #include "content/common/notification_observer.h" 50 #include "content/common/notification_registrar.h" 51 52 class CustomThreadWatcher; 53 class ThreadWatcherList; 54 55 // This class performs health check on threads that would like to be watched. 56 class ThreadWatcher { 57 public: 58 // This method starts performing health check on the given thread_id. It will 59 // create ThreadWatcher object for the given thread_id, thread_name, 60 // sleep_time and unresponsive_time. sleep_time_ is the wait time between ping 61 // messages. unresponsive_time_ is the wait time after ping message is sent, 62 // to check if we have received pong message or not. It will register that 63 // ThreadWatcher object and activate the thread watching of the given 64 // thread_id. 65 static void StartWatching(const BrowserThread::ID& thread_id, 66 const std::string& thread_name, 67 const base::TimeDelta& sleep_time, 68 const base::TimeDelta& unresponsive_time); 69 70 // Return the thread_id of the thread being watched. thread_id()71 BrowserThread::ID thread_id() const { return thread_id_; } 72 73 // Return the name of the thread being watched. thread_name()74 std::string thread_name() const { return thread_name_; } 75 76 // Return the sleep time between ping messages to be sent to the thread. sleep_time()77 base::TimeDelta sleep_time() const { return sleep_time_; } 78 79 // Return the the wait time to check the responsiveness of the thread. unresponsive_time()80 base::TimeDelta unresponsive_time() const { return unresponsive_time_; } 81 82 // Returns true if we are montioring the thread. active()83 bool active() const { return active_; } 84 85 // Returns ping_time_ (used by unit tests). ping_time()86 base::TimeTicks ping_time() const { return ping_time_; } 87 88 // Returns ping_sequence_number_ (used by unit tests). ping_sequence_number()89 uint64 ping_sequence_number() const { return ping_sequence_number_; } 90 91 protected: 92 // Construct a ThreadWatcher for the given thread_id. sleep_time_ is the 93 // wait time between ping messages. unresponsive_time_ is the wait time after 94 // ping message is sent, to check if we have received pong message or not. 95 ThreadWatcher(const BrowserThread::ID& thread_id, 96 const std::string& thread_name, 97 const base::TimeDelta& sleep_time, 98 const base::TimeDelta& unresponsive_time); 99 virtual ~ThreadWatcher(); 100 101 // This method activates the thread watching which starts ping/pong messaging. 102 virtual void ActivateThreadWatching(); 103 104 // This method de-activates the thread watching and revokes all tasks. 105 virtual void DeActivateThreadWatching(); 106 107 // This will ensure that the watching is actively taking place, and awaken 108 // (i.e., post a PostPingMessage) if the watcher has stopped pinging due to 109 // lack of user activity. It will also reset ping_count_ to kPingCount. 110 virtual void WakeUp(); 111 112 // This method records when ping message was sent and it will Post a task 113 // (OnPingMessage) to the watched thread that does nothing but respond with 114 // OnPongMessage. It also posts a task (OnCheckResponsiveness) to check 115 // responsiveness of monitored thread that would be called after waiting 116 // unresponsive_time_. 117 // This method is accessible on WatchDogThread. 118 virtual void PostPingMessage(); 119 120 // This method handles a Pong Message from watched thread. It will track the 121 // response time (pong time minus ping time) via histograms. It posts a 122 // PostPingMessage task that would be called after waiting sleep_time_. It 123 // increments ping_sequence_number_ by 1. 124 // This method is accessible on WatchDogThread. 125 virtual void OnPongMessage(uint64 ping_sequence_number); 126 127 // This method will determine if the watched thread is responsive or not. If 128 // the latest ping_sequence_number_ is not same as the ping_sequence_number 129 // that is passed in, then we can assume that watched thread has responded 130 // with a pong message. 131 // This method is accessible on WatchDogThread. 132 virtual bool OnCheckResponsiveness(uint64 ping_sequence_number); 133 134 private: 135 friend class ThreadWatcherList; 136 137 // Allow tests to access our innards for testing purposes. 138 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); 139 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding); 140 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding); 141 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding); 142 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding); 143 144 // Post constructor initialization. 145 void Initialize(); 146 147 // Watched thread does nothing except post callback_task to the WATCHDOG 148 // Thread. This method is called on watched thread. 149 static void OnPingMessage(const BrowserThread::ID& thread_id, 150 Task* callback_task); 151 152 // This is the number of ping messages to be sent when the user is idle. 153 // ping_count_ will be initialized to kPingCount whenever user becomes active. 154 static const int kPingCount; 155 156 // The thread_id of the thread being watched. Only one instance can exist for 157 // the given thread_id of the thread being watched. 158 const BrowserThread::ID thread_id_; 159 160 // The name of the thread being watched. 161 const std::string thread_name_; 162 163 // It is the sleep time between between the receipt of a pong message back, 164 // and the sending of another ping message. 165 const base::TimeDelta sleep_time_; 166 167 // It is the duration from sending a ping message, until we check status to be 168 // sure a pong message has been returned. 169 const base::TimeDelta unresponsive_time_; 170 171 // This is the last time when ping message was sent. 172 base::TimeTicks ping_time_; 173 174 // This is the sequence number of the next ping for which there is no pong. If 175 // the instance is sleeping, then it will be the sequence number for the next 176 // ping. 177 uint64 ping_sequence_number_; 178 179 // This is set to true if thread watcher is watching. 180 bool active_; 181 182 // The counter tracks least number of ping messages that will be sent to 183 // watched thread before the ping-pong mechanism will go into an extended 184 // sleep. If this value is zero, then the mechanism is in an extended sleep, 185 // and awaiting some observed user action before continuing. 186 int ping_count_; 187 188 // Histogram that keeps track of response times for the watched thread. 189 base::Histogram* histogram_; 190 191 // We use this factory to create callback tasks for ThreadWatcher object. We 192 // use this during ping-pong messaging between WatchDog thread and watched 193 // thread. 194 ScopedRunnableMethodFactory<ThreadWatcher> method_factory_; 195 196 DISALLOW_COPY_AND_ASSIGN(ThreadWatcher); 197 }; 198 199 // Class with a list of all active thread watchers. A thread watcher is active 200 // if it has been registered, which includes determing the histogram name. This 201 // class provides utility functions to start and stop watching all browser 202 // threads. Only one instance of this class exists. 203 class ThreadWatcherList : public NotificationObserver { 204 public: 205 // A map from BrowserThread to the actual instances. 206 typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList; 207 208 // This singleton holds the global list of registered ThreadWatchers. 209 ThreadWatcherList(); 210 // Destructor deletes all registered ThreadWatcher instances. 211 virtual ~ThreadWatcherList(); 212 213 // Register() stores a pointer to the given ThreadWatcher in a global map. 214 static void Register(ThreadWatcher* watcher); 215 216 // This method returns true if the ThreadWatcher object is registerd. 217 static bool IsRegistered(const BrowserThread::ID thread_id); 218 219 // This method posts a task on WatchDogThread to start watching all browser 220 // threads. 221 // This method is accessible on UI thread. 222 static void StartWatchingAll(); 223 224 // This method posts a task on WatchDogThread to RevokeAll tasks and to 225 // deactive thread watching of other threads and tell NotificationService to 226 // stop calling Observe. 227 // This method is accessible on UI thread. 228 static void StopWatchingAll(); 229 230 // RemoveAll NotificationTypes that are being observed. 231 // This method is accessible on UI thread. 232 static void RemoveNotifications(); 233 234 private: 235 // Allow tests to access our innards for testing purposes. 236 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); 237 238 // Delete all thread watcher objects and remove them from global map. 239 // This method is accessible on WatchDogThread. 240 void DeleteAll(); 241 242 // This will ensure that the watching is actively taking place. It will wakeup 243 // all thread watchers every 2 seconds. This is the implementation of 244 // NotificationObserver. When a matching notification is posted to the 245 // notification service, this method is called. 246 // This method is accessible on UI thread. 247 virtual void Observe(NotificationType type, 248 const NotificationSource& source, 249 const NotificationDetails& details); 250 251 // This will ensure that the watching is actively taking place, and awaken 252 // all thread watchers that are registered. 253 // This method is accessible on WatchDogThread. 254 virtual void WakeUpAll(); 255 256 // The Find() method can be used to test to see if a given ThreadWatcher was 257 // already registered, or to retrieve a pointer to it from the global map. 258 static ThreadWatcher* Find(const BrowserThread::ID& thread_id); 259 260 // Helper function should be called only while holding lock_. 261 ThreadWatcher* PreLockedFind(const BrowserThread::ID& thread_id); 262 263 static ThreadWatcherList* global_; // The singleton of this class. 264 265 // Lock for access to registered_. 266 base::Lock lock_; 267 268 // Map of all registered watched threads, from thread_id to ThreadWatcher. 269 RegistrationList registered_; 270 271 // The registrar that holds NotificationTypes to be observed. 272 NotificationRegistrar registrar_; 273 274 // This is the last time when woke all thread watchers up. 275 base::TimeTicks last_wakeup_time_; 276 277 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList); 278 }; 279 280 // Class for WatchDogThread and in its Init method, we start watching UI, IO, 281 // DB, FILE, CACHED threads. 282 class WatchDogThread : public base::Thread { 283 public: 284 // Constructor. 285 WatchDogThread(); 286 287 // Destroys the thread and stops the thread. 288 virtual ~WatchDogThread(); 289 290 // Callable on any thread. Returns whether you're currently on a 291 // watchdog_thread_. 292 static bool CurrentlyOnWatchDogThread(); 293 294 // These are the same methods in message_loop.h, but are guaranteed to either 295 // get posted to the MessageLoop if it's still alive, or be deleted otherwise. 296 // They return true iff the watchdog thread existed and the task was posted. 297 // Note that even if the task is posted, there's no guarantee that it will 298 // run, since the target thread may already have a Quit message in its queue. 299 static bool PostTask(const tracked_objects::Location& from_here, Task* task); 300 static bool PostDelayedTask(const tracked_objects::Location& from_here, 301 Task* task, 302 int64 delay_ms); 303 304 protected: 305 virtual void Init(); 306 virtual void CleanUp(); 307 virtual void CleanUpAfterMessageLoopDestruction(); 308 309 private: 310 static bool PostTaskHelper( 311 const tracked_objects::Location& from_here, 312 Task* task, 313 int64 delay_ms); 314 315 // This lock protects watchdog_thread_. 316 static base::Lock lock_; 317 318 static WatchDogThread* watchdog_thread_; // The singleton of this class. 319 320 DISALLOW_COPY_AND_ASSIGN(WatchDogThread); 321 }; 322 323 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling 324 // refcounting of ThreadWatcher and ThreadWatcherList classes. 325 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher); 326 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList); 327 328 #endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 329