• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server;
18 
19 import android.app.IActivityController;
20 import android.os.Binder;
21 import android.os.RemoteException;
22 import com.android.server.am.ActivityManagerService;
23 
24 import android.content.BroadcastReceiver;
25 import android.content.ContentResolver;
26 import android.content.Context;
27 import android.content.Intent;
28 import android.content.IntentFilter;
29 import android.os.Debug;
30 import android.os.Handler;
31 import android.os.IPowerManager;
32 import android.os.Looper;
33 import android.os.Process;
34 import android.os.ServiceManager;
35 import android.os.SystemClock;
36 import android.os.SystemProperties;
37 import android.util.EventLog;
38 import android.util.Log;
39 import android.util.Slog;
40 
41 import java.io.File;
42 import java.io.FileWriter;
43 import java.io.IOException;
44 import java.util.ArrayList;
45 
46 /** This class calls its monitor every minute. Killing this process if they don't return **/
47 public class Watchdog extends Thread {
48     static final String TAG = "Watchdog";
49 
50     // Set this to true to use debug default values.
51     static final boolean DB = false;
52 
53     // Set this to true to have the watchdog record kernel thread stacks when it fires
54     static final boolean RECORD_KERNEL_THREADS = true;
55 
56     static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
57     static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;
58 
59     // These are temporally ordered: larger values as lateness increases
60     static final int COMPLETED = 0;
61     static final int WAITING = 1;
62     static final int WAITED_HALF = 2;
63     static final int OVERDUE = 3;
64 
65     // Which native processes to dump into dropbox's stack traces
66     public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
67         "/system/bin/audioserver",
68         "/system/bin/cameraserver",
69         "/system/bin/drmserver",
70         "/system/bin/mediadrmserver",
71         "/system/bin/mediaserver",
72         "/system/bin/sdcard",
73         "/system/bin/surfaceflinger",
74         "media.codec",     // system/bin/mediacodec
75         "media.extractor", // system/bin/mediaextractor
76         "com.android.bluetooth",  // Bluetooth service
77     };
78 
79     static Watchdog sWatchdog;
80 
81     /* This handler will be used to post message back onto the main thread */
82     final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>();
83     final HandlerChecker mMonitorChecker;
84     ContentResolver mResolver;
85     ActivityManagerService mActivity;
86 
87     int mPhonePid;
88     IActivityController mController;
89     boolean mAllowRestart = true;
90 
91     /**
92      * Used for checking status of handle threads and scheduling monitor callbacks.
93      */
94     public final class HandlerChecker implements Runnable {
95         private final Handler mHandler;
96         private final String mName;
97         private final long mWaitMax;
98         private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
99         private boolean mCompleted;
100         private Monitor mCurrentMonitor;
101         private long mStartTime;
102 
HandlerChecker(Handler handler, String name, long waitMaxMillis)103         HandlerChecker(Handler handler, String name, long waitMaxMillis) {
104             mHandler = handler;
105             mName = name;
106             mWaitMax = waitMaxMillis;
107             mCompleted = true;
108         }
109 
addMonitor(Monitor monitor)110         public void addMonitor(Monitor monitor) {
111             mMonitors.add(monitor);
112         }
113 
scheduleCheckLocked()114         public void scheduleCheckLocked() {
115             if (mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) {
116                 // If the target looper has recently been polling, then
117                 // there is no reason to enqueue our checker on it since that
118                 // is as good as it not being deadlocked.  This avoid having
119                 // to do a context switch to check the thread.  Note that we
120                 // only do this if mCheckReboot is false and we have no
121                 // monitors, since those would need to be executed at this point.
122                 mCompleted = true;
123                 return;
124             }
125 
126             if (!mCompleted) {
127                 // we already have a check in flight, so no need
128                 return;
129             }
130 
131             mCompleted = false;
132             mCurrentMonitor = null;
133             mStartTime = SystemClock.uptimeMillis();
134             mHandler.postAtFrontOfQueue(this);
135         }
136 
isOverdueLocked()137         public boolean isOverdueLocked() {
138             return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
139         }
140 
getCompletionStateLocked()141         public int getCompletionStateLocked() {
142             if (mCompleted) {
143                 return COMPLETED;
144             } else {
145                 long latency = SystemClock.uptimeMillis() - mStartTime;
146                 if (latency < mWaitMax/2) {
147                     return WAITING;
148                 } else if (latency < mWaitMax) {
149                     return WAITED_HALF;
150                 }
151             }
152             return OVERDUE;
153         }
154 
getThread()155         public Thread getThread() {
156             return mHandler.getLooper().getThread();
157         }
158 
getName()159         public String getName() {
160             return mName;
161         }
162 
describeBlockedStateLocked()163         public String describeBlockedStateLocked() {
164             if (mCurrentMonitor == null) {
165                 return "Blocked in handler on " + mName + " (" + getThread().getName() + ")";
166             } else {
167                 return "Blocked in monitor " + mCurrentMonitor.getClass().getName()
168                         + " on " + mName + " (" + getThread().getName() + ")";
169             }
170         }
171 
172         @Override
run()173         public void run() {
174             final int size = mMonitors.size();
175             for (int i = 0 ; i < size ; i++) {
176                 synchronized (Watchdog.this) {
177                     mCurrentMonitor = mMonitors.get(i);
178                 }
179                 mCurrentMonitor.monitor();
180             }
181 
182             synchronized (Watchdog.this) {
183                 mCompleted = true;
184                 mCurrentMonitor = null;
185             }
186         }
187     }
188 
189     final class RebootRequestReceiver extends BroadcastReceiver {
190         @Override
onReceive(Context c, Intent intent)191         public void onReceive(Context c, Intent intent) {
192             if (intent.getIntExtra("nowait", 0) != 0) {
193                 rebootSystem("Received ACTION_REBOOT broadcast");
194                 return;
195             }
196             Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent);
197         }
198     }
199 
200     /** Monitor for checking the availability of binder threads. The monitor will block until
201      * there is a binder thread available to process in coming IPCs to make sure other processes
202      * can still communicate with the service.
203      */
204     private static final class BinderThreadMonitor implements Watchdog.Monitor {
205         @Override
monitor()206         public void monitor() {
207             Binder.blockUntilThreadAvailable();
208         }
209     }
210 
211     public interface Monitor {
monitor()212         void monitor();
213     }
214 
getInstance()215     public static Watchdog getInstance() {
216         if (sWatchdog == null) {
217             sWatchdog = new Watchdog();
218         }
219 
220         return sWatchdog;
221     }
222 
Watchdog()223     private Watchdog() {
224         super("watchdog");
225         // Initialize handler checkers for each common thread we want to check.  Note
226         // that we are not currently checking the background thread, since it can
227         // potentially hold longer running operations with no guarantees about the timeliness
228         // of operations there.
229 
230         // The shared foreground thread is the main checker.  It is where we
231         // will also dispatch monitor checks and do other work.
232         mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
233                 "foreground thread", DEFAULT_TIMEOUT);
234         mHandlerCheckers.add(mMonitorChecker);
235         // Add checker for main thread.  We only do a quick check since there
236         // can be UI running on the thread.
237         mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
238                 "main thread", DEFAULT_TIMEOUT));
239         // Add checker for shared UI thread.
240         mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
241                 "ui thread", DEFAULT_TIMEOUT));
242         // And also check IO thread.
243         mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
244                 "i/o thread", DEFAULT_TIMEOUT));
245         // And the display thread.
246         mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(),
247                 "display thread", DEFAULT_TIMEOUT));
248 
249         // Initialize monitor for Binder threads.
250         addMonitor(new BinderThreadMonitor());
251     }
252 
init(Context context, ActivityManagerService activity)253     public void init(Context context, ActivityManagerService activity) {
254         mResolver = context.getContentResolver();
255         mActivity = activity;
256 
257         context.registerReceiver(new RebootRequestReceiver(),
258                 new IntentFilter(Intent.ACTION_REBOOT),
259                 android.Manifest.permission.REBOOT, null);
260     }
261 
processStarted(String name, int pid)262     public void processStarted(String name, int pid) {
263         synchronized (this) {
264             if ("com.android.phone".equals(name)) {
265                 mPhonePid = pid;
266             }
267         }
268     }
269 
setActivityController(IActivityController controller)270     public void setActivityController(IActivityController controller) {
271         synchronized (this) {
272             mController = controller;
273         }
274     }
275 
setAllowRestart(boolean allowRestart)276     public void setAllowRestart(boolean allowRestart) {
277         synchronized (this) {
278             mAllowRestart = allowRestart;
279         }
280     }
281 
addMonitor(Monitor monitor)282     public void addMonitor(Monitor monitor) {
283         synchronized (this) {
284             if (isAlive()) {
285                 throw new RuntimeException("Monitors can't be added once the Watchdog is running");
286             }
287             mMonitorChecker.addMonitor(monitor);
288         }
289     }
290 
addThread(Handler thread)291     public void addThread(Handler thread) {
292         addThread(thread, DEFAULT_TIMEOUT);
293     }
294 
addThread(Handler thread, long timeoutMillis)295     public void addThread(Handler thread, long timeoutMillis) {
296         synchronized (this) {
297             if (isAlive()) {
298                 throw new RuntimeException("Threads can't be added once the Watchdog is running");
299             }
300             final String name = thread.getLooper().getThread().getName();
301             mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
302         }
303     }
304 
305     /**
306      * Perform a full reboot of the system.
307      */
rebootSystem(String reason)308     void rebootSystem(String reason) {
309         Slog.i(TAG, "Rebooting system because: " + reason);
310         IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE);
311         try {
312             pms.reboot(false, reason, false);
313         } catch (RemoteException ex) {
314         }
315     }
316 
evaluateCheckerCompletionLocked()317     private int evaluateCheckerCompletionLocked() {
318         int state = COMPLETED;
319         for (int i=0; i<mHandlerCheckers.size(); i++) {
320             HandlerChecker hc = mHandlerCheckers.get(i);
321             state = Math.max(state, hc.getCompletionStateLocked());
322         }
323         return state;
324     }
325 
getBlockedCheckersLocked()326     private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
327         ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
328         for (int i=0; i<mHandlerCheckers.size(); i++) {
329             HandlerChecker hc = mHandlerCheckers.get(i);
330             if (hc.isOverdueLocked()) {
331                 checkers.add(hc);
332             }
333         }
334         return checkers;
335     }
336 
describeCheckersLocked(ArrayList<HandlerChecker> checkers)337     private String describeCheckersLocked(ArrayList<HandlerChecker> checkers) {
338         StringBuilder builder = new StringBuilder(128);
339         for (int i=0; i<checkers.size(); i++) {
340             if (builder.length() > 0) {
341                 builder.append(", ");
342             }
343             builder.append(checkers.get(i).describeBlockedStateLocked());
344         }
345         return builder.toString();
346     }
347 
348     @Override
run()349     public void run() {
350         boolean waitedHalf = false;
351         while (true) {
352             final ArrayList<HandlerChecker> blockedCheckers;
353             final String subject;
354             final boolean allowRestart;
355             int debuggerWasConnected = 0;
356             synchronized (this) {
357                 long timeout = CHECK_INTERVAL;
358                 // Make sure we (re)spin the checkers that have become idle within
359                 // this wait-and-check interval
360                 for (int i=0; i<mHandlerCheckers.size(); i++) {
361                     HandlerChecker hc = mHandlerCheckers.get(i);
362                     hc.scheduleCheckLocked();
363                 }
364 
365                 if (debuggerWasConnected > 0) {
366                     debuggerWasConnected--;
367                 }
368 
369                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
370                 // wait while asleep. If the device is asleep then the thing that we are waiting
371                 // to timeout on is asleep as well and won't have a chance to run, causing a false
372                 // positive on when to kill things.
373                 long start = SystemClock.uptimeMillis();
374                 while (timeout > 0) {
375                     if (Debug.isDebuggerConnected()) {
376                         debuggerWasConnected = 2;
377                     }
378                     try {
379                         wait(timeout);
380                     } catch (InterruptedException e) {
381                         Log.wtf(TAG, e);
382                     }
383                     if (Debug.isDebuggerConnected()) {
384                         debuggerWasConnected = 2;
385                     }
386                     timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
387                 }
388 
389                 final int waitState = evaluateCheckerCompletionLocked();
390                 if (waitState == COMPLETED) {
391                     // The monitors have returned; reset
392                     waitedHalf = false;
393                     continue;
394                 } else if (waitState == WAITING) {
395                     // still waiting but within their configured intervals; back off and recheck
396                     continue;
397                 } else if (waitState == WAITED_HALF) {
398                     if (!waitedHalf) {
399                         // We've waited half the deadlock-detection interval.  Pull a stack
400                         // trace and wait another half.
401                         ArrayList<Integer> pids = new ArrayList<Integer>();
402                         pids.add(Process.myPid());
403                         ActivityManagerService.dumpStackTraces(true, pids, null, null,
404                                 NATIVE_STACKS_OF_INTEREST);
405                         waitedHalf = true;
406                     }
407                     continue;
408                 }
409 
410                 // something is overdue!
411                 blockedCheckers = getBlockedCheckersLocked();
412                 subject = describeCheckersLocked(blockedCheckers);
413                 allowRestart = mAllowRestart;
414             }
415 
416             // If we got here, that means that the system is most likely hung.
417             // First collect stack traces from all threads of the system process.
418             // Then kill this process so that the system will restart.
419             EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
420 
421             ArrayList<Integer> pids = new ArrayList<Integer>();
422             pids.add(Process.myPid());
423             if (mPhonePid > 0) pids.add(mPhonePid);
424             // Pass !waitedHalf so that just in case we somehow wind up here without having
425             // dumped the halfway stacks, we properly re-initialize the trace file.
426             final File stack = ActivityManagerService.dumpStackTraces(
427                     !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST);
428 
429             // Give some extra time to make sure the stack traces get written.
430             // The system's been hanging for a minute, another second or two won't hurt much.
431             SystemClock.sleep(2000);
432 
433             // Pull our own kernel thread stacks as well if we're configured for that
434             if (RECORD_KERNEL_THREADS) {
435                 dumpKernelStackTraces();
436             }
437 
438             // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log
439             doSysRq('w');
440             doSysRq('l');
441 
442             // Try to add the error to the dropbox, but assuming that the ActivityManager
443             // itself may be deadlocked.  (which has happened, causing this statement to
444             // deadlock and the watchdog as a whole to be ineffective)
445             Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
446                     public void run() {
447                         mActivity.addErrorToDropBox(
448                                 "watchdog", null, "system_server", null, null,
449                                 subject, null, stack, null);
450                     }
451                 };
452             dropboxThread.start();
453             try {
454                 dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
455             } catch (InterruptedException ignored) {}
456 
457             IActivityController controller;
458             synchronized (this) {
459                 controller = mController;
460             }
461             if (controller != null) {
462                 Slog.i(TAG, "Reporting stuck state to activity controller");
463                 try {
464                     Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
465                     // 1 = keep waiting, -1 = kill system
466                     int res = controller.systemNotResponding(subject);
467                     if (res >= 0) {
468                         Slog.i(TAG, "Activity controller requested to coninue to wait");
469                         waitedHalf = false;
470                         continue;
471                     }
472                 } catch (RemoteException e) {
473                 }
474             }
475 
476             // Only kill the process if the debugger is not attached.
477             if (Debug.isDebuggerConnected()) {
478                 debuggerWasConnected = 2;
479             }
480             if (debuggerWasConnected >= 2) {
481                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
482             } else if (debuggerWasConnected > 0) {
483                 Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process");
484             } else if (!allowRestart) {
485                 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
486             } else {
487                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
488                 for (int i=0; i<blockedCheckers.size(); i++) {
489                     Slog.w(TAG, blockedCheckers.get(i).getName() + " stack trace:");
490                     StackTraceElement[] stackTrace
491                             = blockedCheckers.get(i).getThread().getStackTrace();
492                     for (StackTraceElement element: stackTrace) {
493                         Slog.w(TAG, "    at " + element);
494                     }
495                 }
496                 Slog.w(TAG, "*** GOODBYE!");
497                 Process.killProcess(Process.myPid());
498                 System.exit(10);
499             }
500 
501             waitedHalf = false;
502         }
503     }
504 
doSysRq(char c)505     private void doSysRq(char c) {
506         try {
507             FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
508             sysrq_trigger.write(c);
509             sysrq_trigger.close();
510         } catch (IOException e) {
511             Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
512         }
513     }
514 
dumpKernelStackTraces()515     private File dumpKernelStackTraces() {
516         String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
517         if (tracesPath == null || tracesPath.length() == 0) {
518             return null;
519         }
520 
521         native_dumpKernelStacks(tracesPath);
522         return new File(tracesPath);
523     }
524 
native_dumpKernelStacks(String tracesPath)525     private native void native_dumpKernelStacks(String tracesPath);
526 }
527