• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server;
18 
19 import android.app.IActivityController;
20 import android.os.Binder;
21 import android.os.RemoteException;
22 import com.android.server.am.ActivityManagerService;
23 import com.android.server.power.PowerManagerService;
24 
25 import android.app.AlarmManager;
26 import android.app.PendingIntent;
27 import android.content.BroadcastReceiver;
28 import android.content.ContentResolver;
29 import android.content.Context;
30 import android.content.Intent;
31 import android.content.IntentFilter;
32 import android.os.BatteryManager;
33 import android.os.Debug;
34 import android.os.Handler;
35 import android.os.Looper;
36 import android.os.Message;
37 import android.os.Process;
38 import android.os.ServiceManager;
39 import android.os.SystemClock;
40 import android.os.SystemProperties;
41 import android.util.EventLog;
42 import android.util.Log;
43 import android.util.Slog;
44 
45 import java.io.File;
46 import java.io.FileWriter;
47 import java.io.IOException;
48 import java.util.ArrayList;
49 import java.util.Calendar;
50 
51 /** This class calls its monitor every minute. Killing this process if they don't return **/
52 public class Watchdog extends Thread {
53     static final String TAG = "Watchdog";
54     static final boolean localLOGV = false || false;
55 
56     // Set this to true to use debug default values.
57     static final boolean DB = false;
58 
59     // Set this to true to have the watchdog record kernel thread stacks when it fires
60     static final boolean RECORD_KERNEL_THREADS = true;
61 
62     static final int MONITOR = 2718;
63 
64     static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000;
65     static final int TIME_TO_WAIT = TIME_TO_RESTART / 2;
66 
67     static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60;   // 5 minutes
68     static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60;        // 3 minutes
69     static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes
70 
71     static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0;                 // never force reboot
72     static final int REBOOT_DEFAULT_START_TIME = 3*60*60;                  // 3:00am
73     static final int REBOOT_DEFAULT_WINDOW = 60*60;                        // within 1 hour
74 
75     static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT";
76 
77     static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
78         "/system/bin/mediaserver",
79         "/system/bin/sdcard",
80         "/system/bin/surfaceflinger"
81     };
82 
83     static Watchdog sWatchdog;
84 
85     /* This handler will be used to post message back onto the main thread */
86     final Handler mHandler;
87     final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
88     ContentResolver mResolver;
89     BatteryService mBattery;
90     PowerManagerService mPower;
91     AlarmManagerService mAlarm;
92     ActivityManagerService mActivity;
93     boolean mCompleted;
94     Monitor mCurrentMonitor;
95 
96     int mPhonePid;
97     IActivityController mController;
98     boolean mAllowRestart = true;
99 
100     final Calendar mCalendar = Calendar.getInstance();
101     int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF;
102     int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM;
103     boolean mNeedScheduledCheck;
104     PendingIntent mCheckupIntent;
105     PendingIntent mRebootIntent;
106 
107     long mBootTime;
108     int mRebootInterval;
109 
110     boolean mReqRebootNoWait;     // should wait for one interval before reboot?
111     int mReqRebootInterval = -1;  // >= 0 if a reboot has been requested
112     int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested
113     int mReqRebootWindow = -1;    // >= 0 if a specific window has been requested
114     int mReqMinScreenOff = -1;    // >= 0 if a specific screen off time has been requested
115     int mReqMinNextAlarm = -1;    // >= 0 if specific time to next alarm has been requested
116     int mReqRecheckInterval= -1;  // >= 0 if a specific recheck interval has been requested
117 
118     /**
119      * Used for scheduling monitor callbacks and checking memory usage.
120      */
121     final class HeartbeatHandler extends Handler {
HeartbeatHandler(Looper looper)122         HeartbeatHandler(Looper looper) {
123             super(looper);
124         }
125 
126         @Override
handleMessage(Message msg)127         public void handleMessage(Message msg) {
128             switch (msg.what) {
129                 case MONITOR: {
130                     // See if we should force a reboot.
131                     int rebootInterval = mReqRebootInterval >= 0
132                             ? mReqRebootInterval : REBOOT_DEFAULT_INTERVAL;
133                     if (mRebootInterval != rebootInterval) {
134                         mRebootInterval = rebootInterval;
135                         // We have been running long enough that a reboot can
136                         // be considered...
137                         checkReboot(false);
138                     }
139 
140                     final int size = mMonitors.size();
141                     for (int i = 0 ; i < size ; i++) {
142                         synchronized (Watchdog.this) {
143                             mCurrentMonitor = mMonitors.get(i);
144                         }
145                         mCurrentMonitor.monitor();
146                     }
147 
148                     synchronized (Watchdog.this) {
149                         mCompleted = true;
150                         mCurrentMonitor = null;
151                     }
152                 } break;
153             }
154         }
155     }
156 
157     final class RebootReceiver extends BroadcastReceiver {
158         @Override
onReceive(Context c, Intent intent)159         public void onReceive(Context c, Intent intent) {
160             if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot.");
161             checkReboot(true);
162         }
163     }
164 
165     final class RebootRequestReceiver extends BroadcastReceiver {
166         @Override
onReceive(Context c, Intent intent)167         public void onReceive(Context c, Intent intent) {
168             mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0;
169             mReqRebootInterval = intent.getIntExtra("interval", -1);
170             mReqRebootStartTime = intent.getIntExtra("startTime", -1);
171             mReqRebootWindow = intent.getIntExtra("window", -1);
172             mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1);
173             mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1);
174             mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1);
175             EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT,
176                     mReqRebootNoWait ? 1 : 0, mReqRebootInterval,
177                             mReqRecheckInterval, mReqRebootStartTime,
178                     mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm);
179             checkReboot(true);
180         }
181     }
182 
183     public interface Monitor {
monitor()184         void monitor();
185     }
186 
getInstance()187     public static Watchdog getInstance() {
188         if (sWatchdog == null) {
189             sWatchdog = new Watchdog();
190         }
191 
192         return sWatchdog;
193     }
194 
Watchdog()195     private Watchdog() {
196         super("watchdog");
197         // Explicitly bind the HeartbeatHandler to run on the ServerThread, so
198         // that it can't get accidentally bound to another thread.
199         mHandler = new HeartbeatHandler(Looper.getMainLooper());
200     }
201 
init(Context context, BatteryService battery, PowerManagerService power, AlarmManagerService alarm, ActivityManagerService activity)202     public void init(Context context, BatteryService battery,
203             PowerManagerService power, AlarmManagerService alarm,
204             ActivityManagerService activity) {
205         mResolver = context.getContentResolver();
206         mBattery = battery;
207         mPower = power;
208         mAlarm = alarm;
209         mActivity = activity;
210 
211         context.registerReceiver(new RebootReceiver(),
212                 new IntentFilter(REBOOT_ACTION));
213         mRebootIntent = PendingIntent.getBroadcast(context,
214                 0, new Intent(REBOOT_ACTION), 0);
215 
216         context.registerReceiver(new RebootRequestReceiver(),
217                 new IntentFilter(Intent.ACTION_REBOOT),
218                 android.Manifest.permission.REBOOT, null);
219 
220         mBootTime = System.currentTimeMillis();
221     }
222 
processStarted(String name, int pid)223     public void processStarted(String name, int pid) {
224         synchronized (this) {
225             if ("com.android.phone".equals(name)) {
226                 mPhonePid = pid;
227             }
228         }
229     }
230 
setActivityController(IActivityController controller)231     public void setActivityController(IActivityController controller) {
232         synchronized (this) {
233             mController = controller;
234         }
235     }
236 
setAllowRestart(boolean allowRestart)237     public void setAllowRestart(boolean allowRestart) {
238         synchronized (this) {
239             mAllowRestart = allowRestart;
240         }
241     }
242 
addMonitor(Monitor monitor)243     public void addMonitor(Monitor monitor) {
244         synchronized (this) {
245             if (isAlive()) {
246                 throw new RuntimeException("Monitors can't be added while the Watchdog is running");
247             }
248             mMonitors.add(monitor);
249         }
250     }
251 
checkReboot(boolean fromAlarm)252     void checkReboot(boolean fromAlarm) {
253         int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval
254                 : REBOOT_DEFAULT_INTERVAL;
255         mRebootInterval = rebootInterval;
256         if (rebootInterval <= 0) {
257             // No reboot interval requested.
258             if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!");
259             mAlarm.remove(mRebootIntent);
260             return;
261         }
262 
263         long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime
264                 : REBOOT_DEFAULT_START_TIME;
265         long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow
266                 : REBOOT_DEFAULT_WINDOW) * 1000;
267         long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval
268                 : MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000;
269 
270         retrieveBrutalityAmount();
271 
272         long realStartTime;
273         long now;
274 
275         synchronized (this) {
276             now = System.currentTimeMillis();
277             realStartTime = computeCalendarTime(mCalendar, now,
278                     rebootStartTime);
279 
280             long rebootIntervalMillis = rebootInterval*24*60*60*1000;
281             if (DB || mReqRebootNoWait ||
282                     (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) {
283                 if (fromAlarm && rebootWindowMillis <= 0) {
284                     // No reboot window -- just immediately reboot.
285                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
286                             (int)rebootIntervalMillis, (int)rebootStartTime*1000,
287                             (int)rebootWindowMillis, "");
288                     rebootSystem("Checkin scheduled forced");
289                     return;
290                 }
291 
292                 // Are we within the reboot window?
293                 if (now < realStartTime) {
294                     // Schedule alarm for next check interval.
295                     realStartTime = computeCalendarTime(mCalendar,
296                             now, rebootStartTime);
297                 } else if (now < (realStartTime+rebootWindowMillis)) {
298                     String doit = shouldWeBeBrutalLocked(now);
299                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
300                             (int)rebootInterval, (int)rebootStartTime*1000,
301                             (int)rebootWindowMillis, doit != null ? doit : "");
302                     if (doit == null) {
303                         rebootSystem("Checked scheduled range");
304                         return;
305                     }
306 
307                     // Schedule next alarm either within the window or in the
308                     // next interval.
309                     if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) {
310                         realStartTime = computeCalendarTime(mCalendar,
311                                 now + rebootIntervalMillis, rebootStartTime);
312                     } else {
313                         realStartTime = now + recheckInterval;
314                     }
315                 } else {
316                     // Schedule alarm for next check interval.
317                     realStartTime = computeCalendarTime(mCalendar,
318                             now + rebootIntervalMillis, rebootStartTime);
319                 }
320             }
321         }
322 
323         if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for "
324                 + ((realStartTime-now)/1000/60) + "m from now");
325         mAlarm.remove(mRebootIntent);
326         mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent);
327     }
328 
329     /**
330      * Perform a full reboot of the system.
331      */
rebootSystem(String reason)332     void rebootSystem(String reason) {
333         Slog.i(TAG, "Rebooting system because: " + reason);
334         PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
335         pms.reboot(false, reason, false);
336     }
337 
338     /**
339      * Load the current Gservices settings for when
340      * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen.
341      * Must not be called with the lock held.
342      */
retrieveBrutalityAmount()343     void retrieveBrutalityAmount() {
344         mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff
345                 : MEMCHECK_DEFAULT_MIN_SCREEN_OFF) * 1000;
346         mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm
347                 : MEMCHECK_DEFAULT_MIN_ALARM) * 1000;
348     }
349 
350     /**
351      * Determine whether it is a good time to kill, crash, or otherwise
352      * plunder the current situation for the overall long-term benefit of
353      * the world.
354      *
355      * @param curTime The current system time.
356      * @return Returns null if this is a good time, else a String with the
357      * text of why it is not a good time.
358      */
shouldWeBeBrutalLocked(long curTime)359     String shouldWeBeBrutalLocked(long curTime) {
360         if (mBattery == null || !mBattery.isPowered(BatteryManager.BATTERY_PLUGGED_ANY)) {
361             return "battery";
362         }
363 
364         if (mMinScreenOff >= 0 && (mPower == null ||
365                 mPower.timeSinceScreenWasLastOn() < mMinScreenOff)) {
366             return "screen";
367         }
368 
369         if (mMinAlarm >= 0 && (mAlarm == null ||
370                 mAlarm.timeToNextAlarm() < mMinAlarm)) {
371             return "alarm";
372         }
373 
374         return null;
375     }
376 
computeCalendarTime(Calendar c, long curTime, long secondsSinceMidnight)377     static long computeCalendarTime(Calendar c, long curTime,
378             long secondsSinceMidnight) {
379 
380         // start with now
381         c.setTimeInMillis(curTime);
382 
383         int val = (int)secondsSinceMidnight / (60*60);
384         c.set(Calendar.HOUR_OF_DAY, val);
385         secondsSinceMidnight -= val * (60*60);
386         val = (int)secondsSinceMidnight / 60;
387         c.set(Calendar.MINUTE, val);
388         c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60));
389         c.set(Calendar.MILLISECOND, 0);
390 
391         long newTime = c.getTimeInMillis();
392         if (newTime < curTime) {
393             // The given time (in seconds since midnight) has already passed for today, so advance
394             // by one day (due to daylight savings, etc., the delta may differ from 24 hours).
395             c.add(Calendar.DAY_OF_MONTH, 1);
396             newTime = c.getTimeInMillis();
397         }
398 
399         return newTime;
400     }
401 
402     @Override
run()403     public void run() {
404         boolean waitedHalf = false;
405         while (true) {
406             mCompleted = false;
407             mHandler.sendEmptyMessage(MONITOR);
408 
409 
410             final String name;
411             final boolean allowRestart;
412             synchronized (this) {
413                 long timeout = TIME_TO_WAIT;
414 
415                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
416                 // wait while asleep. If the device is asleep then the thing that we are waiting
417                 // to timeout on is asleep as well and won't have a chance to run, causing a false
418                 // positive on when to kill things.
419                 long start = SystemClock.uptimeMillis();
420                 while (timeout > 0) {
421                     try {
422                         wait(timeout);
423                     } catch (InterruptedException e) {
424                         Log.wtf(TAG, e);
425                     }
426                     timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
427                 }
428 
429                 if (mCompleted) {
430                     // The monitors have returned.
431                     waitedHalf = false;
432                     continue;
433                 }
434 
435                 if (!waitedHalf) {
436                     // We've waited half the deadlock-detection interval.  Pull a stack
437                     // trace and wait another half.
438                     ArrayList<Integer> pids = new ArrayList<Integer>();
439                     pids.add(Process.myPid());
440                     ActivityManagerService.dumpStackTraces(true, pids, null, null,
441                             NATIVE_STACKS_OF_INTEREST);
442                     waitedHalf = true;
443                     continue;
444                 }
445 
446                 name = (mCurrentMonitor != null) ?
447                     mCurrentMonitor.getClass().getName() : "null";
448                 allowRestart = mAllowRestart;
449             }
450 
451             // If we got here, that means that the system is most likely hung.
452             // First collect stack traces from all threads of the system process.
453             // Then kill this process so that the system will restart.
454             EventLog.writeEvent(EventLogTags.WATCHDOG, name);
455 
456             ArrayList<Integer> pids = new ArrayList<Integer>();
457             pids.add(Process.myPid());
458             if (mPhonePid > 0) pids.add(mPhonePid);
459             // Pass !waitedHalf so that just in case we somehow wind up here without having
460             // dumped the halfway stacks, we properly re-initialize the trace file.
461             final File stack = ActivityManagerService.dumpStackTraces(
462                     !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST);
463 
464             // Give some extra time to make sure the stack traces get written.
465             // The system's been hanging for a minute, another second or two won't hurt much.
466             SystemClock.sleep(2000);
467 
468             // Pull our own kernel thread stacks as well if we're configured for that
469             if (RECORD_KERNEL_THREADS) {
470                 dumpKernelStackTraces();
471             }
472 
473             // Trigger the kernel to dump all blocked threads to the kernel log
474             try {
475                 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
476                 sysrq_trigger.write("w");
477                 sysrq_trigger.close();
478             } catch (IOException e) {
479                 Slog.e(TAG, "Failed to write to /proc/sysrq-trigger");
480                 Slog.e(TAG, e.getMessage());
481             }
482 
483             // Try to add the error to the dropbox, but assuming that the ActivityManager
484             // itself may be deadlocked.  (which has happened, causing this statement to
485             // deadlock and the watchdog as a whole to be ineffective)
486             Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
487                     public void run() {
488                         mActivity.addErrorToDropBox(
489                                 "watchdog", null, "system_server", null, null,
490                                 name, null, stack, null);
491                     }
492                 };
493             dropboxThread.start();
494             try {
495                 dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
496             } catch (InterruptedException ignored) {}
497 
498             IActivityController controller;
499             synchronized (this) {
500                 controller = mController;
501             }
502             if (controller != null) {
503                 Slog.i(TAG, "Reporting stuck state to activity controller");
504                 try {
505                     Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
506                     // 1 = keep waiting, -1 = kill system
507                     int res = controller.systemNotResponding(name);
508                     if (res >= 0) {
509                         Slog.i(TAG, "Activity controller requested to coninue to wait");
510                         waitedHalf = false;
511                         continue;
512                     }
513                 } catch (RemoteException e) {
514                 }
515             }
516 
517             // Only kill the process if the debugger is not attached.
518             if (Debug.isDebuggerConnected()) {
519                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
520             } else if (!allowRestart) {
521                 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
522             } else {
523                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
524                 Process.killProcess(Process.myPid());
525                 System.exit(10);
526             }
527 
528             waitedHalf = false;
529         }
530     }
531 
dumpKernelStackTraces()532     private File dumpKernelStackTraces() {
533         String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
534         if (tracesPath == null || tracesPath.length() == 0) {
535             return null;
536         }
537 
538         native_dumpKernelStacks(tracesPath);
539         return new File(tracesPath);
540     }
541 
native_dumpKernelStacks(String tracesPath)542     private native void native_dumpKernelStacks(String tracesPath);
543 }
544