• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server;
18 
19 import com.android.server.am.ActivityManagerService;
20 import com.android.server.power.PowerManagerService;
21 
22 import android.app.AlarmManager;
23 import android.app.PendingIntent;
24 import android.content.BroadcastReceiver;
25 import android.content.ContentResolver;
26 import android.content.Context;
27 import android.content.Intent;
28 import android.content.IntentFilter;
29 import android.os.BatteryManager;
30 import android.os.Debug;
31 import android.os.Handler;
32 import android.os.Message;
33 import android.os.Process;
34 import android.os.ServiceManager;
35 import android.os.SystemClock;
36 import android.os.SystemProperties;
37 import android.util.EventLog;
38 import android.util.Log;
39 import android.util.Slog;
40 
41 import java.io.File;
42 import java.io.FileWriter;
43 import java.io.IOException;
44 import java.util.ArrayList;
45 import java.util.Calendar;
46 
47 /** This class calls its monitor every minute. Killing this process if they don't return **/
48 public class Watchdog extends Thread {
49     static final String TAG = "Watchdog";
50     static final boolean localLOGV = false || false;
51 
52     // Set this to true to use debug default values.
53     static final boolean DB = false;
54 
55     // Set this to true to have the watchdog record kernel thread stacks when it fires
56     static final boolean RECORD_KERNEL_THREADS = true;
57 
58     static final int MONITOR = 2718;
59 
60     static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000;
61     static final int TIME_TO_WAIT = TIME_TO_RESTART / 2;
62 
63     static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60;   // 5 minutes
64     static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60;        // 3 minutes
65     static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes
66 
67     static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0;                 // never force reboot
68     static final int REBOOT_DEFAULT_START_TIME = 3*60*60;                  // 3:00am
69     static final int REBOOT_DEFAULT_WINDOW = 60*60;                        // within 1 hour
70 
71     static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT";
72 
73     static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
74         "/system/bin/mediaserver",
75         "/system/bin/sdcard",
76         "/system/bin/surfaceflinger"
77     };
78 
79     static Watchdog sWatchdog;
80 
81     /* This handler will be used to post message back onto the main thread */
82     final Handler mHandler;
83     final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
84     ContentResolver mResolver;
85     BatteryService mBattery;
86     PowerManagerService mPower;
87     AlarmManagerService mAlarm;
88     ActivityManagerService mActivity;
89     boolean mCompleted;
90     boolean mForceKillSystem;
91     Monitor mCurrentMonitor;
92 
93     int mPhonePid;
94 
95     final Calendar mCalendar = Calendar.getInstance();
96     int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF;
97     int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM;
98     boolean mNeedScheduledCheck;
99     PendingIntent mCheckupIntent;
100     PendingIntent mRebootIntent;
101 
102     long mBootTime;
103     int mRebootInterval;
104 
105     boolean mReqRebootNoWait;     // should wait for one interval before reboot?
106     int mReqRebootInterval = -1;  // >= 0 if a reboot has been requested
107     int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested
108     int mReqRebootWindow = -1;    // >= 0 if a specific window has been requested
109     int mReqMinScreenOff = -1;    // >= 0 if a specific screen off time has been requested
110     int mReqMinNextAlarm = -1;    // >= 0 if specific time to next alarm has been requested
111     int mReqRecheckInterval= -1;  // >= 0 if a specific recheck interval has been requested
112 
113     /**
114      * Used for scheduling monitor callbacks and checking memory usage.
115      */
116     final class HeartbeatHandler extends Handler {
117         @Override
handleMessage(Message msg)118         public void handleMessage(Message msg) {
119             switch (msg.what) {
120                 case MONITOR: {
121                     // See if we should force a reboot.
122                     int rebootInterval = mReqRebootInterval >= 0
123                             ? mReqRebootInterval : REBOOT_DEFAULT_INTERVAL;
124                     if (mRebootInterval != rebootInterval) {
125                         mRebootInterval = rebootInterval;
126                         // We have been running long enough that a reboot can
127                         // be considered...
128                         checkReboot(false);
129                     }
130 
131                     final int size = mMonitors.size();
132                     for (int i = 0 ; i < size ; i++) {
133                         mCurrentMonitor = mMonitors.get(i);
134                         mCurrentMonitor.monitor();
135                     }
136 
137                     synchronized (Watchdog.this) {
138                         mCompleted = true;
139                         mCurrentMonitor = null;
140                     }
141                 } break;
142             }
143         }
144     }
145 
146     final class RebootReceiver extends BroadcastReceiver {
147         @Override
onReceive(Context c, Intent intent)148         public void onReceive(Context c, Intent intent) {
149             if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot.");
150             checkReboot(true);
151         }
152     }
153 
154     final class RebootRequestReceiver extends BroadcastReceiver {
155         @Override
onReceive(Context c, Intent intent)156         public void onReceive(Context c, Intent intent) {
157             mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0;
158             mReqRebootInterval = intent.getIntExtra("interval", -1);
159             mReqRebootStartTime = intent.getIntExtra("startTime", -1);
160             mReqRebootWindow = intent.getIntExtra("window", -1);
161             mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1);
162             mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1);
163             mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1);
164             EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT,
165                     mReqRebootNoWait ? 1 : 0, mReqRebootInterval,
166                             mReqRecheckInterval, mReqRebootStartTime,
167                     mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm);
168             checkReboot(true);
169         }
170     }
171 
172     public interface Monitor {
monitor()173         void monitor();
174     }
175 
getInstance()176     public static Watchdog getInstance() {
177         if (sWatchdog == null) {
178             sWatchdog = new Watchdog();
179         }
180 
181         return sWatchdog;
182     }
183 
Watchdog()184     private Watchdog() {
185         super("watchdog");
186         mHandler = new HeartbeatHandler();
187     }
188 
init(Context context, BatteryService battery, PowerManagerService power, AlarmManagerService alarm, ActivityManagerService activity)189     public void init(Context context, BatteryService battery,
190             PowerManagerService power, AlarmManagerService alarm,
191             ActivityManagerService activity) {
192         mResolver = context.getContentResolver();
193         mBattery = battery;
194         mPower = power;
195         mAlarm = alarm;
196         mActivity = activity;
197 
198         context.registerReceiver(new RebootReceiver(),
199                 new IntentFilter(REBOOT_ACTION));
200         mRebootIntent = PendingIntent.getBroadcast(context,
201                 0, new Intent(REBOOT_ACTION), 0);
202 
203         context.registerReceiver(new RebootRequestReceiver(),
204                 new IntentFilter(Intent.ACTION_REBOOT),
205                 android.Manifest.permission.REBOOT, null);
206 
207         mBootTime = System.currentTimeMillis();
208     }
209 
processStarted(String name, int pid)210     public void processStarted(String name, int pid) {
211         synchronized (this) {
212             if ("com.android.phone".equals(name)) {
213                 mPhonePid = pid;
214             }
215         }
216     }
217 
addMonitor(Monitor monitor)218     public void addMonitor(Monitor monitor) {
219         synchronized (this) {
220             if (isAlive()) {
221                 throw new RuntimeException("Monitors can't be added while the Watchdog is running");
222             }
223             mMonitors.add(monitor);
224         }
225     }
226 
checkReboot(boolean fromAlarm)227     void checkReboot(boolean fromAlarm) {
228         int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval
229                 : REBOOT_DEFAULT_INTERVAL;
230         mRebootInterval = rebootInterval;
231         if (rebootInterval <= 0) {
232             // No reboot interval requested.
233             if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!");
234             mAlarm.remove(mRebootIntent);
235             return;
236         }
237 
238         long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime
239                 : REBOOT_DEFAULT_START_TIME;
240         long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow
241                 : REBOOT_DEFAULT_WINDOW) * 1000;
242         long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval
243                 : MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000;
244 
245         retrieveBrutalityAmount();
246 
247         long realStartTime;
248         long now;
249 
250         synchronized (this) {
251             now = System.currentTimeMillis();
252             realStartTime = computeCalendarTime(mCalendar, now,
253                     rebootStartTime);
254 
255             long rebootIntervalMillis = rebootInterval*24*60*60*1000;
256             if (DB || mReqRebootNoWait ||
257                     (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) {
258                 if (fromAlarm && rebootWindowMillis <= 0) {
259                     // No reboot window -- just immediately reboot.
260                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
261                             (int)rebootIntervalMillis, (int)rebootStartTime*1000,
262                             (int)rebootWindowMillis, "");
263                     rebootSystem("Checkin scheduled forced");
264                     return;
265                 }
266 
267                 // Are we within the reboot window?
268                 if (now < realStartTime) {
269                     // Schedule alarm for next check interval.
270                     realStartTime = computeCalendarTime(mCalendar,
271                             now, rebootStartTime);
272                 } else if (now < (realStartTime+rebootWindowMillis)) {
273                     String doit = shouldWeBeBrutalLocked(now);
274                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
275                             (int)rebootInterval, (int)rebootStartTime*1000,
276                             (int)rebootWindowMillis, doit != null ? doit : "");
277                     if (doit == null) {
278                         rebootSystem("Checked scheduled range");
279                         return;
280                     }
281 
282                     // Schedule next alarm either within the window or in the
283                     // next interval.
284                     if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) {
285                         realStartTime = computeCalendarTime(mCalendar,
286                                 now + rebootIntervalMillis, rebootStartTime);
287                     } else {
288                         realStartTime = now + recheckInterval;
289                     }
290                 } else {
291                     // Schedule alarm for next check interval.
292                     realStartTime = computeCalendarTime(mCalendar,
293                             now + rebootIntervalMillis, rebootStartTime);
294                 }
295             }
296         }
297 
298         if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for "
299                 + ((realStartTime-now)/1000/60) + "m from now");
300         mAlarm.remove(mRebootIntent);
301         mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent);
302     }
303 
304     /**
305      * Perform a full reboot of the system.
306      */
rebootSystem(String reason)307     void rebootSystem(String reason) {
308         Slog.i(TAG, "Rebooting system because: " + reason);
309         PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
310         pms.reboot(false, reason, false);
311     }
312 
313     /**
314      * Load the current Gservices settings for when
315      * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen.
316      * Must not be called with the lock held.
317      */
retrieveBrutalityAmount()318     void retrieveBrutalityAmount() {
319         mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff
320                 : MEMCHECK_DEFAULT_MIN_SCREEN_OFF) * 1000;
321         mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm
322                 : MEMCHECK_DEFAULT_MIN_ALARM) * 1000;
323     }
324 
325     /**
326      * Determine whether it is a good time to kill, crash, or otherwise
327      * plunder the current situation for the overall long-term benefit of
328      * the world.
329      *
330      * @param curTime The current system time.
331      * @return Returns null if this is a good time, else a String with the
332      * text of why it is not a good time.
333      */
shouldWeBeBrutalLocked(long curTime)334     String shouldWeBeBrutalLocked(long curTime) {
335         if (mBattery == null || !mBattery.isPowered(BatteryManager.BATTERY_PLUGGED_ANY)) {
336             return "battery";
337         }
338 
339         if (mMinScreenOff >= 0 && (mPower == null ||
340                 mPower.timeSinceScreenWasLastOn() < mMinScreenOff)) {
341             return "screen";
342         }
343 
344         if (mMinAlarm >= 0 && (mAlarm == null ||
345                 mAlarm.timeToNextAlarm() < mMinAlarm)) {
346             return "alarm";
347         }
348 
349         return null;
350     }
351 
computeCalendarTime(Calendar c, long curTime, long secondsSinceMidnight)352     static long computeCalendarTime(Calendar c, long curTime,
353             long secondsSinceMidnight) {
354 
355         // start with now
356         c.setTimeInMillis(curTime);
357 
358         int val = (int)secondsSinceMidnight / (60*60);
359         c.set(Calendar.HOUR_OF_DAY, val);
360         secondsSinceMidnight -= val * (60*60);
361         val = (int)secondsSinceMidnight / 60;
362         c.set(Calendar.MINUTE, val);
363         c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60));
364         c.set(Calendar.MILLISECOND, 0);
365 
366         long newTime = c.getTimeInMillis();
367         if (newTime < curTime) {
368             // The given time (in seconds since midnight) has already passed for today, so advance
369             // by one day (due to daylight savings, etc., the delta may differ from 24 hours).
370             c.add(Calendar.DAY_OF_MONTH, 1);
371             newTime = c.getTimeInMillis();
372         }
373 
374         return newTime;
375     }
376 
377     @Override
run()378     public void run() {
379         boolean waitedHalf = false;
380         while (true) {
381             mCompleted = false;
382             mHandler.sendEmptyMessage(MONITOR);
383 
384             synchronized (this) {
385                 long timeout = TIME_TO_WAIT;
386 
387                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
388                 // wait while asleep. If the device is asleep then the thing that we are waiting
389                 // to timeout on is asleep as well and won't have a chance to run, causing a false
390                 // positive on when to kill things.
391                 long start = SystemClock.uptimeMillis();
392                 while (timeout > 0 && !mForceKillSystem) {
393                     try {
394                         wait(timeout);  // notifyAll() is called when mForceKillSystem is set
395                     } catch (InterruptedException e) {
396                         Log.wtf(TAG, e);
397                     }
398                     timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
399                 }
400 
401                 if (mCompleted && !mForceKillSystem) {
402                     // The monitors have returned.
403                     waitedHalf = false;
404                     continue;
405                 }
406 
407                 if (!waitedHalf) {
408                     // We've waited half the deadlock-detection interval.  Pull a stack
409                     // trace and wait another half.
410                     ArrayList<Integer> pids = new ArrayList<Integer>();
411                     pids.add(Process.myPid());
412                     ActivityManagerService.dumpStackTraces(true, pids, null, null,
413                             NATIVE_STACKS_OF_INTEREST);
414                     waitedHalf = true;
415                     continue;
416                 }
417             }
418 
419             // If we got here, that means that the system is most likely hung.
420             // First collect stack traces from all threads of the system process.
421             // Then kill this process so that the system will restart.
422 
423             final String name = (mCurrentMonitor != null) ?
424                     mCurrentMonitor.getClass().getName() : "null";
425             EventLog.writeEvent(EventLogTags.WATCHDOG, name);
426 
427             ArrayList<Integer> pids = new ArrayList<Integer>();
428             pids.add(Process.myPid());
429             if (mPhonePid > 0) pids.add(mPhonePid);
430             // Pass !waitedHalf so that just in case we somehow wind up here without having
431             // dumped the halfway stacks, we properly re-initialize the trace file.
432             final File stack = ActivityManagerService.dumpStackTraces(
433                     !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST);
434 
435             // Give some extra time to make sure the stack traces get written.
436             // The system's been hanging for a minute, another second or two won't hurt much.
437             SystemClock.sleep(2000);
438 
439             // Pull our own kernel thread stacks as well if we're configured for that
440             if (RECORD_KERNEL_THREADS) {
441                 dumpKernelStackTraces();
442             }
443 
444             // Trigger the kernel to dump all blocked threads to the kernel log
445             try {
446                 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
447                 sysrq_trigger.write("w");
448                 sysrq_trigger.close();
449             } catch (IOException e) {
450                 Slog.e(TAG, "Failed to write to /proc/sysrq-trigger");
451                 Slog.e(TAG, e.getMessage());
452             }
453 
454             // Try to add the error to the dropbox, but assuming that the ActivityManager
455             // itself may be deadlocked.  (which has happened, causing this statement to
456             // deadlock and the watchdog as a whole to be ineffective)
457             Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
458                     public void run() {
459                         mActivity.addErrorToDropBox(
460                                 "watchdog", null, "system_server", null, null,
461                                 name, null, stack, null);
462                     }
463                 };
464             dropboxThread.start();
465             try {
466                 dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
467             } catch (InterruptedException ignored) {}
468 
469             // Only kill the process if the debugger is not attached.
470             if (!Debug.isDebuggerConnected()) {
471                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
472                 Process.killProcess(Process.myPid());
473                 System.exit(10);
474             } else {
475                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
476             }
477 
478             waitedHalf = false;
479         }
480     }
481 
dumpKernelStackTraces()482     private File dumpKernelStackTraces() {
483         String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
484         if (tracesPath == null || tracesPath.length() == 0) {
485             return null;
486         }
487 
488         native_dumpKernelStacks(tracesPath);
489         return new File(tracesPath);
490     }
491 
native_dumpKernelStacks(String tracesPath)492     private native void native_dumpKernelStacks(String tracesPath);
493 }
494