• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server;
18 
19 import com.android.server.am.ActivityManagerService;
20 
21 import android.app.AlarmManager;
22 import android.app.PendingIntent;
23 import android.content.BroadcastReceiver;
24 import android.content.ContentResolver;
25 import android.content.Context;
26 import android.content.Intent;
27 import android.content.IntentFilter;
28 import android.os.Debug;
29 import android.os.Handler;
30 import android.os.Message;
31 import android.os.Process;
32 import android.os.ServiceManager;
33 import android.os.SystemClock;
34 import android.os.SystemProperties;
35 import android.provider.Settings;
36 import android.util.EventLog;
37 import android.util.Log;
38 import android.util.Slog;
39 
40 import java.io.File;
41 import java.util.ArrayList;
42 import java.util.Calendar;
43 
44 /** This class calls its monitor every minute. Killing this process if they don't return **/
45 public class Watchdog extends Thread {
46     static final String TAG = "Watchdog";
47     static final boolean localLOGV = false || false;
48 
49     // Set this to true to use debug default values.
50     static final boolean DB = false;
51 
52     // Set this to true to have the watchdog record kernel thread stacks when it fires
53     static final boolean RECORD_KERNEL_THREADS = true;
54 
55     static final int MONITOR = 2718;
56 
57     static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000;
58     static final int TIME_TO_WAIT = TIME_TO_RESTART / 2;
59 
60     static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60;   // 5 minutes
61     static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60;        // 3 minutes
62     static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes
63 
64     static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0;                 // never force reboot
65     static final int REBOOT_DEFAULT_START_TIME = 3*60*60;                  // 3:00am
66     static final int REBOOT_DEFAULT_WINDOW = 60*60;                        // within 1 hour
67 
68     static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT";
69 
70     static Watchdog sWatchdog;
71 
72     /* This handler will be used to post message back onto the main thread */
73     final Handler mHandler;
74     final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
75     ContentResolver mResolver;
76     BatteryService mBattery;
77     PowerManagerService mPower;
78     AlarmManagerService mAlarm;
79     ActivityManagerService mActivity;
80     boolean mCompleted;
81     boolean mForceKillSystem;
82     Monitor mCurrentMonitor;
83 
84     int mPhonePid;
85 
86     final Calendar mCalendar = Calendar.getInstance();
87     int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF;
88     int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM;
89     boolean mNeedScheduledCheck;
90     PendingIntent mCheckupIntent;
91     PendingIntent mRebootIntent;
92 
93     long mBootTime;
94     int mRebootInterval;
95 
96     boolean mReqRebootNoWait;     // should wait for one interval before reboot?
97     int mReqRebootInterval = -1;  // >= 0 if a reboot has been requested
98     int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested
99     int mReqRebootWindow = -1;    // >= 0 if a specific window has been requested
100     int mReqMinScreenOff = -1;    // >= 0 if a specific screen off time has been requested
101     int mReqMinNextAlarm = -1;    // >= 0 if specific time to next alarm has been requested
102     int mReqRecheckInterval= -1;  // >= 0 if a specific recheck interval has been requested
103 
104     /**
105      * Used for scheduling monitor callbacks and checking memory usage.
106      */
107     final class HeartbeatHandler extends Handler {
108         @Override
handleMessage(Message msg)109         public void handleMessage(Message msg) {
110             switch (msg.what) {
111                 case MONITOR: {
112                     // See if we should force a reboot.
113                     int rebootInterval = mReqRebootInterval >= 0
114                             ? mReqRebootInterval : Settings.Secure.getInt(
115                             mResolver, Settings.Secure.REBOOT_INTERVAL,
116                             REBOOT_DEFAULT_INTERVAL);
117                     if (mRebootInterval != rebootInterval) {
118                         mRebootInterval = rebootInterval;
119                         // We have been running long enough that a reboot can
120                         // be considered...
121                         checkReboot(false);
122                     }
123 
124                     final int size = mMonitors.size();
125                     for (int i = 0 ; i < size ; i++) {
126                         mCurrentMonitor = mMonitors.get(i);
127                         mCurrentMonitor.monitor();
128                     }
129 
130                     synchronized (Watchdog.this) {
131                         mCompleted = true;
132                         mCurrentMonitor = null;
133                     }
134                 } break;
135             }
136         }
137     }
138 
139     final class RebootReceiver extends BroadcastReceiver {
140         @Override
onReceive(Context c, Intent intent)141         public void onReceive(Context c, Intent intent) {
142             if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot.");
143             checkReboot(true);
144         }
145     }
146 
147     final class RebootRequestReceiver extends BroadcastReceiver {
148         @Override
onReceive(Context c, Intent intent)149         public void onReceive(Context c, Intent intent) {
150             mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0;
151             mReqRebootInterval = intent.getIntExtra("interval", -1);
152             mReqRebootStartTime = intent.getIntExtra("startTime", -1);
153             mReqRebootWindow = intent.getIntExtra("window", -1);
154             mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1);
155             mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1);
156             mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1);
157             EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT,
158                     mReqRebootNoWait ? 1 : 0, mReqRebootInterval,
159                             mReqRecheckInterval, mReqRebootStartTime,
160                     mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm);
161             checkReboot(true);
162         }
163     }
164 
165     public interface Monitor {
monitor()166         void monitor();
167     }
168 
getInstance()169     public static Watchdog getInstance() {
170         if (sWatchdog == null) {
171             sWatchdog = new Watchdog();
172         }
173 
174         return sWatchdog;
175     }
176 
Watchdog()177     private Watchdog() {
178         super("watchdog");
179         mHandler = new HeartbeatHandler();
180     }
181 
init(Context context, BatteryService battery, PowerManagerService power, AlarmManagerService alarm, ActivityManagerService activity)182     public void init(Context context, BatteryService battery,
183             PowerManagerService power, AlarmManagerService alarm,
184             ActivityManagerService activity) {
185         mResolver = context.getContentResolver();
186         mBattery = battery;
187         mPower = power;
188         mAlarm = alarm;
189         mActivity = activity;
190 
191         context.registerReceiver(new RebootReceiver(),
192                 new IntentFilter(REBOOT_ACTION));
193         mRebootIntent = PendingIntent.getBroadcast(context,
194                 0, new Intent(REBOOT_ACTION), 0);
195 
196         context.registerReceiver(new RebootRequestReceiver(),
197                 new IntentFilter(Intent.ACTION_REBOOT),
198                 android.Manifest.permission.REBOOT, null);
199 
200         mBootTime = System.currentTimeMillis();
201     }
202 
processStarted(String name, int pid)203     public void processStarted(String name, int pid) {
204         synchronized (this) {
205             if ("com.android.phone".equals(name)) {
206                 mPhonePid = pid;
207             }
208         }
209     }
210 
addMonitor(Monitor monitor)211     public void addMonitor(Monitor monitor) {
212         synchronized (this) {
213             if (isAlive()) {
214                 throw new RuntimeException("Monitors can't be added while the Watchdog is running");
215             }
216             mMonitors.add(monitor);
217         }
218     }
219 
checkReboot(boolean fromAlarm)220     void checkReboot(boolean fromAlarm) {
221         int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval
222                 : Settings.Secure.getInt(
223                 mResolver, Settings.Secure.REBOOT_INTERVAL,
224                 REBOOT_DEFAULT_INTERVAL);
225         mRebootInterval = rebootInterval;
226         if (rebootInterval <= 0) {
227             // No reboot interval requested.
228             if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!");
229             mAlarm.remove(mRebootIntent);
230             return;
231         }
232 
233         long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime
234                 : Settings.Secure.getLong(
235                 mResolver, Settings.Secure.REBOOT_START_TIME,
236                 REBOOT_DEFAULT_START_TIME);
237         long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow
238                 : Settings.Secure.getLong(
239                 mResolver, Settings.Secure.REBOOT_WINDOW,
240                 REBOOT_DEFAULT_WINDOW)) * 1000;
241         long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval
242                 : Settings.Secure.getLong(
243                 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL,
244                 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000;
245 
246         retrieveBrutalityAmount();
247 
248         long realStartTime;
249         long now;
250 
251         synchronized (this) {
252             now = System.currentTimeMillis();
253             realStartTime = computeCalendarTime(mCalendar, now,
254                     rebootStartTime);
255 
256             long rebootIntervalMillis = rebootInterval*24*60*60*1000;
257             if (DB || mReqRebootNoWait ||
258                     (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) {
259                 if (fromAlarm && rebootWindowMillis <= 0) {
260                     // No reboot window -- just immediately reboot.
261                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
262                             (int)rebootIntervalMillis, (int)rebootStartTime*1000,
263                             (int)rebootWindowMillis, "");
264                     rebootSystem("Checkin scheduled forced");
265                     return;
266                 }
267 
268                 // Are we within the reboot window?
269                 if (now < realStartTime) {
270                     // Schedule alarm for next check interval.
271                     realStartTime = computeCalendarTime(mCalendar,
272                             now, rebootStartTime);
273                 } else if (now < (realStartTime+rebootWindowMillis)) {
274                     String doit = shouldWeBeBrutalLocked(now);
275                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
276                             (int)rebootInterval, (int)rebootStartTime*1000,
277                             (int)rebootWindowMillis, doit != null ? doit : "");
278                     if (doit == null) {
279                         rebootSystem("Checked scheduled range");
280                         return;
281                     }
282 
283                     // Schedule next alarm either within the window or in the
284                     // next interval.
285                     if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) {
286                         realStartTime = computeCalendarTime(mCalendar,
287                                 now + rebootIntervalMillis, rebootStartTime);
288                     } else {
289                         realStartTime = now + recheckInterval;
290                     }
291                 } else {
292                     // Schedule alarm for next check interval.
293                     realStartTime = computeCalendarTime(mCalendar,
294                             now + rebootIntervalMillis, rebootStartTime);
295                 }
296             }
297         }
298 
299         if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for "
300                 + ((realStartTime-now)/1000/60) + "m from now");
301         mAlarm.remove(mRebootIntent);
302         mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent);
303     }
304 
305     /**
306      * Perform a full reboot of the system.
307      */
rebootSystem(String reason)308     void rebootSystem(String reason) {
309         Slog.i(TAG, "Rebooting system because: " + reason);
310         PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
311         pms.reboot(reason);
312     }
313 
314     /**
315      * Load the current Gservices settings for when
316      * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen.
317      * Must not be called with the lock held.
318      */
retrieveBrutalityAmount()319     void retrieveBrutalityAmount() {
320         mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff
321                 : Settings.Secure.getInt(
322                 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF,
323                 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000;
324         mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm
325                 : Settings.Secure.getInt(
326                 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM,
327                 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000;
328     }
329 
330     /**
331      * Determine whether it is a good time to kill, crash, or otherwise
332      * plunder the current situation for the overall long-term benefit of
333      * the world.
334      *
335      * @param curTime The current system time.
336      * @return Returns null if this is a good time, else a String with the
337      * text of why it is not a good time.
338      */
shouldWeBeBrutalLocked(long curTime)339     String shouldWeBeBrutalLocked(long curTime) {
340         if (mBattery == null || !mBattery.isPowered()) {
341             return "battery";
342         }
343 
344         if (mMinScreenOff >= 0 && (mPower == null ||
345                 mPower.timeSinceScreenOn() < mMinScreenOff)) {
346             return "screen";
347         }
348 
349         if (mMinAlarm >= 0 && (mAlarm == null ||
350                 mAlarm.timeToNextAlarm() < mMinAlarm)) {
351             return "alarm";
352         }
353 
354         return null;
355     }
356 
computeCalendarTime(Calendar c, long curTime, long secondsSinceMidnight)357     static long computeCalendarTime(Calendar c, long curTime,
358             long secondsSinceMidnight) {
359 
360         // start with now
361         c.setTimeInMillis(curTime);
362 
363         int val = (int)secondsSinceMidnight / (60*60);
364         c.set(Calendar.HOUR_OF_DAY, val);
365         secondsSinceMidnight -= val * (60*60);
366         val = (int)secondsSinceMidnight / 60;
367         c.set(Calendar.MINUTE, val);
368         c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60));
369         c.set(Calendar.MILLISECOND, 0);
370 
371         long newTime = c.getTimeInMillis();
372         if (newTime < curTime) {
373             // The given time (in seconds since midnight) has already passed for today, so advance
374             // by one day (due to daylight savings, etc., the delta may differ from 24 hours).
375             c.add(Calendar.DAY_OF_MONTH, 1);
376             newTime = c.getTimeInMillis();
377         }
378 
379         return newTime;
380     }
381 
382     @Override
run()383     public void run() {
384         boolean waitedHalf = false;
385         while (true) {
386             mCompleted = false;
387             mHandler.sendEmptyMessage(MONITOR);
388 
389             synchronized (this) {
390                 long timeout = TIME_TO_WAIT;
391 
392                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
393                 // wait while asleep. If the device is asleep then the thing that we are waiting
394                 // to timeout on is asleep as well and won't have a chance to run, causing a false
395                 // positive on when to kill things.
396                 long start = SystemClock.uptimeMillis();
397                 while (timeout > 0 && !mForceKillSystem) {
398                     try {
399                         wait(timeout);  // notifyAll() is called when mForceKillSystem is set
400                     } catch (InterruptedException e) {
401                         Log.wtf(TAG, e);
402                     }
403                     timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
404                 }
405 
406                 if (mCompleted && !mForceKillSystem) {
407                     // The monitors have returned.
408                     waitedHalf = false;
409                     continue;
410                 }
411 
412                 if (!waitedHalf) {
413                     // We've waited half the deadlock-detection interval.  Pull a stack
414                     // trace and wait another half.
415                     ArrayList<Integer> pids = new ArrayList<Integer>();
416                     pids.add(Process.myPid());
417                     ActivityManagerService.dumpStackTraces(true, pids, null, null);
418                     waitedHalf = true;
419                     continue;
420                 }
421             }
422 
423             // If we got here, that means that the system is most likely hung.
424             // First collect stack traces from all threads of the system process.
425             // Then kill this process so that the system will restart.
426 
427             final String name = (mCurrentMonitor != null) ?
428                     mCurrentMonitor.getClass().getName() : "null";
429             EventLog.writeEvent(EventLogTags.WATCHDOG, name);
430 
431             ArrayList<Integer> pids = new ArrayList<Integer>();
432             pids.add(Process.myPid());
433             if (mPhonePid > 0) pids.add(mPhonePid);
434             // Pass !waitedHalf so that just in case we somehow wind up here without having
435             // dumped the halfway stacks, we properly re-initialize the trace file.
436             final File stack = ActivityManagerService.dumpStackTraces(
437                     !waitedHalf, pids, null, null);
438 
439             // Give some extra time to make sure the stack traces get written.
440             // The system's been hanging for a minute, another second or two won't hurt much.
441             SystemClock.sleep(2000);
442 
443             // Pull our own kernel thread stacks as well if we're configured for that
444             if (RECORD_KERNEL_THREADS) {
445                 dumpKernelStackTraces();
446             }
447 
448             // Try to add the error to the dropbox, but assuming that the ActivityManager
449             // itself may be deadlocked.  (which has happened, causing this statement to
450             // deadlock and the watchdog as a whole to be ineffective)
451             Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
452                     public void run() {
453                         mActivity.addErrorToDropBox(
454                                 "watchdog", null, null, null, name, null, stack, null);
455                     }
456                 };
457             dropboxThread.start();
458             try {
459                 dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
460             } catch (InterruptedException ignored) {}
461 
462             // Only kill the process if the debugger is not attached.
463             if (!Debug.isDebuggerConnected()) {
464                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
465                 Process.killProcess(Process.myPid());
466                 System.exit(10);
467             } else {
468                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
469             }
470 
471             waitedHalf = false;
472         }
473     }
474 
dumpKernelStackTraces()475     private File dumpKernelStackTraces() {
476         String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
477         if (tracesPath == null || tracesPath.length() == 0) {
478             return null;
479         }
480 
481         native_dumpKernelStacks(tracesPath);
482         return new File(tracesPath);
483     }
484 
native_dumpKernelStacks(String tracesPath)485     private native void native_dumpKernelStacks(String tracesPath);
486 }
487