1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import com.android.server.am.ActivityManagerService; 20 import com.android.server.power.PowerManagerService; 21 22 import android.app.AlarmManager; 23 import android.app.PendingIntent; 24 import android.content.BroadcastReceiver; 25 import android.content.ContentResolver; 26 import android.content.Context; 27 import android.content.Intent; 28 import android.content.IntentFilter; 29 import android.os.BatteryManager; 30 import android.os.Debug; 31 import android.os.Handler; 32 import android.os.Message; 33 import android.os.Process; 34 import android.os.ServiceManager; 35 import android.os.SystemClock; 36 import android.os.SystemProperties; 37 import android.util.EventLog; 38 import android.util.Log; 39 import android.util.Slog; 40 41 import java.io.File; 42 import java.io.FileWriter; 43 import java.io.IOException; 44 import java.util.ArrayList; 45 import java.util.Calendar; 46 47 /** This class calls its monitor every minute. Killing this process if they don't return **/ 48 public class Watchdog extends Thread { 49 static final String TAG = "Watchdog"; 50 static final boolean localLOGV = false || false; 51 52 // Set this to true to use debug default values. 53 static final boolean DB = false; 54 55 // Set this to true to have the watchdog record kernel thread stacks when it fires 56 static final boolean RECORD_KERNEL_THREADS = true; 57 58 static final int MONITOR = 2718; 59 60 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000; 61 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2; 62 63 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes 64 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes 65 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes 66 67 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot 68 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am 69 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour 70 71 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT"; 72 73 static final String[] NATIVE_STACKS_OF_INTEREST = new String[] { 74 "/system/bin/mediaserver", 75 "/system/bin/sdcard", 76 "/system/bin/surfaceflinger" 77 }; 78 79 static Watchdog sWatchdog; 80 81 /* This handler will be used to post message back onto the main thread */ 82 final Handler mHandler; 83 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 84 ContentResolver mResolver; 85 BatteryService mBattery; 86 PowerManagerService mPower; 87 AlarmManagerService mAlarm; 88 ActivityManagerService mActivity; 89 boolean mCompleted; 90 boolean mForceKillSystem; 91 Monitor mCurrentMonitor; 92 93 int mPhonePid; 94 95 final Calendar mCalendar = Calendar.getInstance(); 96 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF; 97 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM; 98 boolean mNeedScheduledCheck; 99 PendingIntent mCheckupIntent; 100 PendingIntent mRebootIntent; 101 102 long mBootTime; 103 int mRebootInterval; 104 105 boolean mReqRebootNoWait; // should wait for one interval before reboot? 106 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested 107 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested 108 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested 109 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested 110 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested 111 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested 112 113 /** 114 * Used for scheduling monitor callbacks and checking memory usage. 115 */ 116 final class HeartbeatHandler extends Handler { 117 @Override handleMessage(Message msg)118 public void handleMessage(Message msg) { 119 switch (msg.what) { 120 case MONITOR: { 121 // See if we should force a reboot. 122 int rebootInterval = mReqRebootInterval >= 0 123 ? mReqRebootInterval : REBOOT_DEFAULT_INTERVAL; 124 if (mRebootInterval != rebootInterval) { 125 mRebootInterval = rebootInterval; 126 // We have been running long enough that a reboot can 127 // be considered... 128 checkReboot(false); 129 } 130 131 final int size = mMonitors.size(); 132 for (int i = 0 ; i < size ; i++) { 133 mCurrentMonitor = mMonitors.get(i); 134 mCurrentMonitor.monitor(); 135 } 136 137 synchronized (Watchdog.this) { 138 mCompleted = true; 139 mCurrentMonitor = null; 140 } 141 } break; 142 } 143 } 144 } 145 146 final class RebootReceiver extends BroadcastReceiver { 147 @Override onReceive(Context c, Intent intent)148 public void onReceive(Context c, Intent intent) { 149 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot."); 150 checkReboot(true); 151 } 152 } 153 154 final class RebootRequestReceiver extends BroadcastReceiver { 155 @Override onReceive(Context c, Intent intent)156 public void onReceive(Context c, Intent intent) { 157 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0; 158 mReqRebootInterval = intent.getIntExtra("interval", -1); 159 mReqRebootStartTime = intent.getIntExtra("startTime", -1); 160 mReqRebootWindow = intent.getIntExtra("window", -1); 161 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1); 162 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1); 163 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1); 164 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT, 165 mReqRebootNoWait ? 1 : 0, mReqRebootInterval, 166 mReqRecheckInterval, mReqRebootStartTime, 167 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm); 168 checkReboot(true); 169 } 170 } 171 172 public interface Monitor { monitor()173 void monitor(); 174 } 175 getInstance()176 public static Watchdog getInstance() { 177 if (sWatchdog == null) { 178 sWatchdog = new Watchdog(); 179 } 180 181 return sWatchdog; 182 } 183 Watchdog()184 private Watchdog() { 185 super("watchdog"); 186 mHandler = new HeartbeatHandler(); 187 } 188 init(Context context, BatteryService battery, PowerManagerService power, AlarmManagerService alarm, ActivityManagerService activity)189 public void init(Context context, BatteryService battery, 190 PowerManagerService power, AlarmManagerService alarm, 191 ActivityManagerService activity) { 192 mResolver = context.getContentResolver(); 193 mBattery = battery; 194 mPower = power; 195 mAlarm = alarm; 196 mActivity = activity; 197 198 context.registerReceiver(new RebootReceiver(), 199 new IntentFilter(REBOOT_ACTION)); 200 mRebootIntent = PendingIntent.getBroadcast(context, 201 0, new Intent(REBOOT_ACTION), 0); 202 203 context.registerReceiver(new RebootRequestReceiver(), 204 new IntentFilter(Intent.ACTION_REBOOT), 205 android.Manifest.permission.REBOOT, null); 206 207 mBootTime = System.currentTimeMillis(); 208 } 209 processStarted(String name, int pid)210 public void processStarted(String name, int pid) { 211 synchronized (this) { 212 if ("com.android.phone".equals(name)) { 213 mPhonePid = pid; 214 } 215 } 216 } 217 addMonitor(Monitor monitor)218 public void addMonitor(Monitor monitor) { 219 synchronized (this) { 220 if (isAlive()) { 221 throw new RuntimeException("Monitors can't be added while the Watchdog is running"); 222 } 223 mMonitors.add(monitor); 224 } 225 } 226 checkReboot(boolean fromAlarm)227 void checkReboot(boolean fromAlarm) { 228 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval 229 : REBOOT_DEFAULT_INTERVAL; 230 mRebootInterval = rebootInterval; 231 if (rebootInterval <= 0) { 232 // No reboot interval requested. 233 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!"); 234 mAlarm.remove(mRebootIntent); 235 return; 236 } 237 238 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime 239 : REBOOT_DEFAULT_START_TIME; 240 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow 241 : REBOOT_DEFAULT_WINDOW) * 1000; 242 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval 243 : MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000; 244 245 retrieveBrutalityAmount(); 246 247 long realStartTime; 248 long now; 249 250 synchronized (this) { 251 now = System.currentTimeMillis(); 252 realStartTime = computeCalendarTime(mCalendar, now, 253 rebootStartTime); 254 255 long rebootIntervalMillis = rebootInterval*24*60*60*1000; 256 if (DB || mReqRebootNoWait || 257 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) { 258 if (fromAlarm && rebootWindowMillis <= 0) { 259 // No reboot window -- just immediately reboot. 260 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 261 (int)rebootIntervalMillis, (int)rebootStartTime*1000, 262 (int)rebootWindowMillis, ""); 263 rebootSystem("Checkin scheduled forced"); 264 return; 265 } 266 267 // Are we within the reboot window? 268 if (now < realStartTime) { 269 // Schedule alarm for next check interval. 270 realStartTime = computeCalendarTime(mCalendar, 271 now, rebootStartTime); 272 } else if (now < (realStartTime+rebootWindowMillis)) { 273 String doit = shouldWeBeBrutalLocked(now); 274 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 275 (int)rebootInterval, (int)rebootStartTime*1000, 276 (int)rebootWindowMillis, doit != null ? doit : ""); 277 if (doit == null) { 278 rebootSystem("Checked scheduled range"); 279 return; 280 } 281 282 // Schedule next alarm either within the window or in the 283 // next interval. 284 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) { 285 realStartTime = computeCalendarTime(mCalendar, 286 now + rebootIntervalMillis, rebootStartTime); 287 } else { 288 realStartTime = now + recheckInterval; 289 } 290 } else { 291 // Schedule alarm for next check interval. 292 realStartTime = computeCalendarTime(mCalendar, 293 now + rebootIntervalMillis, rebootStartTime); 294 } 295 } 296 } 297 298 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for " 299 + ((realStartTime-now)/1000/60) + "m from now"); 300 mAlarm.remove(mRebootIntent); 301 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent); 302 } 303 304 /** 305 * Perform a full reboot of the system. 306 */ rebootSystem(String reason)307 void rebootSystem(String reason) { 308 Slog.i(TAG, "Rebooting system because: " + reason); 309 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power"); 310 pms.reboot(false, reason, false); 311 } 312 313 /** 314 * Load the current Gservices settings for when 315 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen. 316 * Must not be called with the lock held. 317 */ retrieveBrutalityAmount()318 void retrieveBrutalityAmount() { 319 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff 320 : MEMCHECK_DEFAULT_MIN_SCREEN_OFF) * 1000; 321 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm 322 : MEMCHECK_DEFAULT_MIN_ALARM) * 1000; 323 } 324 325 /** 326 * Determine whether it is a good time to kill, crash, or otherwise 327 * plunder the current situation for the overall long-term benefit of 328 * the world. 329 * 330 * @param curTime The current system time. 331 * @return Returns null if this is a good time, else a String with the 332 * text of why it is not a good time. 333 */ shouldWeBeBrutalLocked(long curTime)334 String shouldWeBeBrutalLocked(long curTime) { 335 if (mBattery == null || !mBattery.isPowered(BatteryManager.BATTERY_PLUGGED_ANY)) { 336 return "battery"; 337 } 338 339 if (mMinScreenOff >= 0 && (mPower == null || 340 mPower.timeSinceScreenWasLastOn() < mMinScreenOff)) { 341 return "screen"; 342 } 343 344 if (mMinAlarm >= 0 && (mAlarm == null || 345 mAlarm.timeToNextAlarm() < mMinAlarm)) { 346 return "alarm"; 347 } 348 349 return null; 350 } 351 computeCalendarTime(Calendar c, long curTime, long secondsSinceMidnight)352 static long computeCalendarTime(Calendar c, long curTime, 353 long secondsSinceMidnight) { 354 355 // start with now 356 c.setTimeInMillis(curTime); 357 358 int val = (int)secondsSinceMidnight / (60*60); 359 c.set(Calendar.HOUR_OF_DAY, val); 360 secondsSinceMidnight -= val * (60*60); 361 val = (int)secondsSinceMidnight / 60; 362 c.set(Calendar.MINUTE, val); 363 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60)); 364 c.set(Calendar.MILLISECOND, 0); 365 366 long newTime = c.getTimeInMillis(); 367 if (newTime < curTime) { 368 // The given time (in seconds since midnight) has already passed for today, so advance 369 // by one day (due to daylight savings, etc., the delta may differ from 24 hours). 370 c.add(Calendar.DAY_OF_MONTH, 1); 371 newTime = c.getTimeInMillis(); 372 } 373 374 return newTime; 375 } 376 377 @Override run()378 public void run() { 379 boolean waitedHalf = false; 380 while (true) { 381 mCompleted = false; 382 mHandler.sendEmptyMessage(MONITOR); 383 384 synchronized (this) { 385 long timeout = TIME_TO_WAIT; 386 387 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 388 // wait while asleep. If the device is asleep then the thing that we are waiting 389 // to timeout on is asleep as well and won't have a chance to run, causing a false 390 // positive on when to kill things. 391 long start = SystemClock.uptimeMillis(); 392 while (timeout > 0 && !mForceKillSystem) { 393 try { 394 wait(timeout); // notifyAll() is called when mForceKillSystem is set 395 } catch (InterruptedException e) { 396 Log.wtf(TAG, e); 397 } 398 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); 399 } 400 401 if (mCompleted && !mForceKillSystem) { 402 // The monitors have returned. 403 waitedHalf = false; 404 continue; 405 } 406 407 if (!waitedHalf) { 408 // We've waited half the deadlock-detection interval. Pull a stack 409 // trace and wait another half. 410 ArrayList<Integer> pids = new ArrayList<Integer>(); 411 pids.add(Process.myPid()); 412 ActivityManagerService.dumpStackTraces(true, pids, null, null, 413 NATIVE_STACKS_OF_INTEREST); 414 waitedHalf = true; 415 continue; 416 } 417 } 418 419 // If we got here, that means that the system is most likely hung. 420 // First collect stack traces from all threads of the system process. 421 // Then kill this process so that the system will restart. 422 423 final String name = (mCurrentMonitor != null) ? 424 mCurrentMonitor.getClass().getName() : "null"; 425 EventLog.writeEvent(EventLogTags.WATCHDOG, name); 426 427 ArrayList<Integer> pids = new ArrayList<Integer>(); 428 pids.add(Process.myPid()); 429 if (mPhonePid > 0) pids.add(mPhonePid); 430 // Pass !waitedHalf so that just in case we somehow wind up here without having 431 // dumped the halfway stacks, we properly re-initialize the trace file. 432 final File stack = ActivityManagerService.dumpStackTraces( 433 !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST); 434 435 // Give some extra time to make sure the stack traces get written. 436 // The system's been hanging for a minute, another second or two won't hurt much. 437 SystemClock.sleep(2000); 438 439 // Pull our own kernel thread stacks as well if we're configured for that 440 if (RECORD_KERNEL_THREADS) { 441 dumpKernelStackTraces(); 442 } 443 444 // Trigger the kernel to dump all blocked threads to the kernel log 445 try { 446 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger"); 447 sysrq_trigger.write("w"); 448 sysrq_trigger.close(); 449 } catch (IOException e) { 450 Slog.e(TAG, "Failed to write to /proc/sysrq-trigger"); 451 Slog.e(TAG, e.getMessage()); 452 } 453 454 // Try to add the error to the dropbox, but assuming that the ActivityManager 455 // itself may be deadlocked. (which has happened, causing this statement to 456 // deadlock and the watchdog as a whole to be ineffective) 457 Thread dropboxThread = new Thread("watchdogWriteToDropbox") { 458 public void run() { 459 mActivity.addErrorToDropBox( 460 "watchdog", null, "system_server", null, null, 461 name, null, stack, null); 462 } 463 }; 464 dropboxThread.start(); 465 try { 466 dropboxThread.join(2000); // wait up to 2 seconds for it to return. 467 } catch (InterruptedException ignored) {} 468 469 // Only kill the process if the debugger is not attached. 470 if (!Debug.isDebuggerConnected()) { 471 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name); 472 Process.killProcess(Process.myPid()); 473 System.exit(10); 474 } else { 475 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 476 } 477 478 waitedHalf = false; 479 } 480 } 481 dumpKernelStackTraces()482 private File dumpKernelStackTraces() { 483 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); 484 if (tracesPath == null || tracesPath.length() == 0) { 485 return null; 486 } 487 488 native_dumpKernelStacks(tracesPath); 489 return new File(tracesPath); 490 } 491 native_dumpKernelStacks(String tracesPath)492 private native void native_dumpKernelStacks(String tracesPath); 493 } 494