1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import android.app.IActivityController; 20 import android.content.BroadcastReceiver; 21 import android.content.Context; 22 import android.content.Intent; 23 import android.content.IntentFilter; 24 import android.hidl.manager.V1_0.IServiceManager; 25 import android.os.Binder; 26 import android.os.Build; 27 import android.os.Debug; 28 import android.os.Handler; 29 import android.os.IPowerManager; 30 import android.os.Looper; 31 import android.os.Process; 32 import android.os.RemoteException; 33 import android.os.ServiceManager; 34 import android.os.SystemClock; 35 import android.system.ErrnoException; 36 import android.system.Os; 37 import android.system.OsConstants; 38 import android.system.StructRlimit; 39 import android.util.EventLog; 40 import android.util.Log; 41 import android.util.Slog; 42 import android.util.StatsLog; 43 44 import com.android.internal.os.ZygoteConnectionConstants; 45 import com.android.server.am.ActivityManagerService; 46 import com.android.server.wm.SurfaceAnimationThread; 47 48 import java.io.File; 49 import java.io.FileWriter; 50 import java.io.IOException; 51 import java.nio.charset.StandardCharsets; 52 import java.nio.file.Files; 53 import java.nio.file.Path; 54 import java.nio.file.Paths; 55 import java.util.ArrayList; 56 import java.util.Arrays; 57 import java.util.Collections; 58 import java.util.HashSet; 59 import java.util.List; 60 61 /** This class calls its monitor every minute. Killing this process if they don't return **/ 62 public class Watchdog extends Thread { 63 static final String TAG = "Watchdog"; 64 65 /** Debug flag. */ 66 public static final boolean DEBUG = false; 67 68 // Set this to true to use debug default values. 69 static final boolean DB = false; 70 71 // Note 1: Do not lower this value below thirty seconds without tightening the invoke-with 72 // timeout in com.android.internal.os.ZygoteConnection, or wrapped applications 73 // can trigger the watchdog. 74 // Note 2: The debug value is already below the wait time in ZygoteConnection. Wrapped 75 // applications may not work with a debug build. CTS will fail. 76 static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000; 77 static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2; 78 79 // These are temporally ordered: larger values as lateness increases 80 static final int COMPLETED = 0; 81 static final int WAITING = 1; 82 static final int WAITED_HALF = 2; 83 static final int OVERDUE = 3; 84 85 // Which native processes to dump into dropbox's stack traces 86 public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] { 87 "/system/bin/audioserver", 88 "/system/bin/cameraserver", 89 "/system/bin/drmserver", 90 "/system/bin/mediadrmserver", 91 "/system/bin/mediaserver", 92 "/system/bin/sdcard", 93 "/system/bin/surfaceflinger", 94 "/system/bin/vold", 95 "media.extractor", // system/bin/mediaextractor 96 "media.metrics", // system/bin/mediametrics 97 "media.codec", // vendor/bin/hw/android.hardware.media.omx@1.0-service 98 "media.swcodec", // /apex/com.android.media.swcodec/bin/mediaswcodec 99 "com.android.bluetooth", // Bluetooth service 100 "/system/bin/statsd", // Stats daemon 101 }; 102 103 public static final List<String> HAL_INTERFACES_OF_INTEREST = Arrays.asList( 104 "android.hardware.audio@2.0::IDevicesFactory", 105 "android.hardware.audio@4.0::IDevicesFactory", 106 "android.hardware.bluetooth@1.0::IBluetoothHci", 107 "android.hardware.camera.provider@2.4::ICameraProvider", 108 "android.hardware.graphics.allocator@2.0::IAllocator", 109 "android.hardware.graphics.composer@2.1::IComposer", 110 "android.hardware.health@2.0::IHealth", 111 "android.hardware.media.c2@1.0::IComponentStore", 112 "android.hardware.media.omx@1.0::IOmx", 113 "android.hardware.media.omx@1.0::IOmxStore", 114 "android.hardware.sensors@1.0::ISensors", 115 "android.hardware.vr@1.0::IVr", 116 "android.hardware.biometrics.face@1.0::IBiometricsFace" 117 ); 118 119 static Watchdog sWatchdog; 120 121 /* This handler will be used to post message back onto the main thread */ 122 final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>(); 123 final HandlerChecker mMonitorChecker; 124 ActivityManagerService mActivity; 125 126 int mPhonePid; 127 IActivityController mController; 128 boolean mAllowRestart = true; 129 final OpenFdMonitor mOpenFdMonitor; 130 131 /** 132 * Used for checking status of handle threads and scheduling monitor callbacks. 133 */ 134 public final class HandlerChecker implements Runnable { 135 private final Handler mHandler; 136 private final String mName; 137 private final long mWaitMax; 138 private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 139 private final ArrayList<Monitor> mMonitorQueue = new ArrayList<Monitor>(); 140 private boolean mCompleted; 141 private Monitor mCurrentMonitor; 142 private long mStartTime; 143 private int mPauseCount; 144 HandlerChecker(Handler handler, String name, long waitMaxMillis)145 HandlerChecker(Handler handler, String name, long waitMaxMillis) { 146 mHandler = handler; 147 mName = name; 148 mWaitMax = waitMaxMillis; 149 mCompleted = true; 150 } 151 addMonitorLocked(Monitor monitor)152 void addMonitorLocked(Monitor monitor) { 153 // We don't want to update mMonitors when the Handler is in the middle of checking 154 // all monitors. We will update mMonitors on the next schedule if it is safe 155 mMonitorQueue.add(monitor); 156 } 157 scheduleCheckLocked()158 public void scheduleCheckLocked() { 159 if (mCompleted) { 160 // Safe to update monitors in queue, Handler is not in the middle of work 161 mMonitors.addAll(mMonitorQueue); 162 mMonitorQueue.clear(); 163 } 164 if ((mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) 165 || (mPauseCount > 0)) { 166 // Don't schedule until after resume OR 167 // If the target looper has recently been polling, then 168 // there is no reason to enqueue our checker on it since that 169 // is as good as it not being deadlocked. This avoid having 170 // to do a context switch to check the thread. Note that we 171 // only do this if we have no monitors since those would need to 172 // be executed at this point. 173 mCompleted = true; 174 return; 175 } 176 if (!mCompleted) { 177 // we already have a check in flight, so no need 178 return; 179 } 180 181 mCompleted = false; 182 mCurrentMonitor = null; 183 mStartTime = SystemClock.uptimeMillis(); 184 mHandler.postAtFrontOfQueue(this); 185 } 186 isOverdueLocked()187 boolean isOverdueLocked() { 188 return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax); 189 } 190 getCompletionStateLocked()191 public int getCompletionStateLocked() { 192 if (mCompleted) { 193 return COMPLETED; 194 } else { 195 long latency = SystemClock.uptimeMillis() - mStartTime; 196 if (latency < mWaitMax/2) { 197 return WAITING; 198 } else if (latency < mWaitMax) { 199 return WAITED_HALF; 200 } 201 } 202 return OVERDUE; 203 } 204 getThread()205 public Thread getThread() { 206 return mHandler.getLooper().getThread(); 207 } 208 getName()209 public String getName() { 210 return mName; 211 } 212 describeBlockedStateLocked()213 String describeBlockedStateLocked() { 214 if (mCurrentMonitor == null) { 215 return "Blocked in handler on " + mName + " (" + getThread().getName() + ")"; 216 } else { 217 return "Blocked in monitor " + mCurrentMonitor.getClass().getName() 218 + " on " + mName + " (" + getThread().getName() + ")"; 219 } 220 } 221 222 @Override run()223 public void run() { 224 // Once we get here, we ensure that mMonitors does not change even if we call 225 // #addMonitorLocked because we first add the new monitors to mMonitorQueue and 226 // move them to mMonitors on the next schedule when mCompleted is true, at which 227 // point we have completed execution of this method. 228 final int size = mMonitors.size(); 229 for (int i = 0 ; i < size ; i++) { 230 synchronized (Watchdog.this) { 231 mCurrentMonitor = mMonitors.get(i); 232 } 233 mCurrentMonitor.monitor(); 234 } 235 236 synchronized (Watchdog.this) { 237 mCompleted = true; 238 mCurrentMonitor = null; 239 } 240 } 241 242 /** Pause the HandlerChecker. */ pauseLocked(String reason)243 public void pauseLocked(String reason) { 244 mPauseCount++; 245 // Mark as completed, because there's a chance we called this after the watchog 246 // thread loop called Object#wait after 'WAITED_HALF'. In that case we want to ensure 247 // the next call to #getCompletionStateLocked for this checker returns 'COMPLETED' 248 mCompleted = true; 249 Slog.i(TAG, "Pausing HandlerChecker: " + mName + " for reason: " 250 + reason + ". Pause count: " + mPauseCount); 251 } 252 253 /** Resume the HandlerChecker from the last {@link #pauseLocked}. */ resumeLocked(String reason)254 public void resumeLocked(String reason) { 255 if (mPauseCount > 0) { 256 mPauseCount--; 257 Slog.i(TAG, "Resuming HandlerChecker: " + mName + " for reason: " 258 + reason + ". Pause count: " + mPauseCount); 259 } else { 260 Slog.wtf(TAG, "Already resumed HandlerChecker: " + mName); 261 } 262 } 263 } 264 265 final class RebootRequestReceiver extends BroadcastReceiver { 266 @Override onReceive(Context c, Intent intent)267 public void onReceive(Context c, Intent intent) { 268 if (intent.getIntExtra("nowait", 0) != 0) { 269 rebootSystem("Received ACTION_REBOOT broadcast"); 270 return; 271 } 272 Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent); 273 } 274 } 275 276 /** Monitor for checking the availability of binder threads. The monitor will block until 277 * there is a binder thread available to process in coming IPCs to make sure other processes 278 * can still communicate with the service. 279 */ 280 private static final class BinderThreadMonitor implements Watchdog.Monitor { 281 @Override monitor()282 public void monitor() { 283 Binder.blockUntilThreadAvailable(); 284 } 285 } 286 287 public interface Monitor { monitor()288 void monitor(); 289 } 290 getInstance()291 public static Watchdog getInstance() { 292 if (sWatchdog == null) { 293 sWatchdog = new Watchdog(); 294 } 295 296 return sWatchdog; 297 } 298 Watchdog()299 private Watchdog() { 300 super("watchdog"); 301 // Initialize handler checkers for each common thread we want to check. Note 302 // that we are not currently checking the background thread, since it can 303 // potentially hold longer running operations with no guarantees about the timeliness 304 // of operations there. 305 306 // The shared foreground thread is the main checker. It is where we 307 // will also dispatch monitor checks and do other work. 308 mMonitorChecker = new HandlerChecker(FgThread.getHandler(), 309 "foreground thread", DEFAULT_TIMEOUT); 310 mHandlerCheckers.add(mMonitorChecker); 311 // Add checker for main thread. We only do a quick check since there 312 // can be UI running on the thread. 313 mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()), 314 "main thread", DEFAULT_TIMEOUT)); 315 // Add checker for shared UI thread. 316 mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(), 317 "ui thread", DEFAULT_TIMEOUT)); 318 // And also check IO thread. 319 mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(), 320 "i/o thread", DEFAULT_TIMEOUT)); 321 // And the display thread. 322 mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(), 323 "display thread", DEFAULT_TIMEOUT)); 324 // And the animation thread. 325 mHandlerCheckers.add(new HandlerChecker(AnimationThread.getHandler(), 326 "animation thread", DEFAULT_TIMEOUT)); 327 // And the surface animation thread. 328 mHandlerCheckers.add(new HandlerChecker(SurfaceAnimationThread.getHandler(), 329 "surface animation thread", DEFAULT_TIMEOUT)); 330 331 // Initialize monitor for Binder threads. 332 addMonitor(new BinderThreadMonitor()); 333 334 mOpenFdMonitor = OpenFdMonitor.create(); 335 336 // See the notes on DEFAULT_TIMEOUT. 337 assert DB || 338 DEFAULT_TIMEOUT > ZygoteConnectionConstants.WRAPPED_PID_TIMEOUT_MILLIS; 339 } 340 341 /** 342 * Registers a {@link BroadcastReceiver} to listen to reboot broadcasts and trigger reboot. 343 * Should be called during boot after the ActivityManagerService is up and registered 344 * as a system service so it can handle registration of a {@link BroadcastReceiver}. 345 */ init(Context context, ActivityManagerService activity)346 public void init(Context context, ActivityManagerService activity) { 347 mActivity = activity; 348 context.registerReceiver(new RebootRequestReceiver(), 349 new IntentFilter(Intent.ACTION_REBOOT), 350 android.Manifest.permission.REBOOT, null); 351 } 352 processStarted(String name, int pid)353 public void processStarted(String name, int pid) { 354 synchronized (this) { 355 if ("com.android.phone".equals(name)) { 356 mPhonePid = pid; 357 } 358 } 359 } 360 setActivityController(IActivityController controller)361 public void setActivityController(IActivityController controller) { 362 synchronized (this) { 363 mController = controller; 364 } 365 } 366 setAllowRestart(boolean allowRestart)367 public void setAllowRestart(boolean allowRestart) { 368 synchronized (this) { 369 mAllowRestart = allowRestart; 370 } 371 } 372 addMonitor(Monitor monitor)373 public void addMonitor(Monitor monitor) { 374 synchronized (this) { 375 mMonitorChecker.addMonitorLocked(monitor); 376 } 377 } 378 addThread(Handler thread)379 public void addThread(Handler thread) { 380 addThread(thread, DEFAULT_TIMEOUT); 381 } 382 addThread(Handler thread, long timeoutMillis)383 public void addThread(Handler thread, long timeoutMillis) { 384 synchronized (this) { 385 final String name = thread.getLooper().getThread().getName(); 386 mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis)); 387 } 388 } 389 390 /** 391 * Pauses Watchdog action for the currently running thread. Useful before executing long running 392 * operations that could falsely trigger the watchdog. Each call to this will require a matching 393 * call to {@link #resumeWatchingCurrentThread}. 394 * 395 * <p>If the current thread has not been added to the Watchdog, this call is a no-op. 396 * 397 * <p>If the Watchdog is already paused for the current thread, this call adds 398 * adds another pause and will require an additional {@link #resumeCurrentThread} to resume. 399 * 400 * <p>Note: Use with care, as any deadlocks on the current thread will be undetected until all 401 * pauses have been resumed. 402 */ pauseWatchingCurrentThread(String reason)403 public void pauseWatchingCurrentThread(String reason) { 404 synchronized (this) { 405 for (HandlerChecker hc : mHandlerCheckers) { 406 if (Thread.currentThread().equals(hc.getThread())) { 407 hc.pauseLocked(reason); 408 } 409 } 410 } 411 } 412 413 /** 414 * Resumes the last pause from {@link #pauseWatchingCurrentThread} for the currently running 415 * thread. 416 * 417 * <p>If the current thread has not been added to the Watchdog, this call is a no-op. 418 * 419 * <p>If the Watchdog action for the current thread is already resumed, this call logs a wtf. 420 * 421 * <p>If all pauses have been resumed, the Watchdog action is finally resumed, otherwise, 422 * the Watchdog action for the current thread remains paused until resume is called at least 423 * as many times as the calls to pause. 424 */ resumeWatchingCurrentThread(String reason)425 public void resumeWatchingCurrentThread(String reason) { 426 synchronized (this) { 427 for (HandlerChecker hc : mHandlerCheckers) { 428 if (Thread.currentThread().equals(hc.getThread())) { 429 hc.resumeLocked(reason); 430 } 431 } 432 } 433 } 434 435 /** 436 * Perform a full reboot of the system. 437 */ rebootSystem(String reason)438 void rebootSystem(String reason) { 439 Slog.i(TAG, "Rebooting system because: " + reason); 440 IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE); 441 try { 442 pms.reboot(false, reason, false); 443 } catch (RemoteException ex) { 444 } 445 } 446 evaluateCheckerCompletionLocked()447 private int evaluateCheckerCompletionLocked() { 448 int state = COMPLETED; 449 for (int i=0; i<mHandlerCheckers.size(); i++) { 450 HandlerChecker hc = mHandlerCheckers.get(i); 451 state = Math.max(state, hc.getCompletionStateLocked()); 452 } 453 return state; 454 } 455 getBlockedCheckersLocked()456 private ArrayList<HandlerChecker> getBlockedCheckersLocked() { 457 ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>(); 458 for (int i=0; i<mHandlerCheckers.size(); i++) { 459 HandlerChecker hc = mHandlerCheckers.get(i); 460 if (hc.isOverdueLocked()) { 461 checkers.add(hc); 462 } 463 } 464 return checkers; 465 } 466 describeCheckersLocked(List<HandlerChecker> checkers)467 private String describeCheckersLocked(List<HandlerChecker> checkers) { 468 StringBuilder builder = new StringBuilder(128); 469 for (int i=0; i<checkers.size(); i++) { 470 if (builder.length() > 0) { 471 builder.append(", "); 472 } 473 builder.append(checkers.get(i).describeBlockedStateLocked()); 474 } 475 return builder.toString(); 476 } 477 getInterestingHalPids()478 private static ArrayList<Integer> getInterestingHalPids() { 479 try { 480 IServiceManager serviceManager = IServiceManager.getService(); 481 ArrayList<IServiceManager.InstanceDebugInfo> dump = 482 serviceManager.debugDump(); 483 HashSet<Integer> pids = new HashSet<>(); 484 for (IServiceManager.InstanceDebugInfo info : dump) { 485 if (info.pid == IServiceManager.PidConstant.NO_PID) { 486 continue; 487 } 488 489 if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) { 490 continue; 491 } 492 493 pids.add(info.pid); 494 } 495 return new ArrayList<Integer>(pids); 496 } catch (RemoteException e) { 497 return new ArrayList<Integer>(); 498 } 499 } 500 getInterestingNativePids()501 static ArrayList<Integer> getInterestingNativePids() { 502 ArrayList<Integer> pids = getInterestingHalPids(); 503 504 int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST); 505 if (nativePids != null) { 506 pids.ensureCapacity(pids.size() + nativePids.length); 507 for (int i : nativePids) { 508 pids.add(i); 509 } 510 } 511 512 return pids; 513 } 514 515 @Override run()516 public void run() { 517 boolean waitedHalf = false; 518 while (true) { 519 final List<HandlerChecker> blockedCheckers; 520 final String subject; 521 final boolean allowRestart; 522 int debuggerWasConnected = 0; 523 synchronized (this) { 524 long timeout = CHECK_INTERVAL; 525 // Make sure we (re)spin the checkers that have become idle within 526 // this wait-and-check interval 527 for (int i=0; i<mHandlerCheckers.size(); i++) { 528 HandlerChecker hc = mHandlerCheckers.get(i); 529 hc.scheduleCheckLocked(); 530 } 531 532 if (debuggerWasConnected > 0) { 533 debuggerWasConnected--; 534 } 535 536 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 537 // wait while asleep. If the device is asleep then the thing that we are waiting 538 // to timeout on is asleep as well and won't have a chance to run, causing a false 539 // positive on when to kill things. 540 long start = SystemClock.uptimeMillis(); 541 while (timeout > 0) { 542 if (Debug.isDebuggerConnected()) { 543 debuggerWasConnected = 2; 544 } 545 try { 546 wait(timeout); 547 // Note: mHandlerCheckers and mMonitorChecker may have changed after waiting 548 } catch (InterruptedException e) { 549 Log.wtf(TAG, e); 550 } 551 if (Debug.isDebuggerConnected()) { 552 debuggerWasConnected = 2; 553 } 554 timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start); 555 } 556 557 boolean fdLimitTriggered = false; 558 if (mOpenFdMonitor != null) { 559 fdLimitTriggered = mOpenFdMonitor.monitor(); 560 } 561 562 if (!fdLimitTriggered) { 563 final int waitState = evaluateCheckerCompletionLocked(); 564 if (waitState == COMPLETED) { 565 // The monitors have returned; reset 566 waitedHalf = false; 567 continue; 568 } else if (waitState == WAITING) { 569 // still waiting but within their configured intervals; back off and recheck 570 continue; 571 } else if (waitState == WAITED_HALF) { 572 if (!waitedHalf) { 573 Slog.i(TAG, "WAITED_HALF"); 574 // We've waited half the deadlock-detection interval. Pull a stack 575 // trace and wait another half. 576 ArrayList<Integer> pids = new ArrayList<Integer>(); 577 pids.add(Process.myPid()); 578 ActivityManagerService.dumpStackTraces(pids, null, null, 579 getInterestingNativePids()); 580 waitedHalf = true; 581 } 582 continue; 583 } 584 585 // something is overdue! 586 blockedCheckers = getBlockedCheckersLocked(); 587 subject = describeCheckersLocked(blockedCheckers); 588 } else { 589 blockedCheckers = Collections.emptyList(); 590 subject = "Open FD high water mark reached"; 591 } 592 allowRestart = mAllowRestart; 593 } 594 595 // If we got here, that means that the system is most likely hung. 596 // First collect stack traces from all threads of the system process. 597 // Then kill this process so that the system will restart. 598 EventLog.writeEvent(EventLogTags.WATCHDOG, subject); 599 600 ArrayList<Integer> pids = new ArrayList<>(); 601 pids.add(Process.myPid()); 602 if (mPhonePid > 0) pids.add(mPhonePid); 603 604 final File stack = ActivityManagerService.dumpStackTraces( 605 pids, null, null, getInterestingNativePids()); 606 607 // Give some extra time to make sure the stack traces get written. 608 // The system's been hanging for a minute, another second or two won't hurt much. 609 SystemClock.sleep(5000); 610 611 // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log 612 doSysRq('w'); 613 doSysRq('l'); 614 615 // Try to add the error to the dropbox, but assuming that the ActivityManager 616 // itself may be deadlocked. (which has happened, causing this statement to 617 // deadlock and the watchdog as a whole to be ineffective) 618 Thread dropboxThread = new Thread("watchdogWriteToDropbox") { 619 public void run() { 620 // If a watched thread hangs before init() is called, we don't have a 621 // valid mActivity. So we can't log the error to dropbox. 622 if (mActivity != null) { 623 mActivity.addErrorToDropBox( 624 "watchdog", null, "system_server", null, null, null, 625 subject, null, stack, null); 626 } 627 StatsLog.write(StatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject); 628 } 629 }; 630 dropboxThread.start(); 631 try { 632 dropboxThread.join(2000); // wait up to 2 seconds for it to return. 633 } catch (InterruptedException ignored) {} 634 635 IActivityController controller; 636 synchronized (this) { 637 controller = mController; 638 } 639 if (controller != null) { 640 Slog.i(TAG, "Reporting stuck state to activity controller"); 641 try { 642 Binder.setDumpDisabled("Service dumps disabled due to hung system process."); 643 // 1 = keep waiting, -1 = kill system 644 int res = controller.systemNotResponding(subject); 645 if (res >= 0) { 646 Slog.i(TAG, "Activity controller requested to coninue to wait"); 647 waitedHalf = false; 648 continue; 649 } 650 } catch (RemoteException e) { 651 } 652 } 653 654 // Only kill the process if the debugger is not attached. 655 if (Debug.isDebuggerConnected()) { 656 debuggerWasConnected = 2; 657 } 658 if (debuggerWasConnected >= 2) { 659 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 660 } else if (debuggerWasConnected > 0) { 661 Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process"); 662 } else if (!allowRestart) { 663 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process"); 664 } else { 665 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject); 666 WatchdogDiagnostics.diagnoseCheckers(blockedCheckers); 667 Slog.w(TAG, "*** GOODBYE!"); 668 Process.killProcess(Process.myPid()); 669 System.exit(10); 670 } 671 672 waitedHalf = false; 673 } 674 } 675 doSysRq(char c)676 private void doSysRq(char c) { 677 try { 678 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger"); 679 sysrq_trigger.write(c); 680 sysrq_trigger.close(); 681 } catch (IOException e) { 682 Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e); 683 } 684 } 685 686 public static final class OpenFdMonitor { 687 /** 688 * Number of FDs below the soft limit that we trigger a runtime restart at. This was 689 * chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number 690 * of FDs in reserve to complete a dump. 691 */ 692 private static final int FD_HIGH_WATER_MARK = 12; 693 694 private final File mDumpDir; 695 private final File mFdHighWaterMark; 696 create()697 public static OpenFdMonitor create() { 698 // Only run the FD monitor on debuggable builds (such as userdebug and eng builds). 699 if (!Build.IS_DEBUGGABLE) { 700 return null; 701 } 702 703 final StructRlimit rlimit; 704 try { 705 rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE); 706 } catch (ErrnoException errno) { 707 Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno); 708 return null; 709 } 710 711 // The assumption we're making here is that FD numbers are allocated (more or less) 712 // sequentially, which is currently (and historically) true since open is currently 713 // specified to always return the lowest-numbered non-open file descriptor for the 714 // current process. 715 // 716 // We do this to avoid having to enumerate the contents of /proc/self/fd in order to 717 // count the number of descriptors open in the process. 718 final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK)); 719 return new OpenFdMonitor(new File("/data/anr"), fdThreshold); 720 } 721 OpenFdMonitor(File dumpDir, File fdThreshold)722 OpenFdMonitor(File dumpDir, File fdThreshold) { 723 mDumpDir = dumpDir; 724 mFdHighWaterMark = fdThreshold; 725 } 726 727 /** 728 * Dumps open file descriptors and their full paths to a temporary file in {@code mDumpDir}. 729 */ dumpOpenDescriptors()730 private void dumpOpenDescriptors() { 731 // We cannot exec lsof to get more info about open file descriptors because a newly 732 // forked process will not have the permissions to readlink. Instead list all open 733 // descriptors from /proc/pid/fd and resolve them. 734 List<String> dumpInfo = new ArrayList<>(); 735 String fdDirPath = String.format("/proc/%d/fd/", Process.myPid()); 736 File[] fds = new File(fdDirPath).listFiles(); 737 if (fds == null) { 738 dumpInfo.add("Unable to list " + fdDirPath); 739 } else { 740 for (File f : fds) { 741 String fdSymLink = f.getAbsolutePath(); 742 String resolvedPath = ""; 743 try { 744 resolvedPath = Os.readlink(fdSymLink); 745 } catch (ErrnoException ex) { 746 resolvedPath = ex.getMessage(); 747 } 748 dumpInfo.add(fdSymLink + "\t" + resolvedPath); 749 } 750 } 751 752 // Dump the fds & paths to a temp file. 753 try { 754 File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir); 755 Path out = Paths.get(dumpFile.getAbsolutePath()); 756 Files.write(out, dumpInfo, StandardCharsets.UTF_8); 757 } catch (IOException ex) { 758 Slog.w(TAG, "Unable to write open descriptors to file: " + ex); 759 } 760 } 761 762 /** 763 * @return {@code true} if the high water mark was breached and a dump was written, 764 * {@code false} otherwise. 765 */ monitor()766 public boolean monitor() { 767 if (mFdHighWaterMark.exists()) { 768 dumpOpenDescriptors(); 769 return true; 770 } 771 772 return false; 773 } 774 } 775 } 776