1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import static android.content.Intent.ACTION_REBOOT; 20 import static android.content.Intent.ACTION_SHUTDOWN; 21 import static android.service.watchdog.ExplicitHealthCheckService.PackageConfig; 22 import static android.util.Xml.Encoding.UTF_8; 23 24 import static com.android.server.crashrecovery.CrashRecoveryUtils.dumpCrashRecoveryEvents; 25 26 import static java.lang.annotation.RetentionPolicy.SOURCE; 27 28 import android.annotation.CallbackExecutor; 29 import android.annotation.FlaggedApi; 30 import android.annotation.IntDef; 31 import android.annotation.NonNull; 32 import android.annotation.Nullable; 33 import android.annotation.SuppressLint; 34 import android.annotation.SystemApi; 35 import android.content.BroadcastReceiver; 36 import android.content.Context; 37 import android.content.Intent; 38 import android.content.IntentFilter; 39 import android.content.pm.PackageInfo; 40 import android.content.pm.PackageManager; 41 import android.content.pm.VersionedPackage; 42 import android.crashrecovery.flags.Flags; 43 import android.os.Environment; 44 import android.os.Handler; 45 import android.os.Looper; 46 import android.os.Process; 47 import android.os.SystemProperties; 48 import android.provider.DeviceConfig; 49 import android.sysprop.CrashRecoveryProperties; 50 import android.text.TextUtils; 51 import android.util.ArrayMap; 52 import android.util.ArraySet; 53 import android.util.AtomicFile; 54 import android.util.EventLog; 55 import android.util.IndentingPrintWriter; 56 import android.util.LongArrayQueue; 57 import android.util.Slog; 58 import android.util.Xml; 59 import android.util.XmlUtils; 60 61 import com.android.internal.annotations.GuardedBy; 62 import com.android.internal.annotations.VisibleForTesting; 63 import com.android.internal.util.FastXmlSerializer; 64 import com.android.modules.utils.BackgroundThread; 65 66 import libcore.io.IoUtils; 67 68 import org.xmlpull.v1.XmlPullParser; 69 import org.xmlpull.v1.XmlPullParserException; 70 import org.xmlpull.v1.XmlSerializer; 71 72 import java.io.BufferedReader; 73 import java.io.BufferedWriter; 74 import java.io.File; 75 import java.io.FileInputStream; 76 import java.io.FileNotFoundException; 77 import java.io.FileOutputStream; 78 import java.io.FileReader; 79 import java.io.FileWriter; 80 import java.io.IOException; 81 import java.io.InputStream; 82 import java.io.ObjectInputStream; 83 import java.io.ObjectOutputStream; 84 import java.io.PrintWriter; 85 import java.lang.annotation.Retention; 86 import java.lang.annotation.RetentionPolicy; 87 import java.util.ArrayList; 88 import java.util.Collections; 89 import java.util.HashMap; 90 import java.util.Iterator; 91 import java.util.List; 92 import java.util.Map; 93 import java.util.NoSuchElementException; 94 import java.util.Set; 95 import java.util.concurrent.Executor; 96 import java.util.concurrent.TimeUnit; 97 98 /** 99 * Monitors the health of packages on the system and notifies interested observers when packages 100 * fail. On failure, the registered observer with the least user impacting mitigation will 101 * be notified. 102 * @hide 103 */ 104 @FlaggedApi(Flags.FLAG_ENABLE_CRASHRECOVERY) 105 @SystemApi(client = SystemApi.Client.SYSTEM_SERVER) 106 public class PackageWatchdog { 107 private static final String TAG = "PackageWatchdog"; 108 109 static final String PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS = 110 "watchdog_trigger_failure_duration_millis"; 111 static final String PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT = 112 "watchdog_trigger_failure_count"; 113 static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED = 114 "watchdog_explicit_health_check_enabled"; 115 116 // TODO: make the following values configurable via DeviceConfig 117 private static final long NATIVE_CRASH_POLLING_INTERVAL_MILLIS = 118 TimeUnit.SECONDS.toMillis(30); 119 private static final long NUMBER_OF_NATIVE_CRASH_POLLS = 10; 120 121 122 /** Reason for package failure could not be determined. */ 123 public static final int FAILURE_REASON_UNKNOWN = 0; 124 125 /** The package had a native crash. */ 126 public static final int FAILURE_REASON_NATIVE_CRASH = 1; 127 128 /** The package failed an explicit health check. */ 129 public static final int FAILURE_REASON_EXPLICIT_HEALTH_CHECK = 2; 130 131 /** The app crashed. */ 132 public static final int FAILURE_REASON_APP_CRASH = 3; 133 134 /** The app was not responding. */ 135 public static final int FAILURE_REASON_APP_NOT_RESPONDING = 4; 136 137 /** The device was boot looping. */ 138 public static final int FAILURE_REASON_BOOT_LOOP = 5; 139 140 /** @hide */ 141 @IntDef(prefix = { "FAILURE_REASON_" }, value = { 142 FAILURE_REASON_UNKNOWN, 143 FAILURE_REASON_NATIVE_CRASH, 144 FAILURE_REASON_EXPLICIT_HEALTH_CHECK, 145 FAILURE_REASON_APP_CRASH, 146 FAILURE_REASON_APP_NOT_RESPONDING, 147 FAILURE_REASON_BOOT_LOOP 148 }) 149 @Retention(RetentionPolicy.SOURCE) 150 public @interface FailureReasons {} 151 152 // Duration to count package failures before it resets to 0 153 @VisibleForTesting 154 static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS = 155 (int) TimeUnit.MINUTES.toMillis(1); 156 // Number of package failures within the duration above before we notify observers 157 @VisibleForTesting 158 static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5; 159 @VisibleForTesting 160 static final long DEFAULT_OBSERVING_DURATION_MS = TimeUnit.DAYS.toMillis(2); 161 // Sliding window for tracking how many mitigation calls were made for a package. 162 @VisibleForTesting 163 static final long DEFAULT_DEESCALATION_WINDOW_MS = TimeUnit.HOURS.toMillis(1); 164 // Whether explicit health checks are enabled or not 165 private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true; 166 167 @VisibleForTesting 168 static final int DEFAULT_BOOT_LOOP_TRIGGER_COUNT = 5; 169 170 static final long DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS = TimeUnit.MINUTES.toMillis(10); 171 172 // Time needed to apply mitigation 173 private static final String MITIGATION_WINDOW_MS = 174 "persist.device_config.configuration.mitigation_window_ms"; 175 @VisibleForTesting 176 static final long DEFAULT_MITIGATION_WINDOW_MS = TimeUnit.SECONDS.toMillis(5); 177 178 // Threshold level at which or above user might experience significant disruption. 179 private static final String MAJOR_USER_IMPACT_LEVEL_THRESHOLD = 180 "persist.device_config.configuration.major_user_impact_level_threshold"; 181 private static final int DEFAULT_MAJOR_USER_IMPACT_LEVEL_THRESHOLD = 182 PackageHealthObserverImpact.USER_IMPACT_LEVEL_71; 183 184 // Comma separated list of all packages exempt from user impact level threshold. If a package 185 // in the list is crash looping, all the mitigations including factory reset will be performed. 186 private static final String PACKAGES_EXEMPT_FROM_IMPACT_LEVEL_THRESHOLD = 187 "persist.device_config.configuration.packages_exempt_from_impact_level_threshold"; 188 189 // Comma separated list of default packages exempt from user impact level threshold. 190 private static final String DEFAULT_PACKAGES_EXEMPT_FROM_IMPACT_LEVEL_THRESHOLD = 191 "com.android.systemui"; 192 193 private long mNumberOfNativeCrashPollsRemaining; 194 195 private static final int DB_VERSION = 1; 196 private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog"; 197 private static final String TAG_PACKAGE = "package"; 198 private static final String TAG_OBSERVER = "observer"; 199 private static final String ATTR_VERSION = "version"; 200 private static final String ATTR_NAME = "name"; 201 private static final String ATTR_DURATION = "duration"; 202 private static final String ATTR_EXPLICIT_HEALTH_CHECK_DURATION = "health-check-duration"; 203 private static final String ATTR_PASSED_HEALTH_CHECK = "passed-health-check"; 204 private static final String ATTR_MITIGATION_CALLS = "mitigation-calls"; 205 private static final String ATTR_MITIGATION_COUNT = "mitigation-count"; 206 207 // A file containing information about the current mitigation count in the case of a boot loop. 208 // This allows boot loop information to persist in the case of an fs-checkpoint being 209 // aborted. 210 private static final String METADATA_FILE = "/metadata/watchdog/mitigation_count.txt"; 211 212 /** 213 * EventLog tags used when logging into the event log. Note the values must be sync with 214 * frameworks/base/services/core/java/com/android/server/EventLogTags.logtags to get correct 215 * name translation. 216 */ 217 private static final int LOG_TAG_RESCUE_NOTE = 2900; 218 219 private static final Object sPackageWatchdogLock = new Object(); 220 @GuardedBy("sPackageWatchdogLock") 221 private static PackageWatchdog sPackageWatchdog; 222 223 private static final Object sLock = new Object(); 224 // System server context 225 private final Context mContext; 226 // Handler to run short running tasks 227 private final Handler mShortTaskHandler; 228 // Handler for processing IO and long running tasks 229 private final Handler mLongTaskHandler; 230 // Contains (observer-name -> observer-handle) that have ever been registered from 231 // previous boots. Observers with all packages expired are periodically pruned. 232 // It is saved to disk on system shutdown and repouplated on startup so it survives reboots. 233 @GuardedBy("sLock") 234 private final ArrayMap<String, ObserverInternal> mAllObservers = new ArrayMap<>(); 235 // File containing the XML data of monitored packages /data/system/package-watchdog.xml 236 private final AtomicFile mPolicyFile; 237 private final ExplicitHealthCheckController mHealthCheckController; 238 private final Runnable mSyncRequests = this::syncRequests; 239 private final Runnable mSyncStateWithScheduledReason = this::syncStateWithScheduledReason; 240 private final Runnable mSaveToFile = this::saveToFile; 241 private final SystemClock mSystemClock; 242 private final BootThreshold mBootThreshold; 243 private final DeviceConfig.OnPropertiesChangedListener 244 mOnPropertyChangedListener = this::onPropertyChanged; 245 246 private final Set<String> mPackagesExemptFromImpactLevelThreshold = new ArraySet<>(); 247 248 // The set of packages that have been synced with the ExplicitHealthCheckController 249 @GuardedBy("sLock") 250 private Set<String> mRequestedHealthCheckPackages = new ArraySet<>(); 251 @GuardedBy("sLock") 252 private boolean mIsPackagesReady; 253 // Flag to control whether explicit health checks are supported or not 254 @GuardedBy("sLock") 255 private boolean mIsHealthCheckEnabled = DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED; 256 @GuardedBy("sLock") 257 private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS; 258 @GuardedBy("sLock") 259 private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT; 260 // SystemClock#uptimeMillis when we last executed #syncState 261 // 0 if no prune is scheduled. 262 @GuardedBy("sLock") 263 private long mUptimeAtLastStateSync; 264 // If true, sync explicit health check packages with the ExplicitHealthCheckController. 265 @GuardedBy("sLock") 266 private boolean mSyncRequired = false; 267 268 @GuardedBy("sLock") 269 private long mLastMitigation = -1000000; 270 271 @FunctionalInterface 272 @VisibleForTesting 273 interface SystemClock { uptimeMillis()274 long uptimeMillis(); 275 } 276 PackageWatchdog(Context context)277 private PackageWatchdog(Context context) { 278 // Needs to be constructed inline 279 this(context, new AtomicFile( 280 new File(new File(Environment.getDataDirectory(), "system"), 281 "package-watchdog.xml")), 282 new Handler(Looper.myLooper()), BackgroundThread.getHandler(), 283 new ExplicitHealthCheckController(context), 284 android.os.SystemClock::uptimeMillis); 285 } 286 287 /** 288 * Creates a PackageWatchdog that allows injecting dependencies. 289 */ 290 @VisibleForTesting PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler, Handler longTaskHandler, ExplicitHealthCheckController controller, SystemClock clock)291 PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler, 292 Handler longTaskHandler, ExplicitHealthCheckController controller, 293 SystemClock clock) { 294 mContext = context; 295 mPolicyFile = policyFile; 296 mShortTaskHandler = shortTaskHandler; 297 mLongTaskHandler = longTaskHandler; 298 mHealthCheckController = controller; 299 mSystemClock = clock; 300 mNumberOfNativeCrashPollsRemaining = NUMBER_OF_NATIVE_CRASH_POLLS; 301 mBootThreshold = new BootThreshold(DEFAULT_BOOT_LOOP_TRIGGER_COUNT, 302 DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS); 303 304 loadFromFile(); 305 sPackageWatchdog = this; 306 } 307 308 /** 309 * Creates or gets singleton instance of PackageWatchdog. 310 * 311 * @param context The system server context. 312 */ getInstance(@onNull Context context)313 public static @NonNull PackageWatchdog getInstance(@NonNull Context context) { 314 synchronized (sPackageWatchdogLock) { 315 if (sPackageWatchdog == null) { 316 new PackageWatchdog(context); 317 } 318 return sPackageWatchdog; 319 } 320 } 321 322 /** 323 * Called during boot to notify when packages are ready on the device so we can start 324 * binding. 325 * @hide 326 */ onPackagesReady()327 public void onPackagesReady() { 328 synchronized (sLock) { 329 mIsPackagesReady = true; 330 mHealthCheckController.setCallbacks(packageName -> onHealthCheckPassed(packageName), 331 packages -> onSupportedPackages(packages), 332 this::onSyncRequestNotified); 333 setPropertyChangedListenerLocked(); 334 updateConfigs(); 335 } 336 } 337 338 /** 339 * Registers {@code observer} to listen for package failures. Add a new ObserverInternal for 340 * this observer if it does not already exist. 341 * For executing mitigations observers will receive callback on the given executor. 342 * 343 * <p>Observers are expected to call this on boot. It does not specify any packages but 344 * it will resume observing any packages requested from a previous boot. 345 * 346 * @param observer instance of {@link PackageHealthObserver} for observing package failures 347 * and boot loops. 348 * @param executor Executor for the thread on which observers would receive callbacks 349 */ registerHealthObserver(@onNull @allbackExecutor Executor executor, @NonNull PackageHealthObserver observer)350 public void registerHealthObserver(@NonNull @CallbackExecutor Executor executor, 351 @NonNull PackageHealthObserver observer) { 352 synchronized (sLock) { 353 ObserverInternal internalObserver = mAllObservers.get(observer.getUniqueIdentifier()); 354 if (internalObserver != null) { 355 internalObserver.registeredObserver = observer; 356 internalObserver.observerExecutor = executor; 357 } else { 358 internalObserver = new ObserverInternal(observer.getUniqueIdentifier(), 359 new ArrayList<>()); 360 internalObserver.registeredObserver = observer; 361 internalObserver.observerExecutor = executor; 362 mAllObservers.put(observer.getUniqueIdentifier(), internalObserver); 363 syncState("added new observer"); 364 } 365 } 366 } 367 368 /** 369 * Starts observing the health of the {@code packages} for {@code observer}. 370 * Note: Observer needs to be registered with {@link #registerHealthObserver} before calling 371 * this API. 372 * 373 * <p>If monitoring a package supporting explicit health check, at the end of the monitoring 374 * duration if {@link #onHealthCheckPassed} was never called, 375 * {@link PackageHealthObserver#onExecuteHealthCheckMitigation} will be called as if the 376 * package failed. 377 * 378 * <p>If {@code observer} is already monitoring a package in {@code packageNames}, 379 * the monitoring window of that package will be reset to {@code durationMs} and the health 380 * check state will be reset to a default. 381 * 382 * <p>The {@code observer} must be registered with {@link #registerHealthObserver} before 383 * calling this method. 384 * 385 * @param packageNames The list of packages to check. If this is empty, the call will be a 386 * no-op. 387 * 388 * @param timeoutMs The timeout after which Explicit Health Checks would not run. If this is 389 * less than 1, a default monitoring duration 2 days will be used. 390 * 391 * @throws IllegalStateException if the observer was not previously registered 392 */ startExplicitHealthCheck(@onNull List<String> packageNames, long timeoutMs, @NonNull PackageHealthObserver observer)393 public void startExplicitHealthCheck(@NonNull List<String> packageNames, long timeoutMs, 394 @NonNull PackageHealthObserver observer) { 395 synchronized (sLock) { 396 if (!mAllObservers.containsKey(observer.getUniqueIdentifier())) { 397 Slog.wtf(TAG, "No observer found, need to register the observer: " 398 + observer.getUniqueIdentifier()); 399 throw new IllegalStateException("Observer not registered"); 400 } 401 } 402 if (packageNames.isEmpty()) { 403 Slog.wtf(TAG, "No packages to observe, " + observer.getUniqueIdentifier()); 404 return; 405 } 406 if (timeoutMs < 1) { 407 Slog.wtf(TAG, "Invalid duration " + timeoutMs + "ms for observer " 408 + observer.getUniqueIdentifier() + ". Not observing packages " + packageNames); 409 timeoutMs = DEFAULT_OBSERVING_DURATION_MS; 410 } 411 412 List<MonitoredPackage> packages = new ArrayList<>(); 413 for (int i = 0; i < packageNames.size(); i++) { 414 // Health checks not available yet so health check state will start INACTIVE 415 MonitoredPackage pkg = newMonitoredPackage(packageNames.get(i), timeoutMs, false); 416 if (pkg != null) { 417 packages.add(pkg); 418 } else { 419 Slog.w(TAG, "Failed to create MonitoredPackage for pkg=" + packageNames.get(i)); 420 } 421 } 422 423 if (packages.isEmpty()) { 424 return; 425 } 426 427 // Sync before we add the new packages to the observers. This will #pruneObservers, 428 // causing any elapsed time to be deducted from all existing packages before we add new 429 // packages. This maintains the invariant that the elapsed time for ALL (new and existing) 430 // packages is the same. 431 mLongTaskHandler.post(() -> { 432 syncState("observing new packages"); 433 434 synchronized (sLock) { 435 ObserverInternal oldObserver = mAllObservers.get(observer.getUniqueIdentifier()); 436 if (oldObserver == null) { 437 Slog.d(TAG, observer.getUniqueIdentifier() + " started monitoring health " 438 + "of packages " + packageNames); 439 mAllObservers.put(observer.getUniqueIdentifier(), 440 new ObserverInternal(observer.getUniqueIdentifier(), packages)); 441 } else { 442 Slog.d(TAG, observer.getUniqueIdentifier() + " added the following " 443 + "packages to monitor " + packageNames); 444 oldObserver.updatePackagesLocked(packages); 445 } 446 } 447 448 // Sync after we add the new packages to the observers. We may have received packges 449 // requiring an earlier schedule than we are currently scheduled for. 450 syncState("updated observers"); 451 }); 452 453 } 454 455 /** 456 * Unregisters {@code observer} from listening to package failure. 457 * Additionally, this stops observing any packages that may have previously been observed 458 * even from a previous boot. 459 */ unregisterHealthObserver(@onNull PackageHealthObserver observer)460 public void unregisterHealthObserver(@NonNull PackageHealthObserver observer) { 461 mLongTaskHandler.post(() -> { 462 synchronized (sLock) { 463 mAllObservers.remove(observer.getUniqueIdentifier()); 464 } 465 syncState("unregistering observer: " + observer.getUniqueIdentifier()); 466 }); 467 } 468 469 /** 470 * Called when a process fails due to a crash, ANR or explicit health check. 471 * 472 * <p>For each package contained in the process, one registered observer with the least user 473 * impact will be notified for mitigation. 474 * 475 * <p>This method could be called frequently if there is a severe problem on the device. 476 */ notifyPackageFailure(@onNull List<VersionedPackage> packages, @FailureReasons int failureReason)477 public void notifyPackageFailure(@NonNull List<VersionedPackage> packages, 478 @FailureReasons int failureReason) { 479 if (packages == null) { 480 Slog.w(TAG, "Could not resolve a list of failing packages"); 481 return; 482 } 483 synchronized (sLock) { 484 final long now = mSystemClock.uptimeMillis(); 485 if (now >= mLastMitigation 486 && (now - mLastMitigation) < getMitigationWindowMs()) { 487 Slog.i(TAG, "Skipping notifyPackageFailure mitigation"); 488 return; 489 } 490 } 491 mLongTaskHandler.post(() -> { 492 synchronized (sLock) { 493 if (mAllObservers.isEmpty()) { 494 return; 495 } 496 boolean requiresImmediateAction = (failureReason == FAILURE_REASON_NATIVE_CRASH 497 || failureReason == FAILURE_REASON_EXPLICIT_HEALTH_CHECK); 498 if (requiresImmediateAction) { 499 handleFailureImmediately(packages, failureReason); 500 } else { 501 for (int pIndex = 0; pIndex < packages.size(); pIndex++) { 502 VersionedPackage versionedPackage = packages.get(pIndex); 503 // Observer that will receive failure for versionedPackage 504 ObserverInternal currentObserverToNotify = null; 505 int currentObserverImpact = Integer.MAX_VALUE; 506 MonitoredPackage currentMonitoredPackage = null; 507 508 // Find observer with least user impact 509 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { 510 ObserverInternal observer = mAllObservers.valueAt(oIndex); 511 PackageHealthObserver registeredObserver = observer.registeredObserver; 512 if (registeredObserver != null 513 && observer.notifyPackageFailureLocked( 514 versionedPackage.getPackageName())) { 515 MonitoredPackage p = observer.getMonitoredPackage( 516 versionedPackage.getPackageName()); 517 int mitigationCount = 1; 518 if (p != null) { 519 mitigationCount = p.getMitigationCountLocked() + 1; 520 } 521 int impact = registeredObserver.onHealthCheckFailed( 522 versionedPackage, failureReason, mitigationCount); 523 if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 524 && impact < currentObserverImpact) { 525 currentObserverToNotify = observer; 526 currentObserverImpact = impact; 527 currentMonitoredPackage = p; 528 } 529 } 530 } 531 532 // Execute action with least user impact 533 if (currentObserverToNotify != null) { 534 int mitigationCount; 535 if (currentMonitoredPackage != null) { 536 currentMonitoredPackage.noteMitigationCallLocked(); 537 mitigationCount = 538 currentMonitoredPackage.getMitigationCountLocked(); 539 } else { 540 mitigationCount = 1; 541 } 542 maybeExecute(currentObserverToNotify, versionedPackage, 543 failureReason, currentObserverImpact, mitigationCount); 544 } 545 } 546 } 547 } 548 }); 549 } 550 551 /** 552 * For native crashes or explicit health check failures, call directly into each observer to 553 * mitigate the error without going through failure threshold logic. 554 */ 555 @GuardedBy("sLock") handleFailureImmediately(List<VersionedPackage> packages, @FailureReasons int failureReason)556 private void handleFailureImmediately(List<VersionedPackage> packages, 557 @FailureReasons int failureReason) { 558 VersionedPackage failingPackage = packages.size() > 0 ? packages.get(0) : null; 559 ObserverInternal currentObserverToNotify = null; 560 int currentObserverImpact = Integer.MAX_VALUE; 561 for (ObserverInternal observer: mAllObservers.values()) { 562 PackageHealthObserver registeredObserver = observer.registeredObserver; 563 if (registeredObserver != null) { 564 int impact = registeredObserver.onHealthCheckFailed( 565 failingPackage, failureReason, 1); 566 if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 567 && impact < currentObserverImpact) { 568 currentObserverToNotify = observer; 569 currentObserverImpact = impact; 570 } 571 } 572 } 573 if (currentObserverToNotify != null) { 574 maybeExecute(currentObserverToNotify, failingPackage, failureReason, 575 currentObserverImpact, /*mitigationCount=*/ 1); 576 } 577 } 578 maybeExecute(ObserverInternal currentObserverToNotify, VersionedPackage versionedPackage, @FailureReasons int failureReason, int currentObserverImpact, int mitigationCount)579 private void maybeExecute(ObserverInternal currentObserverToNotify, 580 VersionedPackage versionedPackage, 581 @FailureReasons int failureReason, 582 int currentObserverImpact, 583 int mitigationCount) { 584 if (allowMitigations(currentObserverImpact, versionedPackage)) { 585 PackageHealthObserver registeredObserver; 586 synchronized (sLock) { 587 mLastMitigation = mSystemClock.uptimeMillis(); 588 registeredObserver = currentObserverToNotify.registeredObserver; 589 } 590 currentObserverToNotify.observerExecutor.execute(() -> 591 registeredObserver.onExecuteHealthCheckMitigation(versionedPackage, 592 failureReason, mitigationCount)); 593 } 594 } 595 allowMitigations(int currentObserverImpact, VersionedPackage versionedPackage)596 private boolean allowMitigations(int currentObserverImpact, 597 VersionedPackage versionedPackage) { 598 return currentObserverImpact < getUserImpactLevelLimit() 599 || getPackagesExemptFromImpactLevelThreshold().contains( 600 versionedPackage.getPackageName()); 601 } 602 getMitigationWindowMs()603 private long getMitigationWindowMs() { 604 return SystemProperties.getLong(MITIGATION_WINDOW_MS, DEFAULT_MITIGATION_WINDOW_MS); 605 } 606 607 608 /** 609 * Called when the system server boots. If the system server is detected to be in a boot loop, 610 * query each observer and perform the mitigation action with the lowest user impact. 611 * 612 * Note: PackageWatchdog considers system_server restart loop as bootloop. Full reboots 613 * are not counted in bootloop. 614 * @hide 615 */ 616 @SuppressWarnings("GuardedBy") noteBoot()617 public void noteBoot() { 618 synchronized (sLock) { 619 // if boot count has reached threshold, start mitigation. 620 // We wait until threshold number of restarts only for the first time. Perform 621 // mitigations for every restart after that. 622 boolean mitigate = mBootThreshold.incrementAndTest(); 623 if (mitigate) { 624 int mitigationCount = mBootThreshold.getMitigationCount() + 1; 625 ObserverInternal currentObserverToNotify = null; 626 int currentObserverImpact = Integer.MAX_VALUE; 627 for (int i = 0; i < mAllObservers.size(); i++) { 628 final ObserverInternal observer = mAllObservers.valueAt(i); 629 PackageHealthObserver registeredObserver = observer.registeredObserver; 630 if (registeredObserver != null) { 631 int impact = registeredObserver.onBootLoop( 632 observer.getBootMitigationCount() + 1); 633 if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 634 && impact < currentObserverImpact) { 635 currentObserverToNotify = observer; 636 currentObserverImpact = impact; 637 } 638 } 639 } 640 641 if (currentObserverToNotify != null) { 642 PackageHealthObserver registeredObserver = 643 currentObserverToNotify.registeredObserver; 644 int currentObserverMitigationCount = 645 currentObserverToNotify.getBootMitigationCount() + 1; 646 currentObserverToNotify.setBootMitigationCount( 647 currentObserverMitigationCount); 648 saveAllObserversBootMitigationCountToMetadata(METADATA_FILE); 649 currentObserverToNotify.observerExecutor 650 .execute(() -> registeredObserver.onExecuteBootLoopMitigation( 651 currentObserverMitigationCount)); 652 } 653 } 654 } 655 } 656 657 // TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also 658 // avoid holding lock? 659 // This currently adds about 7ms extra to shutdown thread 660 /** @hide Writes the package information to file during shutdown. */ writeNow()661 public void writeNow() { 662 synchronized (sLock) { 663 // Must only run synchronous tasks as this runs on the ShutdownThread and no other 664 // thread is guaranteed to run during shutdown. 665 if (!mAllObservers.isEmpty()) { 666 mLongTaskHandler.removeCallbacks(mSaveToFile); 667 pruneObserversLocked(); 668 saveToFile(); 669 Slog.i(TAG, "Last write to update package durations"); 670 } 671 } 672 } 673 674 /** 675 * Enables or disables explicit health checks. 676 * <p> If explicit health checks are enabled, the health check service is started. 677 * <p> If explicit health checks are disabled, pending explicit health check requests are 678 * passed and the health check service is stopped. 679 */ setExplicitHealthCheckEnabled(boolean enabled)680 private void setExplicitHealthCheckEnabled(boolean enabled) { 681 synchronized (sLock) { 682 mIsHealthCheckEnabled = enabled; 683 mHealthCheckController.setEnabled(enabled); 684 mSyncRequired = true; 685 // Prune to update internal state whenever health check is enabled/disabled 686 syncState("health check state " + (enabled ? "enabled" : "disabled")); 687 } 688 } 689 690 /** 691 * This method should be only called on mShortTaskHandler, since it modifies 692 * {@link #mNumberOfNativeCrashPollsRemaining}. 693 */ checkAndMitigateNativeCrashes()694 private void checkAndMitigateNativeCrashes() { 695 mNumberOfNativeCrashPollsRemaining--; 696 // Check if native watchdog reported a crash 697 if ("1".equals(SystemProperties.get("sys.init.updatable_crashing"))) { 698 // We rollback all available low impact rollbacks when crash is unattributable 699 notifyPackageFailure(Collections.EMPTY_LIST, FAILURE_REASON_NATIVE_CRASH); 700 // we stop polling after an attempt to execute rollback, regardless of whether the 701 // attempt succeeds or not 702 } else { 703 if (mNumberOfNativeCrashPollsRemaining > 0) { 704 mShortTaskHandler.postDelayed(() -> checkAndMitigateNativeCrashes(), 705 NATIVE_CRASH_POLLING_INTERVAL_MILLIS); 706 } 707 } 708 } 709 710 /** 711 * Since this method can eventually trigger a rollback, it should be called 712 * only once boot has completed {@code onBootCompleted} and not earlier, because the install 713 * session must be entirely completed before we try to rollback. 714 * @hide 715 */ scheduleCheckAndMitigateNativeCrashes()716 public void scheduleCheckAndMitigateNativeCrashes() { 717 Slog.i(TAG, "Scheduling " + mNumberOfNativeCrashPollsRemaining + " polls to check " 718 + "and mitigate native crashes"); 719 mShortTaskHandler.post(()->checkAndMitigateNativeCrashes()); 720 } 721 getUserImpactLevelLimit()722 private int getUserImpactLevelLimit() { 723 return SystemProperties.getInt(MAJOR_USER_IMPACT_LEVEL_THRESHOLD, 724 DEFAULT_MAJOR_USER_IMPACT_LEVEL_THRESHOLD); 725 } 726 getPackagesExemptFromImpactLevelThreshold()727 private Set<String> getPackagesExemptFromImpactLevelThreshold() { 728 if (mPackagesExemptFromImpactLevelThreshold.isEmpty()) { 729 String packageNames = SystemProperties.get(PACKAGES_EXEMPT_FROM_IMPACT_LEVEL_THRESHOLD, 730 DEFAULT_PACKAGES_EXEMPT_FROM_IMPACT_LEVEL_THRESHOLD); 731 return Set.of(packageNames.split("\\s*,\\s*")); 732 } 733 return mPackagesExemptFromImpactLevelThreshold; 734 } 735 736 /** 737 * Indicates that a mitigation was successfully triggered or executed during 738 * {@link PackageHealthObserver#onExecuteHealthCheckMitigation} or 739 * {@link PackageHealthObserver#onExecuteBootLoopMitigation}. 740 */ 741 public static final int MITIGATION_RESULT_SUCCESS = 742 ObserverMitigationResult.MITIGATION_RESULT_SUCCESS; 743 744 /** 745 * Indicates that a mitigation executed during 746 * {@link PackageHealthObserver#onExecuteHealthCheckMitigation} or 747 * {@link PackageHealthObserver#onExecuteBootLoopMitigation} was skipped. 748 */ 749 public static final int MITIGATION_RESULT_SKIPPED = 750 ObserverMitigationResult.MITIGATION_RESULT_SKIPPED; 751 752 753 /** 754 * Possible return values of the for mitigations executed during 755 * {@link PackageHealthObserver#onExecuteHealthCheckMitigation} and 756 * {@link PackageHealthObserver#onExecuteBootLoopMitigation}. 757 * @hide 758 */ 759 @Retention(SOURCE) 760 @IntDef(prefix = "MITIGATION_RESULT_", value = { 761 ObserverMitigationResult.MITIGATION_RESULT_SUCCESS, 762 ObserverMitigationResult.MITIGATION_RESULT_SKIPPED, 763 }) 764 public @interface ObserverMitigationResult { 765 int MITIGATION_RESULT_SUCCESS = 1; 766 int MITIGATION_RESULT_SKIPPED = 2; 767 } 768 769 /** 770 * The minimum value that can be returned by any observer. 771 * It represents that no mitigations were available. 772 */ 773 public static final int USER_IMPACT_THRESHOLD_NONE = 774 PackageHealthObserverImpact.USER_IMPACT_LEVEL_0; 775 776 /** 777 * The mitigation impact beyond which the user will start noticing the mitigations. 778 */ 779 public static final int USER_IMPACT_THRESHOLD_MEDIUM = 780 PackageHealthObserverImpact.USER_IMPACT_LEVEL_20; 781 782 /** 783 * The mitigation impact beyond which the user impact is severely high. 784 */ 785 public static final int USER_IMPACT_THRESHOLD_HIGH = 786 PackageHealthObserverImpact.USER_IMPACT_LEVEL_71; 787 788 /** 789 * Possible severity values of the user impact of a 790 * {@link PackageHealthObserver#onExecuteHealthCheckMitigation}. 791 * @hide 792 */ 793 @Retention(SOURCE) 794 @IntDef(value = {PackageHealthObserverImpact.USER_IMPACT_LEVEL_0, 795 PackageHealthObserverImpact.USER_IMPACT_LEVEL_10, 796 PackageHealthObserverImpact.USER_IMPACT_LEVEL_20, 797 PackageHealthObserverImpact.USER_IMPACT_LEVEL_30, 798 PackageHealthObserverImpact.USER_IMPACT_LEVEL_40, 799 PackageHealthObserverImpact.USER_IMPACT_LEVEL_50, 800 PackageHealthObserverImpact.USER_IMPACT_LEVEL_70, 801 PackageHealthObserverImpact.USER_IMPACT_LEVEL_71, 802 PackageHealthObserverImpact.USER_IMPACT_LEVEL_75, 803 PackageHealthObserverImpact.USER_IMPACT_LEVEL_80, 804 PackageHealthObserverImpact.USER_IMPACT_LEVEL_90, 805 PackageHealthObserverImpact.USER_IMPACT_LEVEL_100}) 806 public @interface PackageHealthObserverImpact { 807 /** No action to take. */ 808 int USER_IMPACT_LEVEL_0 = 0; 809 /* Action has low user impact, user of a device will barely notice. */ 810 int USER_IMPACT_LEVEL_10 = 10; 811 /* Actions having medium user impact, user of a device will likely notice. */ 812 int USER_IMPACT_LEVEL_20 = 20; 813 int USER_IMPACT_LEVEL_30 = 30; 814 int USER_IMPACT_LEVEL_40 = 40; 815 int USER_IMPACT_LEVEL_50 = 50; 816 int USER_IMPACT_LEVEL_70 = 70; 817 /* Action has high user impact, a last resort, user of a device will be very frustrated. */ 818 int USER_IMPACT_LEVEL_71 = 71; 819 int USER_IMPACT_LEVEL_75 = 75; 820 int USER_IMPACT_LEVEL_80 = 80; 821 int USER_IMPACT_LEVEL_90 = 90; 822 int USER_IMPACT_LEVEL_100 = 100; 823 } 824 825 /** Register instances of this interface to receive notifications on package failure. */ 826 @SuppressLint({"CallbackName"}) 827 public interface PackageHealthObserver { 828 /** 829 * Called when health check fails for the {@code versionedPackage}. 830 * Note: if the returned user impact is higher than {@link #USER_IMPACT_THRESHOLD_HIGH}, 831 * then {@link #onExecuteHealthCheckMitigation} would be called only in severe device 832 * conditions like boot-loop or network failure. 833 * 834 * @param versionedPackage the package that is failing. This may be null if a native 835 * service is crashing. 836 * @param failureReason the type of failure that is occurring. 837 * @param mitigationCount the number of times mitigation has been called for this package 838 * (including this time). 839 * 840 * @return any value greater than {@link #USER_IMPACT_THRESHOLD_NONE} to express 841 * the impact of mitigation on the user in {@link #onExecuteHealthCheckMitigation}. 842 * Returning {@link #USER_IMPACT_THRESHOLD_NONE} would indicate no mitigations available. 843 */ onHealthCheckFailed( @ullable VersionedPackage versionedPackage, @FailureReasons int failureReason, int mitigationCount)844 @PackageHealthObserverImpact int onHealthCheckFailed( 845 @Nullable VersionedPackage versionedPackage, 846 @FailureReasons int failureReason, 847 int mitigationCount); 848 849 /** 850 * This would be called after {@link #onHealthCheckFailed}. 851 * This is called only if current observer returned least impact mitigation for failed 852 * health check. 853 * 854 * @param versionedPackage the package that is failing. This may be null if a native 855 * service is crashing. 856 * @param failureReason the type of failure that is occurring. 857 * @param mitigationCount the number of times mitigation has been called for this package 858 * (including this time). 859 * @return {@link #MITIGATION_RESULT_SUCCESS} if the mitigation was successful, 860 * or {@link #MITIGATION_RESULT_SKIPPED} if the mitigation was skipped. 861 */ onExecuteHealthCheckMitigation( @ullable VersionedPackage versionedPackage, @FailureReasons int failureReason, int mitigationCount)862 @ObserverMitigationResult int onExecuteHealthCheckMitigation( 863 @Nullable VersionedPackage versionedPackage, 864 @FailureReasons int failureReason, int mitigationCount); 865 866 867 /** 868 * Called when the system server has booted several times within a window of time, defined 869 * by {@link #mBootThreshold} 870 * 871 * @param mitigationCount the number of times mitigation has been attempted for this 872 * boot loop (including this time). 873 * 874 * @return any value greater than {@link #USER_IMPACT_THRESHOLD_NONE} to express 875 * the impact of mitigation on the user in {@link #onExecuteBootLoopMitigation}. 876 * Returning {@link #USER_IMPACT_THRESHOLD_NONE} would indicate no mitigations available. 877 */ onBootLoop(int mitigationCount)878 default @PackageHealthObserverImpact int onBootLoop(int mitigationCount) { 879 return PackageHealthObserverImpact.USER_IMPACT_LEVEL_0; 880 } 881 882 /** 883 * This would be called after {@link #onBootLoop}. 884 * This is called only if current observer returned least impact mitigation for fixing 885 * boot loop. 886 * 887 * @param mitigationCount the number of times mitigation has been attempted for this 888 * boot loop (including this time). 889 * 890 * @return {@link #MITIGATION_RESULT_SUCCESS} if the mitigation was successful, 891 * or {@link #MITIGATION_RESULT_SKIPPED} if the mitigation was skipped. 892 */ onExecuteBootLoopMitigation(int mitigationCount)893 default @ObserverMitigationResult int onExecuteBootLoopMitigation(int mitigationCount) { 894 return ObserverMitigationResult.MITIGATION_RESULT_SKIPPED; 895 } 896 897 // TODO(b/120598832): Ensure uniqueness? 898 /** 899 * Identifier for the observer, should not change across device updates otherwise the 900 * watchdog may drop observing packages with the old name. 901 */ getUniqueIdentifier()902 @NonNull String getUniqueIdentifier(); 903 904 /** 905 * An observer will not be pruned if this is set, even if the observer is not explicitly 906 * monitoring any packages. 907 */ isPersistent()908 default boolean isPersistent() { 909 return false; 910 } 911 912 /** 913 * Returns {@code true} if this observer wishes to observe the given package, {@code false} 914 * otherwise. 915 * Any failing package can be passed on to the observer. Currently the packages that have 916 * ANRs and perform {@link android.service.watchdog.ExplicitHealthCheckService} are being 917 * passed to observers in these API. 918 * 919 * <p> A persistent observer may choose to start observing certain failing packages, even if 920 * it has not explicitly asked to watch the package with {@link #startExplicitHealthCheck}. 921 */ mayObservePackage(@onNull String packageName)922 default boolean mayObservePackage(@NonNull String packageName) { 923 return false; 924 } 925 } 926 927 @VisibleForTesting getTriggerFailureCount()928 long getTriggerFailureCount() { 929 synchronized (sLock) { 930 return mTriggerFailureCount; 931 } 932 } 933 934 @VisibleForTesting getTriggerFailureDurationMs()935 long getTriggerFailureDurationMs() { 936 synchronized (sLock) { 937 return mTriggerFailureDurationMs; 938 } 939 } 940 941 /** 942 * Serializes and syncs health check requests with the {@link ExplicitHealthCheckController}. 943 */ syncRequestsAsync()944 private void syncRequestsAsync() { 945 mShortTaskHandler.removeCallbacks(mSyncRequests); 946 mShortTaskHandler.post(mSyncRequests); 947 } 948 949 /** 950 * Syncs health check requests with the {@link ExplicitHealthCheckController}. 951 * Calls to this must be serialized. 952 * 953 * @see #syncRequestsAsync 954 */ syncRequests()955 private void syncRequests() { 956 boolean syncRequired = false; 957 synchronized (sLock) { 958 if (mIsPackagesReady) { 959 Set<String> packages = getPackagesPendingHealthChecksLocked(); 960 if (mSyncRequired || !packages.equals(mRequestedHealthCheckPackages) 961 || packages.isEmpty()) { 962 syncRequired = true; 963 mRequestedHealthCheckPackages = packages; 964 } 965 } // else, we will sync requests when packages become ready 966 } 967 968 // Call outside lock to avoid holding lock when calling into the controller. 969 if (syncRequired) { 970 Slog.i(TAG, "Syncing health check requests for packages: " 971 + mRequestedHealthCheckPackages); 972 mHealthCheckController.syncRequests(mRequestedHealthCheckPackages); 973 mSyncRequired = false; 974 } 975 } 976 977 /** 978 * Updates the observers monitoring {@code packageName} that explicit health check has passed. 979 * 980 * <p> This update is strictly for registered observers at the time of the call 981 * Observers that register after this signal will have no knowledge of prior signals and will 982 * effectively behave as if the explicit health check hasn't passed for {@code packageName}. 983 * 984 * <p> {@code packageName} can still be considered failed if reported by 985 * {@link #notifyPackageFailureLocked} before the package expires. 986 * 987 * <p> Triggered by components outside the system server when they are fully functional after an 988 * update. 989 */ onHealthCheckPassed(String packageName)990 private void onHealthCheckPassed(String packageName) { 991 Slog.i(TAG, "Health check passed for package: " + packageName); 992 boolean isStateChanged = false; 993 994 synchronized (sLock) { 995 for (int observerIdx = 0; observerIdx < mAllObservers.size(); observerIdx++) { 996 ObserverInternal observer = mAllObservers.valueAt(observerIdx); 997 MonitoredPackage monitoredPackage = observer.getMonitoredPackage(packageName); 998 999 if (monitoredPackage != null) { 1000 int oldState = monitoredPackage.getHealthCheckStateLocked(); 1001 int newState = monitoredPackage.tryPassHealthCheckLocked(); 1002 isStateChanged |= oldState != newState; 1003 } 1004 } 1005 } 1006 1007 if (isStateChanged) { 1008 syncState("health check passed for " + packageName); 1009 } 1010 } 1011 onSupportedPackages(List<PackageConfig> supportedPackages)1012 private void onSupportedPackages(List<PackageConfig> supportedPackages) { 1013 boolean isStateChanged = false; 1014 1015 Map<String, Long> supportedPackageTimeouts = new ArrayMap<>(); 1016 Iterator<PackageConfig> it = supportedPackages.iterator(); 1017 while (it.hasNext()) { 1018 PackageConfig info = it.next(); 1019 supportedPackageTimeouts.put(info.getPackageName(), info.getHealthCheckTimeoutMillis()); 1020 } 1021 1022 synchronized (sLock) { 1023 Slog.d(TAG, "Received supported packages " + supportedPackages); 1024 Iterator<ObserverInternal> oit = mAllObservers.values().iterator(); 1025 while (oit.hasNext()) { 1026 Iterator<MonitoredPackage> pit = oit.next().getMonitoredPackages() 1027 .values().iterator(); 1028 while (pit.hasNext()) { 1029 MonitoredPackage monitoredPackage = pit.next(); 1030 String packageName = monitoredPackage.getName(); 1031 int oldState = monitoredPackage.getHealthCheckStateLocked(); 1032 int newState; 1033 1034 if (supportedPackageTimeouts.containsKey(packageName)) { 1035 // Supported packages become ACTIVE if currently INACTIVE 1036 newState = monitoredPackage.setHealthCheckActiveLocked( 1037 supportedPackageTimeouts.get(packageName)); 1038 } else { 1039 // Unsupported packages are marked as PASSED unless already FAILED 1040 newState = monitoredPackage.tryPassHealthCheckLocked(); 1041 } 1042 isStateChanged |= oldState != newState; 1043 } 1044 } 1045 } 1046 1047 if (isStateChanged) { 1048 syncState("updated health check supported packages " + supportedPackages); 1049 } 1050 } 1051 onSyncRequestNotified()1052 private void onSyncRequestNotified() { 1053 synchronized (sLock) { 1054 mSyncRequired = true; 1055 syncRequestsAsync(); 1056 } 1057 } 1058 1059 @GuardedBy("sLock") getPackagesPendingHealthChecksLocked()1060 private Set<String> getPackagesPendingHealthChecksLocked() { 1061 Set<String> packages = new ArraySet<>(); 1062 Iterator<ObserverInternal> oit = mAllObservers.values().iterator(); 1063 while (oit.hasNext()) { 1064 ObserverInternal observer = oit.next(); 1065 Iterator<MonitoredPackage> pit = 1066 observer.getMonitoredPackages().values().iterator(); 1067 while (pit.hasNext()) { 1068 MonitoredPackage monitoredPackage = pit.next(); 1069 String packageName = monitoredPackage.getName(); 1070 if (monitoredPackage.isPendingHealthChecksLocked()) { 1071 packages.add(packageName); 1072 } 1073 } 1074 } 1075 return packages; 1076 } 1077 1078 /** 1079 * Syncs the state of the observers. 1080 * 1081 * <p> Prunes all observers, saves new state to disk, syncs health check requests with the 1082 * health check service and schedules the next state sync. 1083 */ syncState(String reason)1084 private void syncState(String reason) { 1085 synchronized (sLock) { 1086 Slog.i(TAG, "Syncing state, reason: " + reason); 1087 pruneObserversLocked(); 1088 1089 saveToFileAsync(); 1090 syncRequestsAsync(); 1091 1092 // Done syncing state, schedule the next state sync 1093 scheduleNextSyncStateLocked(); 1094 } 1095 } 1096 syncStateWithScheduledReason()1097 private void syncStateWithScheduledReason() { 1098 syncState("scheduled"); 1099 } 1100 1101 @GuardedBy("sLock") scheduleNextSyncStateLocked()1102 private void scheduleNextSyncStateLocked() { 1103 long durationMs = getNextStateSyncMillisLocked(); 1104 mShortTaskHandler.removeCallbacks(mSyncStateWithScheduledReason); 1105 if (durationMs == Long.MAX_VALUE) { 1106 Slog.i(TAG, "Cancelling state sync, nothing to sync"); 1107 mUptimeAtLastStateSync = 0; 1108 } else { 1109 mUptimeAtLastStateSync = mSystemClock.uptimeMillis(); 1110 mShortTaskHandler.postDelayed(mSyncStateWithScheduledReason, durationMs); 1111 } 1112 } 1113 1114 /** 1115 * Returns the next duration in millis to sync the watchdog state. 1116 * 1117 * @returns Long#MAX_VALUE if there are no observed packages. 1118 */ 1119 @GuardedBy("sLock") getNextStateSyncMillisLocked()1120 private long getNextStateSyncMillisLocked() { 1121 long shortestDurationMs = Long.MAX_VALUE; 1122 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { 1123 ArrayMap<String, MonitoredPackage> packages = mAllObservers.valueAt(oIndex) 1124 .getMonitoredPackages(); 1125 for (int pIndex = 0; pIndex < packages.size(); pIndex++) { 1126 MonitoredPackage mp = packages.valueAt(pIndex); 1127 long duration = mp.getShortestScheduleDurationMsLocked(); 1128 if (duration < shortestDurationMs) { 1129 shortestDurationMs = duration; 1130 } 1131 } 1132 } 1133 return shortestDurationMs; 1134 } 1135 1136 /** 1137 * Removes {@code elapsedMs} milliseconds from all durations on monitored packages 1138 * and updates other internal state. 1139 */ 1140 @GuardedBy("sLock") pruneObserversLocked()1141 private void pruneObserversLocked() { 1142 long elapsedMs = mUptimeAtLastStateSync == 0 1143 ? 0 : mSystemClock.uptimeMillis() - mUptimeAtLastStateSync; 1144 if (elapsedMs <= 0) { 1145 Slog.i(TAG, "Not pruning observers, elapsed time: " + elapsedMs + "ms"); 1146 return; 1147 } 1148 1149 Iterator<ObserverInternal> it = mAllObservers.values().iterator(); 1150 while (it.hasNext()) { 1151 ObserverInternal observer = it.next(); 1152 Set<MonitoredPackage> failedPackages = 1153 observer.prunePackagesLocked(elapsedMs); 1154 if (!failedPackages.isEmpty()) { 1155 onHealthCheckFailed(observer, failedPackages); 1156 } 1157 if (observer.getMonitoredPackages().isEmpty() && (observer.registeredObserver == null 1158 || !observer.registeredObserver.isPersistent())) { 1159 Slog.i(TAG, "Discarding observer " + observer.name + ". All packages expired"); 1160 it.remove(); 1161 } 1162 } 1163 } 1164 onHealthCheckFailed(ObserverInternal observer, Set<MonitoredPackage> failedPackages)1165 private void onHealthCheckFailed(ObserverInternal observer, 1166 Set<MonitoredPackage> failedPackages) { 1167 mLongTaskHandler.post(() -> { 1168 synchronized (sLock) { 1169 PackageHealthObserver registeredObserver = observer.registeredObserver; 1170 if (registeredObserver != null) { 1171 Iterator<MonitoredPackage> it = failedPackages.iterator(); 1172 while (it.hasNext()) { 1173 VersionedPackage versionedPkg = getVersionedPackage(it.next().getName()); 1174 if (versionedPkg != null) { 1175 Slog.i(TAG, 1176 "Explicit health check failed for package " + versionedPkg); 1177 observer.observerExecutor.execute(() -> 1178 registeredObserver.onExecuteHealthCheckMitigation(versionedPkg, 1179 PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK, 1180 1)); 1181 } 1182 } 1183 } 1184 } 1185 }); 1186 } 1187 1188 /** 1189 * Gets PackageInfo for the given package. Matches any user and apex. 1190 * 1191 * @throws PackageManager.NameNotFoundException if no such package is installed. 1192 */ getPackageInfo(String packageName)1193 private PackageInfo getPackageInfo(String packageName) 1194 throws PackageManager.NameNotFoundException { 1195 PackageManager pm = mContext.getPackageManager(); 1196 try { 1197 // The MATCH_ANY_USER flag doesn't mix well with the MATCH_APEX 1198 // flag, so make two separate attempts to get the package info. 1199 // We don't need both flags at the same time because we assume 1200 // apex files are always installed for all users. 1201 return pm.getPackageInfo(packageName, PackageManager.MATCH_ANY_USER); 1202 } catch (PackageManager.NameNotFoundException e) { 1203 return pm.getPackageInfo(packageName, PackageManager.MATCH_APEX); 1204 } 1205 } 1206 1207 @Nullable getVersionedPackage(String packageName)1208 private VersionedPackage getVersionedPackage(String packageName) { 1209 final PackageManager pm = mContext.getPackageManager(); 1210 if (pm == null || TextUtils.isEmpty(packageName)) { 1211 return null; 1212 } 1213 try { 1214 final long versionCode = getPackageInfo(packageName).getLongVersionCode(); 1215 return new VersionedPackage(packageName, versionCode); 1216 } catch (PackageManager.NameNotFoundException e) { 1217 return null; 1218 } 1219 } 1220 1221 /** 1222 * Loads mAllObservers from file. 1223 * 1224 * <p>Note that this is <b>not</b> thread safe and should only called be called 1225 * from the constructor. 1226 */ loadFromFile()1227 private void loadFromFile() { 1228 InputStream infile = null; 1229 mAllObservers.clear(); 1230 try { 1231 infile = mPolicyFile.openRead(); 1232 final XmlPullParser parser = Xml.newPullParser(); 1233 parser.setInput(infile, UTF_8.name()); 1234 XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG); 1235 int outerDepth = parser.getDepth(); 1236 while (XmlUtils.nextElementWithin(parser, outerDepth)) { 1237 ObserverInternal observer = ObserverInternal.read(parser, this); 1238 if (observer != null) { 1239 mAllObservers.put(observer.name, observer); 1240 } 1241 } 1242 } catch (FileNotFoundException e) { 1243 // Nothing to monitor 1244 } catch (Exception e) { 1245 Slog.wtf(TAG, "Unable to read monitored packages, deleting file", e); 1246 mPolicyFile.delete(); 1247 } finally { 1248 IoUtils.closeQuietly(infile); 1249 } 1250 } 1251 onPropertyChanged(DeviceConfig.Properties properties)1252 private void onPropertyChanged(DeviceConfig.Properties properties) { 1253 try { 1254 updateConfigs(); 1255 } catch (Exception ignore) { 1256 Slog.w(TAG, "Failed to reload device config changes"); 1257 } 1258 } 1259 1260 /** Adds a {@link DeviceConfig#OnPropertiesChangedListener}. */ setPropertyChangedListenerLocked()1261 private void setPropertyChangedListenerLocked() { 1262 DeviceConfig.addOnPropertiesChangedListener( 1263 DeviceConfig.NAMESPACE_ROLLBACK, 1264 mContext.getMainExecutor(), 1265 mOnPropertyChangedListener); 1266 } 1267 1268 @VisibleForTesting removePropertyChangedListener()1269 void removePropertyChangedListener() { 1270 DeviceConfig.removeOnPropertiesChangedListener(mOnPropertyChangedListener); 1271 } 1272 1273 /** 1274 * Health check is enabled or disabled after reading the flags 1275 * from DeviceConfig. 1276 */ 1277 @VisibleForTesting updateConfigs()1278 void updateConfigs() { 1279 synchronized (sLock) { 1280 mTriggerFailureCount = DeviceConfig.getInt( 1281 DeviceConfig.NAMESPACE_ROLLBACK, 1282 PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT, 1283 DEFAULT_TRIGGER_FAILURE_COUNT); 1284 if (mTriggerFailureCount <= 0) { 1285 mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT; 1286 } 1287 1288 mTriggerFailureDurationMs = DeviceConfig.getInt( 1289 DeviceConfig.NAMESPACE_ROLLBACK, 1290 PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS, 1291 DEFAULT_TRIGGER_FAILURE_DURATION_MS); 1292 if (mTriggerFailureDurationMs <= 0) { 1293 mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS; 1294 } 1295 1296 setExplicitHealthCheckEnabled(DeviceConfig.getBoolean( 1297 DeviceConfig.NAMESPACE_ROLLBACK, 1298 PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED, 1299 DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED)); 1300 } 1301 } 1302 1303 /** 1304 * Persists mAllObservers to file. Threshold information is ignored. 1305 */ saveToFile()1306 private boolean saveToFile() { 1307 Slog.i(TAG, "Saving observer state to file"); 1308 synchronized (sLock) { 1309 FileOutputStream stream; 1310 try { 1311 stream = mPolicyFile.startWrite(); 1312 } catch (IOException e) { 1313 Slog.w(TAG, "Cannot update monitored packages", e); 1314 return false; 1315 } 1316 1317 try { 1318 XmlSerializer out = new FastXmlSerializer(); 1319 out.setOutput(stream, UTF_8.name()); 1320 out.startDocument(null, true); 1321 out.startTag(null, TAG_PACKAGE_WATCHDOG); 1322 out.attribute(null, ATTR_VERSION, Integer.toString(DB_VERSION)); 1323 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { 1324 mAllObservers.valueAt(oIndex).writeLocked(out); 1325 } 1326 out.endTag(null, TAG_PACKAGE_WATCHDOG); 1327 out.endDocument(); 1328 mPolicyFile.finishWrite(stream); 1329 return true; 1330 } catch (IOException e) { 1331 Slog.w(TAG, "Failed to save monitored packages, restoring backup", e); 1332 mPolicyFile.failWrite(stream); 1333 return false; 1334 } 1335 } 1336 } 1337 saveToFileAsync()1338 private void saveToFileAsync() { 1339 if (!mLongTaskHandler.hasCallbacks(mSaveToFile)) { 1340 mLongTaskHandler.post(mSaveToFile); 1341 } 1342 } 1343 1344 /** @hide Convert a {@code LongArrayQueue} to a String of comma-separated values. */ longArrayQueueToString(LongArrayQueue queue)1345 public static String longArrayQueueToString(LongArrayQueue queue) { 1346 if (queue.size() > 0) { 1347 StringBuilder sb = new StringBuilder(); 1348 sb.append(queue.get(0)); 1349 for (int i = 1; i < queue.size(); i++) { 1350 sb.append(","); 1351 sb.append(queue.get(i)); 1352 } 1353 return sb.toString(); 1354 } 1355 return ""; 1356 } 1357 1358 /** @hide Parse a comma-separated String of longs into a LongArrayQueue. */ parseLongArrayQueue(String commaSeparatedValues)1359 public static LongArrayQueue parseLongArrayQueue(String commaSeparatedValues) { 1360 LongArrayQueue result = new LongArrayQueue(); 1361 if (!TextUtils.isEmpty(commaSeparatedValues)) { 1362 String[] values = commaSeparatedValues.split(","); 1363 for (String value : values) { 1364 result.addLast(Long.parseLong(value)); 1365 } 1366 } 1367 return result; 1368 } 1369 1370 1371 /** Dump status of every observer in mAllObservers. */ dump(@onNull PrintWriter pw)1372 public void dump(@NonNull PrintWriter pw) { 1373 if (Flags.synchronousRebootInRescueParty() && isRecoveryTriggeredReboot()) { 1374 dumpInternal(pw); 1375 } else { 1376 synchronized (sLock) { 1377 dumpInternal(pw); 1378 } 1379 } 1380 } 1381 1382 /** 1383 * Check if we're currently attempting to reboot during mitigation. This method must return 1384 * true if triggered reboot early during a boot loop, since the device will not be fully booted 1385 * at this time. 1386 */ isRecoveryTriggeredReboot()1387 public static boolean isRecoveryTriggeredReboot() { 1388 return isFactoryResetPropertySet() || isRebootPropertySet(); 1389 } 1390 isFactoryResetPropertySet()1391 private static boolean isFactoryResetPropertySet() { 1392 return CrashRecoveryProperties.attemptingFactoryReset().orElse(false); 1393 } 1394 isRebootPropertySet()1395 private static boolean isRebootPropertySet() { 1396 return CrashRecoveryProperties.attemptingReboot().orElse(false); 1397 } 1398 dumpInternal(@onNull PrintWriter pw)1399 private void dumpInternal(@NonNull PrintWriter pw) { 1400 IndentingPrintWriter ipw = new IndentingPrintWriter(pw, " "); 1401 ipw.println("Package Watchdog status"); 1402 ipw.increaseIndent(); 1403 synchronized (sLock) { 1404 for (String observerName : mAllObservers.keySet()) { 1405 ipw.println("Observer name: " + observerName); 1406 ipw.increaseIndent(); 1407 ObserverInternal observerInternal = mAllObservers.get(observerName); 1408 observerInternal.dump(ipw); 1409 ipw.decreaseIndent(); 1410 } 1411 } 1412 ipw.decreaseIndent(); 1413 dumpCrashRecoveryEvents(ipw); 1414 } 1415 1416 @VisibleForTesting 1417 @GuardedBy("sLock") registerObserverInternal(ObserverInternal observerInternal)1418 void registerObserverInternal(ObserverInternal observerInternal) { 1419 mAllObservers.put(observerInternal.name, observerInternal); 1420 } 1421 1422 /** 1423 * Represents an observer monitoring a set of packages along with the failure thresholds for 1424 * each package. 1425 * 1426 * <p> Note, the PackageWatchdog#sLock must always be held when reading or writing 1427 * instances of this class. 1428 */ 1429 static class ObserverInternal { 1430 public final String name; 1431 @GuardedBy("sLock") 1432 private final ArrayMap<String, MonitoredPackage> mPackages = new ArrayMap<>(); 1433 @Nullable 1434 @GuardedBy("sLock") 1435 public PackageHealthObserver registeredObserver; 1436 public Executor observerExecutor; 1437 private int mMitigationCount; 1438 ObserverInternal(String name, List<MonitoredPackage> packages)1439 ObserverInternal(String name, List<MonitoredPackage> packages) { 1440 this(name, packages, /*mitigationCount=*/ 0); 1441 } 1442 ObserverInternal(String name, List<MonitoredPackage> packages, int mitigationCount)1443 ObserverInternal(String name, List<MonitoredPackage> packages, int mitigationCount) { 1444 this.name = name; 1445 updatePackagesLocked(packages); 1446 this.mMitigationCount = mitigationCount; 1447 } 1448 1449 /** 1450 * Writes important {@link MonitoredPackage} details for this observer to file. 1451 * Does not persist any package failure thresholds. 1452 */ 1453 @GuardedBy("sLock") writeLocked(XmlSerializer out)1454 public boolean writeLocked(XmlSerializer out) { 1455 try { 1456 out.startTag(null, TAG_OBSERVER); 1457 out.attribute(null, ATTR_NAME, name); 1458 out.attribute(null, ATTR_MITIGATION_COUNT, Integer.toString(mMitigationCount)); 1459 for (int i = 0; i < mPackages.size(); i++) { 1460 MonitoredPackage p = mPackages.valueAt(i); 1461 p.writeLocked(out); 1462 } 1463 out.endTag(null, TAG_OBSERVER); 1464 return true; 1465 } catch (IOException e) { 1466 Slog.w(TAG, "Cannot save observer", e); 1467 return false; 1468 } 1469 } 1470 getBootMitigationCount()1471 public int getBootMitigationCount() { 1472 return mMitigationCount; 1473 } 1474 setBootMitigationCount(int mitigationCount)1475 public void setBootMitigationCount(int mitigationCount) { 1476 mMitigationCount = mitigationCount; 1477 } 1478 1479 @GuardedBy("sLock") updatePackagesLocked(List<MonitoredPackage> packages)1480 public void updatePackagesLocked(List<MonitoredPackage> packages) { 1481 for (int pIndex = 0; pIndex < packages.size(); pIndex++) { 1482 MonitoredPackage p = packages.get(pIndex); 1483 MonitoredPackage existingPackage = getMonitoredPackage(p.getName()); 1484 if (existingPackage != null) { 1485 existingPackage.updateHealthCheckDuration(p.mDurationMs); 1486 } else { 1487 putMonitoredPackage(p); 1488 } 1489 } 1490 } 1491 1492 /** 1493 * Reduces the monitoring durations of all packages observed by this observer by 1494 * {@code elapsedMs}. If any duration is less than 0, the package is removed from 1495 * observation. If any health check duration is less than 0, the health check result 1496 * is evaluated. 1497 * 1498 * @return a {@link Set} of packages that were removed from the observer without explicit 1499 * health check passing, or an empty list if no package expired for which an explicit health 1500 * check was still pending 1501 */ 1502 @GuardedBy("sLock") prunePackagesLocked(long elapsedMs)1503 private Set<MonitoredPackage> prunePackagesLocked(long elapsedMs) { 1504 Set<MonitoredPackage> failedPackages = new ArraySet<>(); 1505 Iterator<MonitoredPackage> it = mPackages.values().iterator(); 1506 while (it.hasNext()) { 1507 MonitoredPackage p = it.next(); 1508 int oldState = p.getHealthCheckStateLocked(); 1509 int newState = p.handleElapsedTimeLocked(elapsedMs); 1510 if (oldState != HealthCheckState.FAILED 1511 && newState == HealthCheckState.FAILED) { 1512 Slog.i(TAG, "Package " + p.getName() + " failed health check"); 1513 failedPackages.add(p); 1514 } 1515 if (p.isExpiredLocked()) { 1516 it.remove(); 1517 } 1518 } 1519 return failedPackages; 1520 } 1521 1522 /** 1523 * Increments failure counts of {@code packageName}. 1524 * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise 1525 * @hide 1526 */ 1527 @GuardedBy("sLock") notifyPackageFailureLocked(String packageName)1528 public boolean notifyPackageFailureLocked(String packageName) { 1529 if (getMonitoredPackage(packageName) == null && registeredObserver.isPersistent() 1530 && registeredObserver.mayObservePackage(packageName)) { 1531 putMonitoredPackage(sPackageWatchdog.newMonitoredPackage( 1532 packageName, DEFAULT_OBSERVING_DURATION_MS, false)); 1533 } 1534 MonitoredPackage p = getMonitoredPackage(packageName); 1535 if (p != null) { 1536 return p.onFailureLocked(); 1537 } 1538 return false; 1539 } 1540 1541 /** 1542 * Returns the map of packages monitored by this observer. 1543 * 1544 * @return a mapping of package names to {@link MonitoredPackage} objects. 1545 */ 1546 @GuardedBy("sLock") getMonitoredPackages()1547 public ArrayMap<String, MonitoredPackage> getMonitoredPackages() { 1548 return mPackages; 1549 } 1550 1551 /** 1552 * Returns the {@link MonitoredPackage} associated with a given package name if the 1553 * package is being monitored by this observer. 1554 * 1555 * @param packageName: the name of the package. 1556 * @return the {@link MonitoredPackage} object associated with the package name if one 1557 * exists, {@code null} otherwise. 1558 */ 1559 @GuardedBy("sLock") 1560 @Nullable getMonitoredPackage(String packageName)1561 public MonitoredPackage getMonitoredPackage(String packageName) { 1562 return mPackages.get(packageName); 1563 } 1564 1565 /** 1566 * Associates a {@link MonitoredPackage} with the observer. 1567 * 1568 * @param p: the {@link MonitoredPackage} to store. 1569 */ 1570 @GuardedBy("sLock") putMonitoredPackage(MonitoredPackage p)1571 public void putMonitoredPackage(MonitoredPackage p) { 1572 mPackages.put(p.getName(), p); 1573 } 1574 1575 /** 1576 * Returns one ObserverInternal from the {@code parser} and advances its state. 1577 * 1578 * <p>Note that this method is <b>not</b> thread safe. It should only be called from 1579 * #loadFromFile which in turn is only called on construction of the 1580 * singleton PackageWatchdog. 1581 **/ read(XmlPullParser parser, PackageWatchdog watchdog)1582 public static ObserverInternal read(XmlPullParser parser, PackageWatchdog watchdog) { 1583 String observerName = null; 1584 int observerMitigationCount = 0; 1585 if (TAG_OBSERVER.equals(parser.getName())) { 1586 observerName = parser.getAttributeValue(null, ATTR_NAME); 1587 if (TextUtils.isEmpty(observerName)) { 1588 Slog.wtf(TAG, "Unable to read observer name"); 1589 return null; 1590 } 1591 } 1592 List<MonitoredPackage> packages = new ArrayList<>(); 1593 int innerDepth = parser.getDepth(); 1594 try { 1595 try { 1596 observerMitigationCount = Integer.parseInt( 1597 parser.getAttributeValue(null, ATTR_MITIGATION_COUNT)); 1598 } catch (Exception e) { 1599 Slog.i( 1600 TAG, 1601 "ObserverInternal mitigation count was not present."); 1602 } 1603 while (XmlUtils.nextElementWithin(parser, innerDepth)) { 1604 if (TAG_PACKAGE.equals(parser.getName())) { 1605 try { 1606 MonitoredPackage pkg = watchdog.parseMonitoredPackage(parser); 1607 if (pkg != null) { 1608 packages.add(pkg); 1609 } 1610 } catch (NumberFormatException e) { 1611 Slog.wtf(TAG, "Skipping package for observer " + observerName, e); 1612 continue; 1613 } 1614 } 1615 } 1616 } catch (XmlPullParserException | IOException e) { 1617 Slog.wtf(TAG, "Unable to read observer " + observerName, e); 1618 return null; 1619 } 1620 if (packages.isEmpty()) { 1621 return null; 1622 } 1623 return new ObserverInternal(observerName, packages, observerMitigationCount); 1624 } 1625 1626 /** Dumps information about this observer and the packages it watches. */ dump(IndentingPrintWriter pw)1627 public void dump(IndentingPrintWriter pw) { 1628 boolean isPersistent = registeredObserver != null && registeredObserver.isPersistent(); 1629 pw.println("Persistent: " + isPersistent); 1630 for (String packageName : mPackages.keySet()) { 1631 MonitoredPackage p = getMonitoredPackage(packageName); 1632 pw.println(packageName + ": "); 1633 pw.increaseIndent(); 1634 pw.println("# Failures: " + p.mFailureHistory.size()); 1635 pw.println("Monitoring duration remaining: " + p.mDurationMs + "ms"); 1636 pw.println("Explicit health check duration: " + p.mHealthCheckDurationMs + "ms"); 1637 pw.println("Health check state: " + p.toString(p.mHealthCheckState)); 1638 pw.decreaseIndent(); 1639 } 1640 } 1641 } 1642 1643 /** @hide */ 1644 @Retention(SOURCE) 1645 @IntDef(value = { 1646 HealthCheckState.ACTIVE, 1647 HealthCheckState.INACTIVE, 1648 HealthCheckState.PASSED, 1649 HealthCheckState.FAILED}) 1650 public @interface HealthCheckState { 1651 // The package has not passed health check but has requested a health check 1652 int ACTIVE = 0; 1653 // The package has not passed health check and has not requested a health check 1654 int INACTIVE = 1; 1655 // The package has passed health check 1656 int PASSED = 2; 1657 // The package has failed health check 1658 int FAILED = 3; 1659 } 1660 newMonitoredPackage( String name, long durationMs, boolean hasPassedHealthCheck)1661 MonitoredPackage newMonitoredPackage( 1662 String name, long durationMs, boolean hasPassedHealthCheck) { 1663 return newMonitoredPackage(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck, 1664 new LongArrayQueue()); 1665 } 1666 newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs, boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls)1667 MonitoredPackage newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs, 1668 boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls) { 1669 return new MonitoredPackage(name, durationMs, healthCheckDurationMs, 1670 hasPassedHealthCheck, mitigationCalls); 1671 } 1672 parseMonitoredPackage(XmlPullParser parser)1673 MonitoredPackage parseMonitoredPackage(XmlPullParser parser) 1674 throws XmlPullParserException { 1675 String packageName = parser.getAttributeValue(null, ATTR_NAME); 1676 long duration = Long.parseLong(parser.getAttributeValue(null, ATTR_DURATION)); 1677 long healthCheckDuration = Long.parseLong(parser.getAttributeValue(null, 1678 ATTR_EXPLICIT_HEALTH_CHECK_DURATION)); 1679 boolean hasPassedHealthCheck = Boolean.parseBoolean(parser.getAttributeValue(null, 1680 ATTR_PASSED_HEALTH_CHECK)); 1681 LongArrayQueue mitigationCalls = parseLongArrayQueue( 1682 parser.getAttributeValue(null, ATTR_MITIGATION_CALLS)); 1683 return newMonitoredPackage(packageName, 1684 duration, healthCheckDuration, hasPassedHealthCheck, mitigationCalls); 1685 } 1686 1687 /** 1688 * Represents a package and its health check state along with the time 1689 * it should be monitored for. 1690 * 1691 * <p> Note, the PackageWatchdog#sLock must always be held when reading or writing 1692 * instances of this class. 1693 */ 1694 class MonitoredPackage { 1695 private final String mPackageName; 1696 // Times when package failures happen sorted in ascending order 1697 @GuardedBy("sLock") 1698 private final LongArrayQueue mFailureHistory = new LongArrayQueue(); 1699 // Times when an observer was called to mitigate this package's failure. Sorted in 1700 // ascending order. 1701 @GuardedBy("sLock") 1702 private final LongArrayQueue mMitigationCalls; 1703 // One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after 1704 // methods that could change the health check state: handleElapsedTimeLocked and 1705 // tryPassHealthCheckLocked 1706 private int mHealthCheckState = HealthCheckState.INACTIVE; 1707 // Whether an explicit health check has passed. 1708 // This value in addition with mHealthCheckDurationMs determines the health check state 1709 // of the package, see #getHealthCheckStateLocked 1710 @GuardedBy("sLock") 1711 private boolean mHasPassedHealthCheck; 1712 // System uptime duration to monitor package. 1713 @GuardedBy("sLock") 1714 private long mDurationMs; 1715 // System uptime duration to check the result of an explicit health check 1716 // Initially, MAX_VALUE until we get a value from the health check service 1717 // and request health checks. 1718 // This value in addition with mHasPassedHealthCheck determines the health check state 1719 // of the package, see #getHealthCheckStateLocked 1720 @GuardedBy("sLock") 1721 private long mHealthCheckDurationMs = Long.MAX_VALUE; 1722 MonitoredPackage(String packageName, long durationMs, long healthCheckDurationMs, boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls)1723 MonitoredPackage(String packageName, long durationMs, 1724 long healthCheckDurationMs, boolean hasPassedHealthCheck, 1725 LongArrayQueue mitigationCalls) { 1726 mPackageName = packageName; 1727 mDurationMs = durationMs; 1728 mHealthCheckDurationMs = healthCheckDurationMs; 1729 mHasPassedHealthCheck = hasPassedHealthCheck; 1730 mMitigationCalls = mitigationCalls; 1731 updateHealthCheckStateLocked(); 1732 } 1733 1734 /** Writes the salient fields to disk using {@code out}. 1735 * @hide 1736 */ 1737 @GuardedBy("sLock") writeLocked(XmlSerializer out)1738 public void writeLocked(XmlSerializer out) throws IOException { 1739 out.startTag(null, TAG_PACKAGE); 1740 out.attribute(null, ATTR_NAME, getName()); 1741 out.attribute(null, ATTR_DURATION, Long.toString(mDurationMs)); 1742 out.attribute(null, ATTR_EXPLICIT_HEALTH_CHECK_DURATION, 1743 Long.toString(mHealthCheckDurationMs)); 1744 out.attribute(null, ATTR_PASSED_HEALTH_CHECK, Boolean.toString(mHasPassedHealthCheck)); 1745 LongArrayQueue normalizedCalls = normalizeMitigationCalls(); 1746 out.attribute(null, ATTR_MITIGATION_CALLS, longArrayQueueToString(normalizedCalls)); 1747 out.endTag(null, TAG_PACKAGE); 1748 } 1749 1750 /** 1751 * Increment package failures or resets failure count depending on the last package failure. 1752 * 1753 * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise 1754 */ 1755 @GuardedBy("sLock") onFailureLocked()1756 public boolean onFailureLocked() { 1757 // Sliding window algorithm: find out if there exists a window containing failures >= 1758 // mTriggerFailureCount. 1759 final long now = mSystemClock.uptimeMillis(); 1760 mFailureHistory.addLast(now); 1761 while (now - mFailureHistory.peekFirst() > mTriggerFailureDurationMs) { 1762 // Prune values falling out of the window 1763 mFailureHistory.removeFirst(); 1764 } 1765 boolean failed = mFailureHistory.size() >= mTriggerFailureCount; 1766 if (failed) { 1767 mFailureHistory.clear(); 1768 } 1769 return failed; 1770 } 1771 1772 /** 1773 * Notes the timestamp of a mitigation call into the observer. 1774 */ 1775 @GuardedBy("sLock") noteMitigationCallLocked()1776 public void noteMitigationCallLocked() { 1777 mMitigationCalls.addLast(mSystemClock.uptimeMillis()); 1778 } 1779 1780 /** 1781 * Prunes any mitigation calls outside of the de-escalation window, and returns the 1782 * number of calls that are in the window afterwards. 1783 * 1784 * @return the number of mitigation calls made in the de-escalation window. 1785 */ 1786 @GuardedBy("sLock") getMitigationCountLocked()1787 public int getMitigationCountLocked() { 1788 try { 1789 final long now = mSystemClock.uptimeMillis(); 1790 while (now - mMitigationCalls.peekFirst() > DEFAULT_DEESCALATION_WINDOW_MS) { 1791 mMitigationCalls.removeFirst(); 1792 } 1793 } catch (NoSuchElementException ignore) { 1794 } 1795 1796 return mMitigationCalls.size(); 1797 } 1798 1799 /** 1800 * Before writing to disk, make the mitigation call timestamps relative to the current 1801 * system uptime. This is because they need to be relative to the uptime which will reset 1802 * at the next boot. 1803 * 1804 * @return a LongArrayQueue of the mitigation calls relative to the current system uptime. 1805 */ 1806 @GuardedBy("sLock") normalizeMitigationCalls()1807 public LongArrayQueue normalizeMitigationCalls() { 1808 LongArrayQueue normalized = new LongArrayQueue(); 1809 final long now = mSystemClock.uptimeMillis(); 1810 for (int i = 0; i < mMitigationCalls.size(); i++) { 1811 normalized.addLast(mMitigationCalls.get(i) - now); 1812 } 1813 return normalized; 1814 } 1815 1816 /** 1817 * Sets the initial health check duration. 1818 * 1819 * @return the new health check state 1820 */ 1821 @GuardedBy("sLock") setHealthCheckActiveLocked(long initialHealthCheckDurationMs)1822 public int setHealthCheckActiveLocked(long initialHealthCheckDurationMs) { 1823 if (initialHealthCheckDurationMs <= 0) { 1824 Slog.wtf(TAG, "Cannot set non-positive health check duration " 1825 + initialHealthCheckDurationMs + "ms for package " + getName() 1826 + ". Using total duration " + mDurationMs + "ms instead"); 1827 initialHealthCheckDurationMs = mDurationMs; 1828 } 1829 if (mHealthCheckState == HealthCheckState.INACTIVE) { 1830 // Transitions to ACTIVE 1831 mHealthCheckDurationMs = initialHealthCheckDurationMs; 1832 } 1833 return updateHealthCheckStateLocked(); 1834 } 1835 1836 /** 1837 * Updates the monitoring durations of the package. 1838 * 1839 * @return the new health check state 1840 */ 1841 @GuardedBy("sLock") handleElapsedTimeLocked(long elapsedMs)1842 public int handleElapsedTimeLocked(long elapsedMs) { 1843 if (elapsedMs <= 0) { 1844 Slog.w(TAG, "Cannot handle non-positive elapsed time for package " + getName()); 1845 return mHealthCheckState; 1846 } 1847 // Transitions to FAILED if now <= 0 and health check not passed 1848 mDurationMs -= elapsedMs; 1849 if (mHealthCheckState == HealthCheckState.ACTIVE) { 1850 // We only update health check durations if we have #setHealthCheckActiveLocked 1851 // This ensures we don't leave the INACTIVE state for an unexpected elapsed time 1852 // Transitions to FAILED if now <= 0 and health check not passed 1853 mHealthCheckDurationMs -= elapsedMs; 1854 } 1855 return updateHealthCheckStateLocked(); 1856 } 1857 1858 /** Explicitly update the monitoring duration of the package. */ 1859 @GuardedBy("sLock") updateHealthCheckDuration(long newDurationMs)1860 public void updateHealthCheckDuration(long newDurationMs) { 1861 mDurationMs = newDurationMs; 1862 } 1863 1864 /** 1865 * Marks the health check as passed and transitions to {@link HealthCheckState.PASSED} 1866 * if not yet {@link HealthCheckState.FAILED}. 1867 * 1868 * @return the new {@link HealthCheckState health check state} 1869 */ 1870 @GuardedBy("sLock") 1871 @HealthCheckState tryPassHealthCheckLocked()1872 public int tryPassHealthCheckLocked() { 1873 if (mHealthCheckState != HealthCheckState.FAILED) { 1874 // FAILED is a final state so only pass if we haven't failed 1875 // Transition to PASSED 1876 mHasPassedHealthCheck = true; 1877 } 1878 return updateHealthCheckStateLocked(); 1879 } 1880 1881 /** Returns the monitored package name. */ getName()1882 private String getName() { 1883 return mPackageName; 1884 } 1885 1886 /** 1887 * Returns the current {@link HealthCheckState health check state}. 1888 */ 1889 @GuardedBy("sLock") 1890 @HealthCheckState getHealthCheckStateLocked()1891 public int getHealthCheckStateLocked() { 1892 return mHealthCheckState; 1893 } 1894 1895 /** 1896 * Returns the shortest duration before the package should be scheduled for a prune. 1897 * 1898 * @return the duration or {@link Long#MAX_VALUE} if the package should not be scheduled 1899 */ 1900 @GuardedBy("sLock") getShortestScheduleDurationMsLocked()1901 public long getShortestScheduleDurationMsLocked() { 1902 // Consider health check duration only if #isPendingHealthChecksLocked is true 1903 return Math.min(toPositive(mDurationMs), 1904 isPendingHealthChecksLocked() 1905 ? toPositive(mHealthCheckDurationMs) : Long.MAX_VALUE); 1906 } 1907 1908 /** 1909 * Returns {@code true} if the total duration left to monitor the package is less than or 1910 * equal to 0 {@code false} otherwise. 1911 */ 1912 @GuardedBy("sLock") isExpiredLocked()1913 public boolean isExpiredLocked() { 1914 return mDurationMs <= 0; 1915 } 1916 1917 /** 1918 * Returns {@code true} if the package, {@link #getName} is expecting health check results 1919 * {@code false} otherwise. 1920 */ 1921 @GuardedBy("sLock") isPendingHealthChecksLocked()1922 public boolean isPendingHealthChecksLocked() { 1923 return mHealthCheckState == HealthCheckState.ACTIVE 1924 || mHealthCheckState == HealthCheckState.INACTIVE; 1925 } 1926 1927 /** 1928 * Updates the health check state based on {@link #mHasPassedHealthCheck} 1929 * and {@link #mHealthCheckDurationMs}. 1930 * 1931 * @return the new {@link HealthCheckState health check state} 1932 */ 1933 @GuardedBy("sLock") 1934 @HealthCheckState updateHealthCheckStateLocked()1935 private int updateHealthCheckStateLocked() { 1936 int oldState = mHealthCheckState; 1937 if (mHasPassedHealthCheck) { 1938 // Set final state first to avoid ambiguity 1939 mHealthCheckState = HealthCheckState.PASSED; 1940 } else if (mHealthCheckDurationMs <= 0 || mDurationMs <= 0) { 1941 // Set final state first to avoid ambiguity 1942 mHealthCheckState = HealthCheckState.FAILED; 1943 } else if (mHealthCheckDurationMs == Long.MAX_VALUE) { 1944 mHealthCheckState = HealthCheckState.INACTIVE; 1945 } else { 1946 mHealthCheckState = HealthCheckState.ACTIVE; 1947 } 1948 1949 if (oldState != mHealthCheckState) { 1950 Slog.i(TAG, "Updated health check state for package " + getName() + ": " 1951 + toString(oldState) + " -> " + toString(mHealthCheckState)); 1952 } 1953 return mHealthCheckState; 1954 } 1955 1956 /** Returns a {@link String} representation of the current health check state. */ toString(@ealthCheckState int state)1957 private String toString(@HealthCheckState int state) { 1958 switch (state) { 1959 case HealthCheckState.ACTIVE: 1960 return "ACTIVE"; 1961 case HealthCheckState.INACTIVE: 1962 return "INACTIVE"; 1963 case HealthCheckState.PASSED: 1964 return "PASSED"; 1965 case HealthCheckState.FAILED: 1966 return "FAILED"; 1967 default: 1968 return "UNKNOWN"; 1969 } 1970 } 1971 1972 /** Returns {@code value} if it is greater than 0 or {@link Long#MAX_VALUE} otherwise. */ toPositive(long value)1973 private long toPositive(long value) { 1974 return value > 0 ? value : Long.MAX_VALUE; 1975 } 1976 1977 /** Compares the equality of this object with another {@link MonitoredPackage}. */ 1978 @VisibleForTesting isEqualTo(MonitoredPackage pkg)1979 boolean isEqualTo(MonitoredPackage pkg) { 1980 return (getName().equals(pkg.getName())) 1981 && mDurationMs == pkg.mDurationMs 1982 && mHasPassedHealthCheck == pkg.mHasPassedHealthCheck 1983 && mHealthCheckDurationMs == pkg.mHealthCheckDurationMs 1984 && (mMitigationCalls.toString()).equals(pkg.mMitigationCalls.toString()); 1985 } 1986 } 1987 1988 @GuardedBy("sLock") 1989 @SuppressWarnings("GuardedBy") saveAllObserversBootMitigationCountToMetadata(String filePath)1990 void saveAllObserversBootMitigationCountToMetadata(String filePath) { 1991 HashMap<String, Integer> bootMitigationCounts = new HashMap<>(); 1992 for (int i = 0; i < mAllObservers.size(); i++) { 1993 final ObserverInternal observer = mAllObservers.valueAt(i); 1994 bootMitigationCounts.put(observer.name, observer.getBootMitigationCount()); 1995 } 1996 1997 FileOutputStream fileStream = null; 1998 ObjectOutputStream objectStream = null; 1999 try { 2000 fileStream = new FileOutputStream(new File(filePath)); 2001 objectStream = new ObjectOutputStream(fileStream); 2002 objectStream.writeObject(bootMitigationCounts); 2003 objectStream.flush(); 2004 } catch (Exception e) { 2005 Slog.i(TAG, "Could not save observers metadata to file: " + e); 2006 return; 2007 } finally { 2008 IoUtils.closeQuietly(objectStream); 2009 IoUtils.closeQuietly(fileStream); 2010 } 2011 } 2012 2013 /** 2014 * Handles the thresholding logic for system server boots. 2015 */ 2016 class BootThreshold { 2017 2018 private final int mBootTriggerCount; 2019 private final long mTriggerWindow; 2020 BootThreshold(int bootTriggerCount, long triggerWindow)2021 BootThreshold(int bootTriggerCount, long triggerWindow) { 2022 this.mBootTriggerCount = bootTriggerCount; 2023 this.mTriggerWindow = triggerWindow; 2024 } 2025 reset()2026 public void reset() { 2027 setStart(0); 2028 setCount(0); 2029 } 2030 getCount()2031 protected int getCount() { 2032 return CrashRecoveryProperties.rescueBootCount().orElse(0); 2033 } 2034 setCount(int count)2035 protected void setCount(int count) { 2036 CrashRecoveryProperties.rescueBootCount(count); 2037 } 2038 getStart()2039 public long getStart() { 2040 return CrashRecoveryProperties.rescueBootStart().orElse(0L); 2041 } 2042 getMitigationCount()2043 public int getMitigationCount() { 2044 return CrashRecoveryProperties.bootMitigationCount().orElse(0); 2045 } 2046 setStart(long start)2047 public void setStart(long start) { 2048 CrashRecoveryProperties.rescueBootStart(getStartTime(start)); 2049 } 2050 setMitigationStart(long start)2051 public void setMitigationStart(long start) { 2052 CrashRecoveryProperties.bootMitigationStart(getStartTime(start)); 2053 } 2054 getMitigationStart()2055 public long getMitigationStart() { 2056 return CrashRecoveryProperties.bootMitigationStart().orElse(0L); 2057 } 2058 setMitigationCount(int count)2059 public void setMitigationCount(int count) { 2060 CrashRecoveryProperties.bootMitigationCount(count); 2061 } 2062 constrain(long amount, long low, long high)2063 private static long constrain(long amount, long low, long high) { 2064 return amount < low ? low : (amount > high ? high : amount); 2065 } 2066 getStartTime(long start)2067 public long getStartTime(long start) { 2068 final long now = mSystemClock.uptimeMillis(); 2069 return constrain(start, 0, now); 2070 } 2071 saveMitigationCountToMetadata()2072 public void saveMitigationCountToMetadata() { 2073 try (BufferedWriter writer = new BufferedWriter(new FileWriter(METADATA_FILE))) { 2074 writer.write(String.valueOf(getMitigationCount())); 2075 } catch (Exception e) { 2076 Slog.e(TAG, "Could not save metadata to file: " + e); 2077 } 2078 } 2079 readMitigationCountFromMetadataIfNecessary()2080 public void readMitigationCountFromMetadataIfNecessary() { 2081 File bootPropsFile = new File(METADATA_FILE); 2082 if (bootPropsFile.exists()) { 2083 try (BufferedReader reader = new BufferedReader(new FileReader(METADATA_FILE))) { 2084 String mitigationCount = reader.readLine(); 2085 setMitigationCount(Integer.parseInt(mitigationCount)); 2086 bootPropsFile.delete(); 2087 } catch (Exception e) { 2088 Slog.i(TAG, "Could not read metadata file: " + e); 2089 } 2090 } 2091 } 2092 2093 2094 /** Increments the boot counter, and returns whether the device is bootlooping. */ 2095 @GuardedBy("sLock") incrementAndTest()2096 public boolean incrementAndTest() { 2097 readAllObserversBootMitigationCountIfNecessary(METADATA_FILE); 2098 2099 final long now = mSystemClock.uptimeMillis(); 2100 if (now - getStart() < 0) { 2101 Slog.e(TAG, "Window was less than zero. Resetting start to current time."); 2102 setStart(now); 2103 setMitigationStart(now); 2104 } 2105 if (now - getMitigationStart() > DEFAULT_DEESCALATION_WINDOW_MS) { 2106 setMitigationStart(now); 2107 resetAllObserversBootMitigationCount(); 2108 } 2109 final long window = now - getStart(); 2110 if (window >= mTriggerWindow) { 2111 setCount(1); 2112 setStart(now); 2113 return false; 2114 } else { 2115 int count = getCount() + 1; 2116 setCount(count); 2117 EventLog.writeEvent(LOG_TAG_RESCUE_NOTE, Process.ROOT_UID, count, window); 2118 // After a reboot (e.g. by WARM_REBOOT or mainline rollback) we apply 2119 // mitigations without waiting for DEFAULT_BOOT_LOOP_TRIGGER_COUNT. 2120 return (count >= mBootTriggerCount) 2121 || (performedMitigationsDuringWindow() && count > 1); 2122 } 2123 } 2124 2125 @GuardedBy("sLock") performedMitigationsDuringWindow()2126 private boolean performedMitigationsDuringWindow() { 2127 for (ObserverInternal observerInternal: mAllObservers.values()) { 2128 if (observerInternal.getBootMitigationCount() > 0) { 2129 return true; 2130 } 2131 } 2132 return false; 2133 } 2134 2135 @GuardedBy("sLock") resetAllObserversBootMitigationCount()2136 private void resetAllObserversBootMitigationCount() { 2137 for (int i = 0; i < mAllObservers.size(); i++) { 2138 final ObserverInternal observer = mAllObservers.valueAt(i); 2139 observer.setBootMitigationCount(0); 2140 } 2141 saveAllObserversBootMitigationCountToMetadata(METADATA_FILE); 2142 } 2143 2144 @GuardedBy("sLock") 2145 @SuppressWarnings("GuardedBy") readAllObserversBootMitigationCountIfNecessary(String filePath)2146 void readAllObserversBootMitigationCountIfNecessary(String filePath) { 2147 File metadataFile = new File(filePath); 2148 if (metadataFile.exists()) { 2149 FileInputStream fileStream = null; 2150 ObjectInputStream objectStream = null; 2151 HashMap<String, Integer> bootMitigationCounts = null; 2152 try { 2153 fileStream = new FileInputStream(metadataFile); 2154 objectStream = new ObjectInputStream(fileStream); 2155 bootMitigationCounts = 2156 (HashMap<String, Integer>) objectStream.readObject(); 2157 } catch (Exception e) { 2158 Slog.i(TAG, "Could not read observer metadata file: " + e); 2159 return; 2160 } finally { 2161 IoUtils.closeQuietly(objectStream); 2162 IoUtils.closeQuietly(fileStream); 2163 } 2164 2165 if (bootMitigationCounts == null || bootMitigationCounts.isEmpty()) { 2166 Slog.i(TAG, "No observer in metadata file"); 2167 return; 2168 } 2169 for (int i = 0; i < mAllObservers.size(); i++) { 2170 final ObserverInternal observer = mAllObservers.valueAt(i); 2171 if (bootMitigationCounts.containsKey(observer.name)) { 2172 observer.setBootMitigationCount( 2173 bootMitigationCounts.get(observer.name)); 2174 } 2175 } 2176 } 2177 } 2178 } 2179 2180 /** 2181 * Register broadcast receiver for shutdown. 2182 * We would save the observer state to persist across boots. 2183 * 2184 * @hide 2185 */ registerShutdownBroadcastReceiver()2186 public void registerShutdownBroadcastReceiver() { 2187 BroadcastReceiver shutdownEventReceiver = new BroadcastReceiver() { 2188 @Override 2189 public void onReceive(Context context, Intent intent) { 2190 // Only write if intent is relevant to device reboot or shutdown. 2191 String intentAction = intent.getAction(); 2192 if (ACTION_REBOOT.equals(intentAction) 2193 || ACTION_SHUTDOWN.equals(intentAction)) { 2194 writeNow(); 2195 } 2196 } 2197 }; 2198 2199 // Setup receiver for device reboots or shutdowns. 2200 IntentFilter filter = new IntentFilter(ACTION_REBOOT); 2201 filter.addAction(ACTION_SHUTDOWN); 2202 mContext.registerReceiverForAllUsers(shutdownEventReceiver, filter, null, 2203 /* run on main thread */ null); 2204 } 2205 } 2206