• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server;
18 
19 import static android.service.watchdog.ExplicitHealthCheckService.PackageConfig;
20 
21 import static java.lang.annotation.RetentionPolicy.SOURCE;
22 
23 import android.annotation.IntDef;
24 import android.annotation.Nullable;
25 import android.content.Context;
26 import android.content.pm.PackageInfo;
27 import android.content.pm.PackageManager;
28 import android.content.pm.VersionedPackage;
29 import android.net.ConnectivityModuleConnector;
30 import android.os.Environment;
31 import android.os.Handler;
32 import android.os.Looper;
33 import android.os.Process;
34 import android.os.SystemProperties;
35 import android.provider.DeviceConfig;
36 import android.text.TextUtils;
37 import android.util.ArrayMap;
38 import android.util.ArraySet;
39 import android.util.AtomicFile;
40 import android.util.LongArrayQueue;
41 import android.util.MathUtils;
42 import android.util.Slog;
43 import android.util.TypedXmlPullParser;
44 import android.util.TypedXmlSerializer;
45 import android.util.Xml;
46 
47 import com.android.internal.annotations.GuardedBy;
48 import com.android.internal.annotations.VisibleForTesting;
49 import com.android.internal.os.BackgroundThread;
50 import com.android.internal.util.IndentingPrintWriter;
51 import com.android.internal.util.XmlUtils;
52 
53 import libcore.io.IoUtils;
54 
55 import org.xmlpull.v1.XmlPullParserException;
56 
57 import java.io.BufferedReader;
58 import java.io.BufferedWriter;
59 import java.io.File;
60 import java.io.FileNotFoundException;
61 import java.io.FileOutputStream;
62 import java.io.FileReader;
63 import java.io.FileWriter;
64 import java.io.IOException;
65 import java.io.InputStream;
66 import java.lang.annotation.Retention;
67 import java.lang.annotation.RetentionPolicy;
68 import java.util.ArrayList;
69 import java.util.Collections;
70 import java.util.Iterator;
71 import java.util.List;
72 import java.util.Map;
73 import java.util.NoSuchElementException;
74 import java.util.Set;
75 import java.util.concurrent.TimeUnit;
76 
77 /**
78  * Monitors the health of packages on the system and notifies interested observers when packages
79  * fail. On failure, the registered observer with the least user impacting mitigation will
80  * be notified.
81  */
82 public class PackageWatchdog {
83     private static final String TAG = "PackageWatchdog";
84 
85     static final String PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS =
86             "watchdog_trigger_failure_duration_millis";
87     static final String PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT =
88             "watchdog_trigger_failure_count";
89     static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED =
90             "watchdog_explicit_health_check_enabled";
91 
92     // TODO: make the following values configurable via DeviceConfig
93     private static final long NATIVE_CRASH_POLLING_INTERVAL_MILLIS =
94             TimeUnit.SECONDS.toMillis(30);
95     private static final long NUMBER_OF_NATIVE_CRASH_POLLS = 10;
96 
97 
98     public static final int FAILURE_REASON_UNKNOWN = 0;
99     public static final int FAILURE_REASON_NATIVE_CRASH = 1;
100     public static final int FAILURE_REASON_EXPLICIT_HEALTH_CHECK = 2;
101     public static final int FAILURE_REASON_APP_CRASH = 3;
102     public static final int FAILURE_REASON_APP_NOT_RESPONDING = 4;
103 
104     @IntDef(prefix = { "FAILURE_REASON_" }, value = {
105             FAILURE_REASON_UNKNOWN,
106             FAILURE_REASON_NATIVE_CRASH,
107             FAILURE_REASON_EXPLICIT_HEALTH_CHECK,
108             FAILURE_REASON_APP_CRASH,
109             FAILURE_REASON_APP_NOT_RESPONDING
110     })
111     @Retention(RetentionPolicy.SOURCE)
112     public @interface FailureReasons {}
113 
114     // Duration to count package failures before it resets to 0
115     @VisibleForTesting
116     static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS =
117             (int) TimeUnit.MINUTES.toMillis(1);
118     // Number of package failures within the duration above before we notify observers
119     @VisibleForTesting
120     static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5;
121     @VisibleForTesting
122     static final long DEFAULT_OBSERVING_DURATION_MS = TimeUnit.DAYS.toMillis(2);
123     // Sliding window for tracking how many mitigation calls were made for a package.
124     @VisibleForTesting
125     static final long DEFAULT_DEESCALATION_WINDOW_MS = TimeUnit.HOURS.toMillis(1);
126     // Whether explicit health checks are enabled or not
127     private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true;
128 
129     @VisibleForTesting
130     static final int DEFAULT_BOOT_LOOP_TRIGGER_COUNT = 5;
131     static final long DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS = TimeUnit.MINUTES.toMillis(10);
132 
133     // These properties track individual system server boot events, and are reset once the boot
134     // threshold is met, or the boot loop trigger window is exceeded between boot events.
135     private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
136     private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
137 
138     // These properties track multiple calls made to observers tracking boot loops. They are reset
139     // when the de-escalation window is exceeded between boot events.
140     private static final String PROP_BOOT_MITIGATION_WINDOW_START = "sys.boot_mitigation_start";
141     private static final String PROP_BOOT_MITIGATION_COUNT = "sys.boot_mitigation_count";
142 
143     private long mNumberOfNativeCrashPollsRemaining;
144 
145     private static final int DB_VERSION = 1;
146     private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog";
147     private static final String TAG_PACKAGE = "package";
148     private static final String TAG_OBSERVER = "observer";
149     private static final String ATTR_VERSION = "version";
150     private static final String ATTR_NAME = "name";
151     private static final String ATTR_DURATION = "duration";
152     private static final String ATTR_EXPLICIT_HEALTH_CHECK_DURATION = "health-check-duration";
153     private static final String ATTR_PASSED_HEALTH_CHECK = "passed-health-check";
154     private static final String ATTR_MITIGATION_CALLS = "mitigation-calls";
155 
156     // A file containing information about the current mitigation count in the case of a boot loop.
157     // This allows boot loop information to persist in the case of an fs-checkpoint being
158     // aborted.
159     private static final String METADATA_FILE = "/metadata/watchdog/mitigation_count.txt";
160 
161     @GuardedBy("PackageWatchdog.class")
162     private static PackageWatchdog sPackageWatchdog;
163 
164     private final Object mLock = new Object();
165     // System server context
166     private final Context mContext;
167     // Handler to run short running tasks
168     private final Handler mShortTaskHandler;
169     // Handler for processing IO and long running tasks
170     private final Handler mLongTaskHandler;
171     // Contains (observer-name -> observer-handle) that have ever been registered from
172     // previous boots. Observers with all packages expired are periodically pruned.
173     // It is saved to disk on system shutdown and repouplated on startup so it survives reboots.
174     @GuardedBy("mLock")
175     private final ArrayMap<String, ObserverInternal> mAllObservers = new ArrayMap<>();
176     // File containing the XML data of monitored packages /data/system/package-watchdog.xml
177     private final AtomicFile mPolicyFile;
178     private final ExplicitHealthCheckController mHealthCheckController;
179     private final ConnectivityModuleConnector mConnectivityModuleConnector;
180     private final Runnable mSyncRequests = this::syncRequests;
181     private final Runnable mSyncStateWithScheduledReason = this::syncStateWithScheduledReason;
182     private final Runnable mSaveToFile = this::saveToFile;
183     private final SystemClock mSystemClock;
184     private final BootThreshold mBootThreshold;
185     private final DeviceConfig.OnPropertiesChangedListener
186             mOnPropertyChangedListener = this::onPropertyChanged;
187 
188     // The set of packages that have been synced with the ExplicitHealthCheckController
189     @GuardedBy("mLock")
190     private Set<String> mRequestedHealthCheckPackages = new ArraySet<>();
191     @GuardedBy("mLock")
192     private boolean mIsPackagesReady;
193     // Flag to control whether explicit health checks are supported or not
194     @GuardedBy("mLock")
195     private boolean mIsHealthCheckEnabled = DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED;
196     @GuardedBy("mLock")
197     private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS;
198     @GuardedBy("mLock")
199     private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT;
200     // SystemClock#uptimeMillis when we last executed #syncState
201     // 0 if no prune is scheduled.
202     @GuardedBy("mLock")
203     private long mUptimeAtLastStateSync;
204     // If true, sync explicit health check packages with the ExplicitHealthCheckController.
205     @GuardedBy("mLock")
206     private boolean mSyncRequired = false;
207 
208     @FunctionalInterface
209     @VisibleForTesting
210     interface SystemClock {
uptimeMillis()211         long uptimeMillis();
212     }
213 
PackageWatchdog(Context context)214     private PackageWatchdog(Context context) {
215         // Needs to be constructed inline
216         this(context, new AtomicFile(
217                         new File(new File(Environment.getDataDirectory(), "system"),
218                                 "package-watchdog.xml")),
219                 new Handler(Looper.myLooper()), BackgroundThread.getHandler(),
220                 new ExplicitHealthCheckController(context),
221                 ConnectivityModuleConnector.getInstance(),
222                 android.os.SystemClock::uptimeMillis);
223     }
224 
225     /**
226      * Creates a PackageWatchdog that allows injecting dependencies.
227      */
228     @VisibleForTesting
PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler, Handler longTaskHandler, ExplicitHealthCheckController controller, ConnectivityModuleConnector connectivityModuleConnector, SystemClock clock)229     PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler,
230             Handler longTaskHandler, ExplicitHealthCheckController controller,
231             ConnectivityModuleConnector connectivityModuleConnector, SystemClock clock) {
232         mContext = context;
233         mPolicyFile = policyFile;
234         mShortTaskHandler = shortTaskHandler;
235         mLongTaskHandler = longTaskHandler;
236         mHealthCheckController = controller;
237         mConnectivityModuleConnector = connectivityModuleConnector;
238         mSystemClock = clock;
239         mNumberOfNativeCrashPollsRemaining = NUMBER_OF_NATIVE_CRASH_POLLS;
240         mBootThreshold = new BootThreshold(DEFAULT_BOOT_LOOP_TRIGGER_COUNT,
241                 DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS);
242         loadFromFile();
243         sPackageWatchdog = this;
244     }
245 
246     /** Creates or gets singleton instance of PackageWatchdog. */
getInstance(Context context)247     public static PackageWatchdog getInstance(Context context) {
248         synchronized (PackageWatchdog.class) {
249             if (sPackageWatchdog == null) {
250                 new PackageWatchdog(context);
251             }
252             return sPackageWatchdog;
253         }
254     }
255 
256     /**
257      * Called during boot to notify when packages are ready on the device so we can start
258      * binding.
259      */
onPackagesReady()260     public void onPackagesReady() {
261         synchronized (mLock) {
262             mIsPackagesReady = true;
263             mHealthCheckController.setCallbacks(packageName -> onHealthCheckPassed(packageName),
264                     packages -> onSupportedPackages(packages),
265                     this::onSyncRequestNotified);
266             setPropertyChangedListenerLocked();
267             updateConfigs();
268             registerConnectivityModuleHealthListener();
269         }
270     }
271 
272     /**
273      * Registers {@code observer} to listen for package failures. Add a new ObserverInternal for
274      * this observer if it does not already exist.
275      *
276      * <p>Observers are expected to call this on boot. It does not specify any packages but
277      * it will resume observing any packages requested from a previous boot.
278      */
registerHealthObserver(PackageHealthObserver observer)279     public void registerHealthObserver(PackageHealthObserver observer) {
280         synchronized (mLock) {
281             ObserverInternal internalObserver = mAllObservers.get(observer.getName());
282             if (internalObserver != null) {
283                 internalObserver.registeredObserver = observer;
284             } else {
285                 internalObserver = new ObserverInternal(observer.getName(), new ArrayList<>());
286                 internalObserver.registeredObserver = observer;
287                 mAllObservers.put(observer.getName(), internalObserver);
288                 syncState("added new observer");
289             }
290         }
291     }
292 
293     /**
294      * Starts observing the health of the {@code packages} for {@code observer} and notifies
295      * {@code observer} of any package failures within the monitoring duration.
296      *
297      * <p>If monitoring a package supporting explicit health check, at the end of the monitoring
298      * duration if {@link #onHealthCheckPassed} was never called,
299      * {@link PackageHealthObserver#execute} will be called as if the package failed.
300      *
301      * <p>If {@code observer} is already monitoring a package in {@code packageNames},
302      * the monitoring window of that package will be reset to {@code durationMs} and the health
303      * check state will be reset to a default depending on if the package is contained in
304      * {@link mPackagesWithExplicitHealthCheckEnabled}.
305      *
306      * <p>If {@code packageNames} is empty, this will be a no-op.
307      *
308      * <p>If {@code durationMs} is less than 1, a default monitoring duration
309      * {@link #DEFAULT_OBSERVING_DURATION_MS} will be used.
310      */
startObservingHealth(PackageHealthObserver observer, List<String> packageNames, long durationMs)311     public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames,
312             long durationMs) {
313         if (packageNames.isEmpty()) {
314             Slog.wtf(TAG, "No packages to observe, " + observer.getName());
315             return;
316         }
317         if (durationMs < 1) {
318             Slog.wtf(TAG, "Invalid duration " + durationMs + "ms for observer "
319                     + observer.getName() + ". Not observing packages " + packageNames);
320             durationMs = DEFAULT_OBSERVING_DURATION_MS;
321         }
322 
323         List<MonitoredPackage> packages = new ArrayList<>();
324         for (int i = 0; i < packageNames.size(); i++) {
325             // Health checks not available yet so health check state will start INACTIVE
326             MonitoredPackage pkg = newMonitoredPackage(packageNames.get(i), durationMs, false);
327             if (pkg != null) {
328                 packages.add(pkg);
329             } else {
330                 Slog.w(TAG, "Failed to create MonitoredPackage for pkg=" + packageNames.get(i));
331             }
332         }
333 
334         if (packages.isEmpty()) {
335             return;
336         }
337 
338         // Sync before we add the new packages to the observers. This will #pruneObservers,
339         // causing any elapsed time to be deducted from all existing packages before we add new
340         // packages. This maintains the invariant that the elapsed time for ALL (new and existing)
341         // packages is the same.
342         mLongTaskHandler.post(() -> {
343             syncState("observing new packages");
344 
345             synchronized (mLock) {
346                 ObserverInternal oldObserver = mAllObservers.get(observer.getName());
347                 if (oldObserver == null) {
348                     Slog.d(TAG, observer.getName() + " started monitoring health "
349                             + "of packages " + packageNames);
350                     mAllObservers.put(observer.getName(),
351                             new ObserverInternal(observer.getName(), packages));
352                 } else {
353                     Slog.d(TAG, observer.getName() + " added the following "
354                             + "packages to monitor " + packageNames);
355                     oldObserver.updatePackagesLocked(packages);
356                 }
357             }
358 
359             // Register observer in case not already registered
360             registerHealthObserver(observer);
361 
362             // Sync after we add the new packages to the observers. We may have received packges
363             // requiring an earlier schedule than we are currently scheduled for.
364             syncState("updated observers");
365         });
366 
367     }
368 
369     /**
370      * Unregisters {@code observer} from listening to package failure.
371      * Additionally, this stops observing any packages that may have previously been observed
372      * even from a previous boot.
373      */
unregisterHealthObserver(PackageHealthObserver observer)374     public void unregisterHealthObserver(PackageHealthObserver observer) {
375         mLongTaskHandler.post(() -> {
376             synchronized (mLock) {
377                 mAllObservers.remove(observer.getName());
378             }
379             syncState("unregistering observer: " + observer.getName());
380         });
381     }
382 
383     /**
384      * Called when a process fails due to a crash, ANR or explicit health check.
385      *
386      * <p>For each package contained in the process, one registered observer with the least user
387      * impact will be notified for mitigation.
388      *
389      * <p>This method could be called frequently if there is a severe problem on the device.
390      */
onPackageFailure(List<VersionedPackage> packages, @FailureReasons int failureReason)391     public void onPackageFailure(List<VersionedPackage> packages,
392             @FailureReasons int failureReason) {
393         if (packages == null) {
394             Slog.w(TAG, "Could not resolve a list of failing packages");
395             return;
396         }
397         mLongTaskHandler.post(() -> {
398             synchronized (mLock) {
399                 if (mAllObservers.isEmpty()) {
400                     return;
401                 }
402                 boolean requiresImmediateAction = (failureReason == FAILURE_REASON_NATIVE_CRASH
403                         || failureReason == FAILURE_REASON_EXPLICIT_HEALTH_CHECK);
404                 if (requiresImmediateAction) {
405                     handleFailureImmediately(packages, failureReason);
406                 } else {
407                     for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
408                         VersionedPackage versionedPackage = packages.get(pIndex);
409                         // Observer that will receive failure for versionedPackage
410                         PackageHealthObserver currentObserverToNotify = null;
411                         int currentObserverImpact = Integer.MAX_VALUE;
412                         MonitoredPackage currentMonitoredPackage = null;
413 
414                         // Find observer with least user impact
415                         for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
416                             ObserverInternal observer = mAllObservers.valueAt(oIndex);
417                             PackageHealthObserver registeredObserver = observer.registeredObserver;
418                             if (registeredObserver != null
419                                     && observer.onPackageFailureLocked(
420                                     versionedPackage.getPackageName())) {
421                                 MonitoredPackage p = observer.getMonitoredPackage(
422                                         versionedPackage.getPackageName());
423                                 int mitigationCount = 1;
424                                 if (p != null) {
425                                     mitigationCount = p.getMitigationCountLocked() + 1;
426                                 }
427                                 int impact = registeredObserver.onHealthCheckFailed(
428                                         versionedPackage, failureReason, mitigationCount);
429                                 if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
430                                         && impact < currentObserverImpact) {
431                                     currentObserverToNotify = registeredObserver;
432                                     currentObserverImpact = impact;
433                                     currentMonitoredPackage = p;
434                                 }
435                             }
436                         }
437 
438                         // Execute action with least user impact
439                         if (currentObserverToNotify != null) {
440                             int mitigationCount = 1;
441                             if (currentMonitoredPackage != null) {
442                                 currentMonitoredPackage.noteMitigationCallLocked();
443                                 mitigationCount =
444                                         currentMonitoredPackage.getMitigationCountLocked();
445                             }
446                             currentObserverToNotify.execute(versionedPackage,
447                                     failureReason, mitigationCount);
448                         }
449                     }
450                 }
451             }
452         });
453     }
454 
455     /**
456      * For native crashes or explicit health check failures, call directly into each observer to
457      * mitigate the error without going through failure threshold logic.
458      */
handleFailureImmediately(List<VersionedPackage> packages, @FailureReasons int failureReason)459     private void handleFailureImmediately(List<VersionedPackage> packages,
460             @FailureReasons int failureReason) {
461         VersionedPackage failingPackage = packages.size() > 0 ? packages.get(0) : null;
462         PackageHealthObserver currentObserverToNotify = null;
463         int currentObserverImpact = Integer.MAX_VALUE;
464         for (ObserverInternal observer: mAllObservers.values()) {
465             PackageHealthObserver registeredObserver = observer.registeredObserver;
466             if (registeredObserver != null) {
467                 int impact = registeredObserver.onHealthCheckFailed(
468                         failingPackage, failureReason, 1);
469                 if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
470                         && impact < currentObserverImpact) {
471                     currentObserverToNotify = registeredObserver;
472                     currentObserverImpact = impact;
473                 }
474             }
475         }
476         if (currentObserverToNotify != null) {
477             currentObserverToNotify.execute(failingPackage,  failureReason, 1);
478         }
479     }
480 
481     /**
482      * Called when the system server boots. If the system server is detected to be in a boot loop,
483      * query each observer and perform the mitigation action with the lowest user impact.
484      */
noteBoot()485     public void noteBoot() {
486         synchronized (mLock) {
487             if (mBootThreshold.incrementAndTest()) {
488                 mBootThreshold.reset();
489                 int mitigationCount = mBootThreshold.getMitigationCount() + 1;
490                 PackageHealthObserver currentObserverToNotify = null;
491                 int currentObserverImpact = Integer.MAX_VALUE;
492                 for (int i = 0; i < mAllObservers.size(); i++) {
493                     final ObserverInternal observer = mAllObservers.valueAt(i);
494                     PackageHealthObserver registeredObserver = observer.registeredObserver;
495                     if (registeredObserver != null) {
496                         int impact = registeredObserver.onBootLoop(mitigationCount);
497                         if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
498                                 && impact < currentObserverImpact) {
499                             currentObserverToNotify = registeredObserver;
500                             currentObserverImpact = impact;
501                         }
502                     }
503                 }
504                 if (currentObserverToNotify != null) {
505                     mBootThreshold.setMitigationCount(mitigationCount);
506                     mBootThreshold.saveMitigationCountToMetadata();
507                     currentObserverToNotify.executeBootLoopMitigation(mitigationCount);
508                 }
509             }
510         }
511     }
512 
513     // TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also
514     // avoid holding lock?
515     // This currently adds about 7ms extra to shutdown thread
516     /** Writes the package information to file during shutdown. */
writeNow()517     public void writeNow() {
518         synchronized (mLock) {
519             // Must only run synchronous tasks as this runs on the ShutdownThread and no other
520             // thread is guaranteed to run during shutdown.
521             if (!mAllObservers.isEmpty()) {
522                 mLongTaskHandler.removeCallbacks(mSaveToFile);
523                 pruneObserversLocked();
524                 saveToFile();
525                 Slog.i(TAG, "Last write to update package durations");
526             }
527         }
528     }
529 
530     /**
531      * Enables or disables explicit health checks.
532      * <p> If explicit health checks are enabled, the health check service is started.
533      * <p> If explicit health checks are disabled, pending explicit health check requests are
534      * passed and the health check service is stopped.
535      */
setExplicitHealthCheckEnabled(boolean enabled)536     private void setExplicitHealthCheckEnabled(boolean enabled) {
537         synchronized (mLock) {
538             mIsHealthCheckEnabled = enabled;
539             mHealthCheckController.setEnabled(enabled);
540             mSyncRequired = true;
541             // Prune to update internal state whenever health check is enabled/disabled
542             syncState("health check state " + (enabled ? "enabled" : "disabled"));
543         }
544     }
545 
546     /**
547      * This method should be only called on mShortTaskHandler, since it modifies
548      * {@link #mNumberOfNativeCrashPollsRemaining}.
549      */
checkAndMitigateNativeCrashes()550     private void checkAndMitigateNativeCrashes() {
551         mNumberOfNativeCrashPollsRemaining--;
552         // Check if native watchdog reported a crash
553         if ("1".equals(SystemProperties.get("sys.init.updatable_crashing"))) {
554             // We rollback everything available when crash is unattributable
555             onPackageFailure(Collections.EMPTY_LIST, FAILURE_REASON_NATIVE_CRASH);
556             // we stop polling after an attempt to execute rollback, regardless of whether the
557             // attempt succeeds or not
558         } else {
559             if (mNumberOfNativeCrashPollsRemaining > 0) {
560                 mShortTaskHandler.postDelayed(() -> checkAndMitigateNativeCrashes(),
561                         NATIVE_CRASH_POLLING_INTERVAL_MILLIS);
562             }
563         }
564     }
565 
566     /**
567      * Since this method can eventually trigger a rollback, it should be called
568      * only once boot has completed {@code onBootCompleted} and not earlier, because the install
569      * session must be entirely completed before we try to rollback.
570      */
scheduleCheckAndMitigateNativeCrashes()571     public void scheduleCheckAndMitigateNativeCrashes() {
572         Slog.i(TAG, "Scheduling " + mNumberOfNativeCrashPollsRemaining + " polls to check "
573                 + "and mitigate native crashes");
574         mShortTaskHandler.post(()->checkAndMitigateNativeCrashes());
575     }
576 
577     /** Possible severity values of the user impact of a {@link PackageHealthObserver#execute}. */
578     @Retention(SOURCE)
579     @IntDef(value = {PackageHealthObserverImpact.USER_IMPACT_NONE,
580                      PackageHealthObserverImpact.USER_IMPACT_LOW,
581                      PackageHealthObserverImpact.USER_IMPACT_MEDIUM,
582                      PackageHealthObserverImpact.USER_IMPACT_HIGH})
583     public @interface PackageHealthObserverImpact {
584         /** No action to take. */
585         int USER_IMPACT_NONE = 0;
586         /* Action has low user impact, user of a device will barely notice. */
587         int USER_IMPACT_LOW = 1;
588         /* Action has medium user impact, user of a device will likely notice. */
589         int USER_IMPACT_MEDIUM = 3;
590         /* Action has high user impact, a last resort, user of a device will be very frustrated. */
591         int USER_IMPACT_HIGH = 5;
592     }
593 
594     /** Register instances of this interface to receive notifications on package failure. */
595     public interface PackageHealthObserver {
596         /**
597          * Called when health check fails for the {@code versionedPackage}.
598          *
599          * @param versionedPackage the package that is failing. This may be null if a native
600          *                          service is crashing.
601          * @param failureReason   the type of failure that is occurring.
602          * @param mitigationCount the number of times mitigation has been called for this package
603          *                        (including this time).
604          *
605          *
606          * @return any one of {@link PackageHealthObserverImpact} to express the impact
607          * to the user on {@link #execute}
608          */
onHealthCheckFailed( @ullable VersionedPackage versionedPackage, @FailureReasons int failureReason, int mitigationCount)609         @PackageHealthObserverImpact int onHealthCheckFailed(
610                 @Nullable VersionedPackage versionedPackage,
611                 @FailureReasons int failureReason,
612                 int mitigationCount);
613 
614         /**
615          * Executes mitigation for {@link #onHealthCheckFailed}.
616          *
617          * @param versionedPackage the package that is failing. This may be null if a native
618          *                          service is crashing.
619          * @param failureReason   the type of failure that is occurring.
620          * @param mitigationCount the number of times mitigation has been called for this package
621          *                        (including this time).
622          * @return {@code true} if action was executed successfully, {@code false} otherwise
623          */
execute(@ullable VersionedPackage versionedPackage, @FailureReasons int failureReason, int mitigationCount)624         boolean execute(@Nullable VersionedPackage versionedPackage,
625                 @FailureReasons int failureReason, int mitigationCount);
626 
627 
628         /**
629          * Called when the system server has booted several times within a window of time, defined
630          * by {@link #mBootThreshold}
631          *
632          * @param mitigationCount the number of times mitigation has been attempted for this
633          *                        boot loop (including this time).
634          */
onBootLoop(int mitigationCount)635         default @PackageHealthObserverImpact int onBootLoop(int mitigationCount) {
636             return PackageHealthObserverImpact.USER_IMPACT_NONE;
637         }
638 
639         /**
640          * Executes mitigation for {@link #onBootLoop}
641          * @param mitigationCount the number of times mitigation has been attempted for this
642          *                        boot loop (including this time).
643          */
executeBootLoopMitigation(int mitigationCount)644         default boolean executeBootLoopMitigation(int mitigationCount) {
645             return false;
646         }
647 
648         // TODO(b/120598832): Ensure uniqueness?
649         /**
650          * Identifier for the observer, should not change across device updates otherwise the
651          * watchdog may drop observing packages with the old name.
652          */
getName()653         String getName();
654 
655         /**
656          * An observer will not be pruned if this is set, even if the observer is not explicitly
657          * monitoring any packages.
658          */
isPersistent()659         default boolean isPersistent() {
660             return false;
661         }
662 
663         /**
664          * Returns {@code true} if this observer wishes to observe the given package, {@code false}
665          * otherwise
666          *
667          * <p> A persistent observer may choose to start observing certain failing packages, even if
668          * it has not explicitly asked to watch the package with {@link #startObservingHealth}.
669          */
mayObservePackage(String packageName)670         default boolean mayObservePackage(String packageName) {
671             return false;
672         }
673     }
674 
675     @VisibleForTesting
getTriggerFailureCount()676     long getTriggerFailureCount() {
677         synchronized (mLock) {
678             return mTriggerFailureCount;
679         }
680     }
681 
682     @VisibleForTesting
getTriggerFailureDurationMs()683     long getTriggerFailureDurationMs() {
684         synchronized (mLock) {
685             return mTriggerFailureDurationMs;
686         }
687     }
688 
689     /**
690      * Serializes and syncs health check requests with the {@link ExplicitHealthCheckController}.
691      */
syncRequestsAsync()692     private void syncRequestsAsync() {
693         mShortTaskHandler.removeCallbacks(mSyncRequests);
694         mShortTaskHandler.post(mSyncRequests);
695     }
696 
697     /**
698      * Syncs health check requests with the {@link ExplicitHealthCheckController}.
699      * Calls to this must be serialized.
700      *
701      * @see #syncRequestsAsync
702      */
syncRequests()703     private void syncRequests() {
704         boolean syncRequired = false;
705         synchronized (mLock) {
706             if (mIsPackagesReady) {
707                 Set<String> packages = getPackagesPendingHealthChecksLocked();
708                 if (mSyncRequired || !packages.equals(mRequestedHealthCheckPackages)
709                         || packages.isEmpty()) {
710                     syncRequired = true;
711                     mRequestedHealthCheckPackages = packages;
712                 }
713             } // else, we will sync requests when packages become ready
714         }
715 
716         // Call outside lock to avoid holding lock when calling into the controller.
717         if (syncRequired) {
718             Slog.i(TAG, "Syncing health check requests for packages: "
719                     + mRequestedHealthCheckPackages);
720             mHealthCheckController.syncRequests(mRequestedHealthCheckPackages);
721             mSyncRequired = false;
722         }
723     }
724 
725     /**
726      * Updates the observers monitoring {@code packageName} that explicit health check has passed.
727      *
728      * <p> This update is strictly for registered observers at the time of the call
729      * Observers that register after this signal will have no knowledge of prior signals and will
730      * effectively behave as if the explicit health check hasn't passed for {@code packageName}.
731      *
732      * <p> {@code packageName} can still be considered failed if reported by
733      * {@link #onPackageFailureLocked} before the package expires.
734      *
735      * <p> Triggered by components outside the system server when they are fully functional after an
736      * update.
737      */
onHealthCheckPassed(String packageName)738     private void onHealthCheckPassed(String packageName) {
739         Slog.i(TAG, "Health check passed for package: " + packageName);
740         boolean isStateChanged = false;
741 
742         synchronized (mLock) {
743             for (int observerIdx = 0; observerIdx < mAllObservers.size(); observerIdx++) {
744                 ObserverInternal observer = mAllObservers.valueAt(observerIdx);
745                 MonitoredPackage monitoredPackage = observer.getMonitoredPackage(packageName);
746 
747                 if (monitoredPackage != null) {
748                     int oldState = monitoredPackage.getHealthCheckStateLocked();
749                     int newState = monitoredPackage.tryPassHealthCheckLocked();
750                     isStateChanged |= oldState != newState;
751                 }
752             }
753         }
754 
755         if (isStateChanged) {
756             syncState("health check passed for " + packageName);
757         }
758     }
759 
onSupportedPackages(List<PackageConfig> supportedPackages)760     private void onSupportedPackages(List<PackageConfig> supportedPackages) {
761         boolean isStateChanged = false;
762 
763         Map<String, Long> supportedPackageTimeouts = new ArrayMap<>();
764         Iterator<PackageConfig> it = supportedPackages.iterator();
765         while (it.hasNext()) {
766             PackageConfig info = it.next();
767             supportedPackageTimeouts.put(info.getPackageName(), info.getHealthCheckTimeoutMillis());
768         }
769 
770         synchronized (mLock) {
771             Slog.d(TAG, "Received supported packages " + supportedPackages);
772             Iterator<ObserverInternal> oit = mAllObservers.values().iterator();
773             while (oit.hasNext()) {
774                 Iterator<MonitoredPackage> pit = oit.next().getMonitoredPackages()
775                         .values().iterator();
776                 while (pit.hasNext()) {
777                     MonitoredPackage monitoredPackage = pit.next();
778                     String packageName = monitoredPackage.getName();
779                     int oldState = monitoredPackage.getHealthCheckStateLocked();
780                     int newState;
781 
782                     if (supportedPackageTimeouts.containsKey(packageName)) {
783                         // Supported packages become ACTIVE if currently INACTIVE
784                         newState = monitoredPackage.setHealthCheckActiveLocked(
785                                 supportedPackageTimeouts.get(packageName));
786                     } else {
787                         // Unsupported packages are marked as PASSED unless already FAILED
788                         newState = monitoredPackage.tryPassHealthCheckLocked();
789                     }
790                     isStateChanged |= oldState != newState;
791                 }
792             }
793         }
794 
795         if (isStateChanged) {
796             syncState("updated health check supported packages " + supportedPackages);
797         }
798     }
799 
onSyncRequestNotified()800     private void onSyncRequestNotified() {
801         synchronized (mLock) {
802             mSyncRequired = true;
803             syncRequestsAsync();
804         }
805     }
806 
807     @GuardedBy("mLock")
getPackagesPendingHealthChecksLocked()808     private Set<String> getPackagesPendingHealthChecksLocked() {
809         Set<String> packages = new ArraySet<>();
810         Iterator<ObserverInternal> oit = mAllObservers.values().iterator();
811         while (oit.hasNext()) {
812             ObserverInternal observer = oit.next();
813             Iterator<MonitoredPackage> pit =
814                     observer.getMonitoredPackages().values().iterator();
815             while (pit.hasNext()) {
816                 MonitoredPackage monitoredPackage = pit.next();
817                 String packageName = monitoredPackage.getName();
818                 if (monitoredPackage.isPendingHealthChecksLocked()) {
819                     packages.add(packageName);
820                 }
821             }
822         }
823         return packages;
824     }
825 
826     /**
827      * Syncs the state of the observers.
828      *
829      * <p> Prunes all observers, saves new state to disk, syncs health check requests with the
830      * health check service and schedules the next state sync.
831      */
syncState(String reason)832     private void syncState(String reason) {
833         synchronized (mLock) {
834             Slog.i(TAG, "Syncing state, reason: " + reason);
835             pruneObserversLocked();
836 
837             saveToFileAsync();
838             syncRequestsAsync();
839 
840             // Done syncing state, schedule the next state sync
841             scheduleNextSyncStateLocked();
842         }
843     }
844 
syncStateWithScheduledReason()845     private void syncStateWithScheduledReason() {
846         syncState("scheduled");
847     }
848 
849     @GuardedBy("mLock")
scheduleNextSyncStateLocked()850     private void scheduleNextSyncStateLocked() {
851         long durationMs = getNextStateSyncMillisLocked();
852         mShortTaskHandler.removeCallbacks(mSyncStateWithScheduledReason);
853         if (durationMs == Long.MAX_VALUE) {
854             Slog.i(TAG, "Cancelling state sync, nothing to sync");
855             mUptimeAtLastStateSync = 0;
856         } else {
857             mUptimeAtLastStateSync = mSystemClock.uptimeMillis();
858             mShortTaskHandler.postDelayed(mSyncStateWithScheduledReason, durationMs);
859         }
860     }
861 
862     /**
863      * Returns the next duration in millis to sync the watchdog state.
864      *
865      * @returns Long#MAX_VALUE if there are no observed packages.
866      */
867     @GuardedBy("mLock")
getNextStateSyncMillisLocked()868     private long getNextStateSyncMillisLocked() {
869         long shortestDurationMs = Long.MAX_VALUE;
870         for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
871             ArrayMap<String, MonitoredPackage> packages = mAllObservers.valueAt(oIndex)
872                     .getMonitoredPackages();
873             for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
874                 MonitoredPackage mp = packages.valueAt(pIndex);
875                 long duration = mp.getShortestScheduleDurationMsLocked();
876                 if (duration < shortestDurationMs) {
877                     shortestDurationMs = duration;
878                 }
879             }
880         }
881         return shortestDurationMs;
882     }
883 
884     /**
885      * Removes {@code elapsedMs} milliseconds from all durations on monitored packages
886      * and updates other internal state.
887      */
888     @GuardedBy("mLock")
pruneObserversLocked()889     private void pruneObserversLocked() {
890         long elapsedMs = mUptimeAtLastStateSync == 0
891                 ? 0 : mSystemClock.uptimeMillis() - mUptimeAtLastStateSync;
892         if (elapsedMs <= 0) {
893             Slog.i(TAG, "Not pruning observers, elapsed time: " + elapsedMs + "ms");
894             return;
895         }
896 
897         Iterator<ObserverInternal> it = mAllObservers.values().iterator();
898         while (it.hasNext()) {
899             ObserverInternal observer = it.next();
900             Set<MonitoredPackage> failedPackages =
901                     observer.prunePackagesLocked(elapsedMs);
902             if (!failedPackages.isEmpty()) {
903                 onHealthCheckFailed(observer, failedPackages);
904             }
905             if (observer.getMonitoredPackages().isEmpty() && (observer.registeredObserver == null
906                     || !observer.registeredObserver.isPersistent())) {
907                 Slog.i(TAG, "Discarding observer " + observer.name + ". All packages expired");
908                 it.remove();
909             }
910         }
911     }
912 
onHealthCheckFailed(ObserverInternal observer, Set<MonitoredPackage> failedPackages)913     private void onHealthCheckFailed(ObserverInternal observer,
914             Set<MonitoredPackage> failedPackages) {
915         mLongTaskHandler.post(() -> {
916             synchronized (mLock) {
917                 PackageHealthObserver registeredObserver = observer.registeredObserver;
918                 if (registeredObserver != null) {
919                     Iterator<MonitoredPackage> it = failedPackages.iterator();
920                     while (it.hasNext()) {
921                         VersionedPackage versionedPkg = getVersionedPackage(it.next().getName());
922                         if (versionedPkg != null) {
923                             Slog.i(TAG,
924                                     "Explicit health check failed for package " + versionedPkg);
925                             registeredObserver.execute(versionedPkg,
926                                     PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK, 1);
927                         }
928                     }
929                 }
930             }
931         });
932     }
933 
934     /**
935      * Gets PackageInfo for the given package. Matches any user and apex.
936      *
937      * @throws PackageManager.NameNotFoundException if no such package is installed.
938      */
getPackageInfo(String packageName)939     private PackageInfo getPackageInfo(String packageName)
940             throws PackageManager.NameNotFoundException {
941         PackageManager pm = mContext.getPackageManager();
942         try {
943             // The MATCH_ANY_USER flag doesn't mix well with the MATCH_APEX
944             // flag, so make two separate attempts to get the package info.
945             // We don't need both flags at the same time because we assume
946             // apex files are always installed for all users.
947             return pm.getPackageInfo(packageName, PackageManager.MATCH_ANY_USER);
948         } catch (PackageManager.NameNotFoundException e) {
949             return pm.getPackageInfo(packageName, PackageManager.MATCH_APEX);
950         }
951     }
952 
953     @Nullable
getVersionedPackage(String packageName)954     private VersionedPackage getVersionedPackage(String packageName) {
955         final PackageManager pm = mContext.getPackageManager();
956         if (pm == null || TextUtils.isEmpty(packageName)) {
957             return null;
958         }
959         try {
960             final long versionCode = getPackageInfo(packageName).getLongVersionCode();
961             return new VersionedPackage(packageName, versionCode);
962         } catch (PackageManager.NameNotFoundException e) {
963             return null;
964         }
965     }
966 
967     /**
968      * Loads mAllObservers from file.
969      *
970      * <p>Note that this is <b>not</b> thread safe and should only called be called
971      * from the constructor.
972      */
loadFromFile()973     private void loadFromFile() {
974         InputStream infile = null;
975         mAllObservers.clear();
976         try {
977             infile = mPolicyFile.openRead();
978             final TypedXmlPullParser parser = Xml.resolvePullParser(infile);
979             XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG);
980             int outerDepth = parser.getDepth();
981             while (XmlUtils.nextElementWithin(parser, outerDepth)) {
982                 ObserverInternal observer = ObserverInternal.read(parser, this);
983                 if (observer != null) {
984                     mAllObservers.put(observer.name, observer);
985                 }
986             }
987         } catch (FileNotFoundException e) {
988             // Nothing to monitor
989         } catch (IOException | NumberFormatException | XmlPullParserException e) {
990             Slog.wtf(TAG, "Unable to read monitored packages, deleting file", e);
991             mPolicyFile.delete();
992         } finally {
993             IoUtils.closeQuietly(infile);
994         }
995     }
996 
onPropertyChanged(DeviceConfig.Properties properties)997     private void onPropertyChanged(DeviceConfig.Properties properties) {
998         try {
999             updateConfigs();
1000         } catch (Exception ignore) {
1001             Slog.w(TAG, "Failed to reload device config changes");
1002         }
1003     }
1004 
1005     /** Adds a {@link DeviceConfig#OnPropertiesChangedListener}. */
setPropertyChangedListenerLocked()1006     private void setPropertyChangedListenerLocked() {
1007         DeviceConfig.addOnPropertiesChangedListener(
1008                 DeviceConfig.NAMESPACE_ROLLBACK,
1009                 mContext.getMainExecutor(),
1010                 mOnPropertyChangedListener);
1011     }
1012 
1013     @VisibleForTesting
removePropertyChangedListener()1014     void removePropertyChangedListener() {
1015         DeviceConfig.removeOnPropertiesChangedListener(mOnPropertyChangedListener);
1016     }
1017 
1018     /**
1019      * Health check is enabled or disabled after reading the flags
1020      * from DeviceConfig.
1021      */
1022     @VisibleForTesting
updateConfigs()1023     void updateConfigs() {
1024         synchronized (mLock) {
1025             mTriggerFailureCount = DeviceConfig.getInt(
1026                     DeviceConfig.NAMESPACE_ROLLBACK,
1027                     PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT,
1028                     DEFAULT_TRIGGER_FAILURE_COUNT);
1029             if (mTriggerFailureCount <= 0) {
1030                 mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT;
1031             }
1032 
1033             mTriggerFailureDurationMs = DeviceConfig.getInt(
1034                     DeviceConfig.NAMESPACE_ROLLBACK,
1035                     PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS,
1036                     DEFAULT_TRIGGER_FAILURE_DURATION_MS);
1037             if (mTriggerFailureDurationMs <= 0) {
1038                 mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS;
1039             }
1040 
1041             setExplicitHealthCheckEnabled(DeviceConfig.getBoolean(
1042                     DeviceConfig.NAMESPACE_ROLLBACK,
1043                     PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED,
1044                     DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED));
1045         }
1046     }
1047 
registerConnectivityModuleHealthListener()1048     private void registerConnectivityModuleHealthListener() {
1049         // TODO: have an internal method to trigger a rollback by reporting high severity errors,
1050         // and rely on ActivityManager to inform the watchdog of severe network stack crashes
1051         // instead of having this listener in parallel.
1052         mConnectivityModuleConnector.registerHealthListener(
1053                 packageName -> {
1054                     final VersionedPackage pkg = getVersionedPackage(packageName);
1055                     if (pkg == null) {
1056                         Slog.wtf(TAG, "NetworkStack failed but could not find its package");
1057                         return;
1058                     }
1059                     final List<VersionedPackage> pkgList = Collections.singletonList(pkg);
1060                     onPackageFailure(pkgList, FAILURE_REASON_EXPLICIT_HEALTH_CHECK);
1061                 });
1062     }
1063 
1064     /**
1065      * Persists mAllObservers to file. Threshold information is ignored.
1066      */
saveToFile()1067     private boolean saveToFile() {
1068         Slog.i(TAG, "Saving observer state to file");
1069         synchronized (mLock) {
1070             FileOutputStream stream;
1071             try {
1072                 stream = mPolicyFile.startWrite();
1073             } catch (IOException e) {
1074                 Slog.w(TAG, "Cannot update monitored packages", e);
1075                 return false;
1076             }
1077 
1078             try {
1079                 TypedXmlSerializer out = Xml.resolveSerializer(stream);
1080                 out.startDocument(null, true);
1081                 out.startTag(null, TAG_PACKAGE_WATCHDOG);
1082                 out.attributeInt(null, ATTR_VERSION, DB_VERSION);
1083                 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
1084                     mAllObservers.valueAt(oIndex).writeLocked(out);
1085                 }
1086                 out.endTag(null, TAG_PACKAGE_WATCHDOG);
1087                 out.endDocument();
1088                 mPolicyFile.finishWrite(stream);
1089                 return true;
1090             } catch (IOException e) {
1091                 Slog.w(TAG, "Failed to save monitored packages, restoring backup", e);
1092                 mPolicyFile.failWrite(stream);
1093                 return false;
1094             } finally {
1095                 IoUtils.closeQuietly(stream);
1096             }
1097         }
1098     }
1099 
saveToFileAsync()1100     private void saveToFileAsync() {
1101         if (!mLongTaskHandler.hasCallbacks(mSaveToFile)) {
1102             mLongTaskHandler.post(mSaveToFile);
1103         }
1104     }
1105 
1106     /** Convert a {@code LongArrayQueue} to a String of comma-separated values. */
longArrayQueueToString(LongArrayQueue queue)1107     public static String longArrayQueueToString(LongArrayQueue queue) {
1108         if (queue.size() > 0) {
1109             StringBuilder sb = new StringBuilder();
1110             sb.append(queue.get(0));
1111             for (int i = 1; i < queue.size(); i++) {
1112                 sb.append(",");
1113                 sb.append(queue.get(i));
1114             }
1115             return sb.toString();
1116         }
1117         return "";
1118     }
1119 
1120     /** Parse a comma-separated String of longs into a LongArrayQueue. */
parseLongArrayQueue(String commaSeparatedValues)1121     public static LongArrayQueue parseLongArrayQueue(String commaSeparatedValues) {
1122         LongArrayQueue result = new LongArrayQueue();
1123         if (!TextUtils.isEmpty(commaSeparatedValues)) {
1124             String[] values = commaSeparatedValues.split(",");
1125             for (String value : values) {
1126                 result.addLast(Long.parseLong(value));
1127             }
1128         }
1129         return result;
1130     }
1131 
1132 
1133     /** Dump status of every observer in mAllObservers. */
dump(IndentingPrintWriter pw)1134     public void dump(IndentingPrintWriter pw) {
1135         pw.println("Package Watchdog status");
1136         pw.increaseIndent();
1137         synchronized (mLock) {
1138             for (String observerName : mAllObservers.keySet()) {
1139                 pw.println("Observer name: " + observerName);
1140                 pw.increaseIndent();
1141                 ObserverInternal observerInternal = mAllObservers.get(observerName);
1142                 observerInternal.dump(pw);
1143                 pw.decreaseIndent();
1144             }
1145         }
1146     }
1147 
1148     /**
1149      * Represents an observer monitoring a set of packages along with the failure thresholds for
1150      * each package.
1151      *
1152      * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing
1153      * instances of this class.
1154      */
1155     private static class ObserverInternal {
1156         public final String name;
1157         @GuardedBy("mLock")
1158         private final ArrayMap<String, MonitoredPackage> mPackages = new ArrayMap<>();
1159         @Nullable
1160         @GuardedBy("mLock")
1161         public PackageHealthObserver registeredObserver;
1162 
ObserverInternal(String name, List<MonitoredPackage> packages)1163         ObserverInternal(String name, List<MonitoredPackage> packages) {
1164             this.name = name;
1165             updatePackagesLocked(packages);
1166         }
1167 
1168         /**
1169          * Writes important {@link MonitoredPackage} details for this observer to file.
1170          * Does not persist any package failure thresholds.
1171          */
1172         @GuardedBy("mLock")
writeLocked(TypedXmlSerializer out)1173         public boolean writeLocked(TypedXmlSerializer out) {
1174             try {
1175                 out.startTag(null, TAG_OBSERVER);
1176                 out.attribute(null, ATTR_NAME, name);
1177                 for (int i = 0; i < mPackages.size(); i++) {
1178                     MonitoredPackage p = mPackages.valueAt(i);
1179                     p.writeLocked(out);
1180                 }
1181                 out.endTag(null, TAG_OBSERVER);
1182                 return true;
1183             } catch (IOException e) {
1184                 Slog.w(TAG, "Cannot save observer", e);
1185                 return false;
1186             }
1187         }
1188 
1189         @GuardedBy("mLock")
updatePackagesLocked(List<MonitoredPackage> packages)1190         public void updatePackagesLocked(List<MonitoredPackage> packages) {
1191             for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
1192                 MonitoredPackage p = packages.get(pIndex);
1193                 MonitoredPackage existingPackage = getMonitoredPackage(p.getName());
1194                 if (existingPackage != null) {
1195                     existingPackage.updateHealthCheckDuration(p.mDurationMs);
1196                 } else {
1197                     putMonitoredPackage(p);
1198                 }
1199             }
1200         }
1201 
1202         /**
1203          * Reduces the monitoring durations of all packages observed by this observer by
1204          * {@code elapsedMs}. If any duration is less than 0, the package is removed from
1205          * observation. If any health check duration is less than 0, the health check result
1206          * is evaluated.
1207          *
1208          * @return a {@link Set} of packages that were removed from the observer without explicit
1209          * health check passing, or an empty list if no package expired for which an explicit health
1210          * check was still pending
1211          */
1212         @GuardedBy("mLock")
prunePackagesLocked(long elapsedMs)1213         private Set<MonitoredPackage> prunePackagesLocked(long elapsedMs) {
1214             Set<MonitoredPackage> failedPackages = new ArraySet<>();
1215             Iterator<MonitoredPackage> it = mPackages.values().iterator();
1216             while (it.hasNext()) {
1217                 MonitoredPackage p = it.next();
1218                 int oldState = p.getHealthCheckStateLocked();
1219                 int newState = p.handleElapsedTimeLocked(elapsedMs);
1220                 if (oldState != HealthCheckState.FAILED
1221                         && newState == HealthCheckState.FAILED) {
1222                     Slog.i(TAG, "Package " + p.getName() + " failed health check");
1223                     failedPackages.add(p);
1224                 }
1225                 if (p.isExpiredLocked()) {
1226                     it.remove();
1227                 }
1228             }
1229             return failedPackages;
1230         }
1231 
1232         /**
1233          * Increments failure counts of {@code packageName}.
1234          * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise
1235          */
1236         @GuardedBy("mLock")
onPackageFailureLocked(String packageName)1237         public boolean onPackageFailureLocked(String packageName) {
1238             if (getMonitoredPackage(packageName) == null && registeredObserver.isPersistent()
1239                     && registeredObserver.mayObservePackage(packageName)) {
1240                 putMonitoredPackage(sPackageWatchdog.newMonitoredPackage(
1241                         packageName, DEFAULT_OBSERVING_DURATION_MS, false));
1242             }
1243             MonitoredPackage p = getMonitoredPackage(packageName);
1244             if (p != null) {
1245                 return p.onFailureLocked();
1246             }
1247             return false;
1248         }
1249 
1250         /**
1251          * Returns the map of packages monitored by this observer.
1252          *
1253          * @return a mapping of package names to {@link MonitoredPackage} objects.
1254          */
1255         @GuardedBy("mLock")
getMonitoredPackages()1256         public ArrayMap<String, MonitoredPackage> getMonitoredPackages() {
1257             return mPackages;
1258         }
1259 
1260         /**
1261          * Returns the {@link MonitoredPackage} associated with a given package name if the
1262          * package is being monitored by this observer.
1263          *
1264          * @param packageName: the name of the package.
1265          * @return the {@link MonitoredPackage} object associated with the package name if one
1266          *         exists, {@code null} otherwise.
1267          */
1268         @GuardedBy("mLock")
1269         @Nullable
getMonitoredPackage(String packageName)1270         public MonitoredPackage getMonitoredPackage(String packageName) {
1271             return mPackages.get(packageName);
1272         }
1273 
1274         /**
1275          * Associates a {@link MonitoredPackage} with the observer.
1276          *
1277          * @param p: the {@link MonitoredPackage} to store.
1278          */
1279         @GuardedBy("mLock")
putMonitoredPackage(MonitoredPackage p)1280         public void putMonitoredPackage(MonitoredPackage p) {
1281             mPackages.put(p.getName(), p);
1282         }
1283 
1284         /**
1285          * Returns one ObserverInternal from the {@code parser} and advances its state.
1286          *
1287          * <p>Note that this method is <b>not</b> thread safe. It should only be called from
1288          * #loadFromFile which in turn is only called on construction of the
1289          * singleton PackageWatchdog.
1290          **/
read(TypedXmlPullParser parser, PackageWatchdog watchdog)1291         public static ObserverInternal read(TypedXmlPullParser parser, PackageWatchdog watchdog) {
1292             String observerName = null;
1293             if (TAG_OBSERVER.equals(parser.getName())) {
1294                 observerName = parser.getAttributeValue(null, ATTR_NAME);
1295                 if (TextUtils.isEmpty(observerName)) {
1296                     Slog.wtf(TAG, "Unable to read observer name");
1297                     return null;
1298                 }
1299             }
1300             List<MonitoredPackage> packages = new ArrayList<>();
1301             int innerDepth = parser.getDepth();
1302             try {
1303                 while (XmlUtils.nextElementWithin(parser, innerDepth)) {
1304                     if (TAG_PACKAGE.equals(parser.getName())) {
1305                         try {
1306                             MonitoredPackage pkg = watchdog.parseMonitoredPackage(parser);
1307                             if (pkg != null) {
1308                                 packages.add(pkg);
1309                             }
1310                         } catch (NumberFormatException e) {
1311                             Slog.wtf(TAG, "Skipping package for observer " + observerName, e);
1312                             continue;
1313                         }
1314                     }
1315                 }
1316             } catch (XmlPullParserException | IOException e) {
1317                 Slog.wtf(TAG, "Unable to read observer " + observerName, e);
1318                 return null;
1319             }
1320             if (packages.isEmpty()) {
1321                 return null;
1322             }
1323             return new ObserverInternal(observerName, packages);
1324         }
1325 
1326         /** Dumps information about this observer and the packages it watches. */
dump(IndentingPrintWriter pw)1327         public void dump(IndentingPrintWriter pw) {
1328             boolean isPersistent = registeredObserver != null && registeredObserver.isPersistent();
1329             pw.println("Persistent: " + isPersistent);
1330             for (String packageName : mPackages.keySet()) {
1331                 MonitoredPackage p = getMonitoredPackage(packageName);
1332                 pw.println(packageName +  ": ");
1333                 pw.increaseIndent();
1334                 pw.println("# Failures: " + p.mFailureHistory.size());
1335                 pw.println("Monitoring duration remaining: " + p.mDurationMs + "ms");
1336                 pw.println("Explicit health check duration: " + p.mHealthCheckDurationMs + "ms");
1337                 pw.println("Health check state: " + p.toString(p.mHealthCheckState));
1338                 pw.decreaseIndent();
1339             }
1340         }
1341     }
1342 
1343     @Retention(SOURCE)
1344     @IntDef(value = {
1345             HealthCheckState.ACTIVE,
1346             HealthCheckState.INACTIVE,
1347             HealthCheckState.PASSED,
1348             HealthCheckState.FAILED})
1349     public @interface HealthCheckState {
1350         // The package has not passed health check but has requested a health check
1351         int ACTIVE = 0;
1352         // The package has not passed health check and has not requested a health check
1353         int INACTIVE = 1;
1354         // The package has passed health check
1355         int PASSED = 2;
1356         // The package has failed health check
1357         int FAILED = 3;
1358     }
1359 
newMonitoredPackage( String name, long durationMs, boolean hasPassedHealthCheck)1360     MonitoredPackage newMonitoredPackage(
1361             String name, long durationMs, boolean hasPassedHealthCheck) {
1362         return newMonitoredPackage(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck,
1363                 new LongArrayQueue());
1364     }
1365 
newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs, boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls)1366     MonitoredPackage newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs,
1367             boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls) {
1368         return new MonitoredPackage(name, durationMs, healthCheckDurationMs,
1369                 hasPassedHealthCheck, mitigationCalls);
1370     }
1371 
parseMonitoredPackage(TypedXmlPullParser parser)1372     MonitoredPackage parseMonitoredPackage(TypedXmlPullParser parser)
1373             throws XmlPullParserException {
1374         String packageName = parser.getAttributeValue(null, ATTR_NAME);
1375         long duration = parser.getAttributeLong(null, ATTR_DURATION);
1376         long healthCheckDuration = parser.getAttributeLong(null,
1377                         ATTR_EXPLICIT_HEALTH_CHECK_DURATION);
1378         boolean hasPassedHealthCheck = parser.getAttributeBoolean(null, ATTR_PASSED_HEALTH_CHECK);
1379         LongArrayQueue mitigationCalls = parseLongArrayQueue(
1380                 parser.getAttributeValue(null, ATTR_MITIGATION_CALLS));
1381         return newMonitoredPackage(packageName,
1382                 duration, healthCheckDuration, hasPassedHealthCheck, mitigationCalls);
1383     }
1384 
1385     /**
1386      * Represents a package and its health check state along with the time
1387      * it should be monitored for.
1388      *
1389      * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing
1390      * instances of this class.
1391      */
1392     class MonitoredPackage {
1393         private final String mPackageName;
1394         // Times when package failures happen sorted in ascending order
1395         @GuardedBy("mLock")
1396         private final LongArrayQueue mFailureHistory = new LongArrayQueue();
1397         // Times when an observer was called to mitigate this package's failure. Sorted in
1398         // ascending order.
1399         @GuardedBy("mLock")
1400         private final LongArrayQueue mMitigationCalls;
1401         // One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after
1402         // methods that could change the health check state: handleElapsedTimeLocked and
1403         // tryPassHealthCheckLocked
1404         private int mHealthCheckState = HealthCheckState.INACTIVE;
1405         // Whether an explicit health check has passed.
1406         // This value in addition with mHealthCheckDurationMs determines the health check state
1407         // of the package, see #getHealthCheckStateLocked
1408         @GuardedBy("mLock")
1409         private boolean mHasPassedHealthCheck;
1410         // System uptime duration to monitor package.
1411         @GuardedBy("mLock")
1412         private long mDurationMs;
1413         // System uptime duration to check the result of an explicit health check
1414         // Initially, MAX_VALUE until we get a value from the health check service
1415         // and request health checks.
1416         // This value in addition with mHasPassedHealthCheck determines the health check state
1417         // of the package, see #getHealthCheckStateLocked
1418         @GuardedBy("mLock")
1419         private long mHealthCheckDurationMs = Long.MAX_VALUE;
1420 
MonitoredPackage(String packageName, long durationMs, long healthCheckDurationMs, boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls)1421         MonitoredPackage(String packageName, long durationMs,
1422                 long healthCheckDurationMs, boolean hasPassedHealthCheck,
1423                 LongArrayQueue mitigationCalls) {
1424             mPackageName = packageName;
1425             mDurationMs = durationMs;
1426             mHealthCheckDurationMs = healthCheckDurationMs;
1427             mHasPassedHealthCheck = hasPassedHealthCheck;
1428             mMitigationCalls = mitigationCalls;
1429             updateHealthCheckStateLocked();
1430         }
1431 
1432         /** Writes the salient fields to disk using {@code out}. */
1433         @GuardedBy("mLock")
writeLocked(TypedXmlSerializer out)1434         public void writeLocked(TypedXmlSerializer out) throws IOException {
1435             out.startTag(null, TAG_PACKAGE);
1436             out.attribute(null, ATTR_NAME, getName());
1437             out.attributeLong(null, ATTR_DURATION, mDurationMs);
1438             out.attributeLong(null, ATTR_EXPLICIT_HEALTH_CHECK_DURATION, mHealthCheckDurationMs);
1439             out.attributeBoolean(null, ATTR_PASSED_HEALTH_CHECK, mHasPassedHealthCheck);
1440             LongArrayQueue normalizedCalls = normalizeMitigationCalls();
1441             out.attribute(null, ATTR_MITIGATION_CALLS, longArrayQueueToString(normalizedCalls));
1442             out.endTag(null, TAG_PACKAGE);
1443         }
1444 
1445         /**
1446          * Increment package failures or resets failure count depending on the last package failure.
1447          *
1448          * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise
1449          */
1450         @GuardedBy("mLock")
onFailureLocked()1451         public boolean onFailureLocked() {
1452             // Sliding window algorithm: find out if there exists a window containing failures >=
1453             // mTriggerFailureCount.
1454             final long now = mSystemClock.uptimeMillis();
1455             mFailureHistory.addLast(now);
1456             while (now - mFailureHistory.peekFirst() > mTriggerFailureDurationMs) {
1457                 // Prune values falling out of the window
1458                 mFailureHistory.removeFirst();
1459             }
1460             boolean failed = mFailureHistory.size() >= mTriggerFailureCount;
1461             if (failed) {
1462                 mFailureHistory.clear();
1463             }
1464             return failed;
1465         }
1466 
1467         /**
1468          * Notes the timestamp of a mitigation call into the observer.
1469          */
1470         @GuardedBy("mLock")
noteMitigationCallLocked()1471         public void noteMitigationCallLocked() {
1472             mMitigationCalls.addLast(mSystemClock.uptimeMillis());
1473         }
1474 
1475         /**
1476          * Prunes any mitigation calls outside of the de-escalation window, and returns the
1477          * number of calls that are in the window afterwards.
1478          *
1479          * @return the number of mitigation calls made in the de-escalation window.
1480          */
1481         @GuardedBy("mLock")
getMitigationCountLocked()1482         public int getMitigationCountLocked() {
1483             try {
1484                 final long now = mSystemClock.uptimeMillis();
1485                 while (now - mMitigationCalls.peekFirst() > DEFAULT_DEESCALATION_WINDOW_MS) {
1486                     mMitigationCalls.removeFirst();
1487                 }
1488             } catch (NoSuchElementException ignore) {
1489             }
1490 
1491             return mMitigationCalls.size();
1492         }
1493 
1494         /**
1495          * Before writing to disk, make the mitigation call timestamps relative to the current
1496          * system uptime. This is because they need to be relative to the uptime which will reset
1497          * at the next boot.
1498          *
1499          * @return a LongArrayQueue of the mitigation calls relative to the current system uptime.
1500          */
1501         @GuardedBy("mLock")
normalizeMitigationCalls()1502         public LongArrayQueue normalizeMitigationCalls() {
1503             LongArrayQueue normalized = new LongArrayQueue();
1504             final long now = mSystemClock.uptimeMillis();
1505             for (int i = 0; i < mMitigationCalls.size(); i++) {
1506                 normalized.addLast(mMitigationCalls.get(i) - now);
1507             }
1508             return normalized;
1509         }
1510 
1511         /**
1512          * Sets the initial health check duration.
1513          *
1514          * @return the new health check state
1515          */
1516         @GuardedBy("mLock")
setHealthCheckActiveLocked(long initialHealthCheckDurationMs)1517         public int setHealthCheckActiveLocked(long initialHealthCheckDurationMs) {
1518             if (initialHealthCheckDurationMs <= 0) {
1519                 Slog.wtf(TAG, "Cannot set non-positive health check duration "
1520                         + initialHealthCheckDurationMs + "ms for package " + getName()
1521                         + ". Using total duration " + mDurationMs + "ms instead");
1522                 initialHealthCheckDurationMs = mDurationMs;
1523             }
1524             if (mHealthCheckState == HealthCheckState.INACTIVE) {
1525                 // Transitions to ACTIVE
1526                 mHealthCheckDurationMs = initialHealthCheckDurationMs;
1527             }
1528             return updateHealthCheckStateLocked();
1529         }
1530 
1531         /**
1532          * Updates the monitoring durations of the package.
1533          *
1534          * @return the new health check state
1535          */
1536         @GuardedBy("mLock")
handleElapsedTimeLocked(long elapsedMs)1537         public int handleElapsedTimeLocked(long elapsedMs) {
1538             if (elapsedMs <= 0) {
1539                 Slog.w(TAG, "Cannot handle non-positive elapsed time for package " + getName());
1540                 return mHealthCheckState;
1541             }
1542             // Transitions to FAILED if now <= 0 and health check not passed
1543             mDurationMs -= elapsedMs;
1544             if (mHealthCheckState == HealthCheckState.ACTIVE) {
1545                 // We only update health check durations if we have #setHealthCheckActiveLocked
1546                 // This ensures we don't leave the INACTIVE state for an unexpected elapsed time
1547                 // Transitions to FAILED if now <= 0 and health check not passed
1548                 mHealthCheckDurationMs -= elapsedMs;
1549             }
1550             return updateHealthCheckStateLocked();
1551         }
1552 
1553         /** Explicitly update the monitoring duration of the package. */
1554         @GuardedBy("mLock")
updateHealthCheckDuration(long newDurationMs)1555         public void updateHealthCheckDuration(long newDurationMs) {
1556             mDurationMs = newDurationMs;
1557         }
1558 
1559         /**
1560          * Marks the health check as passed and transitions to {@link HealthCheckState.PASSED}
1561          * if not yet {@link HealthCheckState.FAILED}.
1562          *
1563          * @return the new {@link HealthCheckState health check state}
1564          */
1565         @GuardedBy("mLock")
1566         @HealthCheckState
tryPassHealthCheckLocked()1567         public int tryPassHealthCheckLocked() {
1568             if (mHealthCheckState != HealthCheckState.FAILED) {
1569                 // FAILED is a final state so only pass if we haven't failed
1570                 // Transition to PASSED
1571                 mHasPassedHealthCheck = true;
1572             }
1573             return updateHealthCheckStateLocked();
1574         }
1575 
1576         /** Returns the monitored package name. */
getName()1577         private String getName() {
1578             return mPackageName;
1579         }
1580 
1581         /**
1582          * Returns the current {@link HealthCheckState health check state}.
1583          */
1584         @GuardedBy("mLock")
1585         @HealthCheckState
getHealthCheckStateLocked()1586         public int getHealthCheckStateLocked() {
1587             return mHealthCheckState;
1588         }
1589 
1590         /**
1591          * Returns the shortest duration before the package should be scheduled for a prune.
1592          *
1593          * @return the duration or {@link Long#MAX_VALUE} if the package should not be scheduled
1594          */
1595         @GuardedBy("mLock")
getShortestScheduleDurationMsLocked()1596         public long getShortestScheduleDurationMsLocked() {
1597             // Consider health check duration only if #isPendingHealthChecksLocked is true
1598             return Math.min(toPositive(mDurationMs),
1599                     isPendingHealthChecksLocked()
1600                     ? toPositive(mHealthCheckDurationMs) : Long.MAX_VALUE);
1601         }
1602 
1603         /**
1604          * Returns {@code true} if the total duration left to monitor the package is less than or
1605          * equal to 0 {@code false} otherwise.
1606          */
1607         @GuardedBy("mLock")
isExpiredLocked()1608         public boolean isExpiredLocked() {
1609             return mDurationMs <= 0;
1610         }
1611 
1612         /**
1613          * Returns {@code true} if the package, {@link #getName} is expecting health check results
1614          * {@code false} otherwise.
1615          */
1616         @GuardedBy("mLock")
isPendingHealthChecksLocked()1617         public boolean isPendingHealthChecksLocked() {
1618             return mHealthCheckState == HealthCheckState.ACTIVE
1619                     || mHealthCheckState == HealthCheckState.INACTIVE;
1620         }
1621 
1622         /**
1623          * Updates the health check state based on {@link #mHasPassedHealthCheck}
1624          * and {@link #mHealthCheckDurationMs}.
1625          *
1626          * @return the new {@link HealthCheckState health check state}
1627          */
1628         @GuardedBy("mLock")
1629         @HealthCheckState
updateHealthCheckStateLocked()1630         private int updateHealthCheckStateLocked() {
1631             int oldState = mHealthCheckState;
1632             if (mHasPassedHealthCheck) {
1633                 // Set final state first to avoid ambiguity
1634                 mHealthCheckState = HealthCheckState.PASSED;
1635             } else if (mHealthCheckDurationMs <= 0 || mDurationMs <= 0) {
1636                 // Set final state first to avoid ambiguity
1637                 mHealthCheckState = HealthCheckState.FAILED;
1638             } else if (mHealthCheckDurationMs == Long.MAX_VALUE) {
1639                 mHealthCheckState = HealthCheckState.INACTIVE;
1640             } else {
1641                 mHealthCheckState = HealthCheckState.ACTIVE;
1642             }
1643 
1644             if (oldState != mHealthCheckState) {
1645                 Slog.i(TAG, "Updated health check state for package " + getName() + ": "
1646                         + toString(oldState) + " -> " + toString(mHealthCheckState));
1647             }
1648             return mHealthCheckState;
1649         }
1650 
1651         /** Returns a {@link String} representation of the current health check state. */
toString(@ealthCheckState int state)1652         private String toString(@HealthCheckState int state) {
1653             switch (state) {
1654                 case HealthCheckState.ACTIVE:
1655                     return "ACTIVE";
1656                 case HealthCheckState.INACTIVE:
1657                     return "INACTIVE";
1658                 case HealthCheckState.PASSED:
1659                     return "PASSED";
1660                 case HealthCheckState.FAILED:
1661                     return "FAILED";
1662                 default:
1663                     return "UNKNOWN";
1664             }
1665         }
1666 
1667         /** Returns {@code value} if it is greater than 0 or {@link Long#MAX_VALUE} otherwise. */
toPositive(long value)1668         private long toPositive(long value) {
1669             return value > 0 ? value : Long.MAX_VALUE;
1670         }
1671 
1672         /** Compares the equality of this object with another {@link MonitoredPackage}. */
1673         @VisibleForTesting
isEqualTo(MonitoredPackage pkg)1674         boolean isEqualTo(MonitoredPackage pkg) {
1675             return (getName().equals(pkg.getName()))
1676                     && mDurationMs == pkg.mDurationMs
1677                     && mHasPassedHealthCheck == pkg.mHasPassedHealthCheck
1678                     && mHealthCheckDurationMs == pkg.mHealthCheckDurationMs
1679                     && (mMitigationCalls.toString()).equals(pkg.mMitigationCalls.toString());
1680         }
1681     }
1682 
1683     /**
1684      * Handles the thresholding logic for system server boots.
1685      */
1686     class BootThreshold {
1687 
1688         private final int mBootTriggerCount;
1689         private final long mTriggerWindow;
1690 
BootThreshold(int bootTriggerCount, long triggerWindow)1691         BootThreshold(int bootTriggerCount, long triggerWindow) {
1692             this.mBootTriggerCount = bootTriggerCount;
1693             this.mTriggerWindow = triggerWindow;
1694         }
1695 
reset()1696         public void reset() {
1697             setStart(0);
1698             setCount(0);
1699         }
1700 
getCount()1701         private int getCount() {
1702             return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
1703         }
1704 
setCount(int count)1705         private void setCount(int count) {
1706             SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
1707         }
1708 
getStart()1709         public long getStart() {
1710             return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
1711         }
1712 
getMitigationCount()1713         public int getMitigationCount() {
1714             return SystemProperties.getInt(PROP_BOOT_MITIGATION_COUNT, 0);
1715         }
1716 
setStart(long start)1717         public void setStart(long start) {
1718             setPropertyStart(PROP_RESCUE_BOOT_START, start);
1719         }
1720 
setMitigationStart(long start)1721         public void setMitigationStart(long start) {
1722             setPropertyStart(PROP_BOOT_MITIGATION_WINDOW_START, start);
1723         }
1724 
getMitigationStart()1725         public long getMitigationStart() {
1726             return SystemProperties.getLong(PROP_BOOT_MITIGATION_WINDOW_START, 0);
1727         }
1728 
setMitigationCount(int count)1729         public void setMitigationCount(int count) {
1730             SystemProperties.set(PROP_BOOT_MITIGATION_COUNT, Integer.toString(count));
1731         }
1732 
setPropertyStart(String property, long start)1733         public void setPropertyStart(String property, long start) {
1734             final long now = mSystemClock.uptimeMillis();
1735             final long newStart = MathUtils.constrain(start, 0, now);
1736             SystemProperties.set(property, Long.toString(newStart));
1737         }
1738 
saveMitigationCountToMetadata()1739         public void saveMitigationCountToMetadata() {
1740             try (BufferedWriter writer = new BufferedWriter(new FileWriter(METADATA_FILE))) {
1741                 writer.write(String.valueOf(getMitigationCount()));
1742             } catch (Exception e) {
1743                 Slog.e(TAG, "Could not save metadata to file: " + e);
1744             }
1745         }
1746 
readMitigationCountFromMetadataIfNecessary()1747         public void readMitigationCountFromMetadataIfNecessary() {
1748             File bootPropsFile = new File(METADATA_FILE);
1749             if (bootPropsFile.exists()) {
1750                 try (BufferedReader reader = new BufferedReader(new FileReader(METADATA_FILE))) {
1751                     String mitigationCount = reader.readLine();
1752                     setMitigationCount(Integer.parseInt(mitigationCount));
1753                     bootPropsFile.delete();
1754                 } catch (Exception e) {
1755                     Slog.i(TAG, "Could not read metadata file: " + e);
1756                 }
1757             }
1758         }
1759 
1760 
1761         /** Increments the boot counter, and returns whether the device is bootlooping. */
incrementAndTest()1762         public boolean incrementAndTest() {
1763             readMitigationCountFromMetadataIfNecessary();
1764             final long now = mSystemClock.uptimeMillis();
1765             if (now - getStart() < 0) {
1766                 Slog.e(TAG, "Window was less than zero. Resetting start to current time.");
1767                 setStart(now);
1768                 setMitigationStart(now);
1769             }
1770             if (now - getMitigationStart() > DEFAULT_DEESCALATION_WINDOW_MS) {
1771                 setMitigationCount(0);
1772                 setMitigationStart(now);
1773             }
1774             final long window = now - getStart();
1775             if (window >= mTriggerWindow) {
1776                 setCount(1);
1777                 setStart(now);
1778                 return false;
1779             } else {
1780                 int count = getCount() + 1;
1781                 setCount(count);
1782                 EventLogTags.writeRescueNote(Process.ROOT_UID, count, window);
1783                 return count >= mBootTriggerCount;
1784             }
1785         }
1786 
1787     }
1788 }
1789