• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server.criticalevents;
18 
19 import android.os.Handler;
20 import android.os.HandlerThread;
21 import android.server.ServerProtoEnums;
22 import android.util.Slog;
23 
24 import com.android.framework.protobuf.nano.MessageNanoPrinter;
25 import com.android.internal.annotations.VisibleForTesting;
26 import com.android.internal.util.RingBuffer;
27 import com.android.server.criticalevents.nano.CriticalEventLogProto;
28 import com.android.server.criticalevents.nano.CriticalEventLogStorageProto;
29 import com.android.server.criticalevents.nano.CriticalEventProto;
30 import com.android.server.criticalevents.nano.CriticalEventProto.AppNotResponding;
31 import com.android.server.criticalevents.nano.CriticalEventProto.HalfWatchdog;
32 import com.android.server.criticalevents.nano.CriticalEventProto.InstallPackages;
33 import com.android.server.criticalevents.nano.CriticalEventProto.ExcessiveBinderCalls;
34 import com.android.server.criticalevents.nano.CriticalEventProto.JavaCrash;
35 import com.android.server.criticalevents.nano.CriticalEventProto.NativeCrash;
36 import com.android.server.criticalevents.nano.CriticalEventProto.SystemServerStarted;
37 import com.android.server.criticalevents.nano.CriticalEventProto.Watchdog;
38 
39 import java.io.File;
40 import java.io.FileOutputStream;
41 import java.io.IOException;
42 import java.nio.file.Files;
43 import java.nio.file.Paths;
44 import java.time.Duration;
45 import java.util.Arrays;
46 import java.util.UUID;
47 
48 /**
49  * Log of recent critical events such as Watchdogs.
50  *
51  * For use in ANR reports to show recent events that may help to debug the ANR. In particular,
52  * the presence of recent critical events signal that the device was already in a had state.
53  *
54  * This class needs to be thread safe since it's used as a singleton.
55  */
56 public class CriticalEventLog {
57     private static final String TAG = CriticalEventLog.class.getSimpleName();
58 
59     /** UID for system_server. */
60     private static final int AID_SYSTEM = 1000;
61 
62     private static CriticalEventLog sInstance;
63 
64     /** Name of the file the log is saved to. */
65     @VisibleForTesting
66     static final String FILENAME = "critical_event_log.pb";
67 
68     /** Timestamp when the log was last saved (or attempted to be saved) to disk. */
69     private long mLastSaveAttemptMs = 0;
70 
71     /** File the log is saved to. */
72     private final File mLogFile;
73 
74     /** Ring buffer containing the log events. */
75     private final ThreadSafeRingBuffer<CriticalEventProto> mEvents;
76 
77     /** Max age of events to include in the output log proto. */
78     private final int mWindowMs;
79 
80     /** Minimum time between consecutive saves of the log to disk. */
81     private final long mMinTimeBetweenSavesMs;
82 
83     /** Whether to load and save the log synchronously with no delay. Only set to true in tests. */
84     private final boolean mLoadAndSaveImmediately;
85 
86     private final Handler mHandler;
87 
88     private final Runnable mSaveRunnable = this::saveLogToFileNow;
89 
90     @VisibleForTesting
CriticalEventLog(String logDir, int capacity, int windowMs, long minTimeBetweenSavesMs, boolean loadAndSaveImmediately, ILogLoader logLoader)91     CriticalEventLog(String logDir, int capacity, int windowMs, long minTimeBetweenSavesMs,
92             boolean loadAndSaveImmediately, ILogLoader logLoader) {
93         mLogFile = Paths.get(logDir, FILENAME).toFile();
94         mWindowMs = windowMs;
95         mMinTimeBetweenSavesMs = minTimeBetweenSavesMs;
96         mLoadAndSaveImmediately = loadAndSaveImmediately;
97 
98         mEvents = new ThreadSafeRingBuffer<>(CriticalEventProto.class, capacity);
99 
100         HandlerThread thread = new HandlerThread("CriticalEventLogIO");
101         thread.start();
102         mHandler = new Handler(thread.getLooper());
103 
104         final Runnable loadEvents = () -> logLoader.load(mLogFile, mEvents);
105         if (!mLoadAndSaveImmediately) {
106             mHandler.post(loadEvents);
107         } else {
108             loadEvents.run();
109         }
110     }
111 
112     /** Returns a new instance with production settings. */
CriticalEventLog()113     private CriticalEventLog() {
114         this(
115                 /* logDir= */"/data/misc/critical-events",
116                 /* capacity= */ 20,
117                 /* windowMs= */ (int) Duration.ofMinutes(5).toMillis(),
118                 /* minTimeBetweenSavesMs= */ Duration.ofSeconds(2).toMillis(),
119                 /* loadAndSaveImmediately= */ false,
120                 new LogLoader());
121     }
122 
123     /** Returns the singleton instance. */
getInstance()124     public static CriticalEventLog getInstance() {
125         if (sInstance == null) {
126             sInstance = new CriticalEventLog();
127         }
128         return sInstance;
129     }
130 
131     /**
132      * Ensures the singleton instance has been instantiated.
133      *
134      * Use this to eagerly instantiate the log (which loads the previous events from disk).
135      * Otherwise this will occur lazily when the first event is logged, by which time the device may
136      * be under load.
137      */
init()138     public static void init() {
139         getInstance();
140     }
141 
142     @VisibleForTesting
getWallTimeMillis()143     protected long getWallTimeMillis() {
144         return System.currentTimeMillis();
145     }
146 
147    /** Logs when a uid sends an excessive number of binder calls. */
logExcessiveBinderCalls(int uid)148     public void logExcessiveBinderCalls(int uid) {
149         CriticalEventProto event = new CriticalEventProto();
150         ExcessiveBinderCalls calls = new ExcessiveBinderCalls();
151         calls.uid = uid;
152         event.setExcessiveBinderCalls(calls);
153         log(event);
154     }
155 
156     /** Logs when one or more packages are installed. */
logInstallPackagesStarted()157     public void logInstallPackagesStarted() {
158         CriticalEventProto event = new CriticalEventProto();
159         event.setInstallPackages(new InstallPackages());
160         log(event);
161     }
162 
163     /** Logs when system server started. */
logSystemServerStarted()164     public void logSystemServerStarted() {
165         CriticalEventProto event = new CriticalEventProto();
166         event.setSystemServerStarted(new SystemServerStarted());
167         log(event);
168     }
169 
170     /** Logs a watchdog. */
logWatchdog(String subject, UUID uuid)171     public void logWatchdog(String subject, UUID uuid) {
172         Watchdog watchdog = new Watchdog();
173         watchdog.subject = subject;
174         watchdog.uuid = uuid.toString();
175         CriticalEventProto event = new CriticalEventProto();
176         event.setWatchdog(watchdog);
177         log(event);
178     }
179 
180     /** Logs a half-watchdog. */
logHalfWatchdog(String subject)181     public void logHalfWatchdog(String subject) {
182         HalfWatchdog halfWatchdog = new HalfWatchdog();
183         halfWatchdog.subject = subject;
184         CriticalEventProto event = new CriticalEventProto();
185         event.setHalfWatchdog(halfWatchdog);
186         log(event);
187     }
188 
189     /**
190      * Logs an ANR.
191      *
192      * @param subject          the ANR subject line.
193      * @param processClassEnum {@link android.server.ServerProtoEnums} value for the ANRing process.
194      * @param processName      name of the ANRing process.
195      * @param uid              uid of the ANRing process.
196      * @param pid              pid of the ANRing process.
197      */
logAnr(String subject, int processClassEnum, String processName, int uid, int pid)198     public void logAnr(String subject, int processClassEnum, String processName, int uid, int pid) {
199         AppNotResponding anr = new AppNotResponding();
200         anr.subject = subject;
201         anr.processClass = processClassEnum;
202         anr.process = processName;
203         anr.uid = uid;
204         anr.pid = pid;
205         CriticalEventProto event = new CriticalEventProto();
206         event.setAnr(anr);
207         log(event);
208     }
209 
210     /**
211      * Logs a java crash.
212      *
213      * @param exceptionClass   the crash exception class.
214      * @param processClassEnum {@link android.server.ServerProtoEnums} value for the crashed
215      *                         process.
216      * @param processName      name of the crashed process.
217      * @param uid              uid of the crashed process.
218      * @param pid              pid of the crashed process.
219      */
logJavaCrash(String exceptionClass, int processClassEnum, String processName, int uid, int pid)220     public void logJavaCrash(String exceptionClass, int processClassEnum, String processName,
221             int uid, int pid) {
222         JavaCrash crash = new JavaCrash();
223         crash.exceptionClass = exceptionClass;
224         crash.processClass = processClassEnum;
225         crash.process = processName;
226         crash.uid = uid;
227         crash.pid = pid;
228         CriticalEventProto event = new CriticalEventProto();
229         event.setJavaCrash(crash);
230         log(event);
231     }
232 
233     /**
234      * Logs a native crash.
235      *
236      * @param processClassEnum {@link android.server.ServerProtoEnums} value for the crashed
237      *                         process.
238      * @param processName      name of the crashed process.
239      * @param uid              uid of the crashed process.
240      * @param pid              pid of the crashed process.
241      */
logNativeCrash(int processClassEnum, String processName, int uid, int pid)242     public void logNativeCrash(int processClassEnum, String processName, int uid, int pid) {
243         NativeCrash crash = new NativeCrash();
244         crash.processClass = processClassEnum;
245         crash.process = processName;
246         crash.uid = uid;
247         crash.pid = pid;
248         CriticalEventProto event = new CriticalEventProto();
249         event.setNativeCrash(crash);
250         log(event);
251     }
252 
log(CriticalEventProto event)253     private void log(CriticalEventProto event) {
254         event.timestampMs = getWallTimeMillis();
255         appendAndSave(event);
256     }
257 
258     @VisibleForTesting
appendAndSave(CriticalEventProto event)259     void appendAndSave(CriticalEventProto event) {
260         mEvents.append(event);
261         saveLogToFile();
262     }
263 
264     /**
265      * Returns recent critical events in text format to include in system server ANR stack trace
266      * file.
267      *
268      * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}.
269      */
logLinesForSystemServerTraceFile()270     public String logLinesForSystemServerTraceFile() {
271         return logLinesForTraceFile(ServerProtoEnums.SYSTEM_SERVER, "AID_SYSTEM", AID_SYSTEM);
272     }
273 
274     /**
275      * Returns recent critical events in text format to include in logs such as ANR stack trace
276      * files.
277      *
278      * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}.
279      *
280      * Some data in the returned log may be redacted for privacy. For example, a log for a data
281      * app will not include specific crash information for a different data app. See
282      * {@link LogSanitizer} for more information.
283      *
284      * @param traceProcessClassEnum {@link android.server.ServerProtoEnums} value for the process
285      *                              the ANR trace file is for.
286      * @param traceProcessName      name of the process the ANR trace file is for.
287      * @param traceUid              uid of the process the ANR trace file is for.
288      */
logLinesForTraceFile(int traceProcessClassEnum, String traceProcessName, int traceUid)289     public String logLinesForTraceFile(int traceProcessClassEnum, String traceProcessName,
290             int traceUid) {
291         CriticalEventLogProto outputLogProto = getOutputLogProto(traceProcessClassEnum,
292                 traceProcessName, traceUid);
293         return new StringBuilder()
294                 .append("--- CriticalEventLog ---\n")
295                 .append(MessageNanoPrinter.print(outputLogProto))
296                 .append('\n').toString();
297     }
298 
299     /**
300      * Returns a proto containing recent critical events.
301      *
302      * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}.
303      */
304     @VisibleForTesting
getOutputLogProto(int traceProcessClassEnum, String traceProcessName, int traceUid)305     protected CriticalEventLogProto getOutputLogProto(int traceProcessClassEnum,
306             String traceProcessName, int traceUid) {
307         CriticalEventLogProto log = new CriticalEventLogProto();
308         log.timestampMs = getWallTimeMillis();
309         log.windowMs = mWindowMs;
310         log.capacity = mEvents.capacity();
311 
312         CriticalEventProto[] events = recentEventsWithMinTimestamp(log.timestampMs - mWindowMs);
313         LogSanitizer sanitizer = new LogSanitizer(traceProcessClassEnum, traceProcessName,
314                 traceUid);
315         for (int i = 0; i < events.length; i++) {
316             events[i] = sanitizer.process(events[i]);
317         }
318         log.events = events;
319 
320         return log;
321     }
322 
323     /**
324      * Returns the most recent logged events, starting with the first event that has a timestamp
325      * greater than or equal to {@code minTimestampMs}.
326      *
327      * If no events have a timestamp greater than or equal to {@code minTimestampMs}, returns an
328      * empty array.
329      */
recentEventsWithMinTimestamp(long minTimestampMs)330     private CriticalEventProto[] recentEventsWithMinTimestamp(long minTimestampMs) {
331         // allEvents are in insertion order, i.e. in order of when the relevant log___() function
332         // was called.
333         // This means that if the system clock changed (e.g. a NITZ update) allEvents may not be
334         // strictly ordered by timestamp. In this case we are permissive and start with the
335         // first event that has a timestamp in the desired range.
336         CriticalEventProto[] allEvents = mEvents.toArray();
337         for (int i = 0; i < allEvents.length; i++) {
338             if (allEvents[i].timestampMs >= minTimestampMs) {
339                 return Arrays.copyOfRange(allEvents, i, allEvents.length);
340             }
341         }
342         return new CriticalEventProto[]{};
343     }
344 
saveLogToFile()345     private void saveLogToFile() {
346         if (mLoadAndSaveImmediately) {
347             saveLogToFileNow();
348             return;
349         }
350         if (mHandler.hasCallbacks(mSaveRunnable)) {
351             // An earlier save is already scheduled so don't need to schedule an additional one.
352             return;
353         }
354 
355         if (!mHandler.postDelayed(mSaveRunnable, saveDelayMs())) {
356             Slog.w(TAG, "Error scheduling save");
357         }
358     }
359 
360     /**
361      * Returns the delay in milliseconds when scheduling a save on the handler thread.
362      *
363      * Returns a value in the range [0, {@code minTimeBetweenSavesMs}] such that the time between
364      * consecutive saves does not exceed {@code minTimeBetweenSavesMs}.
365      *
366      * This means that if the last save occurred a long time ago, or if no previous saves
367      * have occurred then the returned delay will be zero.
368      */
369     @VisibleForTesting
saveDelayMs()370     protected long saveDelayMs() {
371         final long nowMs = getWallTimeMillis();
372         return Math.max(0,
373                 mLastSaveAttemptMs + mMinTimeBetweenSavesMs - nowMs);
374     }
375 
376     @VisibleForTesting
saveLogToFileNow()377     protected void saveLogToFileNow() {
378         mLastSaveAttemptMs = getWallTimeMillis();
379 
380         File logDir = mLogFile.getParentFile();
381         if (!logDir.exists()) {
382             if (!logDir.mkdir()) {
383                 Slog.e(TAG, "Error creating log directory: " + logDir.getPath());
384                 return;
385             }
386         }
387 
388         if (!mLogFile.exists()) {
389             try {
390                 mLogFile.createNewFile();
391             } catch (IOException e) {
392                 Slog.e(TAG, "Error creating log file", e);
393                 return;
394             }
395         }
396 
397         CriticalEventLogStorageProto logProto = new CriticalEventLogStorageProto();
398         logProto.events = mEvents.toArray();
399 
400         final byte[] bytes = CriticalEventLogStorageProto.toByteArray(logProto);
401         try (FileOutputStream stream = new FileOutputStream(mLogFile, false)) {
402             stream.write(bytes);
403         } catch (IOException e) {
404             Slog.e(TAG, "Error saving log to disk.", e);
405         }
406     }
407 
408     @VisibleForTesting
409     protected static class ThreadSafeRingBuffer<T> {
410         private final int mCapacity;
411         private final RingBuffer<T> mBuffer;
412 
ThreadSafeRingBuffer(Class<T> clazz, int capacity)413         ThreadSafeRingBuffer(Class<T> clazz, int capacity) {
414             this.mCapacity = capacity;
415             this.mBuffer = new RingBuffer<>(clazz, capacity);
416         }
417 
append(T t)418         synchronized void append(T t) {
419             mBuffer.append(t);
420         }
421 
toArray()422         synchronized T[] toArray() {
423             return mBuffer.toArray();
424         }
425 
capacity()426         int capacity() {
427             return mCapacity;
428         }
429     }
430 
431     /** Loads log events from disk into a ring buffer. */
432     protected interface ILogLoader {
load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer)433         void load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer);
434     }
435 
436     /** Loads log events from disk into a ring buffer. */
437     static class LogLoader implements ILogLoader {
438         @Override
load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer)439         public void load(File logFile,
440                 ThreadSafeRingBuffer<CriticalEventProto> buffer) {
441             for (CriticalEventProto event : loadLogFromFile(logFile).events) {
442                 buffer.append(event);
443             }
444         }
445 
loadLogFromFile(File logFile)446         private static CriticalEventLogStorageProto loadLogFromFile(File logFile) {
447             if (!logFile.exists()) {
448                 Slog.i(TAG, "No log found, returning empty log proto.");
449                 return new CriticalEventLogStorageProto();
450             }
451 
452             try {
453                 return CriticalEventLogStorageProto.parseFrom(
454                         Files.readAllBytes(logFile.toPath()));
455             } catch (IOException e) {
456                 Slog.e(TAG, "Error reading log from disk.", e);
457                 return new CriticalEventLogStorageProto();
458             }
459         }
460     }
461 
462     /**
463      * Redacts private data app fields from the critical event protos.
464      *
465      * When a critical event log is requested, this class is used to redact specific information
466      * so that the trace file for a data app does not leak private information about other data
467      * apps.
468      */
469     private static class LogSanitizer {
470         /**
471          * The {@link CriticalEventProto.ProcessClass} of the process the output trace file is for.
472          */
473         int mTraceProcessClassEnum;
474 
475         /** The name of the process that the output trace file is for. */
476         String mTraceProcessName;
477 
478         /** The uid of the process that the output trace file is for. */
479         int mTraceUid;
480 
LogSanitizer(int traceProcessClassEnum, String traceProcessName, int traceUid)481         LogSanitizer(int traceProcessClassEnum, String traceProcessName, int traceUid) {
482             mTraceProcessClassEnum = traceProcessClassEnum;
483             mTraceProcessName = traceProcessName;
484             mTraceUid = traceUid;
485         }
486 
487         /**
488          * Redacts information from a critical event proto where necessary.
489          *
490          * This function does not mutate its input. If redaction happens, it returns a new proto.
491          * Otherwise, it returns the original proto.
492          */
process(CriticalEventProto event)493         CriticalEventProto process(CriticalEventProto event) {
494             if (event.hasAnr()) {
495                 AppNotResponding anr = event.getAnr();
496                 if (shouldSanitize(anr.processClass, anr.process, anr.uid)) {
497                     return sanitizeAnr(event);
498                 }
499             } else if (event.hasJavaCrash()) {
500                 JavaCrash crash = event.getJavaCrash();
501                 if (shouldSanitize(crash.processClass, crash.process, crash.uid)) {
502                     return sanitizeJavaCrash(event);
503                 }
504             } else if (event.hasNativeCrash()) {
505                 NativeCrash crash = event.getNativeCrash();
506                 if (shouldSanitize(crash.processClass, crash.process, crash.uid)) {
507                     return sanitizeNativeCrash(event);
508                 }
509             }
510             // No redaction needed.
511             return event;
512         }
513 
shouldSanitize(int processClassEnum, String processName, int uid)514         private boolean shouldSanitize(int processClassEnum, String processName, int uid) {
515             boolean sameApp = processName != null && processName.equals(mTraceProcessName)
516                     && mTraceUid == uid;
517 
518             // Only sanitize when both the critical event and trace file are for different data
519             // apps.
520             return processClassEnum == CriticalEventProto.DATA_APP
521                     && mTraceProcessClassEnum == CriticalEventProto.DATA_APP
522                     && !sameApp;
523         }
524 
sanitizeAnr(CriticalEventProto base)525         private static CriticalEventProto sanitizeAnr(CriticalEventProto base) {
526             AppNotResponding anr = new AppNotResponding();
527             // Do not set subject and process.
528             anr.processClass = base.getAnr().processClass;
529             anr.uid = base.getAnr().uid;
530             anr.pid = base.getAnr().pid;
531 
532             CriticalEventProto sanitized = sanitizeCriticalEventProto(base);
533             sanitized.setAnr(anr);
534             return sanitized;
535         }
536 
sanitizeJavaCrash(CriticalEventProto base)537         private static CriticalEventProto sanitizeJavaCrash(CriticalEventProto base) {
538             JavaCrash crash = new JavaCrash();
539             // Do not set exceptionClass and process.
540             crash.processClass = base.getJavaCrash().processClass;
541             crash.uid = base.getJavaCrash().uid;
542             crash.pid = base.getJavaCrash().pid;
543 
544             CriticalEventProto sanitized = sanitizeCriticalEventProto(base);
545             sanitized.setJavaCrash(crash);
546             return sanitized;
547         }
548 
sanitizeNativeCrash(CriticalEventProto base)549         private static CriticalEventProto sanitizeNativeCrash(CriticalEventProto base) {
550             NativeCrash crash = new NativeCrash();
551             // Do not set process.
552             crash.processClass = base.getNativeCrash().processClass;
553             crash.uid = base.getNativeCrash().uid;
554             crash.pid = base.getNativeCrash().pid;
555 
556             CriticalEventProto sanitized = sanitizeCriticalEventProto(base);
557             sanitized.setNativeCrash(crash);
558             return sanitized;
559         }
560 
sanitizeCriticalEventProto(CriticalEventProto base)561         private static CriticalEventProto sanitizeCriticalEventProto(CriticalEventProto base) {
562             CriticalEventProto sanitized = new CriticalEventProto();
563             sanitized.timestampMs = base.timestampMs;
564             return sanitized;
565         }
566     }
567 }
568