• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server.criticalevents;
18 
19 import android.os.Handler;
20 import android.os.HandlerThread;
21 import android.server.ServerProtoEnums;
22 import android.util.Slog;
23 
24 import com.android.framework.protobuf.nano.MessageNanoPrinter;
25 import com.android.internal.annotations.VisibleForTesting;
26 import com.android.internal.util.RingBuffer;
27 import com.android.server.criticalevents.nano.CriticalEventLogProto;
28 import com.android.server.criticalevents.nano.CriticalEventLogStorageProto;
29 import com.android.server.criticalevents.nano.CriticalEventProto;
30 import com.android.server.criticalevents.nano.CriticalEventProto.AppNotResponding;
31 import com.android.server.criticalevents.nano.CriticalEventProto.HalfWatchdog;
32 import com.android.server.criticalevents.nano.CriticalEventProto.JavaCrash;
33 import com.android.server.criticalevents.nano.CriticalEventProto.NativeCrash;
34 import com.android.server.criticalevents.nano.CriticalEventProto.Watchdog;
35 
36 import java.io.File;
37 import java.io.FileOutputStream;
38 import java.io.IOException;
39 import java.nio.file.Files;
40 import java.nio.file.Paths;
41 import java.time.Duration;
42 import java.util.Arrays;
43 import java.util.UUID;
44 
45 /**
46  * Log of recent critical events such as Watchdogs.
47  *
48  * For use in ANR reports to show recent events that may help to debug the ANR. In particular,
49  * the presence of recent critical events signal that the device was already in a had state.
50  *
51  * This class needs to be thread safe since it's used as a singleton.
52  */
53 public class CriticalEventLog {
54     private static final String TAG = CriticalEventLog.class.getSimpleName();
55 
56     /** UID for system_server. */
57     private static final int AID_SYSTEM = 1000;
58 
59     private static CriticalEventLog sInstance;
60 
61     /** Name of the file the log is saved to. */
62     @VisibleForTesting
63     static final String FILENAME = "critical_event_log.pb";
64 
65     /** Timestamp when the log was last saved (or attempted to be saved) to disk. */
66     private long mLastSaveAttemptMs = 0;
67 
68     /** File the log is saved to. */
69     private final File mLogFile;
70 
71     /** Ring buffer containing the log events. */
72     private final ThreadSafeRingBuffer<CriticalEventProto> mEvents;
73 
74     /** Max age of events to include in the output log proto. */
75     private final int mWindowMs;
76 
77     /** Minimum time between consecutive saves of the log to disk. */
78     private final long mMinTimeBetweenSavesMs;
79 
80     /** Whether to load and save the log synchronously with no delay. Only set to true in tests. */
81     private final boolean mLoadAndSaveImmediately;
82 
83     private final Handler mHandler;
84 
85     private final Runnable mSaveRunnable = this::saveLogToFileNow;
86 
87     @VisibleForTesting
CriticalEventLog(String logDir, int capacity, int windowMs, long minTimeBetweenSavesMs, boolean loadAndSaveImmediately, ILogLoader logLoader)88     CriticalEventLog(String logDir, int capacity, int windowMs, long minTimeBetweenSavesMs,
89             boolean loadAndSaveImmediately, ILogLoader logLoader) {
90         mLogFile = Paths.get(logDir, FILENAME).toFile();
91         mWindowMs = windowMs;
92         mMinTimeBetweenSavesMs = minTimeBetweenSavesMs;
93         mLoadAndSaveImmediately = loadAndSaveImmediately;
94 
95         mEvents = new ThreadSafeRingBuffer<>(CriticalEventProto.class, capacity);
96 
97         HandlerThread thread = new HandlerThread("CriticalEventLogIO");
98         thread.start();
99         mHandler = new Handler(thread.getLooper());
100 
101         final Runnable loadEvents = () -> logLoader.load(mLogFile, mEvents);
102         if (!mLoadAndSaveImmediately) {
103             mHandler.post(loadEvents);
104         } else {
105             loadEvents.run();
106         }
107     }
108 
109     /** Returns a new instance with production settings. */
CriticalEventLog()110     private CriticalEventLog() {
111         this(
112                 /* logDir= */"/data/misc/critical-events",
113                 /* capacity= */ 20,
114                 /* windowMs= */ (int) Duration.ofMinutes(5).toMillis(),
115                 /* minTimeBetweenSavesMs= */ Duration.ofSeconds(2).toMillis(),
116                 /* loadAndSaveImmediately= */ false,
117                 new LogLoader());
118     }
119 
120     /** Returns the singleton instance. */
getInstance()121     public static CriticalEventLog getInstance() {
122         if (sInstance == null) {
123             sInstance = new CriticalEventLog();
124         }
125         return sInstance;
126     }
127 
128     /**
129      * Ensures the singleton instance has been instantiated.
130      *
131      * Use this to eagerly instantiate the log (which loads the previous events from disk).
132      * Otherwise this will occur lazily when the first event is logged, by which time the device may
133      * be under load.
134      */
init()135     public static void init() {
136         getInstance();
137     }
138 
139     @VisibleForTesting
getWallTimeMillis()140     protected long getWallTimeMillis() {
141         return System.currentTimeMillis();
142     }
143 
144     /** Logs a watchdog. */
logWatchdog(String subject, UUID uuid)145     public void logWatchdog(String subject, UUID uuid) {
146         Watchdog watchdog = new Watchdog();
147         watchdog.subject = subject;
148         watchdog.uuid = uuid.toString();
149         CriticalEventProto event = new CriticalEventProto();
150         event.setWatchdog(watchdog);
151         log(event);
152     }
153 
154     /** Logs a half-watchdog. */
logHalfWatchdog(String subject)155     public void logHalfWatchdog(String subject) {
156         HalfWatchdog halfWatchdog = new HalfWatchdog();
157         halfWatchdog.subject = subject;
158         CriticalEventProto event = new CriticalEventProto();
159         event.setHalfWatchdog(halfWatchdog);
160         log(event);
161     }
162 
163     /**
164      * Logs an ANR.
165      *
166      * @param subject          the ANR subject line.
167      * @param processClassEnum {@link android.server.ServerProtoEnums} value for the ANRing process.
168      * @param processName      name of the ANRing process.
169      * @param uid              uid of the ANRing process.
170      * @param pid              pid of the ANRing process.
171      */
logAnr(String subject, int processClassEnum, String processName, int uid, int pid)172     public void logAnr(String subject, int processClassEnum, String processName, int uid, int pid) {
173         AppNotResponding anr = new AppNotResponding();
174         anr.subject = subject;
175         anr.processClass = processClassEnum;
176         anr.process = processName;
177         anr.uid = uid;
178         anr.pid = pid;
179         CriticalEventProto event = new CriticalEventProto();
180         event.setAnr(anr);
181         log(event);
182     }
183 
184     /**
185      * Logs a java crash.
186      *
187      * @param exceptionClass   the crash exception class.
188      * @param processClassEnum {@link android.server.ServerProtoEnums} value for the crashed
189      *                         process.
190      * @param processName      name of the crashed process.
191      * @param uid              uid of the crashed process.
192      * @param pid              pid of the crashed process.
193      */
logJavaCrash(String exceptionClass, int processClassEnum, String processName, int uid, int pid)194     public void logJavaCrash(String exceptionClass, int processClassEnum, String processName,
195             int uid, int pid) {
196         JavaCrash crash = new JavaCrash();
197         crash.exceptionClass = exceptionClass;
198         crash.processClass = processClassEnum;
199         crash.process = processName;
200         crash.uid = uid;
201         crash.pid = pid;
202         CriticalEventProto event = new CriticalEventProto();
203         event.setJavaCrash(crash);
204         log(event);
205     }
206 
207     /**
208      * Logs a native crash.
209      *
210      * @param processClassEnum {@link android.server.ServerProtoEnums} value for the crashed
211      *                         process.
212      * @param processName      name of the crashed process.
213      * @param uid              uid of the crashed process.
214      * @param pid              pid of the crashed process.
215      */
logNativeCrash(int processClassEnum, String processName, int uid, int pid)216     public void logNativeCrash(int processClassEnum, String processName, int uid, int pid) {
217         NativeCrash crash = new NativeCrash();
218         crash.processClass = processClassEnum;
219         crash.process = processName;
220         crash.uid = uid;
221         crash.pid = pid;
222         CriticalEventProto event = new CriticalEventProto();
223         event.setNativeCrash(crash);
224         log(event);
225     }
226 
log(CriticalEventProto event)227     private void log(CriticalEventProto event) {
228         event.timestampMs = getWallTimeMillis();
229         appendAndSave(event);
230     }
231 
232     @VisibleForTesting
appendAndSave(CriticalEventProto event)233     void appendAndSave(CriticalEventProto event) {
234         mEvents.append(event);
235         saveLogToFile();
236     }
237 
238     /**
239      * Returns recent critical events in text format to include in system server ANR stack trace
240      * file.
241      *
242      * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}.
243      */
logLinesForSystemServerTraceFile()244     public String logLinesForSystemServerTraceFile() {
245         return logLinesForTraceFile(ServerProtoEnums.SYSTEM_SERVER, "AID_SYSTEM", AID_SYSTEM);
246     }
247 
248     /**
249      * Returns recent critical events in text format to include in logs such as ANR stack trace
250      * files.
251      *
252      * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}.
253      *
254      * Some data in the returned log may be redacted for privacy. For example, a log for a data
255      * app will not include specific crash information for a different data app. See
256      * {@link LogSanitizer} for more information.
257      *
258      * @param traceProcessClassEnum {@link android.server.ServerProtoEnums} value for the process
259      *                              the ANR trace file is for.
260      * @param traceProcessName      name of the process the ANR trace file is for.
261      * @param traceUid              uid of the process the ANR trace file is for.
262      */
logLinesForTraceFile(int traceProcessClassEnum, String traceProcessName, int traceUid)263     public String logLinesForTraceFile(int traceProcessClassEnum, String traceProcessName,
264             int traceUid) {
265         CriticalEventLogProto outputLogProto = getOutputLogProto(traceProcessClassEnum,
266                 traceProcessName, traceUid);
267         return new StringBuilder()
268                 .append("--- CriticalEventLog ---\n")
269                 .append(MessageNanoPrinter.print(outputLogProto))
270                 .append('\n').toString();
271     }
272 
273     /**
274      * Returns a proto containing recent critical events.
275      *
276      * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}.
277      */
278     @VisibleForTesting
getOutputLogProto(int traceProcessClassEnum, String traceProcessName, int traceUid)279     protected CriticalEventLogProto getOutputLogProto(int traceProcessClassEnum,
280             String traceProcessName, int traceUid) {
281         CriticalEventLogProto log = new CriticalEventLogProto();
282         log.timestampMs = getWallTimeMillis();
283         log.windowMs = mWindowMs;
284         log.capacity = mEvents.capacity();
285 
286         CriticalEventProto[] events = recentEventsWithMinTimestamp(log.timestampMs - mWindowMs);
287         LogSanitizer sanitizer = new LogSanitizer(traceProcessClassEnum, traceProcessName,
288                 traceUid);
289         for (int i = 0; i < events.length; i++) {
290             events[i] = sanitizer.process(events[i]);
291         }
292         log.events = events;
293 
294         return log;
295     }
296 
297     /**
298      * Returns the most recent logged events, starting with the first event that has a timestamp
299      * greater than or equal to {@code minTimestampMs}.
300      *
301      * If no events have a timestamp greater than or equal to {@code minTimestampMs}, returns an
302      * empty array.
303      */
recentEventsWithMinTimestamp(long minTimestampMs)304     private CriticalEventProto[] recentEventsWithMinTimestamp(long minTimestampMs) {
305         // allEvents are in insertion order, i.e. in order of when the relevant log___() function
306         // was called.
307         // This means that if the system clock changed (e.g. a NITZ update) allEvents may not be
308         // strictly ordered by timestamp. In this case we are permissive and start with the
309         // first event that has a timestamp in the desired range.
310         CriticalEventProto[] allEvents = mEvents.toArray();
311         for (int i = 0; i < allEvents.length; i++) {
312             if (allEvents[i].timestampMs >= minTimestampMs) {
313                 return Arrays.copyOfRange(allEvents, i, allEvents.length);
314             }
315         }
316         return new CriticalEventProto[]{};
317     }
318 
saveLogToFile()319     private void saveLogToFile() {
320         if (mLoadAndSaveImmediately) {
321             saveLogToFileNow();
322             return;
323         }
324         if (mHandler.hasCallbacks(mSaveRunnable)) {
325             // An earlier save is already scheduled so don't need to schedule an additional one.
326             return;
327         }
328 
329         if (!mHandler.postDelayed(mSaveRunnable, saveDelayMs())) {
330             Slog.w(TAG, "Error scheduling save");
331         }
332     }
333 
334     /**
335      * Returns the delay in milliseconds when scheduling a save on the handler thread.
336      *
337      * Returns a value in the range [0, {@code minTimeBetweenSavesMs}] such that the time between
338      * consecutive saves does not exceed {@code minTimeBetweenSavesMs}.
339      *
340      * This means that if the last save occurred a long time ago, or if no previous saves
341      * have occurred then the returned delay will be zero.
342      */
343     @VisibleForTesting
saveDelayMs()344     protected long saveDelayMs() {
345         final long nowMs = getWallTimeMillis();
346         return Math.max(0,
347                 mLastSaveAttemptMs + mMinTimeBetweenSavesMs - nowMs);
348     }
349 
350     @VisibleForTesting
saveLogToFileNow()351     protected void saveLogToFileNow() {
352         mLastSaveAttemptMs = getWallTimeMillis();
353 
354         File logDir = mLogFile.getParentFile();
355         if (!logDir.exists()) {
356             if (!logDir.mkdir()) {
357                 Slog.e(TAG, "Error creating log directory: " + logDir.getPath());
358                 return;
359             }
360         }
361 
362         if (!mLogFile.exists()) {
363             try {
364                 mLogFile.createNewFile();
365             } catch (IOException e) {
366                 Slog.e(TAG, "Error creating log file", e);
367                 return;
368             }
369         }
370 
371         CriticalEventLogStorageProto logProto = new CriticalEventLogStorageProto();
372         logProto.events = mEvents.toArray();
373 
374         final byte[] bytes = CriticalEventLogStorageProto.toByteArray(logProto);
375         try (FileOutputStream stream = new FileOutputStream(mLogFile, false)) {
376             stream.write(bytes);
377         } catch (IOException e) {
378             Slog.e(TAG, "Error saving log to disk.", e);
379         }
380     }
381 
382     @VisibleForTesting
383     protected static class ThreadSafeRingBuffer<T> {
384         private final int mCapacity;
385         private final RingBuffer<T> mBuffer;
386 
ThreadSafeRingBuffer(Class<T> clazz, int capacity)387         ThreadSafeRingBuffer(Class<T> clazz, int capacity) {
388             this.mCapacity = capacity;
389             this.mBuffer = new RingBuffer<>(clazz, capacity);
390         }
391 
append(T t)392         synchronized void append(T t) {
393             mBuffer.append(t);
394         }
395 
toArray()396         synchronized T[] toArray() {
397             return mBuffer.toArray();
398         }
399 
capacity()400         int capacity() {
401             return mCapacity;
402         }
403     }
404 
405     /** Loads log events from disk into a ring buffer. */
406     protected interface ILogLoader {
load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer)407         void load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer);
408     }
409 
410     /** Loads log events from disk into a ring buffer. */
411     static class LogLoader implements ILogLoader {
412         @Override
load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer)413         public void load(File logFile,
414                 ThreadSafeRingBuffer<CriticalEventProto> buffer) {
415             for (CriticalEventProto event : loadLogFromFile(logFile).events) {
416                 buffer.append(event);
417             }
418         }
419 
loadLogFromFile(File logFile)420         private static CriticalEventLogStorageProto loadLogFromFile(File logFile) {
421             if (!logFile.exists()) {
422                 Slog.i(TAG, "No log found, returning empty log proto.");
423                 return new CriticalEventLogStorageProto();
424             }
425 
426             try {
427                 return CriticalEventLogStorageProto.parseFrom(
428                         Files.readAllBytes(logFile.toPath()));
429             } catch (IOException e) {
430                 Slog.e(TAG, "Error reading log from disk.", e);
431                 return new CriticalEventLogStorageProto();
432             }
433         }
434     }
435 
436     /**
437      * Redacts private data app fields from the critical event protos.
438      *
439      * When a critical event log is requested, this class is used to redact specific information
440      * so that the trace file for a data app does not leak private information about other data
441      * apps.
442      */
443     private static class LogSanitizer {
444         /**
445          * The {@link CriticalEventProto.ProcessClass} of the process the output trace file is for.
446          */
447         int mTraceProcessClassEnum;
448 
449         /** The name of the process that the output trace file is for. */
450         String mTraceProcessName;
451 
452         /** The uid of the process that the output trace file is for. */
453         int mTraceUid;
454 
LogSanitizer(int traceProcessClassEnum, String traceProcessName, int traceUid)455         LogSanitizer(int traceProcessClassEnum, String traceProcessName, int traceUid) {
456             mTraceProcessClassEnum = traceProcessClassEnum;
457             mTraceProcessName = traceProcessName;
458             mTraceUid = traceUid;
459         }
460 
461         /**
462          * Redacts information from a critical event proto where necessary.
463          *
464          * This function does not mutate its input. If redaction happens, it returns a new proto.
465          * Otherwise, it returns the original proto.
466          */
process(CriticalEventProto event)467         CriticalEventProto process(CriticalEventProto event) {
468             if (event.hasAnr()) {
469                 AppNotResponding anr = event.getAnr();
470                 if (shouldSanitize(anr.processClass, anr.process, anr.uid)) {
471                     return sanitizeAnr(event);
472                 }
473             } else if (event.hasJavaCrash()) {
474                 JavaCrash crash = event.getJavaCrash();
475                 if (shouldSanitize(crash.processClass, crash.process, crash.uid)) {
476                     return sanitizeJavaCrash(event);
477                 }
478             } else if (event.hasNativeCrash()) {
479                 NativeCrash crash = event.getNativeCrash();
480                 if (shouldSanitize(crash.processClass, crash.process, crash.uid)) {
481                     return sanitizeNativeCrash(event);
482                 }
483             }
484             // No redaction needed.
485             return event;
486         }
487 
shouldSanitize(int processClassEnum, String processName, int uid)488         private boolean shouldSanitize(int processClassEnum, String processName, int uid) {
489             boolean sameApp = processName != null && processName.equals(mTraceProcessName)
490                     && mTraceUid == uid;
491 
492             // Only sanitize when both the critical event and trace file are for different data
493             // apps.
494             return processClassEnum == CriticalEventProto.DATA_APP
495                     && mTraceProcessClassEnum == CriticalEventProto.DATA_APP
496                     && !sameApp;
497         }
498 
sanitizeAnr(CriticalEventProto base)499         private static CriticalEventProto sanitizeAnr(CriticalEventProto base) {
500             AppNotResponding anr = new AppNotResponding();
501             // Do not set subject and process.
502             anr.processClass = base.getAnr().processClass;
503             anr.uid = base.getAnr().uid;
504             anr.pid = base.getAnr().pid;
505 
506             CriticalEventProto sanitized = sanitizeCriticalEventProto(base);
507             sanitized.setAnr(anr);
508             return sanitized;
509         }
510 
sanitizeJavaCrash(CriticalEventProto base)511         private static CriticalEventProto sanitizeJavaCrash(CriticalEventProto base) {
512             JavaCrash crash = new JavaCrash();
513             // Do not set exceptionClass and process.
514             crash.processClass = base.getJavaCrash().processClass;
515             crash.uid = base.getJavaCrash().uid;
516             crash.pid = base.getJavaCrash().pid;
517 
518             CriticalEventProto sanitized = sanitizeCriticalEventProto(base);
519             sanitized.setJavaCrash(crash);
520             return sanitized;
521         }
522 
sanitizeNativeCrash(CriticalEventProto base)523         private static CriticalEventProto sanitizeNativeCrash(CriticalEventProto base) {
524             NativeCrash crash = new NativeCrash();
525             // Do not set process.
526             crash.processClass = base.getNativeCrash().processClass;
527             crash.uid = base.getNativeCrash().uid;
528             crash.pid = base.getNativeCrash().pid;
529 
530             CriticalEventProto sanitized = sanitizeCriticalEventProto(base);
531             sanitized.setNativeCrash(crash);
532             return sanitized;
533         }
534 
sanitizeCriticalEventProto(CriticalEventProto base)535         private static CriticalEventProto sanitizeCriticalEventProto(CriticalEventProto base) {
536             CriticalEventProto sanitized = new CriticalEventProto();
537             sanitized.timestampMs = base.timestampMs;
538             return sanitized;
539         }
540     }
541 }
542