1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server.criticalevents; 18 19 import android.os.Handler; 20 import android.os.HandlerThread; 21 import android.server.ServerProtoEnums; 22 import android.util.Slog; 23 24 import com.android.framework.protobuf.nano.MessageNanoPrinter; 25 import com.android.internal.annotations.VisibleForTesting; 26 import com.android.internal.util.RingBuffer; 27 import com.android.server.criticalevents.nano.CriticalEventLogProto; 28 import com.android.server.criticalevents.nano.CriticalEventLogStorageProto; 29 import com.android.server.criticalevents.nano.CriticalEventProto; 30 import com.android.server.criticalevents.nano.CriticalEventProto.AppNotResponding; 31 import com.android.server.criticalevents.nano.CriticalEventProto.HalfWatchdog; 32 import com.android.server.criticalevents.nano.CriticalEventProto.JavaCrash; 33 import com.android.server.criticalevents.nano.CriticalEventProto.NativeCrash; 34 import com.android.server.criticalevents.nano.CriticalEventProto.Watchdog; 35 36 import java.io.File; 37 import java.io.FileOutputStream; 38 import java.io.IOException; 39 import java.nio.file.Files; 40 import java.nio.file.Paths; 41 import java.time.Duration; 42 import java.util.Arrays; 43 import java.util.UUID; 44 45 /** 46 * Log of recent critical events such as Watchdogs. 47 * 48 * For use in ANR reports to show recent events that may help to debug the ANR. In particular, 49 * the presence of recent critical events signal that the device was already in a had state. 50 * 51 * This class needs to be thread safe since it's used as a singleton. 52 */ 53 public class CriticalEventLog { 54 private static final String TAG = CriticalEventLog.class.getSimpleName(); 55 56 /** UID for system_server. */ 57 private static final int AID_SYSTEM = 1000; 58 59 private static CriticalEventLog sInstance; 60 61 /** Name of the file the log is saved to. */ 62 @VisibleForTesting 63 static final String FILENAME = "critical_event_log.pb"; 64 65 /** Timestamp when the log was last saved (or attempted to be saved) to disk. */ 66 private long mLastSaveAttemptMs = 0; 67 68 /** File the log is saved to. */ 69 private final File mLogFile; 70 71 /** Ring buffer containing the log events. */ 72 private final ThreadSafeRingBuffer<CriticalEventProto> mEvents; 73 74 /** Max age of events to include in the output log proto. */ 75 private final int mWindowMs; 76 77 /** Minimum time between consecutive saves of the log to disk. */ 78 private final long mMinTimeBetweenSavesMs; 79 80 /** Whether to load and save the log synchronously with no delay. Only set to true in tests. */ 81 private final boolean mLoadAndSaveImmediately; 82 83 private final Handler mHandler; 84 85 private final Runnable mSaveRunnable = this::saveLogToFileNow; 86 87 @VisibleForTesting CriticalEventLog(String logDir, int capacity, int windowMs, long minTimeBetweenSavesMs, boolean loadAndSaveImmediately, ILogLoader logLoader)88 CriticalEventLog(String logDir, int capacity, int windowMs, long minTimeBetweenSavesMs, 89 boolean loadAndSaveImmediately, ILogLoader logLoader) { 90 mLogFile = Paths.get(logDir, FILENAME).toFile(); 91 mWindowMs = windowMs; 92 mMinTimeBetweenSavesMs = minTimeBetweenSavesMs; 93 mLoadAndSaveImmediately = loadAndSaveImmediately; 94 95 mEvents = new ThreadSafeRingBuffer<>(CriticalEventProto.class, capacity); 96 97 HandlerThread thread = new HandlerThread("CriticalEventLogIO"); 98 thread.start(); 99 mHandler = new Handler(thread.getLooper()); 100 101 final Runnable loadEvents = () -> logLoader.load(mLogFile, mEvents); 102 if (!mLoadAndSaveImmediately) { 103 mHandler.post(loadEvents); 104 } else { 105 loadEvents.run(); 106 } 107 } 108 109 /** Returns a new instance with production settings. */ CriticalEventLog()110 private CriticalEventLog() { 111 this( 112 /* logDir= */"/data/misc/critical-events", 113 /* capacity= */ 20, 114 /* windowMs= */ (int) Duration.ofMinutes(5).toMillis(), 115 /* minTimeBetweenSavesMs= */ Duration.ofSeconds(2).toMillis(), 116 /* loadAndSaveImmediately= */ false, 117 new LogLoader()); 118 } 119 120 /** Returns the singleton instance. */ getInstance()121 public static CriticalEventLog getInstance() { 122 if (sInstance == null) { 123 sInstance = new CriticalEventLog(); 124 } 125 return sInstance; 126 } 127 128 /** 129 * Ensures the singleton instance has been instantiated. 130 * 131 * Use this to eagerly instantiate the log (which loads the previous events from disk). 132 * Otherwise this will occur lazily when the first event is logged, by which time the device may 133 * be under load. 134 */ init()135 public static void init() { 136 getInstance(); 137 } 138 139 @VisibleForTesting getWallTimeMillis()140 protected long getWallTimeMillis() { 141 return System.currentTimeMillis(); 142 } 143 144 /** Logs a watchdog. */ logWatchdog(String subject, UUID uuid)145 public void logWatchdog(String subject, UUID uuid) { 146 Watchdog watchdog = new Watchdog(); 147 watchdog.subject = subject; 148 watchdog.uuid = uuid.toString(); 149 CriticalEventProto event = new CriticalEventProto(); 150 event.setWatchdog(watchdog); 151 log(event); 152 } 153 154 /** Logs a half-watchdog. */ logHalfWatchdog(String subject)155 public void logHalfWatchdog(String subject) { 156 HalfWatchdog halfWatchdog = new HalfWatchdog(); 157 halfWatchdog.subject = subject; 158 CriticalEventProto event = new CriticalEventProto(); 159 event.setHalfWatchdog(halfWatchdog); 160 log(event); 161 } 162 163 /** 164 * Logs an ANR. 165 * 166 * @param subject the ANR subject line. 167 * @param processClassEnum {@link android.server.ServerProtoEnums} value for the ANRing process. 168 * @param processName name of the ANRing process. 169 * @param uid uid of the ANRing process. 170 * @param pid pid of the ANRing process. 171 */ logAnr(String subject, int processClassEnum, String processName, int uid, int pid)172 public void logAnr(String subject, int processClassEnum, String processName, int uid, int pid) { 173 AppNotResponding anr = new AppNotResponding(); 174 anr.subject = subject; 175 anr.processClass = processClassEnum; 176 anr.process = processName; 177 anr.uid = uid; 178 anr.pid = pid; 179 CriticalEventProto event = new CriticalEventProto(); 180 event.setAnr(anr); 181 log(event); 182 } 183 184 /** 185 * Logs a java crash. 186 * 187 * @param exceptionClass the crash exception class. 188 * @param processClassEnum {@link android.server.ServerProtoEnums} value for the crashed 189 * process. 190 * @param processName name of the crashed process. 191 * @param uid uid of the crashed process. 192 * @param pid pid of the crashed process. 193 */ logJavaCrash(String exceptionClass, int processClassEnum, String processName, int uid, int pid)194 public void logJavaCrash(String exceptionClass, int processClassEnum, String processName, 195 int uid, int pid) { 196 JavaCrash crash = new JavaCrash(); 197 crash.exceptionClass = exceptionClass; 198 crash.processClass = processClassEnum; 199 crash.process = processName; 200 crash.uid = uid; 201 crash.pid = pid; 202 CriticalEventProto event = new CriticalEventProto(); 203 event.setJavaCrash(crash); 204 log(event); 205 } 206 207 /** 208 * Logs a native crash. 209 * 210 * @param processClassEnum {@link android.server.ServerProtoEnums} value for the crashed 211 * process. 212 * @param processName name of the crashed process. 213 * @param uid uid of the crashed process. 214 * @param pid pid of the crashed process. 215 */ logNativeCrash(int processClassEnum, String processName, int uid, int pid)216 public void logNativeCrash(int processClassEnum, String processName, int uid, int pid) { 217 NativeCrash crash = new NativeCrash(); 218 crash.processClass = processClassEnum; 219 crash.process = processName; 220 crash.uid = uid; 221 crash.pid = pid; 222 CriticalEventProto event = new CriticalEventProto(); 223 event.setNativeCrash(crash); 224 log(event); 225 } 226 log(CriticalEventProto event)227 private void log(CriticalEventProto event) { 228 event.timestampMs = getWallTimeMillis(); 229 appendAndSave(event); 230 } 231 232 @VisibleForTesting appendAndSave(CriticalEventProto event)233 void appendAndSave(CriticalEventProto event) { 234 mEvents.append(event); 235 saveLogToFile(); 236 } 237 238 /** 239 * Returns recent critical events in text format to include in system server ANR stack trace 240 * file. 241 * 242 * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}. 243 */ logLinesForSystemServerTraceFile()244 public String logLinesForSystemServerTraceFile() { 245 return logLinesForTraceFile(ServerProtoEnums.SYSTEM_SERVER, "AID_SYSTEM", AID_SYSTEM); 246 } 247 248 /** 249 * Returns recent critical events in text format to include in logs such as ANR stack trace 250 * files. 251 * 252 * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}. 253 * 254 * Some data in the returned log may be redacted for privacy. For example, a log for a data 255 * app will not include specific crash information for a different data app. See 256 * {@link LogSanitizer} for more information. 257 * 258 * @param traceProcessClassEnum {@link android.server.ServerProtoEnums} value for the process 259 * the ANR trace file is for. 260 * @param traceProcessName name of the process the ANR trace file is for. 261 * @param traceUid uid of the process the ANR trace file is for. 262 */ logLinesForTraceFile(int traceProcessClassEnum, String traceProcessName, int traceUid)263 public String logLinesForTraceFile(int traceProcessClassEnum, String traceProcessName, 264 int traceUid) { 265 CriticalEventLogProto outputLogProto = getOutputLogProto(traceProcessClassEnum, 266 traceProcessName, traceUid); 267 return new StringBuilder() 268 .append("--- CriticalEventLog ---\n") 269 .append(MessageNanoPrinter.print(outputLogProto)) 270 .append('\n').toString(); 271 } 272 273 /** 274 * Returns a proto containing recent critical events. 275 * 276 * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}. 277 */ 278 @VisibleForTesting getOutputLogProto(int traceProcessClassEnum, String traceProcessName, int traceUid)279 protected CriticalEventLogProto getOutputLogProto(int traceProcessClassEnum, 280 String traceProcessName, int traceUid) { 281 CriticalEventLogProto log = new CriticalEventLogProto(); 282 log.timestampMs = getWallTimeMillis(); 283 log.windowMs = mWindowMs; 284 log.capacity = mEvents.capacity(); 285 286 CriticalEventProto[] events = recentEventsWithMinTimestamp(log.timestampMs - mWindowMs); 287 LogSanitizer sanitizer = new LogSanitizer(traceProcessClassEnum, traceProcessName, 288 traceUid); 289 for (int i = 0; i < events.length; i++) { 290 events[i] = sanitizer.process(events[i]); 291 } 292 log.events = events; 293 294 return log; 295 } 296 297 /** 298 * Returns the most recent logged events, starting with the first event that has a timestamp 299 * greater than or equal to {@code minTimestampMs}. 300 * 301 * If no events have a timestamp greater than or equal to {@code minTimestampMs}, returns an 302 * empty array. 303 */ recentEventsWithMinTimestamp(long minTimestampMs)304 private CriticalEventProto[] recentEventsWithMinTimestamp(long minTimestampMs) { 305 // allEvents are in insertion order, i.e. in order of when the relevant log___() function 306 // was called. 307 // This means that if the system clock changed (e.g. a NITZ update) allEvents may not be 308 // strictly ordered by timestamp. In this case we are permissive and start with the 309 // first event that has a timestamp in the desired range. 310 CriticalEventProto[] allEvents = mEvents.toArray(); 311 for (int i = 0; i < allEvents.length; i++) { 312 if (allEvents[i].timestampMs >= minTimestampMs) { 313 return Arrays.copyOfRange(allEvents, i, allEvents.length); 314 } 315 } 316 return new CriticalEventProto[]{}; 317 } 318 saveLogToFile()319 private void saveLogToFile() { 320 if (mLoadAndSaveImmediately) { 321 saveLogToFileNow(); 322 return; 323 } 324 if (mHandler.hasCallbacks(mSaveRunnable)) { 325 // An earlier save is already scheduled so don't need to schedule an additional one. 326 return; 327 } 328 329 if (!mHandler.postDelayed(mSaveRunnable, saveDelayMs())) { 330 Slog.w(TAG, "Error scheduling save"); 331 } 332 } 333 334 /** 335 * Returns the delay in milliseconds when scheduling a save on the handler thread. 336 * 337 * Returns a value in the range [0, {@code minTimeBetweenSavesMs}] such that the time between 338 * consecutive saves does not exceed {@code minTimeBetweenSavesMs}. 339 * 340 * This means that if the last save occurred a long time ago, or if no previous saves 341 * have occurred then the returned delay will be zero. 342 */ 343 @VisibleForTesting saveDelayMs()344 protected long saveDelayMs() { 345 final long nowMs = getWallTimeMillis(); 346 return Math.max(0, 347 mLastSaveAttemptMs + mMinTimeBetweenSavesMs - nowMs); 348 } 349 350 @VisibleForTesting saveLogToFileNow()351 protected void saveLogToFileNow() { 352 mLastSaveAttemptMs = getWallTimeMillis(); 353 354 File logDir = mLogFile.getParentFile(); 355 if (!logDir.exists()) { 356 if (!logDir.mkdir()) { 357 Slog.e(TAG, "Error creating log directory: " + logDir.getPath()); 358 return; 359 } 360 } 361 362 if (!mLogFile.exists()) { 363 try { 364 mLogFile.createNewFile(); 365 } catch (IOException e) { 366 Slog.e(TAG, "Error creating log file", e); 367 return; 368 } 369 } 370 371 CriticalEventLogStorageProto logProto = new CriticalEventLogStorageProto(); 372 logProto.events = mEvents.toArray(); 373 374 final byte[] bytes = CriticalEventLogStorageProto.toByteArray(logProto); 375 try (FileOutputStream stream = new FileOutputStream(mLogFile, false)) { 376 stream.write(bytes); 377 } catch (IOException e) { 378 Slog.e(TAG, "Error saving log to disk.", e); 379 } 380 } 381 382 @VisibleForTesting 383 protected static class ThreadSafeRingBuffer<T> { 384 private final int mCapacity; 385 private final RingBuffer<T> mBuffer; 386 ThreadSafeRingBuffer(Class<T> clazz, int capacity)387 ThreadSafeRingBuffer(Class<T> clazz, int capacity) { 388 this.mCapacity = capacity; 389 this.mBuffer = new RingBuffer<>(clazz, capacity); 390 } 391 append(T t)392 synchronized void append(T t) { 393 mBuffer.append(t); 394 } 395 toArray()396 synchronized T[] toArray() { 397 return mBuffer.toArray(); 398 } 399 capacity()400 int capacity() { 401 return mCapacity; 402 } 403 } 404 405 /** Loads log events from disk into a ring buffer. */ 406 protected interface ILogLoader { load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer)407 void load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer); 408 } 409 410 /** Loads log events from disk into a ring buffer. */ 411 static class LogLoader implements ILogLoader { 412 @Override load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer)413 public void load(File logFile, 414 ThreadSafeRingBuffer<CriticalEventProto> buffer) { 415 for (CriticalEventProto event : loadLogFromFile(logFile).events) { 416 buffer.append(event); 417 } 418 } 419 loadLogFromFile(File logFile)420 private static CriticalEventLogStorageProto loadLogFromFile(File logFile) { 421 if (!logFile.exists()) { 422 Slog.i(TAG, "No log found, returning empty log proto."); 423 return new CriticalEventLogStorageProto(); 424 } 425 426 try { 427 return CriticalEventLogStorageProto.parseFrom( 428 Files.readAllBytes(logFile.toPath())); 429 } catch (IOException e) { 430 Slog.e(TAG, "Error reading log from disk.", e); 431 return new CriticalEventLogStorageProto(); 432 } 433 } 434 } 435 436 /** 437 * Redacts private data app fields from the critical event protos. 438 * 439 * When a critical event log is requested, this class is used to redact specific information 440 * so that the trace file for a data app does not leak private information about other data 441 * apps. 442 */ 443 private static class LogSanitizer { 444 /** 445 * The {@link CriticalEventProto.ProcessClass} of the process the output trace file is for. 446 */ 447 int mTraceProcessClassEnum; 448 449 /** The name of the process that the output trace file is for. */ 450 String mTraceProcessName; 451 452 /** The uid of the process that the output trace file is for. */ 453 int mTraceUid; 454 LogSanitizer(int traceProcessClassEnum, String traceProcessName, int traceUid)455 LogSanitizer(int traceProcessClassEnum, String traceProcessName, int traceUid) { 456 mTraceProcessClassEnum = traceProcessClassEnum; 457 mTraceProcessName = traceProcessName; 458 mTraceUid = traceUid; 459 } 460 461 /** 462 * Redacts information from a critical event proto where necessary. 463 * 464 * This function does not mutate its input. If redaction happens, it returns a new proto. 465 * Otherwise, it returns the original proto. 466 */ process(CriticalEventProto event)467 CriticalEventProto process(CriticalEventProto event) { 468 if (event.hasAnr()) { 469 AppNotResponding anr = event.getAnr(); 470 if (shouldSanitize(anr.processClass, anr.process, anr.uid)) { 471 return sanitizeAnr(event); 472 } 473 } else if (event.hasJavaCrash()) { 474 JavaCrash crash = event.getJavaCrash(); 475 if (shouldSanitize(crash.processClass, crash.process, crash.uid)) { 476 return sanitizeJavaCrash(event); 477 } 478 } else if (event.hasNativeCrash()) { 479 NativeCrash crash = event.getNativeCrash(); 480 if (shouldSanitize(crash.processClass, crash.process, crash.uid)) { 481 return sanitizeNativeCrash(event); 482 } 483 } 484 // No redaction needed. 485 return event; 486 } 487 shouldSanitize(int processClassEnum, String processName, int uid)488 private boolean shouldSanitize(int processClassEnum, String processName, int uid) { 489 boolean sameApp = processName != null && processName.equals(mTraceProcessName) 490 && mTraceUid == uid; 491 492 // Only sanitize when both the critical event and trace file are for different data 493 // apps. 494 return processClassEnum == CriticalEventProto.DATA_APP 495 && mTraceProcessClassEnum == CriticalEventProto.DATA_APP 496 && !sameApp; 497 } 498 sanitizeAnr(CriticalEventProto base)499 private static CriticalEventProto sanitizeAnr(CriticalEventProto base) { 500 AppNotResponding anr = new AppNotResponding(); 501 // Do not set subject and process. 502 anr.processClass = base.getAnr().processClass; 503 anr.uid = base.getAnr().uid; 504 anr.pid = base.getAnr().pid; 505 506 CriticalEventProto sanitized = sanitizeCriticalEventProto(base); 507 sanitized.setAnr(anr); 508 return sanitized; 509 } 510 sanitizeJavaCrash(CriticalEventProto base)511 private static CriticalEventProto sanitizeJavaCrash(CriticalEventProto base) { 512 JavaCrash crash = new JavaCrash(); 513 // Do not set exceptionClass and process. 514 crash.processClass = base.getJavaCrash().processClass; 515 crash.uid = base.getJavaCrash().uid; 516 crash.pid = base.getJavaCrash().pid; 517 518 CriticalEventProto sanitized = sanitizeCriticalEventProto(base); 519 sanitized.setJavaCrash(crash); 520 return sanitized; 521 } 522 sanitizeNativeCrash(CriticalEventProto base)523 private static CriticalEventProto sanitizeNativeCrash(CriticalEventProto base) { 524 NativeCrash crash = new NativeCrash(); 525 // Do not set process. 526 crash.processClass = base.getNativeCrash().processClass; 527 crash.uid = base.getNativeCrash().uid; 528 crash.pid = base.getNativeCrash().pid; 529 530 CriticalEventProto sanitized = sanitizeCriticalEventProto(base); 531 sanitized.setNativeCrash(crash); 532 return sanitized; 533 } 534 sanitizeCriticalEventProto(CriticalEventProto base)535 private static CriticalEventProto sanitizeCriticalEventProto(CriticalEventProto base) { 536 CriticalEventProto sanitized = new CriticalEventProto(); 537 sanitized.timestampMs = base.timestampMs; 538 return sanitized; 539 } 540 } 541 } 542