1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server.criticalevents; 18 19 import android.os.Handler; 20 import android.os.HandlerThread; 21 import android.server.ServerProtoEnums; 22 import android.util.Slog; 23 24 import com.android.framework.protobuf.nano.MessageNanoPrinter; 25 import com.android.internal.annotations.VisibleForTesting; 26 import com.android.internal.util.RingBuffer; 27 import com.android.server.criticalevents.nano.CriticalEventLogProto; 28 import com.android.server.criticalevents.nano.CriticalEventLogStorageProto; 29 import com.android.server.criticalevents.nano.CriticalEventProto; 30 import com.android.server.criticalevents.nano.CriticalEventProto.AppNotResponding; 31 import com.android.server.criticalevents.nano.CriticalEventProto.HalfWatchdog; 32 import com.android.server.criticalevents.nano.CriticalEventProto.InstallPackages; 33 import com.android.server.criticalevents.nano.CriticalEventProto.ExcessiveBinderCalls; 34 import com.android.server.criticalevents.nano.CriticalEventProto.JavaCrash; 35 import com.android.server.criticalevents.nano.CriticalEventProto.NativeCrash; 36 import com.android.server.criticalevents.nano.CriticalEventProto.SystemServerStarted; 37 import com.android.server.criticalevents.nano.CriticalEventProto.Watchdog; 38 39 import java.io.File; 40 import java.io.FileOutputStream; 41 import java.io.IOException; 42 import java.nio.file.Files; 43 import java.nio.file.Paths; 44 import java.time.Duration; 45 import java.util.Arrays; 46 import java.util.UUID; 47 48 /** 49 * Log of recent critical events such as Watchdogs. 50 * 51 * For use in ANR reports to show recent events that may help to debug the ANR. In particular, 52 * the presence of recent critical events signal that the device was already in a had state. 53 * 54 * This class needs to be thread safe since it's used as a singleton. 55 */ 56 public class CriticalEventLog { 57 private static final String TAG = CriticalEventLog.class.getSimpleName(); 58 59 /** UID for system_server. */ 60 private static final int AID_SYSTEM = 1000; 61 62 private static CriticalEventLog sInstance; 63 64 /** Name of the file the log is saved to. */ 65 @VisibleForTesting 66 static final String FILENAME = "critical_event_log.pb"; 67 68 /** Timestamp when the log was last saved (or attempted to be saved) to disk. */ 69 private long mLastSaveAttemptMs = 0; 70 71 /** File the log is saved to. */ 72 private final File mLogFile; 73 74 /** Ring buffer containing the log events. */ 75 private final ThreadSafeRingBuffer<CriticalEventProto> mEvents; 76 77 /** Max age of events to include in the output log proto. */ 78 private final int mWindowMs; 79 80 /** Minimum time between consecutive saves of the log to disk. */ 81 private final long mMinTimeBetweenSavesMs; 82 83 /** Whether to load and save the log synchronously with no delay. Only set to true in tests. */ 84 private final boolean mLoadAndSaveImmediately; 85 86 private final Handler mHandler; 87 88 private final Runnable mSaveRunnable = this::saveLogToFileNow; 89 90 @VisibleForTesting CriticalEventLog(String logDir, int capacity, int windowMs, long minTimeBetweenSavesMs, boolean loadAndSaveImmediately, ILogLoader logLoader)91 CriticalEventLog(String logDir, int capacity, int windowMs, long minTimeBetweenSavesMs, 92 boolean loadAndSaveImmediately, ILogLoader logLoader) { 93 mLogFile = Paths.get(logDir, FILENAME).toFile(); 94 mWindowMs = windowMs; 95 mMinTimeBetweenSavesMs = minTimeBetweenSavesMs; 96 mLoadAndSaveImmediately = loadAndSaveImmediately; 97 98 mEvents = new ThreadSafeRingBuffer<>(CriticalEventProto.class, capacity); 99 100 HandlerThread thread = new HandlerThread("CriticalEventLogIO"); 101 thread.start(); 102 mHandler = new Handler(thread.getLooper()); 103 104 final Runnable loadEvents = () -> logLoader.load(mLogFile, mEvents); 105 if (!mLoadAndSaveImmediately) { 106 mHandler.post(loadEvents); 107 } else { 108 loadEvents.run(); 109 } 110 } 111 112 /** Returns a new instance with production settings. */ CriticalEventLog()113 private CriticalEventLog() { 114 this( 115 /* logDir= */"/data/misc/critical-events", 116 /* capacity= */ 20, 117 /* windowMs= */ (int) Duration.ofMinutes(5).toMillis(), 118 /* minTimeBetweenSavesMs= */ Duration.ofSeconds(2).toMillis(), 119 /* loadAndSaveImmediately= */ false, 120 new LogLoader()); 121 } 122 123 /** Returns the singleton instance. */ getInstance()124 public static CriticalEventLog getInstance() { 125 if (sInstance == null) { 126 sInstance = new CriticalEventLog(); 127 } 128 return sInstance; 129 } 130 131 /** 132 * Ensures the singleton instance has been instantiated. 133 * 134 * Use this to eagerly instantiate the log (which loads the previous events from disk). 135 * Otherwise this will occur lazily when the first event is logged, by which time the device may 136 * be under load. 137 */ init()138 public static void init() { 139 getInstance(); 140 } 141 142 @VisibleForTesting getWallTimeMillis()143 protected long getWallTimeMillis() { 144 return System.currentTimeMillis(); 145 } 146 147 /** Logs when a uid sends an excessive number of binder calls. */ logExcessiveBinderCalls(int uid)148 public void logExcessiveBinderCalls(int uid) { 149 CriticalEventProto event = new CriticalEventProto(); 150 ExcessiveBinderCalls calls = new ExcessiveBinderCalls(); 151 calls.uid = uid; 152 event.setExcessiveBinderCalls(calls); 153 log(event); 154 } 155 156 /** Logs when one or more packages are installed. */ logInstallPackagesStarted()157 public void logInstallPackagesStarted() { 158 CriticalEventProto event = new CriticalEventProto(); 159 event.setInstallPackages(new InstallPackages()); 160 log(event); 161 } 162 163 /** Logs when system server started. */ logSystemServerStarted()164 public void logSystemServerStarted() { 165 CriticalEventProto event = new CriticalEventProto(); 166 event.setSystemServerStarted(new SystemServerStarted()); 167 log(event); 168 } 169 170 /** Logs a watchdog. */ logWatchdog(String subject, UUID uuid)171 public void logWatchdog(String subject, UUID uuid) { 172 Watchdog watchdog = new Watchdog(); 173 watchdog.subject = subject; 174 watchdog.uuid = uuid.toString(); 175 CriticalEventProto event = new CriticalEventProto(); 176 event.setWatchdog(watchdog); 177 log(event); 178 } 179 180 /** Logs a half-watchdog. */ logHalfWatchdog(String subject)181 public void logHalfWatchdog(String subject) { 182 HalfWatchdog halfWatchdog = new HalfWatchdog(); 183 halfWatchdog.subject = subject; 184 CriticalEventProto event = new CriticalEventProto(); 185 event.setHalfWatchdog(halfWatchdog); 186 log(event); 187 } 188 189 /** 190 * Logs an ANR. 191 * 192 * @param subject the ANR subject line. 193 * @param processClassEnum {@link android.server.ServerProtoEnums} value for the ANRing process. 194 * @param processName name of the ANRing process. 195 * @param uid uid of the ANRing process. 196 * @param pid pid of the ANRing process. 197 */ logAnr(String subject, int processClassEnum, String processName, int uid, int pid)198 public void logAnr(String subject, int processClassEnum, String processName, int uid, int pid) { 199 AppNotResponding anr = new AppNotResponding(); 200 anr.subject = subject; 201 anr.processClass = processClassEnum; 202 anr.process = processName; 203 anr.uid = uid; 204 anr.pid = pid; 205 CriticalEventProto event = new CriticalEventProto(); 206 event.setAnr(anr); 207 log(event); 208 } 209 210 /** 211 * Logs a java crash. 212 * 213 * @param exceptionClass the crash exception class. 214 * @param processClassEnum {@link android.server.ServerProtoEnums} value for the crashed 215 * process. 216 * @param processName name of the crashed process. 217 * @param uid uid of the crashed process. 218 * @param pid pid of the crashed process. 219 */ logJavaCrash(String exceptionClass, int processClassEnum, String processName, int uid, int pid)220 public void logJavaCrash(String exceptionClass, int processClassEnum, String processName, 221 int uid, int pid) { 222 JavaCrash crash = new JavaCrash(); 223 crash.exceptionClass = exceptionClass; 224 crash.processClass = processClassEnum; 225 crash.process = processName; 226 crash.uid = uid; 227 crash.pid = pid; 228 CriticalEventProto event = new CriticalEventProto(); 229 event.setJavaCrash(crash); 230 log(event); 231 } 232 233 /** 234 * Logs a native crash. 235 * 236 * @param processClassEnum {@link android.server.ServerProtoEnums} value for the crashed 237 * process. 238 * @param processName name of the crashed process. 239 * @param uid uid of the crashed process. 240 * @param pid pid of the crashed process. 241 */ logNativeCrash(int processClassEnum, String processName, int uid, int pid)242 public void logNativeCrash(int processClassEnum, String processName, int uid, int pid) { 243 NativeCrash crash = new NativeCrash(); 244 crash.processClass = processClassEnum; 245 crash.process = processName; 246 crash.uid = uid; 247 crash.pid = pid; 248 CriticalEventProto event = new CriticalEventProto(); 249 event.setNativeCrash(crash); 250 log(event); 251 } 252 log(CriticalEventProto event)253 private void log(CriticalEventProto event) { 254 event.timestampMs = getWallTimeMillis(); 255 appendAndSave(event); 256 } 257 258 @VisibleForTesting appendAndSave(CriticalEventProto event)259 void appendAndSave(CriticalEventProto event) { 260 mEvents.append(event); 261 saveLogToFile(); 262 } 263 264 /** 265 * Returns recent critical events in text format to include in system server ANR stack trace 266 * file. 267 * 268 * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}. 269 */ logLinesForSystemServerTraceFile()270 public String logLinesForSystemServerTraceFile() { 271 return logLinesForTraceFile(ServerProtoEnums.SYSTEM_SERVER, "AID_SYSTEM", AID_SYSTEM); 272 } 273 274 /** 275 * Returns recent critical events in text format to include in logs such as ANR stack trace 276 * files. 277 * 278 * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}. 279 * 280 * Some data in the returned log may be redacted for privacy. For example, a log for a data 281 * app will not include specific crash information for a different data app. See 282 * {@link LogSanitizer} for more information. 283 * 284 * @param traceProcessClassEnum {@link android.server.ServerProtoEnums} value for the process 285 * the ANR trace file is for. 286 * @param traceProcessName name of the process the ANR trace file is for. 287 * @param traceUid uid of the process the ANR trace file is for. 288 */ logLinesForTraceFile(int traceProcessClassEnum, String traceProcessName, int traceUid)289 public String logLinesForTraceFile(int traceProcessClassEnum, String traceProcessName, 290 int traceUid) { 291 CriticalEventLogProto outputLogProto = getOutputLogProto(traceProcessClassEnum, 292 traceProcessName, traceUid); 293 return new StringBuilder() 294 .append("--- CriticalEventLog ---\n") 295 .append(MessageNanoPrinter.print(outputLogProto)) 296 .append('\n').toString(); 297 } 298 299 /** 300 * Returns a proto containing recent critical events. 301 * 302 * Includes all events in the ring buffer with age less than or equal to {@code mWindowMs}. 303 */ 304 @VisibleForTesting getOutputLogProto(int traceProcessClassEnum, String traceProcessName, int traceUid)305 protected CriticalEventLogProto getOutputLogProto(int traceProcessClassEnum, 306 String traceProcessName, int traceUid) { 307 CriticalEventLogProto log = new CriticalEventLogProto(); 308 log.timestampMs = getWallTimeMillis(); 309 log.windowMs = mWindowMs; 310 log.capacity = mEvents.capacity(); 311 312 CriticalEventProto[] events = recentEventsWithMinTimestamp(log.timestampMs - mWindowMs); 313 LogSanitizer sanitizer = new LogSanitizer(traceProcessClassEnum, traceProcessName, 314 traceUid); 315 for (int i = 0; i < events.length; i++) { 316 events[i] = sanitizer.process(events[i]); 317 } 318 log.events = events; 319 320 return log; 321 } 322 323 /** 324 * Returns the most recent logged events, starting with the first event that has a timestamp 325 * greater than or equal to {@code minTimestampMs}. 326 * 327 * If no events have a timestamp greater than or equal to {@code minTimestampMs}, returns an 328 * empty array. 329 */ recentEventsWithMinTimestamp(long minTimestampMs)330 private CriticalEventProto[] recentEventsWithMinTimestamp(long minTimestampMs) { 331 // allEvents are in insertion order, i.e. in order of when the relevant log___() function 332 // was called. 333 // This means that if the system clock changed (e.g. a NITZ update) allEvents may not be 334 // strictly ordered by timestamp. In this case we are permissive and start with the 335 // first event that has a timestamp in the desired range. 336 CriticalEventProto[] allEvents = mEvents.toArray(); 337 for (int i = 0; i < allEvents.length; i++) { 338 if (allEvents[i].timestampMs >= minTimestampMs) { 339 return Arrays.copyOfRange(allEvents, i, allEvents.length); 340 } 341 } 342 return new CriticalEventProto[]{}; 343 } 344 saveLogToFile()345 private void saveLogToFile() { 346 if (mLoadAndSaveImmediately) { 347 saveLogToFileNow(); 348 return; 349 } 350 if (mHandler.hasCallbacks(mSaveRunnable)) { 351 // An earlier save is already scheduled so don't need to schedule an additional one. 352 return; 353 } 354 355 if (!mHandler.postDelayed(mSaveRunnable, saveDelayMs())) { 356 Slog.w(TAG, "Error scheduling save"); 357 } 358 } 359 360 /** 361 * Returns the delay in milliseconds when scheduling a save on the handler thread. 362 * 363 * Returns a value in the range [0, {@code minTimeBetweenSavesMs}] such that the time between 364 * consecutive saves does not exceed {@code minTimeBetweenSavesMs}. 365 * 366 * This means that if the last save occurred a long time ago, or if no previous saves 367 * have occurred then the returned delay will be zero. 368 */ 369 @VisibleForTesting saveDelayMs()370 protected long saveDelayMs() { 371 final long nowMs = getWallTimeMillis(); 372 return Math.max(0, 373 mLastSaveAttemptMs + mMinTimeBetweenSavesMs - nowMs); 374 } 375 376 @VisibleForTesting saveLogToFileNow()377 protected void saveLogToFileNow() { 378 mLastSaveAttemptMs = getWallTimeMillis(); 379 380 File logDir = mLogFile.getParentFile(); 381 if (!logDir.exists()) { 382 if (!logDir.mkdir()) { 383 Slog.e(TAG, "Error creating log directory: " + logDir.getPath()); 384 return; 385 } 386 } 387 388 if (!mLogFile.exists()) { 389 try { 390 mLogFile.createNewFile(); 391 } catch (IOException e) { 392 Slog.e(TAG, "Error creating log file", e); 393 return; 394 } 395 } 396 397 CriticalEventLogStorageProto logProto = new CriticalEventLogStorageProto(); 398 logProto.events = mEvents.toArray(); 399 400 final byte[] bytes = CriticalEventLogStorageProto.toByteArray(logProto); 401 try (FileOutputStream stream = new FileOutputStream(mLogFile, false)) { 402 stream.write(bytes); 403 } catch (IOException e) { 404 Slog.e(TAG, "Error saving log to disk.", e); 405 } 406 } 407 408 @VisibleForTesting 409 protected static class ThreadSafeRingBuffer<T> { 410 private final int mCapacity; 411 private final RingBuffer<T> mBuffer; 412 ThreadSafeRingBuffer(Class<T> clazz, int capacity)413 ThreadSafeRingBuffer(Class<T> clazz, int capacity) { 414 this.mCapacity = capacity; 415 this.mBuffer = new RingBuffer<>(clazz, capacity); 416 } 417 append(T t)418 synchronized void append(T t) { 419 mBuffer.append(t); 420 } 421 toArray()422 synchronized T[] toArray() { 423 return mBuffer.toArray(); 424 } 425 capacity()426 int capacity() { 427 return mCapacity; 428 } 429 } 430 431 /** Loads log events from disk into a ring buffer. */ 432 protected interface ILogLoader { load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer)433 void load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer); 434 } 435 436 /** Loads log events from disk into a ring buffer. */ 437 static class LogLoader implements ILogLoader { 438 @Override load(File logFile, ThreadSafeRingBuffer<CriticalEventProto> buffer)439 public void load(File logFile, 440 ThreadSafeRingBuffer<CriticalEventProto> buffer) { 441 for (CriticalEventProto event : loadLogFromFile(logFile).events) { 442 buffer.append(event); 443 } 444 } 445 loadLogFromFile(File logFile)446 private static CriticalEventLogStorageProto loadLogFromFile(File logFile) { 447 if (!logFile.exists()) { 448 Slog.i(TAG, "No log found, returning empty log proto."); 449 return new CriticalEventLogStorageProto(); 450 } 451 452 try { 453 return CriticalEventLogStorageProto.parseFrom( 454 Files.readAllBytes(logFile.toPath())); 455 } catch (IOException e) { 456 Slog.e(TAG, "Error reading log from disk.", e); 457 return new CriticalEventLogStorageProto(); 458 } 459 } 460 } 461 462 /** 463 * Redacts private data app fields from the critical event protos. 464 * 465 * When a critical event log is requested, this class is used to redact specific information 466 * so that the trace file for a data app does not leak private information about other data 467 * apps. 468 */ 469 private static class LogSanitizer { 470 /** 471 * The {@link CriticalEventProto.ProcessClass} of the process the output trace file is for. 472 */ 473 int mTraceProcessClassEnum; 474 475 /** The name of the process that the output trace file is for. */ 476 String mTraceProcessName; 477 478 /** The uid of the process that the output trace file is for. */ 479 int mTraceUid; 480 LogSanitizer(int traceProcessClassEnum, String traceProcessName, int traceUid)481 LogSanitizer(int traceProcessClassEnum, String traceProcessName, int traceUid) { 482 mTraceProcessClassEnum = traceProcessClassEnum; 483 mTraceProcessName = traceProcessName; 484 mTraceUid = traceUid; 485 } 486 487 /** 488 * Redacts information from a critical event proto where necessary. 489 * 490 * This function does not mutate its input. If redaction happens, it returns a new proto. 491 * Otherwise, it returns the original proto. 492 */ process(CriticalEventProto event)493 CriticalEventProto process(CriticalEventProto event) { 494 if (event.hasAnr()) { 495 AppNotResponding anr = event.getAnr(); 496 if (shouldSanitize(anr.processClass, anr.process, anr.uid)) { 497 return sanitizeAnr(event); 498 } 499 } else if (event.hasJavaCrash()) { 500 JavaCrash crash = event.getJavaCrash(); 501 if (shouldSanitize(crash.processClass, crash.process, crash.uid)) { 502 return sanitizeJavaCrash(event); 503 } 504 } else if (event.hasNativeCrash()) { 505 NativeCrash crash = event.getNativeCrash(); 506 if (shouldSanitize(crash.processClass, crash.process, crash.uid)) { 507 return sanitizeNativeCrash(event); 508 } 509 } 510 // No redaction needed. 511 return event; 512 } 513 shouldSanitize(int processClassEnum, String processName, int uid)514 private boolean shouldSanitize(int processClassEnum, String processName, int uid) { 515 boolean sameApp = processName != null && processName.equals(mTraceProcessName) 516 && mTraceUid == uid; 517 518 // Only sanitize when both the critical event and trace file are for different data 519 // apps. 520 return processClassEnum == CriticalEventProto.DATA_APP 521 && mTraceProcessClassEnum == CriticalEventProto.DATA_APP 522 && !sameApp; 523 } 524 sanitizeAnr(CriticalEventProto base)525 private static CriticalEventProto sanitizeAnr(CriticalEventProto base) { 526 AppNotResponding anr = new AppNotResponding(); 527 // Do not set subject and process. 528 anr.processClass = base.getAnr().processClass; 529 anr.uid = base.getAnr().uid; 530 anr.pid = base.getAnr().pid; 531 532 CriticalEventProto sanitized = sanitizeCriticalEventProto(base); 533 sanitized.setAnr(anr); 534 return sanitized; 535 } 536 sanitizeJavaCrash(CriticalEventProto base)537 private static CriticalEventProto sanitizeJavaCrash(CriticalEventProto base) { 538 JavaCrash crash = new JavaCrash(); 539 // Do not set exceptionClass and process. 540 crash.processClass = base.getJavaCrash().processClass; 541 crash.uid = base.getJavaCrash().uid; 542 crash.pid = base.getJavaCrash().pid; 543 544 CriticalEventProto sanitized = sanitizeCriticalEventProto(base); 545 sanitized.setJavaCrash(crash); 546 return sanitized; 547 } 548 sanitizeNativeCrash(CriticalEventProto base)549 private static CriticalEventProto sanitizeNativeCrash(CriticalEventProto base) { 550 NativeCrash crash = new NativeCrash(); 551 // Do not set process. 552 crash.processClass = base.getNativeCrash().processClass; 553 crash.uid = base.getNativeCrash().uid; 554 crash.pid = base.getNativeCrash().pid; 555 556 CriticalEventProto sanitized = sanitizeCriticalEventProto(base); 557 sanitized.setNativeCrash(crash); 558 return sanitized; 559 } 560 sanitizeCriticalEventProto(CriticalEventProto base)561 private static CriticalEventProto sanitizeCriticalEventProto(CriticalEventProto base) { 562 CriticalEventProto sanitized = new CriticalEventProto(); 563 sanitized.timestampMs = base.timestampMs; 564 return sanitized; 565 } 566 } 567 } 568