1#!/usr/bin/env python3 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""stack symbolizes native crash dumps.""" 18 19import os 20import re 21import subprocess 22import symbol 23import tempfile 24import unittest 25 26import example_crashes 27 28def ConvertTrace(lines): 29 tracer = TraceConverter() 30 print("Reading symbols from", symbol.SYMBOLS_DIR) 31 tracer.ConvertTrace(lines) 32 33class TraceConverter: 34 process_info_line = re.compile(r"(pid: [0-9]+, tid: [0-9]+.*)") 35 revision_line = re.compile(r"(Revision: '(.*)')") 36 signal_line = re.compile(r"(signal [0-9]+ \(.*\).*)") 37 abort_message_line = re.compile(r"(Abort message: '.*')") 38 thread_line = re.compile(r"(.*)(--- ){15}---") 39 dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)") 40 dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)") 41 register_line = re.compile("$a") 42 trace_line = re.compile("$a") 43 sanitizer_trace_line = re.compile("$a") 44 value_line = re.compile("$a") 45 code_line = re.compile("$a") 46 zipinfo_central_directory_line = re.compile(r"Central\s+directory\s+entry") 47 zipinfo_central_info_match = re.compile( 48 r"^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)" 49 r".*^\s*compressed size:\s+(\d+)", re.M | re.S) 50 unreachable_line = re.compile(r"((\d+ bytes in \d+ unreachable allocations)|" 51 r"(\d+ bytes unreachable at [0-9a-f]+)|" 52 r"(referencing \d+ unreachable bytes in \d+ allocation(s)?)|" 53 r"(and \d+ similar unreachable bytes in \d+ allocation(s)?))") 54 trace_lines = [] 55 value_lines = [] 56 last_frame = -1 57 width = "{8}" 58 spacing = "" 59 apk_info = dict() 60 61 register_names = { 62 "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr", 63 "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate", 64 "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", 65 "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", 66 "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags", 67 "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags", 68 } 69 70 def UpdateAbiRegexes(self): 71 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": 72 self.width = "{16}" 73 self.spacing = " " 74 else: 75 self.width = "{8}" 76 self.spacing = "" 77 78 self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})") 79 80 # Note that both trace and value line matching allow for variable amounts of 81 # whitespace (e.g. \t). This is because the we want to allow for the stack 82 # tool to operate on AndroidFeedback provided system logs. AndroidFeedback 83 # strips out double spaces that are found in tombsone files and logcat output. 84 # 85 # Examples of matched trace lines include lines from tombstone files like: 86 # #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so 87 # 88 # Or lines from AndroidFeedback crash report system logs like: 89 # 03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so 90 # Please note the spacing differences. 91 self.trace_line = re.compile( 92 r".*" # Random start stuff. 93 r"\#(?P<frame>[0-9]+)" # Frame number. 94 r"[ \t]+..[ \t]+" # (space)pc(space). 95 r"(?P<offset>[0-9a-f]" + self.width + ")[ \t]+" # Offset (hex number given without 96 # 0x prefix). 97 r"(?P<dso>\[[^\]]+\]|[^\r\n \t]*)" # Library name. 98 r"( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?" # Offset into the file to find the start of the shared so. 99 r"(?P<symbolpresent> \((?P<symbol>.*)\))?") # Is the symbol there? 100 # pylint: disable-msg=C6310 101 # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as 102 # its own regex. Example: 103 # 08-19 05:29:26.283 397 403 I : #0 0xb6a15237 (/system/lib/libclang_rt.asan-arm-android.so+0x4f237) 104 self.sanitizer_trace_line = re.compile( 105 r".*" # Random start stuff. 106 r"\#(?P<frame>[0-9]+)" # Frame number. 107 r"[ \t]+0x[0-9a-f]+[ \t]+" # PC, not interesting to us. 108 r"\(" # Opening paren. 109 r"(?P<dso>[^+]+)" # Library name. 110 r"\+" # '+' 111 r"0x(?P<offset>[0-9a-f]+)" # Offset (hex number given with 112 # 0x prefix). 113 r"\)") # Closing paren. 114 # pylint: disable-msg=C6310 115 # Examples of matched value lines include: 116 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so 117 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so (symbol) 118 # 03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so 119 # Again, note the spacing differences. 120 self.value_line = re.compile(r"(.*)([0-9a-f]" + self.width + r")[ \t]+([0-9a-f]" + self.width + r")[ \t]+([^\r\n \t]*)( \((.*)\))?") 121 # Lines from 'code around' sections of the output will be matched before 122 # value lines because otheriwse the 'code around' sections will be confused as 123 # value lines. 124 # 125 # Examples include: 126 # 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 127 # 03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 128 self.code_line = re.compile(r"(.*)[ \t]*[a-f0-9]" + self.width + 129 r"[ \t]*[a-f0-9]" + self.width + 130 r"[ \t]*[a-f0-9]" + self.width + 131 r"[ \t]*[a-f0-9]" + self.width + 132 r"[ \t]*[a-f0-9]" + self.width + 133 r"[ \t]*[ \r\n]") # pylint: disable-msg=C6310 134 135 def CleanLine(self, ln): 136 # AndroidFeedback adds zero width spaces into its crash reports. These 137 # should be removed or the regular expresssions will fail to match. 138 return ln.encode().decode(encoding='utf8', errors='ignore') 139 140 def PrintTraceLines(self, trace_lines): 141 """Print back trace.""" 142 maxlen = max(len(tl[1]) for tl in trace_lines) 143 print("\nStack Trace:") 144 print(" RELADDR " + self.spacing + "FUNCTION".ljust(maxlen) + " FILE:LINE") 145 for tl in self.trace_lines: 146 (addr, symbol_with_offset, location) = tl 147 print(" %8s %s %s" % (addr, symbol_with_offset.ljust(maxlen), location)) 148 149 def PrintValueLines(self, value_lines): 150 """Print stack data values.""" 151 maxlen = max(len(tl[2]) for tl in self.value_lines) 152 print("\nStack Data:") 153 print(" ADDR " + self.spacing + "VALUE " + "FUNCTION".ljust(maxlen) + " FILE:LINE") 154 for vl in self.value_lines: 155 (addr, value, symbol_with_offset, location) = vl 156 print(" %8s %8s %s %s" % (addr, value, symbol_with_offset.ljust(maxlen), location)) 157 158 def PrintOutput(self, trace_lines, value_lines): 159 if self.trace_lines: 160 self.PrintTraceLines(self.trace_lines) 161 if self.value_lines: 162 self.PrintValueLines(self.value_lines) 163 164 def PrintDivider(self): 165 print("\n-----------------------------------------------------\n") 166 167 def DeleteApkTmpFiles(self): 168 for _, _, tmp_files in self.apk_info.values(): 169 for tmp_file in tmp_files.values(): 170 os.unlink(tmp_file) 171 172 def ConvertTrace(self, lines): 173 lines = [self.CleanLine(line) for line in lines] 174 try: 175 if not symbol.ARCH: 176 symbol.SetAbi(lines) 177 self.UpdateAbiRegexes() 178 for line in lines: 179 self.ProcessLine(line) 180 self.PrintOutput(self.trace_lines, self.value_lines) 181 finally: 182 # Delete any temporary files created while processing the lines. 183 self.DeleteApkTmpFiles() 184 185 def MatchTraceLine(self, line): 186 if self.trace_line.match(line): 187 match = self.trace_line.match(line) 188 return {"frame": match.group("frame"), 189 "offset": match.group("offset"), 190 "so_offset": match.group("so_offset"), 191 "dso": match.group("dso"), 192 "symbol_present": bool(match.group("symbolpresent")), 193 "symbol_name": match.group("symbol")} 194 if self.sanitizer_trace_line.match(line): 195 match = self.sanitizer_trace_line.match(line) 196 return {"frame": match.group("frame"), 197 "offset": match.group("offset"), 198 "so_offset": None, 199 "dso": match.group("dso"), 200 "symbol_present": False, 201 "symbol_name": None} 202 return None 203 204 def ExtractLibFromApk(self, apk, shared_lib_name): 205 # Create a temporary file containing the shared library from the apk. 206 tmp_file = None 207 try: 208 tmp_fd, tmp_file = tempfile.mkstemp() 209 if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0: 210 os.close(tmp_fd) 211 shared_file = tmp_file 212 tmp_file = None 213 return shared_file 214 finally: 215 if tmp_file: 216 os.close(tmp_fd) 217 os.unlink(tmp_file) 218 return None 219 220 def ProcessCentralInfo(self, offset_list, central_info): 221 match = self.zipinfo_central_info_match.search(central_info) 222 if not match: 223 raise Exception("Cannot find all info from zipinfo\n" + central_info) 224 name = match.group(1) 225 start = int(match.group(2)) 226 end = start + int(match.group(3)) 227 228 offset_list.append([name, start, end]) 229 return name, start, end 230 231 def GetLibFromApk(self, apk, offset): 232 # Convert the string to hex. 233 offset = int(offset, 16) 234 235 # Check if we already have information about this offset. 236 if apk in self.apk_info: 237 apk_full_path, offset_list, tmp_files = self.apk_info[apk] 238 for file_name, start, end in offset_list: 239 if offset >= start and offset < end: 240 if file_name in tmp_files: 241 return file_name, tmp_files[file_name] 242 tmp_file = self.ExtractLibFromApk(apk_full_path, file_name) 243 if tmp_file: 244 tmp_files[file_name] = tmp_file 245 return file_name, tmp_file 246 break 247 return None, None 248 249 if not "ANDROID_PRODUCT_OUT" in os.environ: 250 print("ANDROID_PRODUCT_OUT environment variable not set.") 251 return None, None 252 out_dir = os.environ["ANDROID_PRODUCT_OUT"] 253 if not os.path.exists(out_dir): 254 print("ANDROID_PRODUCT_OUT", out_dir, "does not exist.") 255 return None, None 256 if apk.startswith("/"): 257 apk_full_path = out_dir + apk 258 else: 259 apk_full_path = os.path.join(out_dir, apk) 260 if not os.path.exists(apk_full_path): 261 print("Cannot find apk", apk) 262 return None, None 263 264 cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE) 265 # Find the first central info marker. 266 for line in cmd.stdout: 267 if self.zipinfo_central_directory_line.search(line): 268 break 269 270 central_info = "" 271 file_name = None 272 offset_list = [] 273 for line in cmd.stdout: 274 match = self.zipinfo_central_directory_line.search(line) 275 if match: 276 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) 277 if not file_name and offset >= start and offset < end: 278 file_name = cur_name 279 central_info = "" 280 else: 281 central_info += line 282 if central_info: 283 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) 284 if not file_name and offset >= start and offset < end: 285 file_name = cur_name 286 287 # Save the information from the zip. 288 tmp_files = dict() 289 self.apk_info[apk] = [apk_full_path, offset_list, tmp_files] 290 if not file_name: 291 return None, None 292 tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name) 293 if tmp_shared_lib: 294 tmp_files[file_name] = tmp_shared_lib 295 return file_name, tmp_shared_lib 296 return None, None 297 298 def ProcessLine(self, line): 299 ret = False 300 process_header = self.process_info_line.search(line) 301 signal_header = self.signal_line.search(line) 302 abort_message_header = self.abort_message_line.search(line) 303 thread_header = self.thread_line.search(line) 304 register_header = self.register_line.search(line) 305 revision_header = self.revision_line.search(line) 306 dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line) 307 dalvik_native_thread_header = self.dalvik_native_thread_line.search(line) 308 unreachable_header = self.unreachable_line.search(line) 309 if process_header or signal_header or abort_message_header or thread_header or \ 310 register_header or dalvik_jni_thread_header or dalvik_native_thread_header or \ 311 revision_header or unreachable_header: 312 ret = True 313 if self.trace_lines or self.value_lines: 314 self.PrintOutput(self.trace_lines, self.value_lines) 315 self.PrintDivider() 316 self.trace_lines = [] 317 self.value_lines = [] 318 self.last_frame = -1 319 if process_header: 320 print(process_header.group(1)) 321 if signal_header: 322 print(signal_header.group(1)) 323 if abort_message_header: 324 print(abort_message_header.group(1)) 325 if register_header: 326 print(register_header.group(1)) 327 if thread_header: 328 print(thread_header.group(1)) 329 if dalvik_jni_thread_header: 330 print(dalvik_jni_thread_header.group(1)) 331 if dalvik_native_thread_header: 332 print(dalvik_native_thread_header.group(1)) 333 if revision_header: 334 print(revision_header.group(1)) 335 if unreachable_header: 336 print(unreachable_header.group(1)) 337 return True 338 trace_line_dict = self.MatchTraceLine(line) 339 if trace_line_dict is not None: 340 ret = True 341 frame = int(trace_line_dict["frame"]) 342 code_addr = trace_line_dict["offset"] 343 area = trace_line_dict["dso"] 344 so_offset = trace_line_dict["so_offset"] 345 symbol_present = trace_line_dict["symbol_present"] 346 symbol_name = trace_line_dict["symbol_name"] 347 348 if frame <= self.last_frame and (self.trace_lines or self.value_lines): 349 self.PrintOutput(self.trace_lines, self.value_lines) 350 self.PrintDivider() 351 self.trace_lines = [] 352 self.value_lines = [] 353 self.last_frame = frame 354 355 if area == "<unknown>" or area == "[heap]" or area == "[stack]": 356 self.trace_lines.append((code_addr, "", area)) 357 else: 358 # If this is an apk, it usually means that there is actually 359 # a shared so that was loaded directly out of it. In that case, 360 # extract the shared library and the name of the shared library. 361 lib = None 362 # The format of the map name: 363 # Some.apk!libshared.so 364 # or 365 # Some.apk 366 if so_offset: 367 # If it ends in apk, we are done. 368 apk = None 369 if area.endswith(".apk"): 370 apk = area 371 else: 372 index = area.rfind(".so!") 373 if index != -1: 374 # Sometimes we'll see something like: 375 # #01 pc abcd libart.so!libart.so (offset 0x134000) 376 # Remove everything after the ! and zero the offset value. 377 area = area[0:index + 3] 378 so_offset = 0 379 else: 380 index = area.rfind(".apk!") 381 if index != -1: 382 apk = area[0:index + 4] 383 if apk: 384 lib_name, lib = self.GetLibFromApk(apk, so_offset) 385 if not lib: 386 lib = area 387 lib_name = None 388 389 # When using atest, test paths are different between the out/ directory 390 # and device. Apply fixups. 391 if lib.startswith("/data/local/tests/") or lib.startswith("/data/local/tmp/"): 392 test_name = lib.rsplit("/", 1)[-1] 393 prefix = "/data/nativetest" 394 if symbol.ARCH.endswith("64"): 395 prefix += "64" 396 if lib.startswith("/data/local/tests/vendor/"): 397 prefix += "/vendor" 398 lib = prefix + "/" + test_name + "/" + test_name 399 400 # If a calls b which further calls c and c is inlined to b, we want to 401 # display "a -> b -> c" in the stack trace instead of just "a -> c" 402 info = symbol.SymbolInformation(lib, code_addr) 403 nest_count = len(info) - 1 404 for (source_symbol, source_location, object_symbol_with_offset) in info: 405 if not source_symbol: 406 if symbol_present: 407 source_symbol = symbol.CallCppFilt(symbol_name) 408 else: 409 source_symbol = "<unknown>" 410 if not source_location: 411 source_location = area 412 if lib_name: 413 source_location += "(" + lib_name + ")" 414 if nest_count > 0: 415 nest_count = nest_count - 1 416 arrow = "v------>" 417 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": 418 arrow = "v-------------->" 419 self.trace_lines.append((arrow, source_symbol, source_location)) 420 else: 421 if not object_symbol_with_offset: 422 object_symbol_with_offset = source_symbol 423 self.trace_lines.append((code_addr, 424 object_symbol_with_offset, 425 source_location)) 426 if self.code_line.match(line): 427 # Code lines should be ignored. If this were exluded the 'code around' 428 # sections would trigger value_line matches. 429 return ret 430 if self.value_line.match(line): 431 ret = True 432 match = self.value_line.match(line) 433 (unused_, addr, value, area, symbol_present, symbol_name) = match.groups() 434 if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area: 435 self.value_lines.append((addr, value, "", area)) 436 else: 437 info = symbol.SymbolInformation(area, value) 438 (source_symbol, source_location, object_symbol_with_offset) = info.pop() 439 # If there is no information, skip this. 440 if source_symbol or source_location or object_symbol_with_offset: 441 if not source_symbol: 442 if symbol_present: 443 source_symbol = symbol.CallCppFilt(symbol_name) 444 else: 445 source_symbol = "<unknown>" 446 if not source_location: 447 source_location = area 448 if not object_symbol_with_offset: 449 object_symbol_with_offset = source_symbol 450 self.value_lines.append((addr, 451 value, 452 object_symbol_with_offset, 453 source_location)) 454 455 return ret 456 457 458class RegisterPatternTests(unittest.TestCase): 459 def assert_register_matches(self, abi, example_crash, stupid_pattern): 460 tc = TraceConverter() 461 lines = example_crash.split('\n') 462 symbol.SetAbi(lines) 463 tc.UpdateAbiRegexes() 464 for line in lines: 465 tc.ProcessLine(line) 466 is_register = (re.search(stupid_pattern, line) is not None) 467 matched = (tc.register_line.search(line) is not None) 468 self.assertEqual(matched, is_register, line) 469 tc.PrintOutput(tc.trace_lines, tc.value_lines) 470 471 def test_arm_registers(self): 472 self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b') 473 474 def test_arm64_registers(self): 475 self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b') 476 477 def test_mips_registers(self): 478 self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b') 479 480 def test_mips64_registers(self): 481 self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b') 482 483 def test_x86_registers(self): 484 self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b') 485 486 def test_x86_64_registers(self): 487 self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b') 488 489class LibmemunreachablePatternTests(unittest.TestCase): 490 def test_libmemunreachable(self): 491 tc = TraceConverter() 492 lines = example_crashes.libmemunreachable.split('\n') 493 494 symbol.SetAbi(lines) 495 self.assertEqual(symbol.ARCH, "arm") 496 497 tc.UpdateAbiRegexes() 498 header_lines = 0 499 trace_lines = 0 500 for line in lines: 501 tc.ProcessLine(line) 502 if re.search(tc.unreachable_line, line) is not None: 503 header_lines += 1 504 if tc.MatchTraceLine(line) is not None: 505 trace_lines += 1 506 self.assertEqual(header_lines, 3) 507 self.assertEqual(trace_lines, 2) 508 tc.PrintOutput(tc.trace_lines, tc.value_lines) 509 510class LongASANStackTests(unittest.TestCase): 511 # Test that a long ASAN-style (non-padded frame numbers) stack trace is not split into two 512 # when the frame number becomes two digits. This happened before as the frame number was 513 # handled as a string and not converted to an integral. 514 def test_long_asan_crash(self): 515 tc = TraceConverter() 516 lines = example_crashes.long_asan_crash.splitlines() 517 symbol.SetAbi(lines) 518 tc.UpdateAbiRegexes() 519 # Test by making sure trace_line_count is monotonically non-decreasing. If the stack trace 520 # is split, a separator is printed and trace_lines is flushed. 521 trace_line_count = 0 522 for line in lines: 523 tc.ProcessLine(line) 524 self.assertLessEqual(trace_line_count, len(tc.trace_lines)) 525 trace_line_count = len(tc.trace_lines) 526 # The split happened at transition of frame #9 -> #10. Make sure we have parsed (and stored) 527 # more than ten frames. 528 self.assertGreater(trace_line_count, 10) 529 tc.PrintOutput(tc.trace_lines, tc.value_lines) 530 531class ValueLinesTest(unittest.TestCase): 532 def test_value_line_skipped(self): 533 tc = TraceConverter() 534 symbol.SetAbi(["ABI: 'arm'"]) 535 tc.UpdateAbiRegexes() 536 tc.ProcessLine(" 12345678 00001000 .") 537 self.assertEqual([], tc.value_lines) 538 539if __name__ == '__main__': 540 unittest.main(verbosity=2) 541