• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""stack symbolizes native crash dumps."""
18
19import collections
20import functools
21import os
22import pathlib
23import re
24import subprocess
25import symbol
26import tempfile
27import unittest
28
29import example_crashes
30
31def ConvertTrace(lines):
32  tracer = TraceConverter()
33  print("Reading symbols from", symbol.SYMBOLS_DIR)
34  tracer.ConvertTrace(lines)
35
36class TraceConverter:
37  process_info_line = re.compile(r"(pid: [0-9]+, tid: [0-9]+.*)")
38  revision_line = re.compile(r"(Revision: '(.*)')")
39  signal_line = re.compile(r"(signal [0-9]+ \(.*\).*)")
40  abort_message_line = re.compile(r"(Abort message: '.*')")
41  thread_line = re.compile(r"(.*)(--- ){15}---")
42  dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)")
43  dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)")
44  register_line = re.compile("$a")
45  trace_line = re.compile("$a")
46  sanitizer_trace_line = re.compile("$a")
47  value_line = re.compile("$a")
48  code_line = re.compile("$a")
49  zipinfo_central_directory_line = re.compile(r"Central\s+directory\s+entry")
50  zipinfo_central_info_match = re.compile(
51      r"^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)"
52      r".*^\s*compressed size:\s+(\d+)", re.M | re.S)
53  unreachable_line = re.compile(r"((\d+ bytes in \d+ unreachable allocations)|"
54                                r"(\d+ bytes unreachable at [0-9a-f]+)|"
55                                r"(referencing \d+ unreachable bytes in \d+ allocation(s)?)|"
56                                r"(and \d+ similar unreachable bytes in \d+ allocation(s)?))")
57  trace_lines = []
58  value_lines = []
59  last_frame = -1
60  width = "{8}"
61  spacing = ""
62  apk_info = dict()
63  lib_to_path = dict()
64  mte_fault_address = None
65  mte_stack_records = []
66
67  # We use the "file" command line tool to extract BuildId from ELF files.
68  ElfInfo = collections.namedtuple("ElfInfo", ["bitness", "build_id"])
69  readelf_output = re.compile(r"Class:\s*ELF(?P<bitness>32|64).*"
70                              r"Build ID:\s*(?P<build_id>[0-9a-f]+)",
71                              flags=re.DOTALL)
72
73  def UpdateBitnessRegexes(self):
74    if symbol.ARCH_IS_32BIT:
75      self.width = "{8}"
76      self.spacing = ""
77    else:
78      self.width = "{16}"
79      self.spacing = "        "
80    self.register_line = re.compile("    (([ ]*\\b(\S*)\\b +[0-9a-f]" + self.width + "){1,5}$)")
81
82    # Note that both trace and value line matching allow for variable amounts of
83    # whitespace (e.g. \t). This is because the we want to allow for the stack
84    # tool to operate on AndroidFeedback provided system logs. AndroidFeedback
85    # strips out double spaces that are found in tombsone files and logcat output.
86    #
87    # Examples of matched trace lines include lines from tombstone files like:
88    #   #00  pc 001cf42e  /data/data/com.my.project/lib/libmyproject.so
89    #
90    # Or lines from AndroidFeedback crash report system logs like:
91    #   03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
92    # Please note the spacing differences.
93    self.trace_line = re.compile(
94        r".*"                                                 # Random start stuff.
95        r"\#(?P<frame>[0-9]+)"                                # Frame number.
96        r"[ \t]+..[ \t]+"                                     # (space)pc(space).
97        r"(?P<offset>[0-9a-f]" + self.width + ")[ \t]+"       # Offset (hex number given without
98                                                              #         0x prefix).
99        r"(?P<dso>\[[^\]]+\]|[^\r\n \t]*)"                    # Library name.
100        r"( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?"       # Offset into the file to find the start of the shared so.
101        r"(?P<symbolpresent> \((?P<symbol>.*?)\))?"           # Is the symbol there? (non-greedy)
102        r"( \(BuildId: (?P<build_id>.*)\))?"                  # Optional build-id of the ELF file.
103        r"[ \t]*$")                                           # End of line (to expand non-greedy match).
104                                                              # pylint: disable-msg=C6310
105    # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as
106    # its own regex. Example:
107    # 08-19 05:29:26.283   397   403 I         :     #0 0xb6a15237  (/system/lib/libclang_rt.asan-arm-android.so+0x4f237)
108    self.sanitizer_trace_line = re.compile(
109        r".*"                                                 # Random start stuff.
110        r"\#(?P<frame>[0-9]+)"                                # Frame number.
111        r"[ \t]+0x[0-9a-f]+[ \t]+"                            # PC, not interesting to us.
112        r"\("                                                 # Opening paren.
113        r"(?P<dso>[^+]+)"                                     # Library name.
114        r"\+"                                                 # '+'
115        r"0x(?P<offset>[0-9a-f]+)"                            # Offset (hex number given with
116                                                              #         0x prefix).
117        r"\)")                                                # Closing paren.
118                                                              # pylint: disable-msg=C6310
119    # Examples of matched value lines include:
120    #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so
121    #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so (symbol)
122    #   03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
123    # Again, note the spacing differences.
124    self.value_line = re.compile(r"(.*)([0-9a-f]" + self.width + r")[ \t]+([0-9a-f]" + self.width + r")[ \t]+([^\r\n \t]*)( \((.*)\))?")
125    # Lines from 'code around' sections of the output will be matched before
126    # value lines because otheriwse the 'code around' sections will be confused as
127    # value lines.
128    #
129    # Examples include:
130    #   801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
131    #   03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
132    self.code_line = re.compile(r"(.*)[ \t]*[a-f0-9]" + self.width +
133                                r"[ \t]*[a-f0-9]" + self.width +
134                                r"[ \t]*[a-f0-9]" + self.width +
135                                r"[ \t]*[a-f0-9]" + self.width +
136                                r"[ \t]*[a-f0-9]" + self.width +
137                                r"[ \t]*[ \r\n]")  # pylint: disable-msg=C6310
138    self.mte_sync_line = re.compile(r".*signal 11 \(SIGSEGV\), code 9 \(SEGV_MTESERR\), fault addr 0x(?P<address>[0-9a-f]+)")
139    self.mte_stack_record_line = re.compile(r".*stack_record fp:0x(?P<fp>[0-9a-f]+) "
140                                            r"tag:0x(?P<tag>[0-9a-f]+) "
141                                            r"pc:(?P<object>[^+]+)\+0x(?P<offset>[0-9a-f]+)"
142                                            r"(?: \(BuildId: (?P<buildid>[A-Za-z0-9]+)\))?")
143
144  def CleanLine(self, ln):
145    # AndroidFeedback adds zero width spaces into its crash reports. These
146    # should be removed or the regular expresssions will fail to match.
147    return ln.encode().decode(encoding='utf8', errors='ignore')
148
149  def PrintTraceLines(self, trace_lines):
150    """Print back trace."""
151    maxlen = max(len(tl[1]) for tl in trace_lines)
152    print("\nStack Trace:")
153    print("  RELADDR   " + self.spacing + "FUNCTION".ljust(maxlen) + "  FILE:LINE")
154    for tl in self.trace_lines:
155      (addr, symbol_with_offset, location) = tl
156      print("  %8s  %s  %s" % (addr, symbol_with_offset.ljust(maxlen), location))
157
158  def PrintValueLines(self, value_lines):
159    """Print stack data values."""
160    maxlen = max(len(tl[2]) for tl in self.value_lines)
161    print("\nStack Data:")
162    print("  ADDR      " + self.spacing + "VALUE     " + "FUNCTION".ljust(maxlen) + "  FILE:LINE")
163    for vl in self.value_lines:
164      (addr, value, symbol_with_offset, location) = vl
165      print("  %8s  %8s  %s  %s" % (addr, value, symbol_with_offset.ljust(maxlen), location))
166
167  def MatchStackRecords(self):
168    if self.mte_fault_address is None:
169      return
170    fault_tag = (self.mte_fault_address >> 56) & 0xF
171    untagged_fault_address = self.mte_fault_address & ~(0xF << 56)
172    build_id_to_lib = {}
173    record_for_lib = collections.defaultdict(lambda: collections.defaultdict(set))
174    for i, (lib, buildid, offset, fp, tag) in enumerate(self.mte_stack_records):
175      if buildid is not None:
176        if buildid not in build_id_to_lib:
177          basename = os.path.basename(lib).split("!")[-1]
178          newlib = self.GetLibraryByBuildId(symbol.SYMBOLS_DIR, basename, buildid)
179          if newlib is not None:
180            build_id_to_lib[buildid] = newlib
181            lib = newlib
182        else:
183          lib = build_id_to_lib[buildid]
184      record_for_lib[lib][offset].add((fp, tag, i))
185
186    closest_match = None
187    # This order is load-bearing to make inside sort before 0-byte after, and
188    # 1-byte before sort before 1-byte after (which is actually *two*
189    # bytes OOB).
190    INSIDE = 0
191    BEFORE = 1
192    AFTER = 2
193
194    for lib, values in record_for_lib.items():
195      records = symbol.GetStackRecordsForSet(lib, values.keys()) or []
196      for (addr, function_name, local_name, file_line, frame_offset, size,
197           tag_offset) in records:
198        if frame_offset is None or size is None or tag_offset is None:
199          continue
200        for fp, tag, i in values[addr]:
201          obj_offset = untagged_fault_address - fp - frame_offset
202          if tag + tag_offset == fault_tag:
203            if obj_offset >= 0 and obj_offset < size:
204              distance = 0
205              whence = INSIDE
206            elif obj_offset >= 0:
207              distance = obj_offset - size
208              whence = AFTER
209            else: # obj_offset < 0
210              distance = -obj_offset
211              whence = BEFORE
212            # We prefer the closest, and if multiple objects match the most
213            # recent one (lowest i).
214            candidate = (distance, whence, i, obj_offset, local_name,
215                         function_name, file_line)
216            if closest_match is None or candidate < closest_match:
217              closest_match = candidate
218
219    if closest_match is None:
220      return
221
222    distance, whence, _, obj_offset, local_name, function_name, file_line = closest_match
223    if whence == INSIDE:
224      distance = obj_offset
225      whence_str = "inside"
226    elif whence == AFTER:
227      whence_str = "after"
228    else: # whence == BEFORE
229      whence_str = "before"
230
231    print('')
232    print('Potentially referenced stack object:')
233    print('  %d bytes %s a variable "%s" in stack frame of function "%s"'%
234          (distance, whence_str, local_name, function_name))
235    print('  at %s' % file_line)
236
237  def PrintOutput(self, trace_lines, value_lines):
238    if self.trace_lines:
239      self.PrintTraceLines(self.trace_lines)
240    if self.value_lines:
241      self.PrintValueLines(self.value_lines)
242    if self.mte_stack_records:
243      self.MatchStackRecords()
244
245  def PrintDivider(self):
246    print("\n-----------------------------------------------------\n")
247
248  def DeleteApkTmpFiles(self):
249    for _, _, tmp_files in self.apk_info.values():
250      for tmp_file in tmp_files.values():
251        os.unlink(tmp_file)
252
253  def ConvertTrace(self, lines):
254    lines = [self.CleanLine(line) for line in lines]
255    try:
256      if symbol.ARCH_IS_32BIT is None:
257        symbol.SetBitness(lines)
258      self.UpdateBitnessRegexes()
259      for line in lines:
260        self.ProcessLine(line)
261      self.PrintOutput(self.trace_lines, self.value_lines)
262    finally:
263      # Delete any temporary files created while processing the lines.
264      self.DeleteApkTmpFiles()
265
266  def MatchTraceLine(self, line):
267    match = self.trace_line.match(line)
268    if match:
269      return {"frame": match.group("frame"),
270              "offset": match.group("offset"),
271              "so_offset": match.group("so_offset"),
272              "dso": match.group("dso"),
273              "symbol_present": bool(match.group("symbolpresent")),
274              "symbol_name": match.group("symbol"),
275              "build_id": match.group("build_id")}
276    match = self.sanitizer_trace_line.match(line)
277    if match:
278      return {"frame": match.group("frame"),
279              "offset": match.group("offset"),
280              "so_offset": None,
281              "dso": match.group("dso"),
282              "symbol_present": False,
283              "symbol_name": None,
284              "build_id": None}
285    return None
286
287  def ExtractLibFromApk(self, apk, shared_lib_name):
288    # Create a temporary file containing the shared library from the apk.
289    tmp_file = None
290    try:
291      tmp_fd, tmp_file = tempfile.mkstemp()
292      if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0:
293        os.close(tmp_fd)
294        shared_file = tmp_file
295        tmp_file = None
296        return shared_file
297    finally:
298      if tmp_file:
299        os.close(tmp_fd)
300        os.unlink(tmp_file)
301    return None
302
303  def ProcessCentralInfo(self, offset_list, central_info):
304    match = self.zipinfo_central_info_match.search(central_info)
305    if not match:
306      raise Exception("Cannot find all info from zipinfo\n" + central_info)
307    name = match.group(1)
308    start = int(match.group(2))
309    end = start + int(match.group(3))
310    # When the actual apk data is mapped in to the process, it will be
311    # mapped in on a page boundary. This means the header data can start
312    # after the actual offset and the code will get the wrong file.
313    # Rounding down to a page boundary (assumes 4096 page size) fixes
314    # this problem.
315    start = start & ~0xfff
316
317    offset_list.append([name, start, end])
318    return name, start, end
319
320  def GetLibFromApk(self, apk, offset):
321    # Convert the string to hex.
322    offset = int(offset, 16)
323
324    # Check if we already have information about this offset.
325    if apk in self.apk_info:
326      apk_full_path, offset_list, tmp_files = self.apk_info[apk]
327      for file_name, start, end in offset_list:
328        if offset >= start and offset < end:
329          if file_name in tmp_files:
330            return file_name, tmp_files[file_name]
331          tmp_file = self.ExtractLibFromApk(apk_full_path, file_name)
332          if tmp_file:
333            tmp_files[file_name] = tmp_file
334            return file_name, tmp_file
335          break
336      return None, None
337
338    if not "ANDROID_PRODUCT_OUT" in os.environ:
339      print("ANDROID_PRODUCT_OUT environment variable not set.")
340      return None, None
341    out_dir = os.environ["ANDROID_PRODUCT_OUT"]
342    if not os.path.exists(out_dir):
343      print("ANDROID_PRODUCT_OUT", out_dir, "does not exist.")
344      return None, None
345    if apk.startswith("/"):
346      apk_full_path = out_dir + apk
347    else:
348      apk_full_path = os.path.join(out_dir, apk)
349    if not os.path.exists(apk_full_path):
350      print("Cannot find apk", apk)
351      return None, None
352
353    cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE,
354                           encoding='utf8')
355    # Find the first central info marker.
356    for line in cmd.stdout:
357      if self.zipinfo_central_directory_line.search(line):
358        break
359
360    central_info = ""
361    file_name = None
362    offset_list = []
363    for line in cmd.stdout:
364      match = self.zipinfo_central_directory_line.search(line)
365      if match:
366        cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
367        if not file_name and offset >= start and offset < end:
368          file_name = cur_name
369        central_info = ""
370      else:
371        central_info += line
372    if central_info:
373      cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
374      if not file_name and offset >= start and offset < end:
375        file_name = cur_name
376
377    # Make sure the offset_list is sorted, the zip file does not guarantee
378    # that the entries are in order.
379    offset_list = sorted(offset_list, key=lambda entry: entry[1])
380
381    # Save the information from the zip.
382    tmp_files = dict()
383    self.apk_info[apk] = [apk_full_path, offset_list, tmp_files]
384    if not file_name:
385      return None, None
386    tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name)
387    if tmp_shared_lib:
388      tmp_files[file_name] = tmp_shared_lib
389      return file_name, tmp_shared_lib
390    return None, None
391
392  # Find all files in the symbols directory and group them by basename (without directory).
393  @functools.lru_cache(maxsize=None)
394  def GlobSymbolsDir(self, symbols_dir):
395    files_by_basename = {}
396    for path in sorted(pathlib.Path(symbols_dir).glob("**/*")):
397      if os.path.isfile(path):
398        files_by_basename.setdefault(path.name, []).append(path)
399    return files_by_basename
400
401  # Use the "file" command line tool to find the bitness and build_id of given ELF file.
402  @functools.lru_cache(maxsize=None)
403  def GetLibraryInfo(self, lib):
404    stdout = subprocess.check_output([symbol.ToolPath("llvm-readelf"), "-h", "-n", lib], text=True)
405    match = self.readelf_output.search(stdout)
406    if match:
407      return self.ElfInfo(bitness=match.group("bitness"), build_id=match.group("build_id"))
408    return None
409
410  # Search for a library with the given basename and build_id anywhere in the symbols directory.
411  @functools.lru_cache(maxsize=None)
412  def GetLibraryByBuildId(self, symbols_dir, basename, build_id):
413    for candidate in self.GlobSymbolsDir(symbols_dir).get(basename, []):
414      info = self.GetLibraryInfo(candidate)
415      if info and info.build_id == build_id:
416        return "/" + str(candidate.relative_to(symbols_dir))
417    return None
418
419  def GetLibPath(self, lib):
420    if lib in self.lib_to_path:
421      return self.lib_to_path[lib]
422
423    lib_path = self.FindLibPath(lib)
424    self.lib_to_path[lib] = lib_path
425    return lib_path
426
427  def FindLibPath(self, lib):
428    symbol_dir = symbol.SYMBOLS_DIR
429    if os.path.isfile(symbol_dir + lib):
430      return lib
431
432    # Try and rewrite any apex files if not found in symbols.
433    # For some reason, the directory in symbols does not match
434    # the path on system.
435    # The path is com.android.<directory> on device, but
436    # com.google.android.<directory> in symbols.
437    new_lib = lib.replace("/com.android.", "/com.google.android.")
438    if os.path.isfile(symbol_dir + new_lib):
439      return new_lib
440
441    # When using atest, test paths are different between the out/ directory
442    # and device. Apply fixups.
443    if not lib.startswith("/data/local/tests/") and not lib.startswith("/data/local/tmp/"):
444      print("WARNING: Cannot find %s in symbol directory" % lib)
445      return lib
446
447    test_name = lib.rsplit("/", 1)[-1]
448    test_dir = "/data/nativetest"
449    test_dir_bitness = ""
450    if symbol.ARCH_IS_32BIT:
451      bitness = "32"
452    else:
453      bitness = "64"
454      test_dir_bitness = "64"
455
456    # Unfortunately, the location of the real symbol file is not
457    # standardized, so we need to go hunting for it.
458
459    # This is in vendor, look for the value in:
460    #   /data/nativetest{64}/vendor/test_name/test_name
461    if lib.startswith("/data/local/tests/vendor/"):
462      lib_path = os.path.join(test_dir + test_dir_bitness, "vendor", test_name, test_name)
463      if os.path.isfile(symbol_dir + lib_path):
464        return lib_path
465
466    # Look for the path in:
467    #   /data/nativetest{64}/test_name/test_name
468    lib_path = os.path.join(test_dir + test_dir_bitness, test_name, test_name)
469    if os.path.isfile(symbol_dir + lib_path):
470      return lib_path
471
472    # CtsXXX tests are in really non-standard locations try:
473    #  /data/nativetest/{test_name}
474    lib_path = os.path.join(test_dir, test_name)
475    if os.path.isfile(symbol_dir + lib_path):
476      return lib_path
477    # Try:
478    #   /data/nativetest/{test_name}{32|64}
479    lib_path += bitness
480    if os.path.isfile(symbol_dir + lib_path):
481      return lib_path
482
483    # Cannot find location, give up and return the original path
484    print("WARNING: Cannot find %s in symbol directory" % lib)
485    return lib
486
487
488  def ProcessLine(self, line):
489    ret = False
490    process_header = self.process_info_line.search(line)
491    signal_header = self.signal_line.search(line)
492    abort_message_header = self.abort_message_line.search(line)
493    thread_header = self.thread_line.search(line)
494    register_header = self.register_line.search(line)
495    revision_header = self.revision_line.search(line)
496    dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line)
497    dalvik_native_thread_header = self.dalvik_native_thread_line.search(line)
498    unreachable_header = self.unreachable_line.search(line)
499    if process_header or signal_header or abort_message_header or thread_header or \
500        register_header or dalvik_jni_thread_header or dalvik_native_thread_header or \
501        revision_header or unreachable_header:
502      ret = True
503      if self.trace_lines or self.value_lines or self.mte_stack_records:
504        self.PrintOutput(self.trace_lines, self.value_lines)
505        self.PrintDivider()
506        self.trace_lines = []
507        self.value_lines = []
508        self.mte_fault_address = None
509        self.mte_stack_records = []
510        self.last_frame = -1
511      if self.mte_sync_line.match(line):
512        match = self.mte_sync_line.match(line)
513        self.mte_fault_address = int(match.group("address"), 16)
514      if process_header:
515        print(process_header.group(1))
516      if signal_header:
517        print(signal_header.group(1))
518      if abort_message_header:
519        print(abort_message_header.group(1))
520      if register_header:
521        print(register_header.group(1))
522      if thread_header:
523        print(thread_header.group(1))
524      if dalvik_jni_thread_header:
525        print(dalvik_jni_thread_header.group(1))
526      if dalvik_native_thread_header:
527        print(dalvik_native_thread_header.group(1))
528      if revision_header:
529        print(revision_header.group(1))
530      if unreachable_header:
531        print(unreachable_header.group(1))
532      return True
533    trace_line_dict = self.MatchTraceLine(line)
534    if trace_line_dict is not None:
535      ret = True
536      frame = int(trace_line_dict["frame"])
537      code_addr = trace_line_dict["offset"]
538      area = trace_line_dict["dso"]
539      so_offset = trace_line_dict["so_offset"]
540      symbol_present = trace_line_dict["symbol_present"]
541      symbol_name = trace_line_dict["symbol_name"]
542      build_id = trace_line_dict["build_id"]
543
544      if frame <= self.last_frame and (self.trace_lines or self.value_lines):
545        self.PrintOutput(self.trace_lines, self.value_lines)
546        self.PrintDivider()
547        self.trace_lines = []
548        self.value_lines = []
549      self.last_frame = frame
550
551      if area == "<unknown>" or area == "[heap]" or area == "[stack]":
552        self.trace_lines.append((code_addr, "", area))
553      else:
554        # If this is an apk, it usually means that there is actually
555        # a shared so that was loaded directly out of it. In that case,
556        # extract the shared library and the name of the shared library.
557        lib = None
558        # The format of the map name:
559        #   Some.apk!libshared.so
560        # or
561        #   Some.apk
562        lib_extracted = False
563        if so_offset:
564          # If it ends in apk, we are done.
565          apk = None
566          if area.endswith(".apk"):
567            apk = area
568          else:
569            index = area.rfind(".so!")
570            if index != -1:
571              # Sometimes we'll see something like:
572              #   #01 pc abcd  libart.so!libart.so (offset 0x134000)
573              # Remove everything after the ! and zero the offset value.
574              area = area[0:index + 3]
575              so_offset = 0
576            else:
577              index = area.rfind(".apk!")
578              if index != -1:
579                apk = area[0:index + 4]
580          if apk:
581            lib_name, lib = self.GetLibFromApk(apk, so_offset)
582            lib_extracted = lib != None
583        else:
584          # Sometimes we'll see something like:
585          #   #01 pc abcd  libart.so!libart.so
586          # Remove everything after the !.
587          index = area.rfind(".so!")
588          if index != -1:
589            area = area[0:index + 3]
590        if not lib:
591          lib = area
592          lib_name = None
593
594        if not lib_extracted:
595          if build_id:
596            # If we have the build_id, do a brute-force search of the symbols directory.
597            basename = os.path.basename(lib).split("!")[-1]
598            lib = self.GetLibraryByBuildId(symbol.SYMBOLS_DIR, basename, build_id)
599            if not lib:
600              print("WARNING: Cannot find {} with build id {} in symbols directory."
601                    .format(basename, build_id))
602          else:
603            # When using atest, test paths are different between the out/ directory
604            # and device. Apply fixups.
605            lib = self.GetLibPath(lib)
606
607        # If a calls b which further calls c and c is inlined to b, we want to
608        # display "a -> b -> c" in the stack trace instead of just "a -> c"
609        info = symbol.SymbolInformation(lib, code_addr)
610        nest_count = len(info) - 1
611        for (source_symbol, source_location, symbol_with_offset) in info:
612          if not source_symbol:
613            if symbol_present:
614              source_symbol = symbol.CallCppFilt(symbol_name)
615            else:
616              source_symbol = "<unknown>"
617          if not symbol.VERBOSE:
618            source_symbol = symbol.FormatSymbolWithoutParameters(source_symbol)
619            symbol_with_offset = symbol.FormatSymbolWithoutParameters(symbol_with_offset)
620          if not source_location:
621            source_location = area
622            if lib_name:
623              source_location += "(" + lib_name + ")"
624          if nest_count > 0:
625            nest_count = nest_count - 1
626            arrow = "v------>"
627            if not symbol.ARCH_IS_32BIT:
628              arrow = "v-------------->"
629            self.trace_lines.append((arrow, source_symbol, source_location))
630          else:
631            if not symbol_with_offset:
632              symbol_with_offset = source_symbol
633            self.trace_lines.append((code_addr, symbol_with_offset, source_location))
634    if self.code_line.match(line):
635      # Code lines should be ignored. If this were exluded the 'code around'
636      # sections would trigger value_line matches.
637      return ret
638    if self.value_line.match(line):
639      ret = True
640      match = self.value_line.match(line)
641      (unused_, addr, value, area, symbol_present, symbol_name) = match.groups()
642      if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area:
643        self.value_lines.append((addr, value, "", area))
644      else:
645        info = symbol.SymbolInformation(area, value)
646        (source_symbol, source_location, object_symbol_with_offset) = info.pop()
647        # If there is no information, skip this.
648        if source_symbol or source_location or object_symbol_with_offset:
649          if not source_symbol:
650            if symbol_present:
651              source_symbol = symbol.CallCppFilt(symbol_name)
652            else:
653              source_symbol = "<unknown>"
654          if not source_location:
655            source_location = area
656          if not object_symbol_with_offset:
657            object_symbol_with_offset = source_symbol
658          self.value_lines.append((addr,
659                                   value,
660                                   object_symbol_with_offset,
661                                   source_location))
662    if self.mte_stack_record_line.match(line):
663      ret = True
664      match = self.mte_stack_record_line.match(line)
665      if self.mte_fault_address is not None:
666        self.mte_stack_records.append(
667          (match.group("object"),
668           match.group("buildid"),
669           int(match.group("offset"), 16),
670           int(match.group("fp"), 16),
671           int(match.group("tag"), 16)))
672
673    return ret
674
675
676class RegisterPatternTests(unittest.TestCase):
677  def assert_register_matches(self, abi, example_crash, stupid_pattern):
678    tc = TraceConverter()
679    lines = example_crash.split('\n')
680    symbol.SetBitness(lines)
681    tc.UpdateBitnessRegexes()
682    for line in lines:
683      tc.ProcessLine(line)
684      is_register = (re.search(stupid_pattern, line) is not None)
685      matched = (tc.register_line.search(line) is not None)
686      self.assertEqual(matched, is_register, line)
687    tc.PrintOutput(tc.trace_lines, tc.value_lines)
688
689  def test_arm_registers(self):
690    self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip|scr)\\b')
691
692  def test_arm64_registers(self):
693    self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp|v[1-3]?[0-9])\\b')
694
695  def test_x86_registers(self):
696    self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b')
697
698  def test_x86_64_registers(self):
699    self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b')
700
701  def test_riscv64_registers(self):
702    self.assert_register_matches("riscv64", example_crashes.riscv64, '\\b(gp|t2|t6|s3|s7|s11|a3|a7|sp)\\b')
703
704class LibmemunreachablePatternTests(unittest.TestCase):
705  def test_libmemunreachable(self):
706    tc = TraceConverter()
707    lines = example_crashes.libmemunreachable.split('\n')
708
709    symbol.SetBitness(lines)
710    self.assertTrue(symbol.ARCH_IS_32BIT)
711    tc.UpdateBitnessRegexes()
712    header_lines = 0
713    trace_lines = 0
714    for line in lines:
715      tc.ProcessLine(line)
716      if re.search(tc.unreachable_line, line) is not None:
717        header_lines += 1
718      if tc.MatchTraceLine(line) is not None:
719        trace_lines += 1
720    self.assertEqual(header_lines, 3)
721    self.assertEqual(trace_lines, 2)
722    tc.PrintOutput(tc.trace_lines, tc.value_lines)
723
724class LongASANStackTests(unittest.TestCase):
725  # Test that a long ASAN-style (non-padded frame numbers) stack trace is not split into two
726  # when the frame number becomes two digits. This happened before as the frame number was
727  # handled as a string and not converted to an integral.
728  def test_long_asan_crash(self):
729    tc = TraceConverter()
730    lines = example_crashes.long_asan_crash.splitlines()
731    symbol.SetBitness(lines)
732    tc.UpdateBitnessRegexes()
733    # Test by making sure trace_line_count is monotonically non-decreasing. If the stack trace
734    # is split, a separator is printed and trace_lines is flushed.
735    trace_line_count = 0
736    for line in lines:
737      tc.ProcessLine(line)
738      self.assertLessEqual(trace_line_count, len(tc.trace_lines))
739      trace_line_count = len(tc.trace_lines)
740    # The split happened at transition of frame #9 -> #10. Make sure we have parsed (and stored)
741    # more than ten frames.
742    self.assertGreater(trace_line_count, 10)
743    tc.PrintOutput(tc.trace_lines, tc.value_lines)
744
745class ValueLinesTest(unittest.TestCase):
746  def test_value_line_skipped(self):
747    tc = TraceConverter()
748    symbol.ARCH_IS_32BIT = True
749    tc.UpdateBitnessRegexes()
750    tc.ProcessLine("    12345678  00001000  .")
751    self.assertEqual([], tc.value_lines)
752
753if __name__ == '__main__':
754    unittest.main(verbosity=2)
755