• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import atexit
23import json
24import glob
25import os
26import platform
27import re
28import shutil
29import signal
30import subprocess
31import unittest
32
33ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP", ".")
34
35
36def FindClangDir():
37  get_clang_version = ANDROID_BUILD_TOP + "/build/soong/scripts/get_clang_version.py"
38  if os.path.exists(get_clang_version):
39    # We want the script to fail if get_clang_version.py exists but is unable
40    # to find the clang version.
41    version_output = subprocess.check_output(get_clang_version, text=True)
42    return ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/" + version_output.strip()
43  else:
44    return None
45
46
47def FindSymbolsDir():
48  saveddir = os.getcwd()
49  os.chdir(ANDROID_BUILD_TOP)
50  stream = None
51  try:
52    cmd = "build/soong/soong_ui.bash --dumpvar-mode --abs TARGET_OUT_UNSTRIPPED"
53    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, shell=True).stdout
54    return str(stream.read().strip())
55  finally:
56    if stream is not None:
57        stream.close()
58    os.chdir(saveddir)
59
60SYMBOLS_DIR = FindSymbolsDir()
61
62ARCH_IS_32BIT = None
63
64VERBOSE = False
65
66# These are private. Do not access them from other modules.
67_CACHED_TOOLCHAIN = None
68_CACHED_CXX_FILT = None
69
70# Caches for symbolized information.
71_SYMBOL_INFORMATION_ADDR2LINE_CACHE = {}
72_SYMBOL_INFORMATION_OBJDUMP_CACHE = {}
73_SYMBOL_DEMANGLING_CACHE = {}
74
75# Caches for pipes to subprocesses.
76
77class ProcessCache:
78  _cmd2pipe = {}
79  _lru = []
80
81  # Max number of open pipes.
82  _PIPE_MAX_OPEN = 10
83
84  def GetProcess(self, cmd):
85    cmd_tuple = tuple(cmd)  # Need to use a tuple as lists can't be dict keys.
86    # Pipe already available?
87    if cmd_tuple in self._cmd2pipe:
88      pipe = self._cmd2pipe[cmd_tuple]
89      # Update LRU.
90      self._lru = [(cmd_tuple, pipe)] + [i for i in self._lru if i[0] != cmd_tuple]
91      return pipe
92
93    # Not cached, yet. Open a new one.
94
95    # Check if too many are open, close the old ones.
96    while len(self._lru) >= self._PIPE_MAX_OPEN:
97      open_cmd, open_pipe = self._lru.pop()
98      del self._cmd2pipe[open_cmd]
99      self.TerminateProcess(open_pipe)
100
101    # Create and put into cache.
102    pipe = self.SpawnProcess(cmd)
103    self._cmd2pipe[cmd_tuple] = pipe
104    self._lru = [(cmd_tuple, pipe)] + self._lru
105    return pipe
106
107  def SpawnProcess(self, cmd):
108    return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
109
110  def TerminateProcess(self, pipe):
111    if pipe.poll() is None:
112      # Process is still running.
113      pipe.stdin.close()
114      pipe.stdout.close()
115      pipe.terminate()
116    pipe.wait()
117
118  def KillAllProcesses(self):
119    for _, open_pipe in self._lru:
120      self.TerminateProcess(open_pipe)
121    _cmd2pipe = {}
122    _lru = []
123
124
125_PIPE_ADDR2LINE_CACHE = ProcessCache()
126_PIPE_CPPFILT_CACHE = ProcessCache()
127
128
129# Process cache cleanup on shutdown.
130
131def CloseAllPipes():
132  _PIPE_ADDR2LINE_CACHE.KillAllProcesses()
133  _PIPE_CPPFILT_CACHE.KillAllProcesses()
134
135
136atexit.register(CloseAllPipes)
137
138
139def PipeTermHandler(signum, frame):
140  CloseAllPipes()
141  os._exit(0)
142
143
144for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM):
145  signal.signal(sig, PipeTermHandler)
146
147
148
149
150def ToolPath(tool, toolchain=None):
151  """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """
152  if shutil.which(tool):
153    return tool
154  if not toolchain:
155    toolchain = FindToolchain()
156  return os.path.join(toolchain, tool)
157
158
159def FindToolchain():
160  """Returns the toolchain."""
161
162  global _CACHED_TOOLCHAIN
163  if _CACHED_TOOLCHAIN:
164    return _CACHED_TOOLCHAIN
165
166  llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/";
167  if not os.path.exists(llvm_binutils_dir):
168    raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir))
169
170  _CACHED_TOOLCHAIN = llvm_binutils_dir
171  print("Using toolchain from:", _CACHED_TOOLCHAIN)
172  return _CACHED_TOOLCHAIN
173
174
175def SymbolInformation(lib, addr):
176  """Look up symbol information about an address.
177
178  Args:
179    lib: library (or executable) pathname containing symbols
180    addr: string hexidecimal address
181
182  Returns:
183    A list of the form [(source_symbol, source_location,
184    object_symbol_with_offset)].
185
186    If the function has been inlined then the list may contain
187    more than one element with the symbols for the most deeply
188    nested inlined location appearing first.  The list is
189    always non-empty, even if no information is available.
190
191    Usually you want to display the source_location and
192    object_symbol_with_offset from the last element in the list.
193  """
194  info = SymbolInformationForSet(lib, set([addr]))
195  return (info and info.get(addr)) or [(None, None, None)]
196
197
198def SymbolInformationForSet(lib, unique_addrs):
199  """Look up symbol information for a set of addresses from the given library.
200
201  Args:
202    lib: library (or executable) pathname containing symbols
203    unique_addrs: set of hexidecimal addresses
204
205  Returns:
206    A dictionary of the form {addr: [(source_symbol, source_location,
207    object_symbol_with_offset)]} where each address has a list of
208    associated symbols and locations.  The list is always non-empty.
209
210    If the function has been inlined then the list may contain
211    more than one element with the symbols for the most deeply
212    nested inlined location appearing first.  The list is
213    always non-empty, even if no information is available.
214
215    Usually you want to display the source_location and
216    object_symbol_with_offset from the last element in the list.
217  """
218  if not lib:
219    return None
220
221  addr_to_line = CallLlvmSymbolizerForSet(lib, unique_addrs)
222  if not addr_to_line:
223    return None
224
225  addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
226  if not addr_to_objdump:
227    return None
228
229  result = {}
230  for addr in unique_addrs:
231    source_info = addr_to_line.get(addr)
232    if not source_info:
233      source_info = [(None, None)]
234    if addr in addr_to_objdump:
235      (object_symbol, object_offset) = addr_to_objdump.get(addr)
236      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
237                                                         object_offset)
238    else:
239      object_symbol_with_offset = None
240    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
241        for (source_symbol, source_location) in source_info]
242
243  return result
244
245
246def _OptionalStackRecordField(json_result, field):
247  """Fix up bizarre formatting of llvm-symbolizer output
248
249  Some parts of the FRAME output are output as a string containing a hex
250  integer, or the empty string when it's missing.
251
252  Args:
253    json_result: dictionary containing the Frame response
254    field: name of the field we want to read
255
256  Returns:
257    integer of field value, or None if missing
258  """
259  value = json_result.get(field, "")
260  if isinstance(value, int):
261    # Leaving this here in case someone decides to fix the types of the
262    # symbolizer output, so it's easier to roll out.
263    return value
264  if value != "":
265    return int(value, 16)
266  return None
267
268
269def _GetJSONSymbolizerForLib(lib, args=None):
270  """ Find symbol file for lib, and return a llvm-symbolizer instance for it.
271
272  Args:
273    lib: library (or executable) pathname containing symbols
274    args: (optional) list of arguments to pass to llvm-symbolizer
275
276  Returns:
277    child process, or None if lib not found
278  """
279  if args is None:
280    args = []
281  symbols = SYMBOLS_DIR + lib
282  if not os.path.exists(symbols):
283    symbols = lib
284    if not os.path.exists(symbols):
285      return None
286
287  # Make sure the symbols path is not a directory.
288  if os.path.isdir(symbols):
289    return None
290
291  cmd = [ToolPath("llvm-symbolizer"), "--output-style=JSON"] + args + ["--obj=" + symbols]
292  return _PIPE_ADDR2LINE_CACHE.GetProcess(cmd)
293
294
295def GetStackRecordsForSet(lib, unique_addrs):
296  """Look up stack record information for a set of addresses
297
298  Args:
299    lib: library (or executable) pathname containing symbols
300    unique_addrs: set of integer addresses look up.
301
302  Returns:
303    A list of tuples
304    (addr, function_name, local_name, file_line, frame_offset, size, tag_offset)
305    describing the local variables of the stack frame.
306    frame_offset, size, tag_offset may be None.
307  """
308  child = _GetJSONSymbolizerForLib(lib)
309  if child is None or child.poll() is not None:
310    return None
311  records = []
312  for addr in unique_addrs:
313    child.stdin.write("FRAME 0x%x\n" % addr)
314    child.stdin.flush()
315    json_result = json.loads(child.stdout.readline().strip())
316    for frame in json_result["Frame"]:
317      records.append(
318        (addr,
319        frame["FunctionName"],
320        frame["Name"],
321        frame["DeclFile"] + ":" + str(frame["DeclLine"]),
322        frame.get("FrameOffset"),
323        _OptionalStackRecordField(frame, "Size"),
324        _OptionalStackRecordField(frame, "TagOffset")))
325  return records
326
327
328def CallLlvmSymbolizerForSet(lib, unique_addrs):
329  """Look up line and symbol information for a set of addresses.
330
331  Args:
332    lib: library (or executable) pathname containing symbols
333    unique_addrs: set of string hexidecimal addresses look up.
334
335  Returns:
336    A dictionary of the form {addr: [(symbol, file:line)]} where
337    each address has a list of associated symbols and locations
338    or an empty list if no symbol information was found.
339
340    If the function has been inlined then the list may contain
341    more than one element with the symbols for the most deeply
342    nested inlined location appearing first.
343  """
344  if not lib:
345    return None
346
347  result = {}
348  addrs = sorted(unique_addrs)
349
350  if lib in _SYMBOL_INFORMATION_ADDR2LINE_CACHE:
351    addr_cache = _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib]
352
353    # Go through and handle all known addresses.
354    for x in range(len(addrs)):
355      next_addr = addrs.pop(0)
356      if next_addr in addr_cache:
357        result[next_addr] = addr_cache[next_addr]
358      else:
359        # Re-add, needs to be symbolized.
360        addrs.append(next_addr)
361
362    if not addrs:
363      # Everything was cached, we're done.
364      return result
365  else:
366    addr_cache = {}
367    _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] = addr_cache
368
369  child = _GetJSONSymbolizerForLib(
370    lib, ["--functions", "--inlines", "--demangle"])
371  if child is None:
372    return None
373  for addr in addrs:
374    try:
375      child.stdin.write("0x%s\n" % addr)
376      child.stdin.flush()
377      records = []
378      json_result = json.loads(child.stdout.readline().strip())
379      if "Symbol" in json_result:
380        for symbol in json_result["Symbol"]:
381          function_name = symbol["FunctionName"]
382          # GNU style location: file_name:line_num
383          location = ("%s:%s" % (symbol["FileName"], symbol["Line"]))
384          records.append((function_name, location))
385    except IOError as e:
386      # Remove the / in front of the library name to match other output.
387      records = [(None, lib[1:] + "  ***Error: " + str(e))]
388    result[addr] = records
389    addr_cache[addr] = records
390  return result
391
392
393def CallObjdumpForSet(lib, unique_addrs):
394  """Use objdump to find out the names of the containing functions.
395
396  Args:
397    lib: library (or executable) pathname containing symbols
398    unique_addrs: set of string hexidecimal addresses to find the functions for.
399
400  Returns:
401    A dictionary of the form {addr: (string symbol, offset)}.
402  """
403  if not lib:
404    return None
405
406  result = {}
407  addrs = sorted(unique_addrs)
408
409  addr_cache = None
410  if lib in _SYMBOL_INFORMATION_OBJDUMP_CACHE:
411    addr_cache = _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib]
412
413    # Go through and handle all known addresses.
414    for x in range(len(addrs)):
415      next_addr = addrs.pop(0)
416      if next_addr in addr_cache:
417        result[next_addr] = addr_cache[next_addr]
418      else:
419        # Re-add, needs to be symbolized.
420        addrs.append(next_addr)
421
422    if not addrs:
423      # Everything was cached, we're done.
424      return result
425  else:
426    addr_cache = {}
427    _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] = addr_cache
428
429  symbols = SYMBOLS_DIR + lib
430  if not os.path.exists(symbols):
431    symbols = lib
432    if not os.path.exists(symbols):
433      return None
434
435  start_addr_dec = str(int(addrs[0], 16))
436  stop_addr_dec = str(int(addrs[-1], 16) + 8)
437  cmd = [ToolPath("llvm-objdump"),
438         "--section=.text",
439         "--demangle",
440         "--disassemble",
441         "--start-address=" + start_addr_dec,
442         "--stop-address=" + stop_addr_dec,
443         symbols]
444
445  # Function lines look like:
446  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
447  # We pull out the address and function first. Then we check for an optional
448  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
449  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
450  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
451
452  # A disassembly line looks like:
453  #   177b2:	b510      	push	{r4, lr}
454  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
455
456  current_symbol = None    # The current function symbol in the disassembly.
457  current_symbol_addr = 0  # The address of the current function.
458  addr_index = 0  # The address that we are currently looking for.
459
460  stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True).stdout
461  for line in stream:
462    # Is it a function line like:
463    #   000177b0 <android::IBinder::~IBinder()>:
464    components = func_regexp.match(line)
465    if components:
466      # This is a new function, so record the current function and its address.
467      current_symbol_addr = int(components.group(1), 16)
468      current_symbol = components.group(2)
469
470      # Does it have an optional offset like: "foo(..)+0x2c"?
471      components = offset_regexp.match(current_symbol)
472      if components:
473        current_symbol = components.group(1)
474        offset = components.group(2)
475        if offset:
476          current_symbol_addr -= int(offset, 16)
477
478    # Is it an disassembly line like:
479    #   177b2:	b510      	push	{r4, lr}
480    components = asm_regexp.match(line)
481    if components:
482      addr = components.group(1)
483      target_addr = addrs[addr_index]
484      i_addr = int(addr, 16)
485      i_target = int(target_addr, 16)
486      if i_addr == i_target:
487        result[target_addr] = (current_symbol, i_target - current_symbol_addr)
488        addr_cache[target_addr] = result[target_addr]
489        addr_index += 1
490        if addr_index >= len(addrs):
491          break
492  stream.close()
493
494  return result
495
496
497def CallCppFilt(mangled_symbol):
498  if mangled_symbol in _SYMBOL_DEMANGLING_CACHE:
499    return _SYMBOL_DEMANGLING_CACHE[mangled_symbol]
500
501  global _CACHED_CXX_FILT
502  if not _CACHED_CXX_FILT:
503    toolchains = None
504    clang_dir = FindClangDir()
505    if clang_dir:
506      if os.path.exists(clang_dir + "/bin/llvm-cxxfilt"):
507        toolchains = [clang_dir + "/bin/llvm-cxxfilt"]
508      else:
509        raise Exception("bin/llvm-cxxfilt missing from " + clang_dir)
510    else:
511      # When run in CI, we don't have a way to find the clang version.  But
512      # llvm-cxxfilt should be available in the following relative path.
513      toolchains = glob.glob("./clang-r*/bin/llvm-cxxfilt")
514      if toolchains and len(toolchains) != 1:
515        raise Exception("Expected one llvm-cxxfilt but found many: " + \
516                        ", ".join(toolchains))
517    if not toolchains:
518      raise Exception("Could not find llvm-cxxfilt tool")
519    _CACHED_CXX_FILT = sorted(toolchains)[-1]
520
521  cmd = [_CACHED_CXX_FILT]
522  process = _PIPE_CPPFILT_CACHE.GetProcess(cmd)
523  process.stdin.write(mangled_symbol)
524  process.stdin.write("\n")
525  process.stdin.flush()
526
527  demangled_symbol = process.stdout.readline().strip()
528
529  _SYMBOL_DEMANGLING_CACHE[mangled_symbol] = demangled_symbol
530
531  return demangled_symbol
532
533
534def FormatSymbolWithOffset(symbol, offset):
535  if offset == 0:
536    return symbol
537  return "%s+%d" % (symbol, offset)
538
539def FormatSymbolWithoutParameters(symbol):
540  """Remove parameters from function.
541
542  Rather than trying to parse the demangled C++ signature,
543  it just removes matching top level parenthesis.
544  """
545  if not symbol:
546    return symbol
547
548  result = symbol
549  result = result.replace(") const", ")")                  # Strip const keyword.
550  result = result.replace("operator<<", "operator\u00AB")  # Avoid unmatched '<'.
551  result = result.replace("operator>>", "operator\u00BB")  # Avoid unmatched '>'.
552  result = result.replace("operator->", "operator\u2192")  # Avoid unmatched '>'.
553
554  nested = []  # Keeps tract of current nesting level of parenthesis.
555  for i in reversed(range(len(result))):  # Iterate backward to make cutting easier.
556    c = result[i]
557    if c == ')' or c == '>':
558      if len(nested) == 0:
559        end = i + 1  # Mark the end of top-level pair.
560      nested.append(c)
561    if c == '(' or c == '<':
562      if len(nested) == 0 or {')':'(', '>':'<'}[nested.pop()] != c:
563        return symbol  # Malformed: character does not match its pair.
564      if len(nested) == 0 and c == '(' and (end - i) > 2:
565        result = result[:i] + result[end:]  # Remove substring (i, end).
566  if len(nested) > 0:
567    return symbol  # Malformed: missing pair.
568
569  return result.strip()
570
571def SetBitness(lines):
572  global ARCH_IS_32BIT
573
574  trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)")
575  asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+")
576
577  ARCH_IS_32BIT = False
578  for line in lines:
579    trace_match = trace_line.search(line)
580    if trace_match:
581      # Try to guess the arch, we know the bitness.
582      if len(trace_match.group(1)) == 16:
583        ARCH_IS_32BIT = False
584      else:
585        ARCH_IS_32BIT = True
586      break
587    asan_trace_match = asan_trace_line.search(line)
588    if asan_trace_match:
589      # We might be able to guess the bitness by the length of the address.
590      if len(asan_trace_match.group(1)) > 8:
591        ARCH_IS_32BIT = False
592        # We know for a fact this is 64 bit, so we are done.
593        break
594      else:
595        # This might be 32 bit, or just a small address. Keep going in this
596        # case, but if we couldn't figure anything else out, go with 32 bit.
597        ARCH_IS_32BIT = True
598
599class FindClangDirTests(unittest.TestCase):
600  @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.')
601  def test_clang_dir_found(self):
602    self.assertIsNotNone(FindClangDir())
603
604class SetBitnessTests(unittest.TestCase):
605  def test_32bit_check(self):
606    global ARCH_IS_32BIT
607
608    SetBitness(["#00 pc 000374e0"])
609    self.assertTrue(ARCH_IS_32BIT)
610
611  def test_64bit_check(self):
612    global ARCH_IS_32BIT
613
614    SetBitness(["#00 pc 00000000000374e0"])
615    self.assertFalse(ARCH_IS_32BIT)
616
617  def test_32bit_asan_trace_line_toolchain(self):
618    global ARCH_IS_32BIT
619
620    SetBitness(["#10 0xb5eeba5d  (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"])
621    self.assertTrue(ARCH_IS_32BIT)
622
623  def test_64bit_asan_trace_line_toolchain(self):
624    global ARCH_IS_32BIT
625
626    SetBitness(["#12 0x5d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)",
627                "#12 0x11b35d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"])
628    self.assertFalse(ARCH_IS_32BIT)
629
630class FormatSymbolWithoutParametersTests(unittest.TestCase):
631  def test_c(self):
632    self.assertEqual(FormatSymbolWithoutParameters("foo"), "foo")
633    self.assertEqual(FormatSymbolWithoutParameters("foo+42"), "foo+42")
634
635  def test_simple(self):
636    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)"), "foo")
637    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)+42"), "foo+42")
638    self.assertEqual(FormatSymbolWithoutParameters("bar::foo(int i)+42"), "bar::foo+42")
639    self.assertEqual(FormatSymbolWithoutParameters("operator()"), "operator()")
640
641  def test_templates(self):
642    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T>& v)"), "bar::foo<T>")
643    self.assertEqual(FormatSymbolWithoutParameters("bar<T>::foo(vector<T>& v)"), "bar<T>::foo")
644    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T<U>>& v)"), "bar::foo<T>")
645    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<(EnumType)0>(vector<(EnumType)0>& v)"),
646                                                   "bar::foo<(EnumType)0>")
647
648  def test_nested(self):
649    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)::bar(int j)"), "foo::bar")
650
651  def test_unbalanced(self):
652    self.assertEqual(FormatSymbolWithoutParameters("foo(bar(int i)"), "foo(bar(int i)")
653    self.assertEqual(FormatSymbolWithoutParameters("foo)bar(int i)"), "foo)bar(int i)")
654    self.assertEqual(FormatSymbolWithoutParameters("foo<bar(int i)"), "foo<bar(int i)")
655    self.assertEqual(FormatSymbolWithoutParameters("foo>bar(int i)"), "foo>bar(int i)")
656
657if __name__ == '__main__':
658    unittest.main(verbosity=2)
659