• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import atexit
23import glob
24import os
25import platform
26import re
27import shutil
28import signal
29import subprocess
30import unittest
31
32ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP", ".")
33
34
35def FindClangDir():
36  get_clang_version = ANDROID_BUILD_TOP + "/build/soong/scripts/get_clang_version.py"
37  if os.path.exists(get_clang_version):
38    # We want the script to fail if get_clang_version.py exists but is unable
39    # to find the clang version.
40    version_output = subprocess.check_output(get_clang_version, text=True)
41    return ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/" + version_output.strip()
42  else:
43    return None
44
45
46def FindSymbolsDir():
47  saveddir = os.getcwd()
48  os.chdir(ANDROID_BUILD_TOP)
49  stream = None
50  try:
51    cmd = "build/soong/soong_ui.bash --dumpvar-mode --abs TARGET_OUT_UNSTRIPPED"
52    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, shell=True).stdout
53    return str(stream.read().strip())
54  finally:
55    if stream is not None:
56        stream.close()
57    os.chdir(saveddir)
58
59SYMBOLS_DIR = FindSymbolsDir()
60
61ARCH_IS_32BIT = None
62
63VERBOSE = False
64
65# These are private. Do not access them from other modules.
66_CACHED_TOOLCHAIN = None
67_CACHED_CXX_FILT = None
68
69# Caches for symbolized information.
70_SYMBOL_INFORMATION_ADDR2LINE_CACHE = {}
71_SYMBOL_INFORMATION_OBJDUMP_CACHE = {}
72_SYMBOL_DEMANGLING_CACHE = {}
73
74# Caches for pipes to subprocesses.
75
76class ProcessCache:
77  _cmd2pipe = {}
78  _lru = []
79
80  # Max number of open pipes.
81  _PIPE_MAX_OPEN = 10
82
83  def GetProcess(self, cmd):
84    cmd_tuple = tuple(cmd)  # Need to use a tuple as lists can't be dict keys.
85    # Pipe already available?
86    if cmd_tuple in self._cmd2pipe:
87      pipe = self._cmd2pipe[cmd_tuple]
88      # Update LRU.
89      self._lru = [(cmd_tuple, pipe)] + [i for i in self._lru if i[0] != cmd_tuple]
90      return pipe
91
92    # Not cached, yet. Open a new one.
93
94    # Check if too many are open, close the old ones.
95    while len(self._lru) >= self._PIPE_MAX_OPEN:
96      open_cmd, open_pipe = self._lru.pop()
97      del self._cmd2pipe[open_cmd]
98      self.TerminateProcess(open_pipe)
99
100    # Create and put into cache.
101    pipe = self.SpawnProcess(cmd)
102    self._cmd2pipe[cmd_tuple] = pipe
103    self._lru = [(cmd_tuple, pipe)] + self._lru
104    return pipe
105
106  def SpawnProcess(self, cmd):
107     return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
108
109  def TerminateProcess(self, pipe):
110    pipe.stdin.close()
111    pipe.stdout.close()
112    pipe.terminate()
113    pipe.wait()
114
115  def KillAllProcesses(self):
116    for _, open_pipe in self._lru:
117      self.TerminateProcess(open_pipe)
118    _cmd2pipe = {}
119    _lru = []
120
121
122_PIPE_ADDR2LINE_CACHE = ProcessCache()
123_PIPE_CPPFILT_CACHE = ProcessCache()
124
125
126# Process cache cleanup on shutdown.
127
128def CloseAllPipes():
129  _PIPE_ADDR2LINE_CACHE.KillAllProcesses()
130  _PIPE_CPPFILT_CACHE.KillAllProcesses()
131
132
133atexit.register(CloseAllPipes)
134
135
136def PipeTermHandler(signum, frame):
137  CloseAllPipes()
138  os._exit(0)
139
140
141for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM):
142  signal.signal(sig, PipeTermHandler)
143
144
145
146
147def ToolPath(tool, toolchain=None):
148  """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """
149  if shutil.which(tool):
150    return tool
151  if not toolchain:
152    toolchain = FindToolchain()
153  return os.path.join(toolchain, tool)
154
155
156def FindToolchain():
157  """Returns the toolchain."""
158
159  global _CACHED_TOOLCHAIN
160  if _CACHED_TOOLCHAIN:
161    return _CACHED_TOOLCHAIN
162
163  llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/";
164  if not os.path.exists(llvm_binutils_dir):
165    raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir))
166
167  _CACHED_TOOLCHAIN = llvm_binutils_dir
168  print("Using toolchain from:", _CACHED_TOOLCHAIN)
169  return _CACHED_TOOLCHAIN
170
171
172def SymbolInformation(lib, addr):
173  """Look up symbol information about an address.
174
175  Args:
176    lib: library (or executable) pathname containing symbols
177    addr: string hexidecimal address
178
179  Returns:
180    A list of the form [(source_symbol, source_location,
181    object_symbol_with_offset)].
182
183    If the function has been inlined then the list may contain
184    more than one element with the symbols for the most deeply
185    nested inlined location appearing first.  The list is
186    always non-empty, even if no information is available.
187
188    Usually you want to display the source_location and
189    object_symbol_with_offset from the last element in the list.
190  """
191  info = SymbolInformationForSet(lib, set([addr]))
192  return (info and info.get(addr)) or [(None, None, None)]
193
194
195def SymbolInformationForSet(lib, unique_addrs):
196  """Look up symbol information for a set of addresses from the given library.
197
198  Args:
199    lib: library (or executable) pathname containing symbols
200    unique_addrs: set of hexidecimal addresses
201
202  Returns:
203    A dictionary of the form {addr: [(source_symbol, source_location,
204    object_symbol_with_offset)]} where each address has a list of
205    associated symbols and locations.  The list is always non-empty.
206
207    If the function has been inlined then the list may contain
208    more than one element with the symbols for the most deeply
209    nested inlined location appearing first.  The list is
210    always non-empty, even if no information is available.
211
212    Usually you want to display the source_location and
213    object_symbol_with_offset from the last element in the list.
214  """
215  if not lib:
216    return None
217
218  addr_to_line = CallLlvmSymbolizerForSet(lib, unique_addrs)
219  if not addr_to_line:
220    return None
221
222  addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
223  if not addr_to_objdump:
224    return None
225
226  result = {}
227  for addr in unique_addrs:
228    source_info = addr_to_line.get(addr)
229    if not source_info:
230      source_info = [(None, None)]
231    if addr in addr_to_objdump:
232      (object_symbol, object_offset) = addr_to_objdump.get(addr)
233      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
234                                                         object_offset)
235    else:
236      object_symbol_with_offset = None
237    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
238        for (source_symbol, source_location) in source_info]
239
240  return result
241
242
243def CallLlvmSymbolizerForSet(lib, unique_addrs):
244  """Look up line and symbol information for a set of addresses.
245
246  Args:
247    lib: library (or executable) pathname containing symbols
248    unique_addrs: set of string hexidecimal addresses look up.
249
250  Returns:
251    A dictionary of the form {addr: [(symbol, file:line)]} where
252    each address has a list of associated symbols and locations
253    or an empty list if no symbol information was found.
254
255    If the function has been inlined then the list may contain
256    more than one element with the symbols for the most deeply
257    nested inlined location appearing first.
258  """
259  if not lib:
260    return None
261
262  result = {}
263  addrs = sorted(unique_addrs)
264
265  if lib in _SYMBOL_INFORMATION_ADDR2LINE_CACHE:
266    addr_cache = _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib]
267
268    # Go through and handle all known addresses.
269    for x in range(len(addrs)):
270      next_addr = addrs.pop(0)
271      if next_addr in addr_cache:
272        result[next_addr] = addr_cache[next_addr]
273      else:
274        # Re-add, needs to be symbolized.
275        addrs.append(next_addr)
276
277    if not addrs:
278      # Everything was cached, we're done.
279      return result
280  else:
281    addr_cache = {}
282    _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] = addr_cache
283
284  symbols = SYMBOLS_DIR + lib
285  if not os.path.exists(symbols):
286    symbols = lib
287    if not os.path.exists(symbols):
288      return None
289
290  # Make sure the symbols path is not a directory.
291  if os.path.isdir(symbols):
292    return None
293
294  cmd = [ToolPath("llvm-symbolizer"), "--functions", "--inlines",
295      "--demangle", "--obj=" + symbols, "--output-style=GNU"]
296  child = _PIPE_ADDR2LINE_CACHE.GetProcess(cmd)
297
298  for addr in addrs:
299    try:
300      child.stdin.write("0x%s\n" % addr)
301      child.stdin.flush()
302      records = []
303      first = True
304      while True:
305        symbol = child.stdout.readline().strip()
306        if not symbol:
307          break
308        location = child.stdout.readline().strip()
309        records.append((symbol, location))
310        if first:
311          # Write a blank line as a sentinel so we know when to stop
312          # reading inlines from the output.
313          # The blank line will cause llvm-symbolizer to emit a blank line.
314          child.stdin.write("\n")
315          child.stdin.flush()
316          first = False
317    except IOError as e:
318      # Remove the / in front of the library name to match other output.
319      records = [(None, lib[1:] + "  ***Error: " + str(e))]
320    result[addr] = records
321    addr_cache[addr] = records
322  return result
323
324
325def CallObjdumpForSet(lib, unique_addrs):
326  """Use objdump to find out the names of the containing functions.
327
328  Args:
329    lib: library (or executable) pathname containing symbols
330    unique_addrs: set of string hexidecimal addresses to find the functions for.
331
332  Returns:
333    A dictionary of the form {addr: (string symbol, offset)}.
334  """
335  if not lib:
336    return None
337
338  result = {}
339  addrs = sorted(unique_addrs)
340
341  addr_cache = None
342  if lib in _SYMBOL_INFORMATION_OBJDUMP_CACHE:
343    addr_cache = _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib]
344
345    # Go through and handle all known addresses.
346    for x in range(len(addrs)):
347      next_addr = addrs.pop(0)
348      if next_addr in addr_cache:
349        result[next_addr] = addr_cache[next_addr]
350      else:
351        # Re-add, needs to be symbolized.
352        addrs.append(next_addr)
353
354    if not addrs:
355      # Everything was cached, we're done.
356      return result
357  else:
358    addr_cache = {}
359    _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] = addr_cache
360
361  symbols = SYMBOLS_DIR + lib
362  if not os.path.exists(symbols):
363    symbols = lib
364    if not os.path.exists(symbols):
365      return None
366
367  start_addr_dec = str(int(addrs[0], 16))
368  stop_addr_dec = str(int(addrs[-1], 16) + 8)
369  cmd = [ToolPath("llvm-objdump"),
370         "--section=.text",
371         "--demangle",
372         "--disassemble",
373         "--start-address=" + start_addr_dec,
374         "--stop-address=" + stop_addr_dec,
375         symbols]
376
377  # Function lines look like:
378  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
379  # We pull out the address and function first. Then we check for an optional
380  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
381  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
382  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
383
384  # A disassembly line looks like:
385  #   177b2:	b510      	push	{r4, lr}
386  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
387
388  current_symbol = None    # The current function symbol in the disassembly.
389  current_symbol_addr = 0  # The address of the current function.
390  addr_index = 0  # The address that we are currently looking for.
391
392  stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True).stdout
393  for line in stream:
394    # Is it a function line like:
395    #   000177b0 <android::IBinder::~IBinder()>:
396    components = func_regexp.match(line)
397    if components:
398      # This is a new function, so record the current function and its address.
399      current_symbol_addr = int(components.group(1), 16)
400      current_symbol = components.group(2)
401
402      # Does it have an optional offset like: "foo(..)+0x2c"?
403      components = offset_regexp.match(current_symbol)
404      if components:
405        current_symbol = components.group(1)
406        offset = components.group(2)
407        if offset:
408          current_symbol_addr -= int(offset, 16)
409
410    # Is it an disassembly line like:
411    #   177b2:	b510      	push	{r4, lr}
412    components = asm_regexp.match(line)
413    if components:
414      addr = components.group(1)
415      target_addr = addrs[addr_index]
416      i_addr = int(addr, 16)
417      i_target = int(target_addr, 16)
418      if i_addr == i_target:
419        result[target_addr] = (current_symbol, i_target - current_symbol_addr)
420        addr_cache[target_addr] = result[target_addr]
421        addr_index += 1
422        if addr_index >= len(addrs):
423          break
424  stream.close()
425
426  return result
427
428
429def CallCppFilt(mangled_symbol):
430  if mangled_symbol in _SYMBOL_DEMANGLING_CACHE:
431    return _SYMBOL_DEMANGLING_CACHE[mangled_symbol]
432
433  global _CACHED_CXX_FILT
434  if not _CACHED_CXX_FILT:
435    toolchains = None
436    clang_dir = FindClangDir()
437    if clang_dir:
438      if os.path.exists(clang_dir + "/bin/llvm-cxxfilt"):
439        toolchains = [clang_dir + "/bin/llvm-cxxfilt"]
440      else:
441        raise Exception("bin/llvm-cxxfilt missing from " + clang_dir)
442    else:
443      # When run in CI, we don't have a way to find the clang version.  But
444      # llvm-cxxfilt should be available in the following relative path.
445      toolchains = glob.glob("./clang-r*/bin/llvm-cxxfilt")
446      if toolchains and len(toolchains) != 1:
447        raise Exception("Expected one llvm-cxxfilt but found many: " + \
448                        ", ".join(toolchains))
449    if not toolchains:
450      raise Exception("Could not find llvm-cxxfilt tool")
451    _CACHED_CXX_FILT = sorted(toolchains)[-1]
452
453  cmd = [_CACHED_CXX_FILT]
454  process = _PIPE_CPPFILT_CACHE.GetProcess(cmd)
455  process.stdin.write(mangled_symbol)
456  process.stdin.write("\n")
457  process.stdin.flush()
458
459  demangled_symbol = process.stdout.readline().strip()
460
461  _SYMBOL_DEMANGLING_CACHE[mangled_symbol] = demangled_symbol
462
463  return demangled_symbol
464
465
466def FormatSymbolWithOffset(symbol, offset):
467  if offset == 0:
468    return symbol
469  return "%s+%d" % (symbol, offset)
470
471def FormatSymbolWithoutParameters(symbol):
472  """Remove parameters from function.
473
474  Rather than trying to parse the demangled C++ signature,
475  it just removes matching top level parenthesis.
476  """
477  if not symbol:
478    return symbol
479
480  result = symbol
481  result = result.replace(") const", ")")                  # Strip const keyword.
482  result = result.replace("operator<<", "operator\u00AB")  # Avoid unmatched '<'.
483  result = result.replace("operator>>", "operator\u00BB")  # Avoid unmatched '>'.
484  result = result.replace("operator->", "operator\u2192")  # Avoid unmatched '>'.
485
486  nested = []  # Keeps tract of current nesting level of parenthesis.
487  for i in reversed(range(len(result))):  # Iterate backward to make cutting easier.
488    c = result[i]
489    if c == ')' or c == '>':
490      if len(nested) == 0:
491        end = i + 1  # Mark the end of top-level pair.
492      nested.append(c)
493    if c == '(' or c == '<':
494      if len(nested) == 0 or {')':'(', '>':'<'}[nested.pop()] != c:
495        return symbol  # Malformed: character does not match its pair.
496      if len(nested) == 0 and c == '(' and (end - i) > 2:
497        result = result[:i] + result[end:]  # Remove substring (i, end).
498  if len(nested) > 0:
499    return symbol  # Malformed: missing pair.
500
501  return result.strip()
502
503def SetBitness(lines):
504  global ARCH_IS_32BIT
505
506  trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)")
507  asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+")
508
509  ARCH_IS_32BIT = False
510  for line in lines:
511    trace_match = trace_line.search(line)
512    if trace_match:
513      # Try to guess the arch, we know the bitness.
514      if len(trace_match.group(1)) == 16:
515        ARCH_IS_32BIT = False
516      else:
517        ARCH_IS_32BIT = True
518      break
519    asan_trace_match = asan_trace_line.search(line)
520    if asan_trace_match:
521      # We might be able to guess the bitness by the length of the address.
522      if len(asan_trace_match.group(1)) > 8:
523        ARCH_IS_32BIT = False
524        # We know for a fact this is 64 bit, so we are done.
525        break
526      else:
527        # This might be 32 bit, or just a small address. Keep going in this
528        # case, but if we couldn't figure anything else out, go with 32 bit.
529        ARCH_IS_32BIT = True
530
531class FindClangDirTests(unittest.TestCase):
532  @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.')
533  def test_clang_dir_found(self):
534    self.assertIsNotNone(FindClangDir())
535
536class SetBitnessTests(unittest.TestCase):
537  def test_32bit_check(self):
538    global ARCH_IS_32BIT
539
540    SetBitness(["#00 pc 000374e0"])
541    self.assertTrue(ARCH_IS_32BIT)
542
543  def test_64bit_check(self):
544    global ARCH_IS_32BIT
545
546    SetBitness(["#00 pc 00000000000374e0"])
547    self.assertFalse(ARCH_IS_32BIT)
548
549  def test_32bit_asan_trace_line_toolchain(self):
550    global ARCH_IS_32BIT
551
552    SetBitness(["#10 0xb5eeba5d  (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"])
553    self.assertTrue(ARCH_IS_32BIT)
554
555  def test_64bit_asan_trace_line_toolchain(self):
556    global ARCH_IS_32BIT
557
558    SetBitness(["#12 0x5d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)",
559                "#12 0x11b35d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"])
560    self.assertFalse(ARCH_IS_32BIT)
561
562class FormatSymbolWithoutParametersTests(unittest.TestCase):
563  def test_c(self):
564    self.assertEqual(FormatSymbolWithoutParameters("foo"), "foo")
565    self.assertEqual(FormatSymbolWithoutParameters("foo+42"), "foo+42")
566
567  def test_simple(self):
568    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)"), "foo")
569    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)+42"), "foo+42")
570    self.assertEqual(FormatSymbolWithoutParameters("bar::foo(int i)+42"), "bar::foo+42")
571    self.assertEqual(FormatSymbolWithoutParameters("operator()"), "operator()")
572
573  def test_templates(self):
574    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T>& v)"), "bar::foo<T>")
575    self.assertEqual(FormatSymbolWithoutParameters("bar<T>::foo(vector<T>& v)"), "bar<T>::foo")
576    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T<U>>& v)"), "bar::foo<T>")
577    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<(EnumType)0>(vector<(EnumType)0>& v)"),
578                                                   "bar::foo<(EnumType)0>")
579
580  def test_nested(self):
581    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)::bar(int j)"), "foo::bar")
582
583  def test_unbalanced(self):
584    self.assertEqual(FormatSymbolWithoutParameters("foo(bar(int i)"), "foo(bar(int i)")
585    self.assertEqual(FormatSymbolWithoutParameters("foo)bar(int i)"), "foo)bar(int i)")
586    self.assertEqual(FormatSymbolWithoutParameters("foo<bar(int i)"), "foo<bar(int i)")
587    self.assertEqual(FormatSymbolWithoutParameters("foo>bar(int i)"), "foo>bar(int i)")
588
589if __name__ == '__main__':
590    unittest.main(verbosity=2)
591