• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import glob
23import os
24import platform
25import re
26import subprocess
27import unittest
28
29ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
30if not ANDROID_BUILD_TOP:
31  ANDROID_BUILD_TOP = "."
32
33def FindSymbolsDir():
34  saveddir = os.getcwd()
35  os.chdir(ANDROID_BUILD_TOP)
36  try:
37    cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
38           "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
39           "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
40    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
41    return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
42  finally:
43    os.chdir(saveddir)
44
45SYMBOLS_DIR = FindSymbolsDir()
46
47ARCH = "arm"
48
49
50# These are private. Do not access them from other modules.
51_CACHED_TOOLCHAIN = None
52_CACHED_TOOLCHAIN_ARCH = None
53
54
55def ToolPath(tool, toolchain=None):
56  """Return a fully-qualified path to the specified tool"""
57  if not toolchain:
58    toolchain = FindToolchain()
59  return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
60
61
62def FindToolchain():
63  """Returns the toolchain matching ARCH."""
64  global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH
65  if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH:
66    return _CACHED_TOOLCHAIN
67
68  # We use slightly different names from GCC, and there's only one toolchain
69  # for x86/x86_64. Note that these are the names of the top-level directory
70  # rather than the _different_ names used lower down the directory hierarchy!
71  gcc_dir = ARCH
72  if gcc_dir == "arm64":
73    gcc_dir = "aarch64"
74  elif gcc_dir == "mips64":
75    gcc_dir = "mips"
76  elif gcc_dir == "x86_64":
77    gcc_dir = "x86"
78
79  os_name = platform.system().lower();
80
81  available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir))
82  if len(available_toolchains) == 0:
83    raise Exception("Could not find tool chain for %s" % (ARCH))
84
85  toolchain = sorted(available_toolchains)[-1]
86
87  if not os.path.exists(ToolPath("addr2line", toolchain)):
88    raise Exception("No addr2line for %s" % (toolchain))
89
90  _CACHED_TOOLCHAIN = toolchain
91  _CACHED_TOOLCHAIN_ARCH = ARCH
92  print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN)
93  return _CACHED_TOOLCHAIN
94
95
96def SymbolInformation(lib, addr):
97  """Look up symbol information about an address.
98
99  Args:
100    lib: library (or executable) pathname containing symbols
101    addr: string hexidecimal address
102
103  Returns:
104    A list of the form [(source_symbol, source_location,
105    object_symbol_with_offset)].
106
107    If the function has been inlined then the list may contain
108    more than one element with the symbols for the most deeply
109    nested inlined location appearing first.  The list is
110    always non-empty, even if no information is available.
111
112    Usually you want to display the source_location and
113    object_symbol_with_offset from the last element in the list.
114  """
115  info = SymbolInformationForSet(lib, set([addr]))
116  return (info and info.get(addr)) or [(None, None, None)]
117
118
119def SymbolInformationForSet(lib, unique_addrs):
120  """Look up symbol information for a set of addresses from the given library.
121
122  Args:
123    lib: library (or executable) pathname containing symbols
124    unique_addrs: set of hexidecimal addresses
125
126  Returns:
127    A dictionary of the form {addr: [(source_symbol, source_location,
128    object_symbol_with_offset)]} where each address has a list of
129    associated symbols and locations.  The list is always non-empty.
130
131    If the function has been inlined then the list may contain
132    more than one element with the symbols for the most deeply
133    nested inlined location appearing first.  The list is
134    always non-empty, even if no information is available.
135
136    Usually you want to display the source_location and
137    object_symbol_with_offset from the last element in the list.
138  """
139  if not lib:
140    return None
141
142  addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
143  if not addr_to_line:
144    return None
145
146  addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
147  if not addr_to_objdump:
148    return None
149
150  result = {}
151  for addr in unique_addrs:
152    source_info = addr_to_line.get(addr)
153    if not source_info:
154      source_info = [(None, None)]
155    if addr in addr_to_objdump:
156      (object_symbol, object_offset) = addr_to_objdump.get(addr)
157      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
158                                                         object_offset)
159    else:
160      object_symbol_with_offset = None
161    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
162        for (source_symbol, source_location) in source_info]
163
164  return result
165
166
167def CallAddr2LineForSet(lib, unique_addrs):
168  """Look up line and symbol information for a set of addresses.
169
170  Args:
171    lib: library (or executable) pathname containing symbols
172    unique_addrs: set of string hexidecimal addresses look up.
173
174  Returns:
175    A dictionary of the form {addr: [(symbol, file:line)]} where
176    each address has a list of associated symbols and locations
177    or an empty list if no symbol information was found.
178
179    If the function has been inlined then the list may contain
180    more than one element with the symbols for the most deeply
181    nested inlined location appearing first.
182  """
183  if not lib:
184    return None
185
186  symbols = SYMBOLS_DIR + lib
187  if not os.path.exists(symbols):
188    return None
189
190  cmd = [ToolPath("addr2line"), "--functions", "--inlines",
191      "--demangle", "--exe=" + symbols]
192  child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
193
194  result = {}
195  addrs = sorted(unique_addrs)
196  for addr in addrs:
197    child.stdin.write("0x%s\n" % addr)
198    child.stdin.flush()
199    records = []
200    first = True
201    while True:
202      symbol = child.stdout.readline().strip()
203      if symbol == "??":
204        symbol = None
205      location = child.stdout.readline().strip()
206      if location == "??:0":
207        location = None
208      if symbol is None and location is None:
209        break
210      records.append((symbol, location))
211      if first:
212        # Write a blank line as a sentinel so we know when to stop
213        # reading inlines from the output.
214        # The blank line will cause addr2line to emit "??\n??:0\n".
215        child.stdin.write("\n")
216        first = False
217    result[addr] = records
218  child.stdin.close()
219  child.stdout.close()
220  return result
221
222
223def StripPC(addr):
224  """Strips the Thumb bit a program counter address when appropriate.
225
226  Args:
227    addr: the program counter address
228
229  Returns:
230    The stripped program counter address.
231  """
232  global ARCH
233  if ARCH == "arm":
234    return addr & ~1
235  return addr
236
237
238def CallObjdumpForSet(lib, unique_addrs):
239  """Use objdump to find out the names of the containing functions.
240
241  Args:
242    lib: library (or executable) pathname containing symbols
243    unique_addrs: set of string hexidecimal addresses to find the functions for.
244
245  Returns:
246    A dictionary of the form {addr: (string symbol, offset)}.
247  """
248  if not lib:
249    return None
250
251  symbols = SYMBOLS_DIR + lib
252  if not os.path.exists(symbols):
253    return None
254
255  symbols = SYMBOLS_DIR + lib
256  if not os.path.exists(symbols):
257    return None
258
259  addrs = sorted(unique_addrs)
260  start_addr_dec = str(StripPC(int(addrs[0], 16)))
261  stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
262  cmd = [ToolPath("objdump"),
263         "--section=.text",
264         "--demangle",
265         "--disassemble",
266         "--start-address=" + start_addr_dec,
267         "--stop-address=" + stop_addr_dec,
268         symbols]
269
270  # Function lines look like:
271  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
272  # We pull out the address and function first. Then we check for an optional
273  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
274  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
275  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
276
277  # A disassembly line looks like:
278  #   177b2:	b510      	push	{r4, lr}
279  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
280
281  current_symbol = None    # The current function symbol in the disassembly.
282  current_symbol_addr = 0  # The address of the current function.
283  addr_index = 0  # The address that we are currently looking for.
284
285  stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
286  result = {}
287  for line in stream:
288    # Is it a function line like:
289    #   000177b0 <android::IBinder::~IBinder()>:
290    components = func_regexp.match(line)
291    if components:
292      # This is a new function, so record the current function and its address.
293      current_symbol_addr = int(components.group(1), 16)
294      current_symbol = components.group(2)
295
296      # Does it have an optional offset like: "foo(..)+0x2c"?
297      components = offset_regexp.match(current_symbol)
298      if components:
299        current_symbol = components.group(1)
300        offset = components.group(2)
301        if offset:
302          current_symbol_addr -= int(offset, 16)
303
304    # Is it an disassembly line like:
305    #   177b2:	b510      	push	{r4, lr}
306    components = asm_regexp.match(line)
307    if components:
308      addr = components.group(1)
309      target_addr = addrs[addr_index]
310      i_addr = int(addr, 16)
311      i_target = StripPC(int(target_addr, 16))
312      if i_addr == i_target:
313        result[target_addr] = (current_symbol, i_target - current_symbol_addr)
314        addr_index += 1
315        if addr_index >= len(addrs):
316          break
317  stream.close()
318
319  return result
320
321
322def CallCppFilt(mangled_symbol):
323  cmd = [ToolPath("c++filt")]
324  process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
325  process.stdin.write(mangled_symbol)
326  process.stdin.write("\n")
327  process.stdin.close()
328  demangled_symbol = process.stdout.readline().strip()
329  process.stdout.close()
330  return demangled_symbol
331
332
333def FormatSymbolWithOffset(symbol, offset):
334  if offset == 0:
335    return symbol
336  return "%s+%d" % (symbol, offset)
337
338
339
340class FindToolchainTests(unittest.TestCase):
341  def assert_toolchain_found(self, abi):
342    global ARCH
343    ARCH = abi
344    FindToolchain() # Will throw on failure.
345
346  def test_toolchains_found(self):
347    self.assert_toolchain_found("arm")
348    self.assert_toolchain_found("arm64")
349    self.assert_toolchain_found("mips")
350    self.assert_toolchain_found("x86")
351    self.assert_toolchain_found("x86_64")
352
353
354if __name__ == '__main__':
355    unittest.main()
356