• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import glob
23import os
24import re
25import subprocess
26
27ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
28if not ANDROID_BUILD_TOP:
29  ANDROID_BUILD_TOP = "."
30
31def FindSymbolsDir():
32  saveddir = os.getcwd()
33  os.chdir(ANDROID_BUILD_TOP)
34  try:
35    cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
36           "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
37           "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
38    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
39    return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
40  finally:
41    os.chdir(saveddir)
42
43SYMBOLS_DIR = FindSymbolsDir()
44
45ARCH = "arm"
46
47TOOLCHAIN = None
48
49def ToolPath(tool, toolchain=None):
50  """Return a fully-qualified path to the specified tool"""
51  if not toolchain:
52    toolchain = FindToolchain()
53  return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
54
55def FindToolchain():
56  """Returns the toolchain matching ARCH. Assumes that you're lunched
57  such that the necessary toolchain is either your primary or secondary.
58  TODO: we could make this 'just work' for most users by just globbing the
59  newest toolchains for every architecture out of prebuilts/, but other
60  parts of this tool assume you're lunched correctly anyway."""
61  global TOOLCHAIN
62  if TOOLCHAIN is not None:
63    return TOOLCHAIN
64
65  # We use slightly different names from GCC, and there's only one toolchain
66  # for x86/x86_64.
67  gcc_arch = ARCH
68  if gcc_arch == "arm64":
69    gcc_arch = "aarch64"
70  elif gcc_arch == "mips":
71    gcc_arch = "mipsel"
72  elif gcc_arch == "x86":
73    gcc_arch = "x86_64"
74
75  tc1 = os.environ["ANDROID_TOOLCHAIN"]
76  tc2 = os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"]
77
78  if ("/" + gcc_arch + "-linux-") in tc1:
79    toolchain = tc1
80  elif ("/" + gcc_arch + "-linux-") in tc2:
81    toolchain = tc2
82  else:
83    raise Exception("Could not find tool chain for %s" % (gcc_arch))
84
85  if not os.path.exists(ToolPath("addr2line", toolchain)):
86    raise Exception("No addr2line for %s" % (toolchain))
87
88  TOOLCHAIN = toolchain
89  print "Using toolchain from: %s" % TOOLCHAIN
90  return TOOLCHAIN
91
92def SymbolInformation(lib, addr):
93  """Look up symbol information about an address.
94
95  Args:
96    lib: library (or executable) pathname containing symbols
97    addr: string hexidecimal address
98
99  Returns:
100    A list of the form [(source_symbol, source_location,
101    object_symbol_with_offset)].
102
103    If the function has been inlined then the list may contain
104    more than one element with the symbols for the most deeply
105    nested inlined location appearing first.  The list is
106    always non-empty, even if no information is available.
107
108    Usually you want to display the source_location and
109    object_symbol_with_offset from the last element in the list.
110  """
111  info = SymbolInformationForSet(lib, set([addr]))
112  return (info and info.get(addr)) or [(None, None, None)]
113
114
115def SymbolInformationForSet(lib, unique_addrs):
116  """Look up symbol information for a set of addresses from the given library.
117
118  Args:
119    lib: library (or executable) pathname containing symbols
120    unique_addrs: set of hexidecimal addresses
121
122  Returns:
123    A dictionary of the form {addr: [(source_symbol, source_location,
124    object_symbol_with_offset)]} where each address has a list of
125    associated symbols and locations.  The list is always non-empty.
126
127    If the function has been inlined then the list may contain
128    more than one element with the symbols for the most deeply
129    nested inlined location appearing first.  The list is
130    always non-empty, even if no information is available.
131
132    Usually you want to display the source_location and
133    object_symbol_with_offset from the last element in the list.
134  """
135  if not lib:
136    return None
137
138  addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
139  if not addr_to_line:
140    return None
141
142  addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
143  if not addr_to_objdump:
144    return None
145
146  result = {}
147  for addr in unique_addrs:
148    source_info = addr_to_line.get(addr)
149    if not source_info:
150      source_info = [(None, None)]
151    if addr in addr_to_objdump:
152      (object_symbol, object_offset) = addr_to_objdump.get(addr)
153      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
154                                                         object_offset)
155    else:
156      object_symbol_with_offset = None
157    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
158        for (source_symbol, source_location) in source_info]
159
160  return result
161
162
163def CallAddr2LineForSet(lib, unique_addrs):
164  """Look up line and symbol information for a set of addresses.
165
166  Args:
167    lib: library (or executable) pathname containing symbols
168    unique_addrs: set of string hexidecimal addresses look up.
169
170  Returns:
171    A dictionary of the form {addr: [(symbol, file:line)]} where
172    each address has a list of associated symbols and locations
173    or an empty list if no symbol information was found.
174
175    If the function has been inlined then the list may contain
176    more than one element with the symbols for the most deeply
177    nested inlined location appearing first.
178  """
179  if not lib:
180    return None
181
182
183  symbols = SYMBOLS_DIR + lib
184  if not os.path.exists(symbols):
185    return None
186
187  cmd = [ToolPath("addr2line"), "--functions", "--inlines",
188      "--demangle", "--exe=" + symbols]
189  child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
190
191  result = {}
192  addrs = sorted(unique_addrs)
193  for addr in addrs:
194    child.stdin.write("0x%s\n" % addr)
195    child.stdin.flush()
196    records = []
197    first = True
198    while True:
199      symbol = child.stdout.readline().strip()
200      if symbol == "??":
201        symbol = None
202      location = child.stdout.readline().strip()
203      if location == "??:0":
204        location = None
205      if symbol is None and location is None:
206        break
207      records.append((symbol, location))
208      if first:
209        # Write a blank line as a sentinel so we know when to stop
210        # reading inlines from the output.
211        # The blank line will cause addr2line to emit "??\n??:0\n".
212        child.stdin.write("\n")
213        first = False
214    result[addr] = records
215  child.stdin.close()
216  child.stdout.close()
217  return result
218
219
220def StripPC(addr):
221  """Strips the Thumb bit a program counter address when appropriate.
222
223  Args:
224    addr: the program counter address
225
226  Returns:
227    The stripped program counter address.
228  """
229  global ARCH
230
231  if ARCH == "arm":
232    return addr & ~1
233  return addr
234
235def CallObjdumpForSet(lib, unique_addrs):
236  """Use objdump to find out the names of the containing functions.
237
238  Args:
239    lib: library (or executable) pathname containing symbols
240    unique_addrs: set of string hexidecimal addresses to find the functions for.
241
242  Returns:
243    A dictionary of the form {addr: (string symbol, offset)}.
244  """
245  if not lib:
246    return None
247
248  symbols = SYMBOLS_DIR + lib
249  if not os.path.exists(symbols):
250    return None
251
252  symbols = SYMBOLS_DIR + lib
253  if not os.path.exists(symbols):
254    return None
255
256  addrs = sorted(unique_addrs)
257  start_addr_dec = str(StripPC(int(addrs[0], 16)))
258  stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
259  cmd = [ToolPath("objdump"),
260         "--section=.text",
261         "--demangle",
262         "--disassemble",
263         "--start-address=" + start_addr_dec,
264         "--stop-address=" + stop_addr_dec,
265         symbols]
266
267  # Function lines look like:
268  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
269  # We pull out the address and function first. Then we check for an optional
270  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
271  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
272  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
273
274  # A disassembly line looks like:
275  #   177b2:	b510      	push	{r4, lr}
276  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
277
278  current_symbol = None    # The current function symbol in the disassembly.
279  current_symbol_addr = 0  # The address of the current function.
280  addr_index = 0  # The address that we are currently looking for.
281
282  stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
283  result = {}
284  for line in stream:
285    # Is it a function line like:
286    #   000177b0 <android::IBinder::~IBinder()>:
287    components = func_regexp.match(line)
288    if components:
289      # This is a new function, so record the current function and its address.
290      current_symbol_addr = int(components.group(1), 16)
291      current_symbol = components.group(2)
292
293      # Does it have an optional offset like: "foo(..)+0x2c"?
294      components = offset_regexp.match(current_symbol)
295      if components:
296        current_symbol = components.group(1)
297        offset = components.group(2)
298        if offset:
299          current_symbol_addr -= int(offset, 16)
300
301    # Is it an disassembly line like:
302    #   177b2:	b510      	push	{r4, lr}
303    components = asm_regexp.match(line)
304    if components:
305      addr = components.group(1)
306      target_addr = addrs[addr_index]
307      i_addr = int(addr, 16)
308      i_target = StripPC(int(target_addr, 16))
309      if i_addr == i_target:
310        result[target_addr] = (current_symbol, i_target - current_symbol_addr)
311        addr_index += 1
312        if addr_index >= len(addrs):
313          break
314  stream.close()
315
316  return result
317
318
319def CallCppFilt(mangled_symbol):
320  cmd = [ToolPath("c++filt")]
321  process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
322  process.stdin.write(mangled_symbol)
323  process.stdin.write("\n")
324  process.stdin.close()
325  demangled_symbol = process.stdout.readline().strip()
326  process.stdout.close()
327  return demangled_symbol
328
329def FormatSymbolWithOffset(symbol, offset):
330  if offset == 0:
331    return symbol
332  return "%s+%d" % (symbol, offset)
333