• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import os
23import re
24import subprocess
25
26ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
27if not ANDROID_BUILD_TOP:
28  ANDROID_BUILD_TOP = "."
29
30def FindSymbolsDir():
31  saveddir = os.getcwd()
32  os.chdir(ANDROID_BUILD_TOP)
33  try:
34    cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
35           "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
36           "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
37    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
38    return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
39  finally:
40    os.chdir(saveddir)
41
42SYMBOLS_DIR = FindSymbolsDir()
43
44ARCH = "arm"
45
46TOOLCHAIN_INFO = None
47
48def Uname():
49  """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
50  uname = os.uname()[0]
51  if uname == "Darwin":
52    proc = os.uname()[-1]
53    if proc == "i386" or proc == "x86_64":
54      return "darwin-x86"
55    return "darwin-ppc"
56  if uname == "Linux":
57    return "linux-x86"
58  return uname
59
60def ToolPath(tool, toolchain_info=None):
61  """Return a full qualified path to the specified tool"""
62  if not toolchain_info:
63    toolchain_info = FindToolchain()
64  (label, platform, target) = toolchain_info
65  return os.path.join(ANDROID_BUILD_TOP, "prebuilts/gcc", Uname(), platform, label, "bin",
66                     target + "-" + tool)
67
68def FindToolchain():
69  """Look for the latest available toolchain
70
71  Args:
72    None
73
74  Returns:
75    A pair of strings containing toolchain label and target prefix.
76  """
77  global TOOLCHAIN_INFO
78  if TOOLCHAIN_INFO is not None:
79    return TOOLCHAIN_INFO
80
81  ## Known toolchains, newer ones in the front.
82  if ARCH == "arm":
83    gcc_version = os.environ["TARGET_GCC_VERSION"]
84    known_toolchains = [
85      ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi"),
86    ]
87  elif ARCH =="x86":
88    known_toolchains = [
89      ("i686-android-linux-4.4.3", "x86", "i686-android-linux")
90    ]
91  else:
92    known_toolchains = []
93
94  # Look for addr2line to check for valid toolchain path.
95  for (label, platform, target) in known_toolchains:
96    toolchain_info = (label, platform, target);
97    if os.path.exists(ToolPath("addr2line", toolchain_info)):
98      TOOLCHAIN_INFO = toolchain_info
99      return toolchain_info
100
101  raise Exception("Could not find tool chain")
102
103def SymbolInformation(lib, addr):
104  """Look up symbol information about an address.
105
106  Args:
107    lib: library (or executable) pathname containing symbols
108    addr: string hexidecimal address
109
110  Returns:
111    A list of the form [(source_symbol, source_location,
112    object_symbol_with_offset)].
113
114    If the function has been inlined then the list may contain
115    more than one element with the symbols for the most deeply
116    nested inlined location appearing first.  The list is
117    always non-empty, even if no information is available.
118
119    Usually you want to display the source_location and
120    object_symbol_with_offset from the last element in the list.
121  """
122  info = SymbolInformationForSet(lib, set([addr]))
123  return (info and info.get(addr)) or [(None, None, None)]
124
125
126def SymbolInformationForSet(lib, unique_addrs):
127  """Look up symbol information for a set of addresses from the given library.
128
129  Args:
130    lib: library (or executable) pathname containing symbols
131    unique_addrs: set of hexidecimal addresses
132
133  Returns:
134    A dictionary of the form {addr: [(source_symbol, source_location,
135    object_symbol_with_offset)]} where each address has a list of
136    associated symbols and locations.  The list is always non-empty.
137
138    If the function has been inlined then the list may contain
139    more than one element with the symbols for the most deeply
140    nested inlined location appearing first.  The list is
141    always non-empty, even if no information is available.
142
143    Usually you want to display the source_location and
144    object_symbol_with_offset from the last element in the list.
145  """
146  if not lib:
147    return None
148
149  addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
150  if not addr_to_line:
151    return None
152
153  addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
154  if not addr_to_objdump:
155    return None
156
157  result = {}
158  for addr in unique_addrs:
159    source_info = addr_to_line.get(addr)
160    if not source_info:
161      source_info = [(None, None)]
162    if addr in addr_to_objdump:
163      (object_symbol, object_offset) = addr_to_objdump.get(addr)
164      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
165                                                         object_offset)
166    else:
167      object_symbol_with_offset = None
168    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
169        for (source_symbol, source_location) in source_info]
170
171  return result
172
173
174def CallAddr2LineForSet(lib, unique_addrs):
175  """Look up line and symbol information for a set of addresses.
176
177  Args:
178    lib: library (or executable) pathname containing symbols
179    unique_addrs: set of string hexidecimal addresses look up.
180
181  Returns:
182    A dictionary of the form {addr: [(symbol, file:line)]} where
183    each address has a list of associated symbols and locations
184    or an empty list if no symbol information was found.
185
186    If the function has been inlined then the list may contain
187    more than one element with the symbols for the most deeply
188    nested inlined location appearing first.
189  """
190  if not lib:
191    return None
192
193
194  symbols = SYMBOLS_DIR + lib
195  if not os.path.exists(symbols):
196    return None
197
198  (label, platform, target) = FindToolchain()
199  cmd = [ToolPath("addr2line"), "--functions", "--inlines",
200      "--demangle", "--exe=" + symbols]
201  child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
202
203  result = {}
204  addrs = sorted(unique_addrs)
205  for addr in addrs:
206    child.stdin.write("0x%s\n" % addr)
207    child.stdin.flush()
208    records = []
209    first = True
210    while True:
211      symbol = child.stdout.readline().strip()
212      if symbol == "??":
213        symbol = None
214      location = child.stdout.readline().strip()
215      if location == "??:0":
216        location = None
217      if symbol is None and location is None:
218        break
219      records.append((symbol, location))
220      if first:
221        # Write a blank line as a sentinel so we know when to stop
222        # reading inlines from the output.
223        # The blank line will cause addr2line to emit "??\n??:0\n".
224        child.stdin.write("\n")
225        first = False
226    result[addr] = records
227  child.stdin.close()
228  child.stdout.close()
229  return result
230
231
232def StripPC(addr):
233  """Strips the Thumb bit a program counter address when appropriate.
234
235  Args:
236    addr: the program counter address
237
238  Returns:
239    The stripped program counter address.
240  """
241  global ARCH
242
243  if ARCH == "arm":
244    return addr & ~1
245  return addr
246
247def CallObjdumpForSet(lib, unique_addrs):
248  """Use objdump to find out the names of the containing functions.
249
250  Args:
251    lib: library (or executable) pathname containing symbols
252    unique_addrs: set of string hexidecimal addresses to find the functions for.
253
254  Returns:
255    A dictionary of the form {addr: (string symbol, offset)}.
256  """
257  if not lib:
258    return None
259
260  symbols = SYMBOLS_DIR + lib
261  if not os.path.exists(symbols):
262    return None
263
264  symbols = SYMBOLS_DIR + lib
265  if not os.path.exists(symbols):
266    return None
267
268  addrs = sorted(unique_addrs)
269  start_addr_dec = str(StripPC(int(addrs[0], 16)))
270  stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
271  cmd = [ToolPath("objdump"),
272         "--section=.text",
273         "--demangle",
274         "--disassemble",
275         "--start-address=" + start_addr_dec,
276         "--stop-address=" + stop_addr_dec,
277         symbols]
278
279  # Function lines look like:
280  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
281  # We pull out the address and function first. Then we check for an optional
282  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
283  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
284  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
285
286  # A disassembly line looks like:
287  #   177b2:	b510      	push	{r4, lr}
288  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
289
290  current_symbol = None    # The current function symbol in the disassembly.
291  current_symbol_addr = 0  # The address of the current function.
292  addr_index = 0  # The address that we are currently looking for.
293
294  stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
295  result = {}
296  for line in stream:
297    # Is it a function line like:
298    #   000177b0 <android::IBinder::~IBinder()>:
299    components = func_regexp.match(line)
300    if components:
301      # This is a new function, so record the current function and its address.
302      current_symbol_addr = int(components.group(1), 16)
303      current_symbol = components.group(2)
304
305      # Does it have an optional offset like: "foo(..)+0x2c"?
306      components = offset_regexp.match(current_symbol)
307      if components:
308        current_symbol = components.group(1)
309        offset = components.group(2)
310        if offset:
311          current_symbol_addr -= int(offset, 16)
312
313    # Is it an disassembly line like:
314    #   177b2:	b510      	push	{r4, lr}
315    components = asm_regexp.match(line)
316    if components:
317      addr = components.group(1)
318      target_addr = addrs[addr_index]
319      i_addr = int(addr, 16)
320      i_target = StripPC(int(target_addr, 16))
321      if i_addr == i_target:
322        result[target_addr] = (current_symbol, i_target - current_symbol_addr)
323        addr_index += 1
324        if addr_index >= len(addrs):
325          break
326  stream.close()
327
328  return result
329
330
331def CallCppFilt(mangled_symbol):
332  cmd = [ToolPath("c++filt")]
333  process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
334  process.stdin.write(mangled_symbol)
335  process.stdin.write("\n")
336  process.stdin.close()
337  demangled_symbol = process.stdout.readline().strip()
338  process.stdout.close()
339  return demangled_symbol
340
341def FormatSymbolWithOffset(symbol, offset):
342  if offset == 0:
343    return symbol
344  return "%s+%d" % (symbol, offset)
345