• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import glob
23import itertools
24import os
25import re
26import subprocess
27import zipfile
28
29CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)),
30                          os.pardir, os.pardir, os.pardir, os.pardir)
31ANDROID_BUILD_TOP = CHROME_SRC
32SYMBOLS_DIR = CHROME_SRC
33CHROME_SYMBOLS_DIR = CHROME_SRC
34
35ARCH = "arm"
36
37TOOLCHAIN_INFO = None
38
39def Uname():
40  """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
41  uname = os.uname()[0]
42  if uname == "Darwin":
43    proc = os.uname()[-1]
44    if proc == "i386" or proc == "x86_64":
45      return "darwin-x86"
46    return "darwin-ppc"
47  if uname == "Linux":
48    return "linux-x86"
49  return uname
50
51def ToolPath(tool, toolchain_info=None):
52  """Return a full qualified path to the specified tool"""
53  # ToolPath looks for the tools in the completely incorrect directory.
54  # This looks in the checked in android_tools.
55  if ARCH == "arm":
56    toolchain_source = "arm-linux-androideabi-4.6"
57    toolchain_prefix = "arm-linux-androideabi"
58    ndk = "ndk"
59  elif ARCH == "arm64":
60    toolchain_source = "aarch64-linux-android-4.9"
61    toolchain_prefix = "aarch64-linux-android"
62    ndk = "ndk"
63  elif ARCH == "x86":
64    toolchain_source = "x86-4.6"
65    toolchain_prefix = "i686-android-linux"
66    ndk = "ndk"
67  elif ARCH == "x86_64":
68    toolchain_source = "x86_64-4.9"
69    toolchain_prefix = "x86_64-linux-android"
70    ndk = "ndk"
71  elif ARCH == "mips":
72    toolchain_source = "mipsel-linux-android-4.6"
73    toolchain_prefix = "mipsel-linux-android"
74    ndk = "ndk"
75  else:
76    raise Exception("Could not find tool chain")
77
78  toolchain_subdir = (
79      "third_party/android_tools/%s/toolchains/%s/prebuilt/linux-x86_64/bin" %
80       (ndk, toolchain_source))
81
82  return os.path.join(CHROME_SRC,
83                      toolchain_subdir,
84                      toolchain_prefix + "-" + tool)
85
86def FindToolchain():
87  """Look for the latest available toolchain
88
89  Args:
90    None
91
92  Returns:
93    A pair of strings containing toolchain label and target prefix.
94  """
95  global TOOLCHAIN_INFO
96  if TOOLCHAIN_INFO is not None:
97    return TOOLCHAIN_INFO
98
99  ## Known toolchains, newer ones in the front.
100  if ARCH == "arm64":
101    gcc_version = "4.9"
102    known_toolchains = [
103      ("aarch64-linux-android-" + gcc_version, "aarch64", "aarch64-linux-android")
104    ]
105  elif ARCH == "arm":
106    gcc_version = "4.6"
107    known_toolchains = [
108      ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi"),
109    ]
110  elif ARCH =="x86":
111    known_toolchains = [
112      ("i686-android-linux-4.4.3", "x86", "i686-android-linux")
113    ]
114  elif ARCH =="x86_64":
115    known_toolchains = [
116      ("x86_64-linux-android-4.9", "x86_64", "x86_64-linux-android")
117    ]
118  elif ARCH == "mips":
119    gcc_version = "4.6"
120    known_toolchains = [
121      ("mipsel-linux-android-" + gcc_version, "mips", "mipsel-linux-android")
122    ]
123  else:
124    known_toolchains = []
125
126  # Look for addr2line to check for valid toolchain path.
127  for (label, platform, target) in known_toolchains:
128    toolchain_info = (label, platform, target);
129    if os.path.exists(ToolPath("addr2line", toolchain_info)):
130      TOOLCHAIN_INFO = toolchain_info
131      print "Using toolchain from :" + ToolPath("", TOOLCHAIN_INFO)
132      return toolchain_info
133
134  raise Exception("Could not find tool chain")
135
136def GetAapt():
137  """Returns the path to aapt.
138
139  Args:
140    None
141
142  Returns:
143    the pathname of the 'aapt' executable.
144  """
145  sdk_home = os.path.join('third_party', 'android_tools', 'sdk')
146  sdk_home = os.environ.get('SDK_HOME', sdk_home)
147  aapt_exe = glob.glob(os.path.join(sdk_home, 'build-tools', '*', 'aapt'))
148  if not aapt_exe:
149    return None
150  return sorted(aapt_exe, key=os.path.getmtime, reverse=True)[0]
151
152def ApkMatchPackageName(aapt, apk_path, package_name):
153  """Returns true the APK's package name matches package_name.
154
155  Args:
156    aapt: pathname for the 'aapt' executable.
157    apk_path: pathname of the APK file.
158    package_name: package name to match.
159
160  Returns:
161    True if the package name matches or aapt is None, False otherwise.
162  """
163  if not aapt:
164    # Allow false positives
165    return True
166  aapt_output = subprocess.check_output(
167      [aapt, 'dump', 'badging', apk_path]).split('\n')
168  package_name_re = re.compile(r'package: .*name=\'(\S*)\'')
169  for line in aapt_output:
170    match = package_name_re.match(line)
171    if match:
172      return package_name == match.group(1)
173  return False
174
175def PathListJoin(prefix_list, suffix_list):
176   """Returns each prefix in prefix_list joined with each suffix in suffix list.
177
178   Args:
179     prefix_list: list of path prefixes.
180     suffix_list: list of path suffixes.
181
182   Returns:
183     List of paths each of which joins a prefix with a suffix.
184   """
185   return [
186       os.path.join(prefix, suffix)
187       for prefix in prefix_list for suffix in suffix_list ]
188
189def GetCandidates(dirs, filepart, candidate_fun):
190  """Returns a list of candidate filenames.
191
192  Args:
193    dirs: a list of the directory part of the pathname.
194    filepart: the file part of the pathname.
195    candidate_fun: a function to apply to each candidate, returns a list.
196
197  Returns:
198    A list of candidate files ordered by modification time, newest first.
199  """
200  out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out')
201  out_dir = os.path.join(CHROME_SYMBOLS_DIR, out_dir)
202  buildtype = os.environ.get('BUILDTYPE')
203  if buildtype:
204    buildtype_list = [ buildtype ]
205  else:
206    buildtype_list = [ 'Debug', 'Release' ]
207
208  candidates = PathListJoin([out_dir], buildtype_list) + [CHROME_SYMBOLS_DIR]
209  candidates = PathListJoin(candidates, dirs)
210  candidates = PathListJoin(candidates, [filepart])
211  candidates = list(
212      itertools.chain.from_iterable(map(candidate_fun, candidates)))
213  candidates = sorted(candidates, key=os.path.getmtime, reverse=True)
214  return candidates
215
216def GetCandidateApks():
217  """Returns a list of APKs which could contain the library.
218
219  Args:
220    None
221
222  Returns:
223    list of APK filename which could contain the library.
224  """
225  return GetCandidates(['apks'], '*.apk', glob.glob)
226
227def GetCrazyLib(apk_filename):
228  """Returns the name of the first crazy library from this APK.
229
230  Args:
231    apk_filename: name of an APK file.
232
233  Returns:
234    Name of the first library which would be crazy loaded from this APK.
235  """
236  zip_file = zipfile.ZipFile(apk_filename, 'r')
237  for filename in zip_file.namelist():
238    match = re.match('lib/[^/]*/crazy.(lib.*[.]so)', filename)
239    if match:
240      return match.group(1)
241
242def GetMatchingApks(device_apk_name):
243  """Find any APKs which match the package indicated by the device_apk_name.
244
245  Args:
246     device_apk_name: name of the APK on the device.
247
248  Returns:
249     A list of APK filenames which could contain the desired library.
250  """
251  match = re.match('(.*)-[0-9]+[.]apk$', device_apk_name)
252  if not match:
253    return None
254  package_name = match.group(1)
255  return filter(
256      lambda candidate_apk:
257          ApkMatchPackageName(GetAapt(), candidate_apk, package_name),
258      GetCandidateApks())
259
260def MapDeviceApkToLibrary(device_apk_name):
261  """Provide a library name which corresponds with device_apk_name.
262
263  Args:
264    device_apk_name: name of the APK on the device.
265
266  Returns:
267    Name of the library which corresponds to that APK.
268  """
269  matching_apks = GetMatchingApks(device_apk_name)
270  for matching_apk in matching_apks:
271    crazy_lib = GetCrazyLib(matching_apk)
272    if crazy_lib:
273      return crazy_lib
274
275def GetCandidateLibraries(library_name):
276  """Returns a list of candidate library filenames.
277
278  Args:
279    library_name: basename of the library to match.
280
281  Returns:
282    A list of matching library filenames for library_name.
283  """
284  return GetCandidates(
285      ['lib', 'lib.target'], library_name,
286      lambda filename: filter(os.path.exists, [filename]))
287
288def TranslateLibPath(lib):
289  # SymbolInformation(lib, addr) receives lib as the path from symbols
290  # root to the symbols file. This needs to be translated to point to the
291  # correct .so path. If the user doesn't explicitly specify which directory to
292  # use, then use the most recently updated one in one of the known directories.
293  # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it
294  # untranslated in case it is an Android symbol in SYMBOLS_DIR.
295  library_name = os.path.basename(lib)
296
297  # The filename in the stack trace maybe an APK name rather than a library
298  # name. This happens when the library was loaded directly from inside the
299  # APK. If this is the case we try to figure out the library name by looking
300  # for a matching APK file and finding the name of the library in contains.
301  # The name of the APK file on the device is of the form
302  # <package_name>-<number>.apk. The APK file on the host may have any name
303  # so we look at the APK badging to see if the package name matches.
304  if re.search('-[0-9]+[.]apk$', library_name):
305    mapping = MapDeviceApkToLibrary(library_name)
306    if mapping:
307      library_name = mapping
308
309  candidate_libraries = GetCandidateLibraries(library_name)
310  if not candidate_libraries:
311    return lib
312
313  library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR)
314  return '/' + library_path
315
316def SymbolInformation(lib, addr, get_detailed_info):
317  """Look up symbol information about an address.
318
319  Args:
320    lib: library (or executable) pathname containing symbols
321    addr: string hexidecimal address
322
323  Returns:
324    A list of the form [(source_symbol, source_location,
325    object_symbol_with_offset)].
326
327    If the function has been inlined then the list may contain
328    more than one element with the symbols for the most deeply
329    nested inlined location appearing first.  The list is
330    always non-empty, even if no information is available.
331
332    Usually you want to display the source_location and
333    object_symbol_with_offset from the last element in the list.
334  """
335  lib = TranslateLibPath(lib)
336  info = SymbolInformationForSet(lib, set([addr]), get_detailed_info)
337  return (info and info.get(addr)) or [(None, None, None)]
338
339
340def SymbolInformationForSet(lib, unique_addrs, get_detailed_info):
341  """Look up symbol information for a set of addresses from the given library.
342
343  Args:
344    lib: library (or executable) pathname containing symbols
345    unique_addrs: set of hexidecimal addresses
346
347  Returns:
348    A dictionary of the form {addr: [(source_symbol, source_location,
349    object_symbol_with_offset)]} where each address has a list of
350    associated symbols and locations.  The list is always non-empty.
351
352    If the function has been inlined then the list may contain
353    more than one element with the symbols for the most deeply
354    nested inlined location appearing first.  The list is
355    always non-empty, even if no information is available.
356
357    Usually you want to display the source_location and
358    object_symbol_with_offset from the last element in the list.
359  """
360  if not lib:
361    return None
362
363  addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
364  if not addr_to_line:
365    return None
366
367  if get_detailed_info:
368    addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
369    if not addr_to_objdump:
370      return None
371  else:
372    addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs)
373
374  result = {}
375  for addr in unique_addrs:
376    source_info = addr_to_line.get(addr)
377    if not source_info:
378      source_info = [(None, None)]
379    if addr in addr_to_objdump:
380      (object_symbol, object_offset) = addr_to_objdump.get(addr)
381      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
382                                                         object_offset)
383    else:
384      object_symbol_with_offset = None
385    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
386        for (source_symbol, source_location) in source_info]
387
388  return result
389
390
391class MemoizedForSet(object):
392  def __init__(self, fn):
393    self.fn = fn
394    self.cache = {}
395
396  def __call__(self, lib, unique_addrs):
397    lib_cache = self.cache.setdefault(lib, {})
398
399    no_cache = filter(lambda x: x not in lib_cache, unique_addrs)
400    if no_cache:
401      lib_cache.update((k, None) for k in no_cache)
402      result = self.fn(lib, no_cache)
403      if result:
404        lib_cache.update(result)
405
406    return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k])
407
408
409@MemoizedForSet
410def CallAddr2LineForSet(lib, unique_addrs):
411  """Look up line and symbol information for a set of addresses.
412
413  Args:
414    lib: library (or executable) pathname containing symbols
415    unique_addrs: set of string hexidecimal addresses look up.
416
417  Returns:
418    A dictionary of the form {addr: [(symbol, file:line)]} where
419    each address has a list of associated symbols and locations
420    or an empty list if no symbol information was found.
421
422    If the function has been inlined then the list may contain
423    more than one element with the symbols for the most deeply
424    nested inlined location appearing first.
425  """
426  if not lib:
427    return None
428
429
430  symbols = SYMBOLS_DIR + lib
431  if not os.path.isfile(symbols):
432    return None
433
434  (label, platform, target) = FindToolchain()
435  cmd = [ToolPath("addr2line"), "--functions", "--inlines",
436      "--demangle", "--exe=" + symbols]
437  child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
438
439  result = {}
440  addrs = sorted(unique_addrs)
441  for addr in addrs:
442    child.stdin.write("0x%s\n" % addr)
443    child.stdin.flush()
444    records = []
445    first = True
446    while True:
447      symbol = child.stdout.readline().strip()
448      if symbol == "??":
449        symbol = None
450      location = child.stdout.readline().strip()
451      if location == "??:0":
452        location = None
453      if symbol is None and location is None:
454        break
455      records.append((symbol, location))
456      if first:
457        # Write a blank line as a sentinel so we know when to stop
458        # reading inlines from the output.
459        # The blank line will cause addr2line to emit "??\n??:0\n".
460        child.stdin.write("\n")
461        first = False
462    result[addr] = records
463  child.stdin.close()
464  child.stdout.close()
465  return result
466
467
468def StripPC(addr):
469  """Strips the Thumb bit a program counter address when appropriate.
470
471  Args:
472    addr: the program counter address
473
474  Returns:
475    The stripped program counter address.
476  """
477  global ARCH
478
479  if ARCH == "arm":
480    return addr & ~1
481  return addr
482
483@MemoizedForSet
484def CallObjdumpForSet(lib, unique_addrs):
485  """Use objdump to find out the names of the containing functions.
486
487  Args:
488    lib: library (or executable) pathname containing symbols
489    unique_addrs: set of string hexidecimal addresses to find the functions for.
490
491  Returns:
492    A dictionary of the form {addr: (string symbol, offset)}.
493  """
494  if not lib:
495    return None
496
497  symbols = SYMBOLS_DIR + lib
498  if not os.path.exists(symbols):
499    return None
500
501  symbols = SYMBOLS_DIR + lib
502  if not os.path.exists(symbols):
503    return None
504
505  result = {}
506
507  # Function lines look like:
508  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
509  # We pull out the address and function first. Then we check for an optional
510  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
511  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
512  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
513
514  # A disassembly line looks like:
515  #   177b2:  b510        push  {r4, lr}
516  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
517
518  for target_addr in unique_addrs:
519    start_addr_dec = str(StripPC(int(target_addr, 16)))
520    stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8)
521    cmd = [ToolPath("objdump"),
522           "--section=.text",
523           "--demangle",
524           "--disassemble",
525           "--start-address=" + start_addr_dec,
526           "--stop-address=" + stop_addr_dec,
527           symbols]
528
529    current_symbol = None    # The current function symbol in the disassembly.
530    current_symbol_addr = 0  # The address of the current function.
531
532    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
533    for line in stream:
534      # Is it a function line like:
535      #   000177b0 <android::IBinder::~IBinder()>:
536      components = func_regexp.match(line)
537      if components:
538        # This is a new function, so record the current function and its address.
539        current_symbol_addr = int(components.group(1), 16)
540        current_symbol = components.group(2)
541
542        # Does it have an optional offset like: "foo(..)+0x2c"?
543        components = offset_regexp.match(current_symbol)
544        if components:
545          current_symbol = components.group(1)
546          offset = components.group(2)
547          if offset:
548            current_symbol_addr -= int(offset, 16)
549
550      # Is it an disassembly line like:
551      #   177b2:  b510        push  {r4, lr}
552      components = asm_regexp.match(line)
553      if components:
554        addr = components.group(1)
555        i_addr = int(addr, 16)
556        i_target = StripPC(int(target_addr, 16))
557        if i_addr == i_target:
558          result[target_addr] = (current_symbol, i_target - current_symbol_addr)
559    stream.close()
560
561  return result
562
563
564def CallCppFilt(mangled_symbol):
565  cmd = [ToolPath("c++filt")]
566  process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
567  process.stdin.write(mangled_symbol)
568  process.stdin.write("\n")
569  process.stdin.close()
570  demangled_symbol = process.stdout.readline().strip()
571  process.stdout.close()
572  return demangled_symbol
573
574def FormatSymbolWithOffset(symbol, offset):
575  if offset == 0:
576    return symbol
577  return "%s+%d" % (symbol, offset)
578