• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4#                     The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
10import bisect
11import os
12import re
13import sys
14import subprocess
15
16symbolizers = {}
17filetypes = {}
18vmaddrs = {}
19DEBUG = False
20
21
22def fix_filename(file_name):
23  for path_to_cut in sys.argv[1:]:
24    file_name = re.sub(".*" + path_to_cut, "", file_name)
25  file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
26  file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
27  return file_name
28
29
30class Symbolizer(object):
31  def __init__(self):
32    pass
33
34
35class LinuxSymbolizer(Symbolizer):
36  def __init__(self, binary):
37    super(LinuxSymbolizer, self).__init__()
38    self.binary = binary
39    self.pipe = self.open_addr2line()
40  def open_addr2line(self):
41    cmd = ["addr2line", "-f", "-e", self.binary]
42    if DEBUG:
43      print ' '.join(cmd)
44    return subprocess.Popen(cmd,
45                            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
46  def symbolize(self, prefix, addr, offset):
47    try:
48      print >> self.pipe.stdin, offset
49      function_name = self.pipe.stdout.readline().rstrip()
50      file_name     = self.pipe.stdout.readline().rstrip()
51    except Exception:
52      function_name = ""
53      file_name = ""
54    file_name = fix_filename(file_name)
55    return "%s%s in %s %s" % (prefix, addr, function_name, file_name)
56
57
58class DarwinSymbolizer(Symbolizer):
59  def __init__(self, addr, binary):
60    super(DarwinSymbolizer, self).__init__()
61    self.binary = binary
62    # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
63    if len(addr) > 10:
64      self.arch = "x86_64"
65    else:
66      self.arch = "i386"
67    self.vmaddr = None
68    self.pipe = None
69  def get_binary_vmaddr(self):
70    """
71    Get the slide value to be added to the address.
72    We're ooking for the following piece in otool -l output:
73      Load command 0
74      cmd LC_SEGMENT
75      cmdsize 736
76      segname __TEXT
77      vmaddr 0x00000000
78    """
79    if self.vmaddr:
80      return self.vmaddr
81    cmdline = ["otool", "-l", self.binary]
82    pipe = subprocess.Popen(cmdline,
83                            stdin=subprocess.PIPE,
84                            stdout=subprocess.PIPE)
85    is_text = False
86    vmaddr = 0
87    for line in pipe.stdout.readlines():
88      line = line.strip()
89      if line.startswith('segname'):
90        is_text = (line == 'segname __TEXT')
91        continue
92      if line.startswith('vmaddr') and is_text:
93        sv = line.split(' ')
94        vmaddr = int(sv[-1], 16)
95        break
96    self.vmaddr = vmaddr
97    return self.vmaddr
98  def write_addr_to_pipe(self, offset):
99    slide = self.get_binary_vmaddr()
100    print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
101  def open_atos(self):
102    if DEBUG:
103      print "atos -o %s -arch %s" % (self.binary, self.arch)
104    cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
105    self.pipe = subprocess.Popen(cmdline,
106                                 stdin=subprocess.PIPE,
107                                 stdout=subprocess.PIPE,
108                                 stderr=subprocess.PIPE)
109  def symbolize(self, prefix, addr, offset):
110    self.open_atos()
111    self.write_addr_to_pipe(offset)
112    self.pipe.stdin.close()
113    atos_line = self.pipe.stdout.readline().rstrip()
114    # A well-formed atos response looks like this:
115    #   foo(type1, type2) (in object.name) (filename.cc:80)
116    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
117    if DEBUG:
118      print "atos_line: ", atos_line
119    if match:
120      function_name = match.group(1)
121      function_name = re.sub("\(.*?\)", "", function_name)
122      file_name = fix_filename(match.group(3))
123      return "%s%s in %s %s" % (prefix, addr, function_name, file_name)
124    else:
125      return "%s%s in %s" % (prefix, addr, atos_line)
126
127
128# Chain two symbolizers so that the second one is called if the first fails.
129class ChainSymbolizer(Symbolizer):
130  def __init__(self, symbolizer1, symbolizer2):
131    super(ChainSymbolizer, self).__init__()
132    self.symbolizer1 = symbolizer1
133    self.symbolizer2 = symbolizer2
134  def symbolize(self, prefix, addr, offset):
135    result = self.symbolizer1.symbolize(prefix, addr, offset)
136    if result is None:
137      result = self.symbolizer2.symbolize(prefix, addr, offset)
138    return result
139
140
141def BreakpadSymbolizerFactory(addr, binary):
142  suffix = os.getenv("BREAKPAD_SUFFIX")
143  if suffix:
144    filename = binary + suffix
145    if os.access(filename, os.F_OK):
146      return BreakpadSymbolizer(filename)
147  return None
148
149
150def SystemSymbolizerFactory(system, addr, binary):
151  if system == 'Darwin':
152    return DarwinSymbolizer(addr, binary)
153  elif system == 'Linux':
154    return LinuxSymbolizer(binary)
155
156
157class BreakpadSymbolizer(Symbolizer):
158  def __init__(self, filename):
159    super(BreakpadSymbolizer, self).__init__()
160    self.filename = filename
161    lines = file(filename).readlines()
162    self.files = []
163    self.symbols = {}
164    self.address_list = []
165    self.addresses = {}
166    # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
167    fragments = lines[0].rstrip().split()
168    self.arch = fragments[2]
169    self.debug_id = fragments[3]
170    self.binary = ' '.join(fragments[4:])
171    self.parse_lines(lines[1:])
172  def parse_lines(self, lines):
173    cur_function_addr = ''
174    for line in lines:
175      fragments = line.split()
176      if fragments[0] == 'FILE':
177        assert int(fragments[1]) == len(self.files)
178        self.files.append(' '.join(fragments[2:]))
179      elif fragments[0] == 'PUBLIC':
180        self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
181      elif fragments[0] in ['CFI', 'STACK']:
182        pass
183      elif fragments[0] == 'FUNC':
184        cur_function_addr = int(fragments[1], 16)
185        if not cur_function_addr in self.symbols.keys():
186          self.symbols[cur_function_addr] = ' '.join(fragments[4:])
187      else:
188        # Line starting with an address.
189        addr = int(fragments[0], 16)
190        self.address_list.append(addr)
191        # Tuple of symbol address, size, line, file number.
192        self.addresses[addr] = (cur_function_addr,
193                                int(fragments[1], 16),
194                                int(fragments[2]),
195                                int(fragments[3]))
196    self.address_list.sort()
197  def get_sym_file_line(self, addr):
198    key = None
199    if addr in self.addresses.keys():
200      key = addr
201    else:
202      index = bisect.bisect_left(self.address_list, addr)
203      if index == 0:
204        return None
205      else:
206        key = self.address_list[index - 1]
207    sym_id, size, line_no, file_no = self.addresses[key]
208    symbol = self.symbols[sym_id]
209    filename = self.files[file_no]
210    if addr < key + size:
211      return symbol, filename, line_no
212    else:
213      return None
214  def symbolize(self, prefix, addr, offset):
215    res = self.get_sym_file_line(int(offset, 16))
216    if res:
217      function_name, file_name, line_no = res
218      result = "%s%s in %s %s:%d" % (
219          prefix, addr, function_name, file_name, line_no)
220      print result
221      return result
222    else:
223      return None
224
225
226def symbolize_line(system, line):
227  #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
228  match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)',
229                   line)
230  if match:
231    if DEBUG:
232      print line
233    prefix = match.group(1)
234    # frameno = match.group(2)
235    addr = match.group(3)
236    binary = match.group(4)
237    offset = match.group(5)
238    if not symbolizers.has_key(binary):
239      p = BreakpadSymbolizerFactory(addr, binary)
240      if p:
241        symbolizers[binary] = p
242      else:
243        symbolizers[binary] = SystemSymbolizerFactory(system, addr, binary)
244    result = symbolizers[binary].symbolize(prefix, addr, offset)
245    if result is None:
246      symbolizers[binary] = ChainSymbolizer(symbolizers[binary],
247          SystemSymbolizerFactory(system, addr, binary))
248    return symbolizers[binary].symbolize(prefix, addr, offset)
249  else:
250    return line
251
252
253def main():
254  system = os.uname()[0]
255  if system in ['Linux', 'Darwin']:
256    for line in sys.stdin:
257      line = symbolize_line(system, line)
258      print line.rstrip()
259  else:
260    print 'Unknown system: ', system
261
262
263if __name__ == '__main__':
264  main()
265