1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import bisect 11import os 12import re 13import sys 14import subprocess 15 16symbolizers = {} 17filetypes = {} 18vmaddrs = {} 19DEBUG = False 20 21 22def fix_filename(file_name): 23 for path_to_cut in sys.argv[1:]: 24 file_name = re.sub(".*" + path_to_cut, "", file_name) 25 file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) 26 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) 27 return file_name 28 29 30class Symbolizer(object): 31 def __init__(self): 32 pass 33 34 35class LinuxSymbolizer(Symbolizer): 36 def __init__(self, binary): 37 super(LinuxSymbolizer, self).__init__() 38 self.binary = binary 39 self.pipe = self.open_addr2line() 40 def open_addr2line(self): 41 cmd = ["addr2line", "-f", "-e", self.binary] 42 if DEBUG: 43 print ' '.join(cmd) 44 return subprocess.Popen(cmd, 45 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 46 def symbolize(self, prefix, addr, offset): 47 try: 48 print >> self.pipe.stdin, offset 49 function_name = self.pipe.stdout.readline().rstrip() 50 file_name = self.pipe.stdout.readline().rstrip() 51 except Exception: 52 function_name = "" 53 file_name = "" 54 file_name = fix_filename(file_name) 55 return "%s%s in %s %s" % (prefix, addr, function_name, file_name) 56 57 58class DarwinSymbolizer(Symbolizer): 59 def __init__(self, addr, binary): 60 super(DarwinSymbolizer, self).__init__() 61 self.binary = binary 62 # Guess which arch we're running. 10 = len("0x") + 8 hex digits. 63 if len(addr) > 10: 64 self.arch = "x86_64" 65 else: 66 self.arch = "i386" 67 self.vmaddr = None 68 self.pipe = None 69 def get_binary_vmaddr(self): 70 """ 71 Get the slide value to be added to the address. 72 We're ooking for the following piece in otool -l output: 73 Load command 0 74 cmd LC_SEGMENT 75 cmdsize 736 76 segname __TEXT 77 vmaddr 0x00000000 78 """ 79 if self.vmaddr: 80 return self.vmaddr 81 cmdline = ["otool", "-l", self.binary] 82 pipe = subprocess.Popen(cmdline, 83 stdin=subprocess.PIPE, 84 stdout=subprocess.PIPE) 85 is_text = False 86 vmaddr = 0 87 for line in pipe.stdout.readlines(): 88 line = line.strip() 89 if line.startswith('segname'): 90 is_text = (line == 'segname __TEXT') 91 continue 92 if line.startswith('vmaddr') and is_text: 93 sv = line.split(' ') 94 vmaddr = int(sv[-1], 16) 95 break 96 self.vmaddr = vmaddr 97 return self.vmaddr 98 def write_addr_to_pipe(self, offset): 99 slide = self.get_binary_vmaddr() 100 print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide) 101 def open_atos(self): 102 if DEBUG: 103 print "atos -o %s -arch %s" % (self.binary, self.arch) 104 cmdline = ["atos", "-o", self.binary, "-arch", self.arch] 105 self.pipe = subprocess.Popen(cmdline, 106 stdin=subprocess.PIPE, 107 stdout=subprocess.PIPE, 108 stderr=subprocess.PIPE) 109 def symbolize(self, prefix, addr, offset): 110 self.open_atos() 111 self.write_addr_to_pipe(offset) 112 self.pipe.stdin.close() 113 atos_line = self.pipe.stdout.readline().rstrip() 114 # A well-formed atos response looks like this: 115 # foo(type1, type2) (in object.name) (filename.cc:80) 116 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 117 if DEBUG: 118 print "atos_line: ", atos_line 119 if match: 120 function_name = match.group(1) 121 function_name = re.sub("\(.*?\)", "", function_name) 122 file_name = fix_filename(match.group(3)) 123 return "%s%s in %s %s" % (prefix, addr, function_name, file_name) 124 else: 125 return "%s%s in %s" % (prefix, addr, atos_line) 126 127 128# Chain two symbolizers so that the second one is called if the first fails. 129class ChainSymbolizer(Symbolizer): 130 def __init__(self, symbolizer1, symbolizer2): 131 super(ChainSymbolizer, self).__init__() 132 self.symbolizer1 = symbolizer1 133 self.symbolizer2 = symbolizer2 134 def symbolize(self, prefix, addr, offset): 135 result = self.symbolizer1.symbolize(prefix, addr, offset) 136 if result is None: 137 result = self.symbolizer2.symbolize(prefix, addr, offset) 138 return result 139 140 141def BreakpadSymbolizerFactory(addr, binary): 142 suffix = os.getenv("BREAKPAD_SUFFIX") 143 if suffix: 144 filename = binary + suffix 145 if os.access(filename, os.F_OK): 146 return BreakpadSymbolizer(filename) 147 return None 148 149 150def SystemSymbolizerFactory(system, addr, binary): 151 if system == 'Darwin': 152 return DarwinSymbolizer(addr, binary) 153 elif system == 'Linux': 154 return LinuxSymbolizer(binary) 155 156 157class BreakpadSymbolizer(Symbolizer): 158 def __init__(self, filename): 159 super(BreakpadSymbolizer, self).__init__() 160 self.filename = filename 161 lines = file(filename).readlines() 162 self.files = [] 163 self.symbols = {} 164 self.address_list = [] 165 self.addresses = {} 166 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 167 fragments = lines[0].rstrip().split() 168 self.arch = fragments[2] 169 self.debug_id = fragments[3] 170 self.binary = ' '.join(fragments[4:]) 171 self.parse_lines(lines[1:]) 172 def parse_lines(self, lines): 173 cur_function_addr = '' 174 for line in lines: 175 fragments = line.split() 176 if fragments[0] == 'FILE': 177 assert int(fragments[1]) == len(self.files) 178 self.files.append(' '.join(fragments[2:])) 179 elif fragments[0] == 'PUBLIC': 180 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 181 elif fragments[0] in ['CFI', 'STACK']: 182 pass 183 elif fragments[0] == 'FUNC': 184 cur_function_addr = int(fragments[1], 16) 185 if not cur_function_addr in self.symbols.keys(): 186 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 187 else: 188 # Line starting with an address. 189 addr = int(fragments[0], 16) 190 self.address_list.append(addr) 191 # Tuple of symbol address, size, line, file number. 192 self.addresses[addr] = (cur_function_addr, 193 int(fragments[1], 16), 194 int(fragments[2]), 195 int(fragments[3])) 196 self.address_list.sort() 197 def get_sym_file_line(self, addr): 198 key = None 199 if addr in self.addresses.keys(): 200 key = addr 201 else: 202 index = bisect.bisect_left(self.address_list, addr) 203 if index == 0: 204 return None 205 else: 206 key = self.address_list[index - 1] 207 sym_id, size, line_no, file_no = self.addresses[key] 208 symbol = self.symbols[sym_id] 209 filename = self.files[file_no] 210 if addr < key + size: 211 return symbol, filename, line_no 212 else: 213 return None 214 def symbolize(self, prefix, addr, offset): 215 res = self.get_sym_file_line(int(offset, 16)) 216 if res: 217 function_name, file_name, line_no = res 218 result = "%s%s in %s %s:%d" % ( 219 prefix, addr, function_name, file_name, line_no) 220 print result 221 return result 222 else: 223 return None 224 225 226def symbolize_line(system, line): 227 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 228 match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', 229 line) 230 if match: 231 if DEBUG: 232 print line 233 prefix = match.group(1) 234 # frameno = match.group(2) 235 addr = match.group(3) 236 binary = match.group(4) 237 offset = match.group(5) 238 if not symbolizers.has_key(binary): 239 p = BreakpadSymbolizerFactory(addr, binary) 240 if p: 241 symbolizers[binary] = p 242 else: 243 symbolizers[binary] = SystemSymbolizerFactory(system, addr, binary) 244 result = symbolizers[binary].symbolize(prefix, addr, offset) 245 if result is None: 246 symbolizers[binary] = ChainSymbolizer(symbolizers[binary], 247 SystemSymbolizerFactory(system, addr, binary)) 248 return symbolizers[binary].symbolize(prefix, addr, offset) 249 else: 250 return line 251 252 253def main(): 254 system = os.uname()[0] 255 if system in ['Linux', 'Darwin']: 256 for line in sys.stdin: 257 line = symbolize_line(system, line) 258 print line.rstrip() 259 else: 260 print 'Unknown system: ', system 261 262 263if __name__ == '__main__': 264 main() 265