1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import bisect 6import re 7 8 9_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') 10_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') 11_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') 12_READELF_SECTION_HEADER_PATTER = re.compile( 13 '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+' 14 '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+' 15 '([0-9]+)\s+([0-9]+)\s+([0-9]+)') 16 17 18class ParsingException(Exception): 19 def __str__(self): 20 return repr(self.args[0]) 21 22 23class AddressMapping(object): 24 def __init__(self): 25 self._symbol_map = {} 26 27 def append(self, start, entry): 28 self._symbol_map[start] = entry 29 30 def find(self, address): 31 return self._symbol_map.get(address) 32 33 34class RangeAddressMapping(AddressMapping): 35 def __init__(self): 36 super(RangeAddressMapping, self).__init__() 37 self._sorted_start_list = [] 38 self._is_sorted = True 39 40 def append(self, start, entry): 41 if self._sorted_start_list: 42 if self._sorted_start_list[-1] > start: 43 self._is_sorted = False 44 elif self._sorted_start_list[-1] == start: 45 return 46 self._sorted_start_list.append(start) 47 self._symbol_map[start] = entry 48 49 def find(self, address): 50 if not self._sorted_start_list: 51 return None 52 if not self._is_sorted: 53 self._sorted_start_list.sort() 54 self._is_sorted = True 55 found_index = bisect.bisect_left(self._sorted_start_list, address) 56 found_start_address = self._sorted_start_list[found_index - 1] 57 return self._symbol_map[found_start_address] 58 59 60class Procedure(object): 61 """A class for a procedure symbol and an address range for the symbol.""" 62 63 def __init__(self, start, end, name): 64 self.start = start 65 self.end = end 66 self.name = name 67 68 def __eq__(self, other): 69 return (self.start == other.start and 70 self.end == other.end and 71 self.name == other.name) 72 73 def __ne__(self, other): 74 return not self.__eq__(other) 75 76 def __str__(self): 77 return '%x-%x: %s' % (self.start, self.end, self.name) 78 79 80class ElfSection(object): 81 """A class for an elf section header.""" 82 83 def __init__( 84 self, number, name, stype, address, offset, size, es, flg, lk, inf, al): 85 self.number = number 86 self.name = name 87 self.stype = stype 88 self.address = address 89 self.offset = offset 90 self.size = size 91 self.es = es 92 self.flg = flg 93 self.lk = lk 94 self.inf = inf 95 self.al = al 96 97 def __eq__(self, other): 98 return (self.number == other.number and 99 self.name == other.name and 100 self.stype == other.stype and 101 self.address == other.address and 102 self.offset == other.offset and 103 self.size == other.size and 104 self.es == other.es and 105 self.flg == other.flg and 106 self.lk == other.lk and 107 self.inf == other.inf and 108 self.al == other.al) 109 110 def __ne__(self, other): 111 return not self.__eq__(other) 112 113 def __str__(self): 114 return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name) 115 116 117class StaticSymbolsInFile(object): 118 """Represents static symbol information in a binary file.""" 119 120 def __init__(self, my_name): 121 self.my_name = my_name 122 self._elf_sections = [] 123 self._procedures = RangeAddressMapping() 124 self._sourcefiles = RangeAddressMapping() 125 self._typeinfos = AddressMapping() 126 127 def _append_elf_section(self, elf_section): 128 self._elf_sections.append(elf_section) 129 130 def _append_procedure(self, start, procedure): 131 self._procedures.append(start, procedure) 132 133 def _append_sourcefile(self, start, sourcefile): 134 self._sourcefiles.append(start, sourcefile) 135 136 def _append_typeinfo(self, start, typeinfo): 137 self._typeinfos.append(start, typeinfo) 138 139 def _find_symbol_by_runtime_address(self, address, vma, target): 140 if not (vma.begin <= address < vma.end): 141 return None 142 143 if vma.name != self.my_name: 144 return None 145 146 file_offset = address - (vma.begin - vma.offset) 147 elf_address = None 148 for section in self._elf_sections: 149 if section.offset <= file_offset < (section.offset + section.size): 150 elf_address = section.address + file_offset - section.offset 151 if not elf_address: 152 return None 153 154 return target.find(elf_address) 155 156 def find_procedure_by_runtime_address(self, address, vma): 157 return self._find_symbol_by_runtime_address(address, vma, self._procedures) 158 159 def find_sourcefile_by_runtime_address(self, address, vma): 160 return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles) 161 162 def find_typeinfo_by_runtime_address(self, address, vma): 163 return self._find_symbol_by_runtime_address(address, vma, self._typeinfos) 164 165 def load_readelf_ew(self, f): 166 found_header = False 167 for line in f: 168 if line.rstrip() == 'Section Headers:': 169 found_header = True 170 break 171 if not found_header: 172 return None 173 174 for line in f: 175 line = line.rstrip() 176 matched = _READELF_SECTION_HEADER_PATTER.match(line) 177 if matched: 178 self._append_elf_section(ElfSection( 179 int(matched.group(1), 10), # number 180 matched.group(2), # name 181 matched.group(3), # stype 182 int(matched.group(4), 16), # address 183 int(matched.group(5), 16), # offset 184 int(matched.group(6), 16), # size 185 matched.group(7), # es 186 matched.group(8), # flg 187 matched.group(9), # lk 188 matched.group(10), # inf 189 matched.group(11) # al 190 )) 191 else: 192 if line in ('Key to Flags:', 'Program Headers:'): 193 break 194 195 def load_readelf_debug_decodedline_file(self, input_file): 196 for line in input_file: 197 splitted = line.rstrip().split(None, 2) 198 self._append_sourcefile(int(splitted[0], 16), splitted[1]) 199 200 @staticmethod 201 def _parse_nm_bsd_line(line): 202 if line[8] == ' ': 203 return line[0:8], line[9], line[11:] 204 elif line[16] == ' ': 205 return line[0:16], line[17], line[19:] 206 raise ParsingException('Invalid nm output.') 207 208 @staticmethod 209 def _get_short_function_name(function): 210 while True: 211 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) 212 if not number: 213 break 214 while True: 215 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) 216 if not number: 217 break 218 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) 219 220 def load_nm_bsd(self, f, mangled=False): 221 last_start = 0 222 routine = '' 223 224 for line in f: 225 line = line.rstrip() 226 sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line) 227 228 if sym_value[0] == ' ': 229 continue 230 231 start_val = int(sym_value, 16) 232 233 if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and 234 (not mangled and sym_name.startswith('typeinfo'))): 235 self._append_typeinfo(start_val, sym_name) 236 237 # It's possible for two symbols to share the same address, if 238 # one is a zero-length variable (like __start_google_malloc) or 239 # one symbol is a weak alias to another (like __libc_malloc). 240 # In such cases, we want to ignore all values except for the 241 # actual symbol, which in nm-speak has type "T". The logic 242 # below does this, though it's a bit tricky: what happens when 243 # we have a series of lines with the same address, is the first 244 # one gets queued up to be processed. However, it won't 245 # *actually* be processed until later, when we read a line with 246 # a different address. That means that as long as we're reading 247 # lines with the same address, we have a chance to replace that 248 # item in the queue, which we do whenever we see a 'T' entry -- 249 # that is, a line with type 'T'. If we never see a 'T' entry, 250 # we'll just go ahead and process the first entry (which never 251 # got touched in the queue), and ignore the others. 252 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): 253 # We are the 'T' symbol at this address, replace previous symbol. 254 routine = sym_name 255 continue 256 elif start_val == last_start: 257 # We're not the 'T' symbol at this address, so ignore us. 258 continue 259 260 # Tag this routine with the starting address in case the image 261 # has multiple occurrences of this routine. We use a syntax 262 # that resembles template paramters that are automatically 263 # stripped out by ShortFunctionName() 264 sym_name += "<%016x>" % start_val 265 266 if not mangled: 267 routine = self._get_short_function_name(routine) 268 self._append_procedure( 269 last_start, Procedure(last_start, start_val, routine)) 270 271 last_start = start_val 272 routine = sym_name 273 274 if not mangled: 275 routine = self._get_short_function_name(routine) 276 self._append_procedure( 277 last_start, Procedure(last_start, last_start, routine)) 278