• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import bisect
6import re
7
8
9_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
10_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
11_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
12_READELF_SECTION_HEADER_PATTER = re.compile(
13    '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
14    '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
15    '([0-9]+)\s+([0-9]+)\s+([0-9]+)')
16
17
18class ParsingException(Exception):
19  def __str__(self):
20    return repr(self.args[0])
21
22
23class AddressMapping(object):
24  def __init__(self):
25    self._symbol_map = {}
26
27  def append(self, start, entry):
28    self._symbol_map[start] = entry
29
30  def find(self, address):
31    return self._symbol_map.get(address)
32
33
34class RangeAddressMapping(AddressMapping):
35  def __init__(self):
36    super(RangeAddressMapping, self).__init__()
37    self._sorted_start_list = []
38    self._is_sorted = True
39
40  def append(self, start, entry):
41    if self._sorted_start_list:
42      if self._sorted_start_list[-1] > start:
43        self._is_sorted = False
44      elif self._sorted_start_list[-1] == start:
45        return
46    self._sorted_start_list.append(start)
47    self._symbol_map[start] = entry
48
49  def find(self, address):
50    if not self._sorted_start_list:
51      return None
52    if not self._is_sorted:
53      self._sorted_start_list.sort()
54      self._is_sorted = True
55    found_index = bisect.bisect_left(self._sorted_start_list, address)
56    found_start_address = self._sorted_start_list[found_index - 1]
57    return self._symbol_map[found_start_address]
58
59
60class Procedure(object):
61  """A class for a procedure symbol and an address range for the symbol."""
62
63  def __init__(self, start, end, name):
64    self.start = start
65    self.end = end
66    self.name = name
67
68  def __eq__(self, other):
69    return (self.start == other.start and
70            self.end == other.end and
71            self.name == other.name)
72
73  def __ne__(self, other):
74    return not self.__eq__(other)
75
76  def __str__(self):
77    return '%x-%x: %s' % (self.start, self.end, self.name)
78
79
80class ElfSection(object):
81  """A class for an elf section header."""
82
83  def __init__(
84      self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
85    self.number = number
86    self.name = name
87    self.stype = stype
88    self.address = address
89    self.offset = offset
90    self.size = size
91    self.es = es
92    self.flg = flg
93    self.lk = lk
94    self.inf = inf
95    self.al = al
96
97  def __eq__(self, other):
98    return (self.number == other.number and
99            self.name == other.name and
100            self.stype == other.stype and
101            self.address == other.address and
102            self.offset == other.offset and
103            self.size == other.size and
104            self.es == other.es and
105            self.flg == other.flg and
106            self.lk == other.lk and
107            self.inf == other.inf and
108            self.al == other.al)
109
110  def __ne__(self, other):
111    return not self.__eq__(other)
112
113  def __str__(self):
114    return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)
115
116
117class StaticSymbolsInFile(object):
118  """Represents static symbol information in a binary file."""
119
120  def __init__(self, my_name):
121    self.my_name = my_name
122    self._elf_sections = []
123    self._procedures = RangeAddressMapping()
124    self._sourcefiles = RangeAddressMapping()
125    self._typeinfos = AddressMapping()
126
127  def _append_elf_section(self, elf_section):
128    self._elf_sections.append(elf_section)
129
130  def _append_procedure(self, start, procedure):
131    self._procedures.append(start, procedure)
132
133  def _append_sourcefile(self, start, sourcefile):
134    self._sourcefiles.append(start, sourcefile)
135
136  def _append_typeinfo(self, start, typeinfo):
137    self._typeinfos.append(start, typeinfo)
138
139  def _find_symbol_by_runtime_address(self, address, vma, target):
140    if not (vma.begin <= address < vma.end):
141      return None
142
143    if vma.name != self.my_name:
144      return None
145
146    file_offset = address - (vma.begin - vma.offset)
147    elf_address = None
148    for section in self._elf_sections:
149      if section.offset <= file_offset < (section.offset + section.size):
150        elf_address = section.address + file_offset - section.offset
151    if not elf_address:
152      return None
153
154    return target.find(elf_address)
155
156  def find_procedure_by_runtime_address(self, address, vma):
157    return self._find_symbol_by_runtime_address(address, vma, self._procedures)
158
159  def find_sourcefile_by_runtime_address(self, address, vma):
160    return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles)
161
162  def find_typeinfo_by_runtime_address(self, address, vma):
163    return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)
164
165  def load_readelf_ew(self, f):
166    found_header = False
167    for line in f:
168      if line.rstrip() == 'Section Headers:':
169        found_header = True
170        break
171    if not found_header:
172      return None
173
174    for line in f:
175      line = line.rstrip()
176      matched = _READELF_SECTION_HEADER_PATTER.match(line)
177      if matched:
178        self._append_elf_section(ElfSection(
179            int(matched.group(1), 10), # number
180            matched.group(2), # name
181            matched.group(3), # stype
182            int(matched.group(4), 16), # address
183            int(matched.group(5), 16), # offset
184            int(matched.group(6), 16), # size
185            matched.group(7), # es
186            matched.group(8), # flg
187            matched.group(9), # lk
188            matched.group(10), # inf
189            matched.group(11) # al
190            ))
191      else:
192        if line in ('Key to Flags:', 'Program Headers:'):
193          break
194
195  def load_readelf_debug_decodedline_file(self, input_file):
196    for line in input_file:
197      splitted = line.rstrip().split(None, 2)
198      self._append_sourcefile(int(splitted[0], 16), splitted[1])
199
200  @staticmethod
201  def _parse_nm_bsd_line(line):
202    if line[8] == ' ':
203      return line[0:8], line[9], line[11:]
204    elif line[16] == ' ':
205      return line[0:16], line[17], line[19:]
206    raise ParsingException('Invalid nm output.')
207
208  @staticmethod
209  def _get_short_function_name(function):
210    while True:
211      function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
212      if not number:
213        break
214    while True:
215      function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
216      if not number:
217        break
218    return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
219
220  def load_nm_bsd(self, f, mangled=False):
221    last_start = 0
222    routine = ''
223
224    for line in f:
225      line = line.rstrip()
226      sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)
227
228      if sym_value[0] == ' ':
229        continue
230
231      start_val = int(sym_value, 16)
232
233      if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
234          (not mangled and sym_name.startswith('typeinfo'))):
235        self._append_typeinfo(start_val, sym_name)
236
237      # It's possible for two symbols to share the same address, if
238      # one is a zero-length variable (like __start_google_malloc) or
239      # one symbol is a weak alias to another (like __libc_malloc).
240      # In such cases, we want to ignore all values except for the
241      # actual symbol, which in nm-speak has type "T".  The logic
242      # below does this, though it's a bit tricky: what happens when
243      # we have a series of lines with the same address, is the first
244      # one gets queued up to be processed.  However, it won't
245      # *actually* be processed until later, when we read a line with
246      # a different address.  That means that as long as we're reading
247      # lines with the same address, we have a chance to replace that
248      # item in the queue, which we do whenever we see a 'T' entry --
249      # that is, a line with type 'T'.  If we never see a 'T' entry,
250      # we'll just go ahead and process the first entry (which never
251      # got touched in the queue), and ignore the others.
252      if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
253        # We are the 'T' symbol at this address, replace previous symbol.
254        routine = sym_name
255        continue
256      elif start_val == last_start:
257        # We're not the 'T' symbol at this address, so ignore us.
258        continue
259
260      # Tag this routine with the starting address in case the image
261      # has multiple occurrences of this routine.  We use a syntax
262      # that resembles template paramters that are automatically
263      # stripped out by ShortFunctionName()
264      sym_name += "<%016x>" % start_val
265
266      if not mangled:
267        routine = self._get_short_function_name(routine)
268      self._append_procedure(
269          last_start, Procedure(last_start, start_val, routine))
270
271      last_start = start_val
272      routine = sym_name
273
274    if not mangled:
275      routine = self._get_short_function_name(routine)
276    self._append_procedure(
277        last_start, Procedure(last_start, last_start, routine))
278