• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import copy
6import datetime
7import logging
8import os
9import re
10import time
11
12from lib.bucket import BUCKET_ID
13from lib.exceptions import EmptyDumpException, InvalidDumpException
14from lib.exceptions import ObsoleteDumpVersionException, ParsingException
15from lib.pageframe import PageFrame
16from lib.range_dict import ExclusiveRangeDict
17from lib.symbol import procfs
18
19
20LOGGER = logging.getLogger('dmprof')
21
22
23# Heap Profile Dump versions
24
25# DUMP_DEEP_[1-4] are obsolete.
26# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
27# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
28# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
29# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
30# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
31DUMP_DEEP_1 = 'DUMP_DEEP_1'
32DUMP_DEEP_2 = 'DUMP_DEEP_2'
33DUMP_DEEP_3 = 'DUMP_DEEP_3'
34DUMP_DEEP_4 = 'DUMP_DEEP_4'
35
36DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
37
38# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
39# malloc and mmap are identified in bucket files.
40# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
41DUMP_DEEP_5 = 'DUMP_DEEP_5'
42
43# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
44DUMP_DEEP_6 = 'DUMP_DEEP_6'
45
46
47class Dump(object):
48  """Represents a heap profile dump."""
49
50  _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
51
52  _HOOK_PATTERN = re.compile(
53      r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
54      r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
55
56  _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
57                               '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
58  _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
59                                 '(?P<RESERVED>[0-9]+)')
60
61  _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
62  _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
63
64  _TIME_PATTERN_FORMAT = re.compile(
65      r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
66  _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
67
68  def __init__(self, path, modified_time):
69    self._path = path
70    matched = self._PATH_PATTERN.match(path)
71    self._pid = int(matched.group(2))
72    self._count = int(matched.group(3))
73    self._time = modified_time
74    self._map = {}
75    self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
76    self._stacktrace_lines = []
77    self._global_stats = {} # used only in apply_policy
78
79    self._run_id = ''
80    self._pagesize = 4096
81    self._pageframe_length = 0
82    self._pageframe_encoding = ''
83    self._has_pagecount = False
84
85    self._version = ''
86    self._lines = []
87
88  @property
89  def path(self):
90    return self._path
91
92  @property
93  def count(self):
94    return self._count
95
96  @property
97  def time(self):
98    return self._time
99
100  @property
101  def iter_map(self):
102    for region in sorted(self._map.iteritems()):
103      yield region[0], region[1]
104
105  def iter_procmaps(self):
106    for begin, end, attr in self._map.iter_range():
107      yield begin, end, attr
108
109  @property
110  def iter_stacktrace(self):
111    for line in self._stacktrace_lines:
112      yield line
113
114  def global_stat(self, name):
115    return self._global_stats[name]
116
117  @property
118  def run_id(self):
119    return self._run_id
120
121  @property
122  def pagesize(self):
123    return self._pagesize
124
125  @property
126  def pageframe_length(self):
127    return self._pageframe_length
128
129  @property
130  def pageframe_encoding(self):
131    return self._pageframe_encoding
132
133  @property
134  def has_pagecount(self):
135    return self._has_pagecount
136
137  @staticmethod
138  def load(path, log_header='Loading a heap profile dump: '):
139    """Loads a heap profile dump.
140
141    Args:
142        path: A file path string to load.
143        log_header: A preceding string for log messages.
144
145    Returns:
146        A loaded Dump object.
147
148    Raises:
149        ParsingException for invalid heap profile dumps.
150    """
151    dump = Dump(path, os.stat(path).st_mtime)
152    with open(path, 'r') as f:
153      dump.load_file(f, log_header)
154    return dump
155
156  def load_file(self, f, log_header):
157    self._lines = [line for line in f
158                   if line and not line.startswith('#')]
159
160    try:
161      self._version, ln = self._parse_version()
162      self._parse_meta_information()
163      if self._version == DUMP_DEEP_6:
164        self._parse_mmap_list()
165      self._parse_global_stats()
166      self._extract_stacktrace_lines(ln)
167    except EmptyDumpException:
168      LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
169    except ParsingException, e:
170      LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
171      raise
172    else:
173      LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
174
175  def _parse_version(self):
176    """Parses a version string in self._lines.
177
178    Returns:
179        A pair of (a string representing a version of the stacktrace dump,
180        and an integer indicating a line number next to the version string).
181
182    Raises:
183        ParsingException for invalid dump versions.
184    """
185    version = ''
186
187    # Skip until an identifiable line.
188    headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
189    if not self._lines:
190      raise EmptyDumpException('Empty heap dump file.')
191    (ln, found) = skip_while(
192        0, len(self._lines),
193        lambda n: not self._lines[n].startswith(headers))
194    if not found:
195      raise InvalidDumpException('No version header.')
196
197    # Identify a version.
198    if self._lines[ln].startswith('heap profile: '):
199      version = self._lines[ln][13:].strip()
200      if version in (DUMP_DEEP_5, DUMP_DEEP_6):
201        (ln, _) = skip_while(
202            ln, len(self._lines),
203            lambda n: self._lines[n] != 'STACKTRACES:\n')
204      elif version in DUMP_DEEP_OBSOLETE:
205        raise ObsoleteDumpVersionException(version)
206      else:
207        raise InvalidDumpException('Invalid version: %s' % version)
208    elif self._lines[ln] == 'STACKTRACES:\n':
209      raise ObsoleteDumpVersionException(DUMP_DEEP_1)
210    elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
211      raise ObsoleteDumpVersionException(DUMP_DEEP_2)
212
213    return (version, ln)
214
215  def _parse_global_stats(self):
216    """Parses lines in self._lines as global stats."""
217    (ln, _) = skip_while(
218        0, len(self._lines),
219        lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
220
221    global_stat_names = [
222        'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
223        'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
224        'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
225        'nonprofiled-stack', 'nonprofiled-other',
226        'profiled-mmap', 'profiled-malloc']
227
228    for prefix in global_stat_names:
229      (ln, _) = skip_while(
230          ln, len(self._lines),
231          lambda n: self._lines[n].split()[0] != prefix)
232      words = self._lines[ln].split()
233      self._global_stats[prefix + '_virtual'] = int(words[-2])
234      self._global_stats[prefix + '_committed'] = int(words[-1])
235
236  def _parse_meta_information(self):
237    """Parses lines in self._lines for meta information."""
238    (ln, found) = skip_while(
239        0, len(self._lines),
240        lambda n: self._lines[n] != 'META:\n')
241    if not found:
242      return
243    ln += 1
244
245    while True:
246      if self._lines[ln].startswith('Time:'):
247        matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
248        matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
249        if matched_format:
250          self._time = time.mktime(datetime.datetime.strptime(
251              matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
252          if matched_format.group(2):
253            self._time += float(matched_format.group(2)[1:]) / 1000.0
254        elif matched_seconds:
255          self._time = float(matched_seconds.group(1))
256      elif self._lines[ln].startswith('Reason:'):
257        pass  # Nothing to do for 'Reason:'
258      elif self._lines[ln].startswith('PageSize: '):
259        self._pagesize = int(self._lines[ln][10:])
260      elif self._lines[ln].startswith('CommandLine:'):
261        pass
262      elif (self._lines[ln].startswith('PageFrame: ') or
263            self._lines[ln].startswith('PFN: ')):
264        if self._lines[ln].startswith('PageFrame: '):
265          words = self._lines[ln][11:].split(',')
266        else:
267          words = self._lines[ln][5:].split(',')
268        for word in words:
269          if word == '24':
270            self._pageframe_length = 24
271          elif word == 'Base64':
272            self._pageframe_encoding = 'base64'
273          elif word == 'PageCount':
274            self._has_pagecount = True
275      elif self._lines[ln].startswith('RunID: '):
276        self._run_id = self._lines[ln][7:].strip()
277      elif (self._lines[ln].startswith('MMAP_LIST:') or
278            self._lines[ln].startswith('GLOBAL_STATS:')):
279        # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
280        break
281      else:
282        pass
283      ln += 1
284
285  def _parse_mmap_list(self):
286    """Parses lines in self._lines as a mmap list."""
287    (ln, found) = skip_while(
288        0, len(self._lines),
289        lambda n: self._lines[n] != 'MMAP_LIST:\n')
290    if not found:
291      return {}
292
293    ln += 1
294    self._map = {}
295    current_vma = {}
296    pageframe_list = []
297    while True:
298      entry = procfs.ProcMaps.parse_line(self._lines[ln])
299      if entry:
300        current_vma = {}
301        for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
302          for key, value in entry.as_dict().iteritems():
303            attr[key] = value
304            current_vma[key] = value
305        ln += 1
306        continue
307
308      if self._lines[ln].startswith('  PF: '):
309        for pageframe in self._lines[ln][5:].split():
310          pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
311        ln += 1
312        continue
313
314      matched = self._HOOK_PATTERN.match(self._lines[ln])
315      if not matched:
316        break
317      # 2: starting address
318      # 5: end address
319      # 7: hooked or unhooked
320      # 8: additional information
321      if matched.group(7) == 'hooked':
322        submatched = self._HOOKED_PATTERN.match(matched.group(8))
323        if not submatched:
324          submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
325      elif matched.group(7) == 'unhooked':
326        submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
327        if not submatched:
328          submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
329      else:
330        assert matched.group(7) in ['hooked', 'unhooked']
331
332      submatched_dict = submatched.groupdict()
333      region_info = { 'vma': current_vma }
334      if submatched_dict.get('TYPE'):
335        region_info['type'] = submatched_dict['TYPE'].strip()
336      if submatched_dict.get('COMMITTED'):
337        region_info['committed'] = int(submatched_dict['COMMITTED'])
338      if submatched_dict.get('RESERVED'):
339        region_info['reserved'] = int(submatched_dict['RESERVED'])
340      if submatched_dict.get('BUCKETID'):
341        region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
342
343      if matched.group(1) == '(':
344        start = current_vma['begin']
345      else:
346        start = int(matched.group(2), 16)
347      if matched.group(4) == '(':
348        end = current_vma['end']
349      else:
350        end = int(matched.group(5), 16)
351
352      if pageframe_list and pageframe_list[0].start_truncated:
353        pageframe_list[0].set_size(
354            pageframe_list[0].size - start % self._pagesize)
355      if pageframe_list and pageframe_list[-1].end_truncated:
356        pageframe_list[-1].set_size(
357            pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
358      region_info['pageframe'] = pageframe_list
359      pageframe_list = []
360
361      self._map[(start, end)] = (matched.group(7), region_info)
362      ln += 1
363
364  def _extract_stacktrace_lines(self, line_number):
365    """Extracts the position of stacktrace lines.
366
367    Valid stacktrace lines are stored into self._stacktrace_lines.
368
369    Args:
370        line_number: A line number to start parsing in lines.
371
372    Raises:
373        ParsingException for invalid dump versions.
374    """
375    if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
376      (line_number, _) = skip_while(
377          line_number, len(self._lines),
378          lambda n: not self._lines[n].split()[0].isdigit())
379      stacktrace_start = line_number
380      (line_number, _) = skip_while(
381          line_number, len(self._lines),
382          lambda n: self._check_stacktrace_line(self._lines[n]))
383      self._stacktrace_lines = self._lines[stacktrace_start:line_number]
384
385    elif self._version in DUMP_DEEP_OBSOLETE:
386      raise ObsoleteDumpVersionException(self._version)
387
388    else:
389      raise InvalidDumpException('Invalid version: %s' % self._version)
390
391  @staticmethod
392  def _check_stacktrace_line(stacktrace_line):
393    """Checks if a given stacktrace_line is valid as stacktrace.
394
395    Args:
396        stacktrace_line: A string to be checked.
397
398    Returns:
399        True if the given stacktrace_line is valid.
400    """
401    words = stacktrace_line.split()
402    if len(words) < BUCKET_ID + 1:
403      return False
404    if words[BUCKET_ID - 1] != '@':
405      return False
406    return True
407
408
409class DumpList(object):
410  """Represents a sequence of heap profile dumps."""
411
412  def __init__(self, dump_list):
413    self._dump_list = dump_list
414
415  @staticmethod
416  def load(path_list):
417    LOGGER.info('Loading heap dump profiles.')
418    dump_list = []
419    for path in path_list:
420      dump_list.append(Dump.load(path, '  '))
421    return DumpList(dump_list)
422
423  def __len__(self):
424    return len(self._dump_list)
425
426  def __iter__(self):
427    for dump in self._dump_list:
428      yield dump
429
430  def __getitem__(self, index):
431    return self._dump_list[index]
432
433
434class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
435  """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
436  _DUMMY_ENTRY = procfs.ProcMapsEntry(
437      0,     # begin
438      0,     # end
439      '-',   # readable
440      '-',   # writable
441      '-',   # executable
442      '-',   # private
443      0,     # offset
444      '00',  # major
445      '00',  # minor
446      0,     # inode
447      ''     # name
448      )
449
450  def __init__(self):
451    super(ProcMapsEntryAttribute, self).__init__()
452    self._entry = self._DUMMY_ENTRY.as_dict()
453
454  def __str__(self):
455    return str(self._entry)
456
457  def __repr__(self):
458    return 'ProcMapsEntryAttribute' + str(self._entry)
459
460  def __getitem__(self, key):
461    return self._entry[key]
462
463  def __setitem__(self, key, value):
464    if key not in self._entry:
465      raise KeyError(key)
466    self._entry[key] = value
467
468  def copy(self):
469    new_entry = ProcMapsEntryAttribute()
470    for key, value in self._entry.iteritems():
471      new_entry[key] = copy.deepcopy(value)
472    return new_entry
473
474
475def skip_while(index, max_index, skipping_condition):
476  """Increments |index| until |skipping_condition|(|index|) is False.
477
478  Returns:
479      A pair of an integer indicating a line number after skipped, and a
480      boolean value which is True if found a line which skipping_condition
481      is False for.
482  """
483  while skipping_condition(index):
484    index += 1
485    if index >= max_index:
486      return index, False
487  return index, True
488