• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""This module classifies NativeHeap objects filtering their allocations.
6
7The only filter currently available is 'stacktrace', which works as follows:
8
9{'name': 'rule-1', 'stacktrace': 'foo' }
10{'name': 'rule-2', 'stacktrace': ['foo', r'bar\s+baz']}
11{'name': 'rule-3', 'source_path': 'sk.*allocator'}
12{'name': 'rule-3', 'source_path': 'sk', 'stacktrace': 'SkAllocator'}
13
14
15rule-1 will match any allocation that has 'foo' in one of its  stack frames.
16rule-2 will match any allocation that has a stack frame matching 'foo' AND a
17followed by a stack frame matching 'bar baz'. Note that order matters, so rule-2
18will not match a stacktrace like ['bar baz', 'foo'].
19rule-3 will match any allocation in which at least one of the source paths in
20its stack frames matches the regex sk.*allocator.
21rule-4 will match any allocation which satisfies both the conditions.
22
23TODO(primiano): introduce more filters after the first prototype with UI, for
24instance, filter by library file name or by allocation size.
25"""
26
27import collections
28import posixpath
29import re
30
31from memory_inspector.classification import results
32from memory_inspector.classification import rules
33from memory_inspector.core import exceptions
34from memory_inspector.core import native_heap
35
36
37_RESULT_KEYS = ['bytes_allocated']
38
39
40def LoadRules(content):
41  """Loads and parses a native-heap rule tree from a content (string).
42
43  Returns:
44    An instance of |rules.Rule|, which nodes are |_NHeapRule| instances.
45  """
46  return rules.Load(content, _NHeapRule)
47
48
49def Classify(nativeheap, rule_tree):
50  """Creates aggregated results of native heaps using the provided rules.
51
52  Args:
53    nativeheap: the heap dump being processed (a |NativeHeap| instance).
54    rule_tree: the user-defined rules that define the filtering categories.
55
56  Returns:
57    An instance of |AggreatedResults|.
58  """
59  assert(isinstance(nativeheap, native_heap.NativeHeap))
60  assert(isinstance(rule_tree, rules.Rule))
61
62  res = results.AggreatedResults(rule_tree, _RESULT_KEYS)
63  for allocation in nativeheap.allocations:
64    res.AddToMatchingNodes(allocation, [allocation.total_size])
65  return res
66
67
68def InferHeuristicRulesFromHeap(nheap, max_depth=3, threshold=0.02):
69  """Infers the rules tree from a symbolized heap snapshot.
70
71  In lack of a specific set of rules, this method can be invoked to infer a
72  meaningful rule tree starting from a heap snapshot. It will build a compact
73  radix tree from the source paths of the stack traces, which height is at most
74  |max_depth|, selecting only those nodes which contribute for at least
75  |threshold| (1.0 = 100%) w.r.t. the total allocation of the heap snapshot.
76  """
77  assert(isinstance(nheap, native_heap.NativeHeap))
78
79  def RadixTreeInsert(node, path):
80    """Inserts a string (path) into a radix tree (a python recursive dict).
81
82    e.g.: [/a/b/c, /a/b/d, /z/h] -> {'/a/b/': {'c': {}, 'd': {}}, '/z/h': {}}
83    """
84
85    def GetCommonPrefix(args):
86      """Returns the common prefix between two paths (no partial paths).
87
88      e.g.: /tmp/bar, /tmp/baz will return /tmp/ (and not /tmp/ba as the dumb
89      posixpath.commonprefix implementation would do)
90      """
91      parts = posixpath.commonprefix(args).rpartition(posixpath.sep)[0]
92      return parts + posixpath.sep if parts else ''
93
94    for node_path in node.iterkeys():
95      pfx = GetCommonPrefix([node_path, path])
96      if not pfx:
97        continue
98      if len(pfx) < len(node_path):
99        node[pfx] = {node_path[len(pfx):] : node[node_path]}
100        del node[node_path]
101      if len(path) > len(pfx):
102        RadixTreeInsert(node[pfx], path[len(pfx):])
103      return
104    node[path] = {}  # No common prefix, create new child in current node.
105
106  # Given an allocation of size N and its stack trace, heuristically determines
107  # the source directory to be blamed for the N bytes.
108  # The blamed_dir is the one which appears more times in the top 8 stack frames
109  # (excluding the first 2, which usually are just the (m|c)alloc call sites).
110  # At the end, this will generate a *leaderboard* (|blamed_dirs|) which
111  # associates, to each source path directory, the number of bytes allocated.
112
113  blamed_dirs = collections.Counter()  # '/s/path' : bytes_from_this_path (int)
114  total_allocated = 0
115  for alloc in nheap.allocations:
116    dir_histogram = collections.Counter()
117    for frame in alloc.stack_trace.frames[2:10]:
118      # Compute a histogram (for each allocation) of the top source dirs.
119      if not frame.symbol or not frame.symbol.source_info:
120        continue
121      src_file = frame.symbol.source_info[0].source_file_path
122      src_dir = posixpath.dirname(src_file.replace('\\', '/')) + '/'
123      dir_histogram.update([src_dir])
124    if not dir_histogram:
125      continue
126    # Add the blamed dir to the leaderboard.
127    blamed_dir = dir_histogram.most_common()[0][0]
128    blamed_dirs.update({blamed_dir : alloc.total_size})
129    total_allocated += alloc.total_size
130
131  # Select only the top paths from the leaderboard which contribute for more
132  # than |threshold| and make a radix tree out of them.
133  radix_tree = {}
134  for blamed_dir, alloc_size in blamed_dirs.most_common():
135    if (1.0 * alloc_size / total_allocated) < threshold:
136      break
137    RadixTreeInsert(radix_tree, blamed_dir)
138
139  # The final step consists in generating a rule tree from the radix tree. This
140  # is a pretty straightforward tree-clone operation, they have the same shape.
141  def GenRulesFromRadixTree(radix_tree_node, max_depth, parent_path=''):
142    children = []
143    if max_depth > 0:
144      for node_path, node_children in radix_tree_node.iteritems():
145        child_rule = {
146            'name': node_path[-16:],
147            'source_path': '^' + re.escape(parent_path + node_path),
148            'children': GenRulesFromRadixTree(
149                node_children, max_depth - 1, parent_path + node_path)}
150        children += [child_rule]
151    return children
152
153  rules_tree = GenRulesFromRadixTree(radix_tree, max_depth)
154  return LoadRules(str(rules_tree))
155
156
157class _NHeapRule(rules.Rule):
158  def __init__(self, name, filters):
159    super(_NHeapRule, self).__init__(name)
160
161    # The 'stacktrace' filter can be either a string (simple case, one regex) or
162    # a list of strings (complex case, see doc in the header of this file).
163    stacktrace_regexs = filters.get('stacktrace', [])
164    if isinstance(stacktrace_regexs, basestring):
165      stacktrace_regexs = [stacktrace_regexs]
166    self._stacktrace_regexs = []
167    for regex in stacktrace_regexs:
168      try:
169        self._stacktrace_regexs.append(re.compile(regex))
170      except re.error, descr:
171        raise exceptions.MemoryInspectorException(
172            'Stacktrace regex error "%s" : %s' % (regex, descr))
173
174    # The 'source_path' regex, instead, simply matches the source file path.
175    self._path_regex = None
176    path_regex = filters.get('source_path')
177    if path_regex:
178      try:
179        self._path_regex = re.compile(path_regex)
180      except re.error, descr:
181        raise exceptions.MemoryInspectorException(
182            'Path regex error "%s" : %s' % (path_regex, descr))
183
184  def Match(self, allocation):
185    # Match the source file path, if the 'source_path' filter is specified.
186    if self._path_regex:
187      path_matches = False
188      for frame in allocation.stack_trace.frames:
189        if frame.symbol and frame.symbol.source_info:
190          if self._path_regex.search(
191              frame.symbol.source_info[0].source_file_path):
192            path_matches = True
193            break
194      if not path_matches:
195        return False
196
197    # Match the stack traces symbols, if the 'stacktrace' filter is specified.
198    if not self._stacktrace_regexs:
199      return True
200    cur_regex_idx = 0
201    cur_regex = self._stacktrace_regexs[0]
202    for frame in allocation.stack_trace.frames:
203      if frame.symbol and cur_regex.search(frame.symbol.name):
204        # The current regex has been matched.
205        if cur_regex_idx == len(self._stacktrace_regexs) - 1:
206          return True  # All the provided regexs have been matched, we're happy.
207        cur_regex_idx += 1
208        cur_regex = self._stacktrace_regexs[cur_regex_idx]
209
210    return False  # Not all the provided regexs have been matched.