1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""This module classifies NativeHeap objects filtering their allocations. 6 7The only filter currently available is 'stacktrace', which works as follows: 8 9{'name': 'rule-1', 'stacktrace': 'foo' } 10{'name': 'rule-2', 'stacktrace': ['foo', r'bar\s+baz']} 11{'name': 'rule-3', 'source_path': 'sk.*allocator'} 12{'name': 'rule-3', 'source_path': 'sk', 'stacktrace': 'SkAllocator'} 13 14 15rule-1 will match any allocation that has 'foo' in one of its stack frames. 16rule-2 will match any allocation that has a stack frame matching 'foo' AND a 17followed by a stack frame matching 'bar baz'. Note that order matters, so rule-2 18will not match a stacktrace like ['bar baz', 'foo']. 19rule-3 will match any allocation in which at least one of the source paths in 20its stack frames matches the regex sk.*allocator. 21rule-4 will match any allocation which satisfies both the conditions. 22 23TODO(primiano): introduce more filters after the first prototype with UI, for 24instance, filter by library file name or by allocation size. 25""" 26 27import collections 28import posixpath 29import re 30 31from memory_inspector.classification import results 32from memory_inspector.classification import rules 33from memory_inspector.core import exceptions 34from memory_inspector.core import native_heap 35 36 37_RESULT_KEYS = ['bytes_allocated'] 38 39 40def LoadRules(content): 41 """Loads and parses a native-heap rule tree from a content (string). 42 43 Returns: 44 An instance of |rules.Rule|, which nodes are |_NHeapRule| instances. 45 """ 46 return rules.Load(content, _NHeapRule) 47 48 49def Classify(nativeheap, rule_tree): 50 """Creates aggregated results of native heaps using the provided rules. 51 52 Args: 53 nativeheap: the heap dump being processed (a |NativeHeap| instance). 54 rule_tree: the user-defined rules that define the filtering categories. 55 56 Returns: 57 An instance of |AggreatedResults|. 58 """ 59 assert(isinstance(nativeheap, native_heap.NativeHeap)) 60 assert(isinstance(rule_tree, rules.Rule)) 61 62 res = results.AggreatedResults(rule_tree, _RESULT_KEYS) 63 for allocation in nativeheap.allocations: 64 res.AddToMatchingNodes(allocation, [allocation.total_size]) 65 return res 66 67 68def InferHeuristicRulesFromHeap(nheap, max_depth=3, threshold=0.02): 69 """Infers the rules tree from a symbolized heap snapshot. 70 71 In lack of a specific set of rules, this method can be invoked to infer a 72 meaningful rule tree starting from a heap snapshot. It will build a compact 73 radix tree from the source paths of the stack traces, which height is at most 74 |max_depth|, selecting only those nodes which contribute for at least 75 |threshold| (1.0 = 100%) w.r.t. the total allocation of the heap snapshot. 76 """ 77 assert(isinstance(nheap, native_heap.NativeHeap)) 78 79 def RadixTreeInsert(node, path): 80 """Inserts a string (path) into a radix tree (a python recursive dict). 81 82 e.g.: [/a/b/c, /a/b/d, /z/h] -> {'/a/b/': {'c': {}, 'd': {}}, '/z/h': {}} 83 """ 84 85 def GetCommonPrefix(args): 86 """Returns the common prefix between two paths (no partial paths). 87 88 e.g.: /tmp/bar, /tmp/baz will return /tmp/ (and not /tmp/ba as the dumb 89 posixpath.commonprefix implementation would do) 90 """ 91 parts = posixpath.commonprefix(args).rpartition(posixpath.sep)[0] 92 return parts + posixpath.sep if parts else '' 93 94 for node_path in node.iterkeys(): 95 pfx = GetCommonPrefix([node_path, path]) 96 if not pfx: 97 continue 98 if len(pfx) < len(node_path): 99 node[pfx] = {node_path[len(pfx):] : node[node_path]} 100 del node[node_path] 101 if len(path) > len(pfx): 102 RadixTreeInsert(node[pfx], path[len(pfx):]) 103 return 104 node[path] = {} # No common prefix, create new child in current node. 105 106 # Given an allocation of size N and its stack trace, heuristically determines 107 # the source directory to be blamed for the N bytes. 108 # The blamed_dir is the one which appears more times in the top 8 stack frames 109 # (excluding the first 2, which usually are just the (m|c)alloc call sites). 110 # At the end, this will generate a *leaderboard* (|blamed_dirs|) which 111 # associates, to each source path directory, the number of bytes allocated. 112 113 blamed_dirs = collections.Counter() # '/s/path' : bytes_from_this_path (int) 114 total_allocated = 0 115 for alloc in nheap.allocations: 116 dir_histogram = collections.Counter() 117 for frame in alloc.stack_trace.frames[2:10]: 118 # Compute a histogram (for each allocation) of the top source dirs. 119 if not frame.symbol or not frame.symbol.source_info: 120 continue 121 src_file = frame.symbol.source_info[0].source_file_path 122 src_dir = posixpath.dirname(src_file.replace('\\', '/')) + '/' 123 dir_histogram.update([src_dir]) 124 if not dir_histogram: 125 continue 126 # Add the blamed dir to the leaderboard. 127 blamed_dir = dir_histogram.most_common()[0][0] 128 blamed_dirs.update({blamed_dir : alloc.total_size}) 129 total_allocated += alloc.total_size 130 131 # Select only the top paths from the leaderboard which contribute for more 132 # than |threshold| and make a radix tree out of them. 133 radix_tree = {} 134 for blamed_dir, alloc_size in blamed_dirs.most_common(): 135 if (1.0 * alloc_size / total_allocated) < threshold: 136 break 137 RadixTreeInsert(radix_tree, blamed_dir) 138 139 # The final step consists in generating a rule tree from the radix tree. This 140 # is a pretty straightforward tree-clone operation, they have the same shape. 141 def GenRulesFromRadixTree(radix_tree_node, max_depth, parent_path=''): 142 children = [] 143 if max_depth > 0: 144 for node_path, node_children in radix_tree_node.iteritems(): 145 child_rule = { 146 'name': node_path[-16:], 147 'source_path': '^' + re.escape(parent_path + node_path), 148 'children': GenRulesFromRadixTree( 149 node_children, max_depth - 1, parent_path + node_path)} 150 children += [child_rule] 151 return children 152 153 rules_tree = GenRulesFromRadixTree(radix_tree, max_depth) 154 return LoadRules(str(rules_tree)) 155 156 157class _NHeapRule(rules.Rule): 158 def __init__(self, name, filters): 159 super(_NHeapRule, self).__init__(name) 160 161 # The 'stacktrace' filter can be either a string (simple case, one regex) or 162 # a list of strings (complex case, see doc in the header of this file). 163 stacktrace_regexs = filters.get('stacktrace', []) 164 if isinstance(stacktrace_regexs, basestring): 165 stacktrace_regexs = [stacktrace_regexs] 166 self._stacktrace_regexs = [] 167 for regex in stacktrace_regexs: 168 try: 169 self._stacktrace_regexs.append(re.compile(regex)) 170 except re.error, descr: 171 raise exceptions.MemoryInspectorException( 172 'Stacktrace regex error "%s" : %s' % (regex, descr)) 173 174 # The 'source_path' regex, instead, simply matches the source file path. 175 self._path_regex = None 176 path_regex = filters.get('source_path') 177 if path_regex: 178 try: 179 self._path_regex = re.compile(path_regex) 180 except re.error, descr: 181 raise exceptions.MemoryInspectorException( 182 'Path regex error "%s" : %s' % (path_regex, descr)) 183 184 def Match(self, allocation): 185 # Match the source file path, if the 'source_path' filter is specified. 186 if self._path_regex: 187 path_matches = False 188 for frame in allocation.stack_trace.frames: 189 if frame.symbol and frame.symbol.source_info: 190 if self._path_regex.search( 191 frame.symbol.source_info[0].source_file_path): 192 path_matches = True 193 break 194 if not path_matches: 195 return False 196 197 # Match the stack traces symbols, if the 'stacktrace' filter is specified. 198 if not self._stacktrace_regexs: 199 return True 200 cur_regex_idx = 0 201 cur_regex = self._stacktrace_regexs[0] 202 for frame in allocation.stack_trace.frames: 203 if frame.symbol and cur_regex.search(frame.symbol.name): 204 # The current regex has been matched. 205 if cur_regex_idx == len(self._stacktrace_regexs) - 1: 206 return True # All the provided regexs have been matched, we're happy. 207 cur_regex_idx += 1 208 cur_regex = self._stacktrace_regexs[cur_regex_idx] 209 210 return False # Not all the provided regexs have been matched.