• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import logging
7import os
8import re
9
10
11LOGGER = logging.getLogger('dmprof')
12
13BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
14POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')
15
16# Heap Profile Policy versions
17
18# POLICY_DEEP_1 DOES NOT include allocation_type columns.
19# mmap regions are distincted w/ mmap frames in the pattern column.
20POLICY_DEEP_1 = 'POLICY_DEEP_1'
21
22# POLICY_DEEP_2 DOES include allocation_type columns.
23# mmap regions are distincted w/ the allocation_type column.
24POLICY_DEEP_2 = 'POLICY_DEEP_2'
25
26# POLICY_DEEP_3 is in JSON format.
27POLICY_DEEP_3 = 'POLICY_DEEP_3'
28
29# POLICY_DEEP_3 contains typeinfo.
30POLICY_DEEP_4 = 'POLICY_DEEP_4'
31
32
33class Rule(object):
34  """Represents one matching rule in a policy file."""
35
36  def __init__(self,
37               name,
38               allocator_type,
39               stackfunction_pattern=None,
40               stacksourcefile_pattern=None,
41               typeinfo_pattern=None,
42               mappedpathname_pattern=None,
43               mappedpermission_pattern=None,
44               sharedwith=None):
45    self._name = name
46    self._allocator_type = allocator_type
47
48    self._stackfunction_pattern = None
49    if stackfunction_pattern:
50      self._stackfunction_pattern = re.compile(
51          stackfunction_pattern + r'\Z')
52
53    self._stacksourcefile_pattern = None
54    if stacksourcefile_pattern:
55      self._stacksourcefile_pattern = re.compile(
56          stacksourcefile_pattern + r'\Z')
57
58    self._typeinfo_pattern = None
59    if typeinfo_pattern:
60      self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')
61
62    self._mappedpathname_pattern = None
63    if mappedpathname_pattern:
64      self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z')
65
66    self._mappedpermission_pattern = None
67    if mappedpermission_pattern:
68      self._mappedpermission_pattern = re.compile(
69          mappedpermission_pattern + r'\Z')
70
71    self._sharedwith = []
72    if sharedwith:
73      self._sharedwith = sharedwith
74
75  @property
76  def name(self):
77    return self._name
78
79  @property
80  def allocator_type(self):
81    return self._allocator_type
82
83  @property
84  def stackfunction_pattern(self):
85    return self._stackfunction_pattern
86
87  @property
88  def stacksourcefile_pattern(self):
89    return self._stacksourcefile_pattern
90
91  @property
92  def typeinfo_pattern(self):
93    return self._typeinfo_pattern
94
95  @property
96  def mappedpathname_pattern(self):
97    return self._mappedpathname_pattern
98
99  @property
100  def mappedpermission_pattern(self):
101    return self._mappedpermission_pattern
102
103  @property
104  def sharedwith(self):
105    return self._sharedwith
106
107
108class Policy(object):
109  """Represents a policy, a content of a policy file."""
110
111  def __init__(self, rules, version, components):
112    self._rules = rules
113    self._version = version
114    self._components = components
115
116  @property
117  def rules(self):
118    return self._rules
119
120  @property
121  def version(self):
122    return self._version
123
124  @property
125  def components(self):
126    return self._components
127
128  def find_rule(self, component_name):
129    """Finds a rule whose name is |component_name|. """
130    for rule in self._rules:
131      if rule.name == component_name:
132        return rule
133    return None
134
135  def find_malloc(self, bucket):
136    """Finds a matching component name which a given |bucket| belongs to.
137
138    Args:
139        bucket: A Bucket object to be searched for.
140
141    Returns:
142        A string representing a component name.
143    """
144    assert not bucket or bucket.allocator_type == 'malloc'
145
146    if not bucket:
147      return 'no-bucket'
148    if bucket.component_cache:
149      return bucket.component_cache
150
151    stackfunction = bucket.symbolized_joined_stackfunction
152    stacksourcefile = bucket.symbolized_joined_stacksourcefile
153    typeinfo = bucket.symbolized_typeinfo
154    if typeinfo.startswith('0x'):
155      typeinfo = bucket.typeinfo_name
156
157    for rule in self._rules:
158      if (rule.allocator_type == 'malloc' and
159          (not rule.stackfunction_pattern or
160           rule.stackfunction_pattern.match(stackfunction)) and
161          (not rule.stacksourcefile_pattern or
162           rule.stacksourcefile_pattern.match(stacksourcefile)) and
163          (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):
164        bucket.component_cache = rule.name
165        return rule.name
166
167    assert False
168
169  def find_mmap(self, region, bucket_set,
170                pageframe=None, group_pfn_counts=None):
171    """Finds a matching component which a given mmap |region| belongs to.
172
173    It uses |bucket_set| to match with backtraces.  If |pageframe| is given,
174    it considers memory sharing among processes.
175
176    NOTE: Don't use Bucket's |component_cache| for mmap regions because they're
177    classified not only with bucket information (mappedpathname for example).
178
179    Args:
180        region: A tuple representing a memory region.
181        bucket_set: A BucketSet object to look up backtraces.
182        pageframe: A PageFrame object representing a pageframe maybe including
183            a pagecount.
184        group_pfn_counts: A dict mapping a PFN to the number of times the
185            the pageframe is mapped by the known "group (Chrome)" processes.
186
187    Returns:
188        A string representing a component name.
189    """
190    assert region[0] == 'hooked'
191    bucket = bucket_set.get(region[1]['bucket_id'])
192    assert not bucket or bucket.allocator_type == 'mmap'
193
194    if not bucket:
195      return 'no-bucket', None
196
197    stackfunction = bucket.symbolized_joined_stackfunction
198    stacksourcefile = bucket.symbolized_joined_stacksourcefile
199    sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
200
201    for rule in self._rules:
202      if (rule.allocator_type == 'mmap' and
203          (not rule.stackfunction_pattern or
204           rule.stackfunction_pattern.match(stackfunction)) and
205          (not rule.stacksourcefile_pattern or
206           rule.stacksourcefile_pattern.match(stacksourcefile)) and
207          (not rule.mappedpathname_pattern or
208           rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
209          (not rule.mappedpermission_pattern or
210           rule.mappedpermission_pattern.match(
211               region[1]['vma']['readable'] +
212               region[1]['vma']['writable'] +
213               region[1]['vma']['executable'] +
214               region[1]['vma']['private'])) and
215          (not rule.sharedwith or
216           not pageframe or sharedwith in rule.sharedwith)):
217        return rule.name, bucket
218
219    assert False
220
221  def find_unhooked(self, region, pageframe=None, group_pfn_counts=None):
222    """Finds a matching component which a given unhooked |region| belongs to.
223
224    If |pageframe| is given, it considers memory sharing among processes.
225
226    Args:
227        region: A tuple representing a memory region.
228        pageframe: A PageFrame object representing a pageframe maybe including
229            a pagecount.
230        group_pfn_counts: A dict mapping a PFN to the number of times the
231            the pageframe is mapped by the known "group (Chrome)" processes.
232
233    Returns:
234        A string representing a component name.
235    """
236    assert region[0] == 'unhooked'
237    sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
238
239    for rule in self._rules:
240      if (rule.allocator_type == 'unhooked' and
241          (not rule.mappedpathname_pattern or
242           rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
243          (not rule.mappedpermission_pattern or
244           rule.mappedpermission_pattern.match(
245               region[1]['vma']['readable'] +
246               region[1]['vma']['writable'] +
247               region[1]['vma']['executable'] +
248               region[1]['vma']['private'])) and
249          (not rule.sharedwith or
250           not pageframe or sharedwith in rule.sharedwith)):
251        return rule.name
252
253    assert False
254
255  @staticmethod
256  def load(filename, filetype):
257    """Loads a policy file of |filename| in a |format|.
258
259    Args:
260        filename: A filename to be loaded.
261        filetype: A string to specify a type of the file.  Only 'json' is
262            supported for now.
263
264    Returns:
265        A loaded Policy object.
266    """
267    with open(os.path.join(BASE_PATH, filename)) as policy_f:
268      return Policy.parse(policy_f, filetype)
269
270  @staticmethod
271  def parse(policy_f, filetype):
272    """Parses a policy file content in a |format|.
273
274    Args:
275        policy_f: An IO object to be loaded.
276        filetype: A string to specify a type of the file.  Only 'json' is
277            supported for now.
278
279    Returns:
280        A loaded Policy object.
281    """
282    if filetype == 'json':
283      return Policy._parse_json(policy_f)
284    else:
285      return None
286
287  JSON_COMMENT_REGEX = re.compile(r'//.*')
288
289  @staticmethod
290  def _parse_json(policy_f):
291    """Parses policy file in json format.
292
293    A policy file contains component's names and their stacktrace pattern
294    written in regular expression.  Those patterns are matched against each
295    symbols of each stacktraces in the order written in the policy file
296
297    Args:
298         policy_f: A File/IO object to read.
299
300    Returns:
301         A loaded policy object.
302    """
303    policy_json = policy_f.read()
304    policy_json = re.sub(Policy.JSON_COMMENT_REGEX, '', policy_json)
305    policy = json.loads(policy_json)
306
307    rules = []
308    for rule in policy['rules']:
309      stackfunction = rule.get('stackfunction') or rule.get('stacktrace')
310      stacksourcefile = rule.get('stacksourcefile')
311      rules.append(Rule(
312          rule['name'],
313          rule['allocator'],  # allocator_type
314          stackfunction,
315          stacksourcefile,
316          rule['typeinfo'] if 'typeinfo' in rule else None,
317          rule.get('mappedpathname'),
318          rule.get('mappedpermission'),
319          rule.get('sharedwith')))
320
321    return Policy(rules, policy['version'], policy['components'])
322
323  @staticmethod
324  def _categorize_pageframe(pageframe, group_pfn_counts):
325    """Categorizes a pageframe based on its sharing status.
326
327    Returns:
328        'private' if |pageframe| is not shared with other processes.  'group'
329        if |pageframe| is shared only with group (Chrome-related) processes.
330        'others' if |pageframe| is shared with non-group processes.
331    """
332    if not pageframe:
333      return 'private'
334
335    if pageframe.pagecount:
336      if pageframe.pagecount == 1:
337        return 'private'
338      elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1:
339        return 'group'
340      else:
341        return 'others'
342    else:
343      if pageframe.pfn in group_pfn_counts:
344        return 'group'
345      else:
346        return 'private'
347
348
349class PolicySet(object):
350  """Represents a set of policies."""
351
352  def __init__(self, policy_directory):
353    self._policy_directory = policy_directory
354
355  @staticmethod
356  def load(labels=None):
357    """Loads a set of policies via the "default policy directory".
358
359    The "default policy directory" contains pairs of policies and their labels.
360    For example, a policy "policy.l0.json" is labeled "l0" in the default
361    policy directory "policies.json".
362
363    All policies in the directory are loaded by default.  Policies can be
364    limited by |labels|.
365
366    Args:
367        labels: An array that contains policy labels to be loaded.
368
369    Returns:
370        A PolicySet object.
371    """
372    default_policy_directory = PolicySet._load_default_policy_directory()
373    if labels:
374      specified_policy_directory = {}
375      for label in labels:
376        if label in default_policy_directory:
377          specified_policy_directory[label] = default_policy_directory[label]
378        # TODO(dmikurube): Load an un-labeled policy file.
379      return PolicySet._load_policies(specified_policy_directory)
380    else:
381      return PolicySet._load_policies(default_policy_directory)
382
383  def __len__(self):
384    return len(self._policy_directory)
385
386  def __iter__(self):
387    for label in self._policy_directory:
388      yield label
389
390  def __getitem__(self, label):
391    return self._policy_directory[label]
392
393  @staticmethod
394  def _load_default_policy_directory():
395    with open(POLICIES_JSON_PATH, mode='r') as policies_f:
396      default_policy_directory = json.load(policies_f)
397    return default_policy_directory
398
399  @staticmethod
400  def _load_policies(directory):
401    LOGGER.info('Loading policy files.')
402    policies = {}
403    for label in directory:
404      LOGGER.info('  %s: %s' % (label, directory[label]['file']))
405      loaded = Policy.load(directory[label]['file'], directory[label]['format'])
406      if loaded:
407        policies[label] = loaded
408    return PolicySet(policies)
409