# Copyright 2013 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import json import logging import os import re LOGGER = logging.getLogger('dmprof') BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') # Heap Profile Policy versions # POLICY_DEEP_1 DOES NOT include allocation_type columns. # mmap regions are distincted w/ mmap frames in the pattern column. POLICY_DEEP_1 = 'POLICY_DEEP_1' # POLICY_DEEP_2 DOES include allocation_type columns. # mmap regions are distincted w/ the allocation_type column. POLICY_DEEP_2 = 'POLICY_DEEP_2' # POLICY_DEEP_3 is in JSON format. POLICY_DEEP_3 = 'POLICY_DEEP_3' # POLICY_DEEP_3 contains typeinfo. POLICY_DEEP_4 = 'POLICY_DEEP_4' class Rule(object): """Represents one matching rule in a policy file.""" def __init__(self, name, allocator_type, stackfunction_pattern=None, stacksourcefile_pattern=None, typeinfo_pattern=None, mappedpathname_pattern=None, mappedpermission_pattern=None, sharedwith=None): self._name = name self._allocator_type = allocator_type self._stackfunction_pattern = None if stackfunction_pattern: self._stackfunction_pattern = re.compile( stackfunction_pattern + r'\Z') self._stacksourcefile_pattern = None if stacksourcefile_pattern: self._stacksourcefile_pattern = re.compile( stacksourcefile_pattern + r'\Z') self._typeinfo_pattern = None if typeinfo_pattern: self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') self._mappedpathname_pattern = None if mappedpathname_pattern: self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') self._mappedpermission_pattern = None if mappedpermission_pattern: self._mappedpermission_pattern = re.compile( mappedpermission_pattern + r'\Z') self._sharedwith = [] if sharedwith: self._sharedwith = sharedwith @property def name(self): return self._name @property def allocator_type(self): return self._allocator_type @property def stackfunction_pattern(self): return self._stackfunction_pattern @property def stacksourcefile_pattern(self): return self._stacksourcefile_pattern @property def typeinfo_pattern(self): return self._typeinfo_pattern @property def mappedpathname_pattern(self): return self._mappedpathname_pattern @property def mappedpermission_pattern(self): return self._mappedpermission_pattern @property def sharedwith(self): return self._sharedwith class Policy(object): """Represents a policy, a content of a policy file.""" def __init__(self, rules, version, components): self._rules = rules self._version = version self._components = components @property def rules(self): return self._rules @property def version(self): return self._version @property def components(self): return self._components def find_rule(self, component_name): """Finds a rule whose name is |component_name|. """ for rule in self._rules: if rule.name == component_name: return rule return None def find_malloc(self, bucket): """Finds a matching component name which a given |bucket| belongs to. Args: bucket: A Bucket object to be searched for. Returns: A string representing a component name. """ assert not bucket or bucket.allocator_type == 'malloc' if not bucket: return 'no-bucket' if bucket.component_cache: return bucket.component_cache stackfunction = bucket.symbolized_joined_stackfunction stacksourcefile = bucket.symbolized_joined_stacksourcefile typeinfo = bucket.symbolized_typeinfo if typeinfo.startswith('0x'): typeinfo = bucket.typeinfo_name for rule in self._rules: if (rule.allocator_type == 'malloc' and (not rule.stackfunction_pattern or rule.stackfunction_pattern.match(stackfunction)) and (not rule.stacksourcefile_pattern or rule.stacksourcefile_pattern.match(stacksourcefile)) and (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): bucket.component_cache = rule.name return rule.name assert False def find_mmap(self, region, bucket_set, pageframe=None, group_pfn_counts=None): """Finds a matching component which a given mmap |region| belongs to. It uses |bucket_set| to match with backtraces. If |pageframe| is given, it considers memory sharing among processes. NOTE: Don't use Bucket's |component_cache| for mmap regions because they're classified not only with bucket information (mappedpathname for example). Args: region: A tuple representing a memory region. bucket_set: A BucketSet object to look up backtraces. pageframe: A PageFrame object representing a pageframe maybe including a pagecount. group_pfn_counts: A dict mapping a PFN to the number of times the the pageframe is mapped by the known "group (Chrome)" processes. Returns: A string representing a component name. """ assert region[0] == 'hooked' bucket = bucket_set.get(region[1]['bucket_id']) assert not bucket or bucket.allocator_type == 'mmap' if not bucket: return 'no-bucket', None stackfunction = bucket.symbolized_joined_stackfunction stacksourcefile = bucket.symbolized_joined_stacksourcefile sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) for rule in self._rules: if (rule.allocator_type == 'mmap' and (not rule.stackfunction_pattern or rule.stackfunction_pattern.match(stackfunction)) and (not rule.stacksourcefile_pattern or rule.stacksourcefile_pattern.match(stacksourcefile)) and (not rule.mappedpathname_pattern or rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and (not rule.mappedpermission_pattern or rule.mappedpermission_pattern.match( region[1]['vma']['readable'] + region[1]['vma']['writable'] + region[1]['vma']['executable'] + region[1]['vma']['private'])) and (not rule.sharedwith or not pageframe or sharedwith in rule.sharedwith)): return rule.name, bucket assert False def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): """Finds a matching component which a given unhooked |region| belongs to. If |pageframe| is given, it considers memory sharing among processes. Args: region: A tuple representing a memory region. pageframe: A PageFrame object representing a pageframe maybe including a pagecount. group_pfn_counts: A dict mapping a PFN to the number of times the the pageframe is mapped by the known "group (Chrome)" processes. Returns: A string representing a component name. """ assert region[0] == 'unhooked' sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) for rule in self._rules: if (rule.allocator_type == 'unhooked' and (not rule.mappedpathname_pattern or rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and (not rule.mappedpermission_pattern or rule.mappedpermission_pattern.match( region[1]['vma']['readable'] + region[1]['vma']['writable'] + region[1]['vma']['executable'] + region[1]['vma']['private'])) and (not rule.sharedwith or not pageframe or sharedwith in rule.sharedwith)): return rule.name assert False @staticmethod def load(filename, filetype): """Loads a policy file of |filename| in a |format|. Args: filename: A filename to be loaded. filetype: A string to specify a type of the file. Only 'json' is supported for now. Returns: A loaded Policy object. """ with open(os.path.join(BASE_PATH, filename)) as policy_f: return Policy.parse(policy_f, filetype) @staticmethod def parse(policy_f, filetype): """Parses a policy file content in a |format|. Args: policy_f: An IO object to be loaded. filetype: A string to specify a type of the file. Only 'json' is supported for now. Returns: A loaded Policy object. """ if filetype == 'json': return Policy._parse_json(policy_f) else: return None JSON_COMMENT_REGEX = re.compile(r'//.*') @staticmethod def _parse_json(policy_f): """Parses policy file in json format. A policy file contains component's names and their stacktrace pattern written in regular expression. Those patterns are matched against each symbols of each stacktraces in the order written in the policy file Args: policy_f: A File/IO object to read. Returns: A loaded policy object. """ policy_json = policy_f.read() policy_json = re.sub(Policy.JSON_COMMENT_REGEX, '', policy_json) policy = json.loads(policy_json) rules = [] for rule in policy['rules']: stackfunction = rule.get('stackfunction') or rule.get('stacktrace') stacksourcefile = rule.get('stacksourcefile') rules.append(Rule( rule['name'], rule['allocator'], # allocator_type stackfunction, stacksourcefile, rule['typeinfo'] if 'typeinfo' in rule else None, rule.get('mappedpathname'), rule.get('mappedpermission'), rule.get('sharedwith'))) return Policy(rules, policy['version'], policy['components']) @staticmethod def _categorize_pageframe(pageframe, group_pfn_counts): """Categorizes a pageframe based on its sharing status. Returns: 'private' if |pageframe| is not shared with other processes. 'group' if |pageframe| is shared only with group (Chrome-related) processes. 'others' if |pageframe| is shared with non-group processes. """ if not pageframe: return 'private' if pageframe.pagecount: if pageframe.pagecount == 1: return 'private' elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: return 'group' else: return 'others' else: if pageframe.pfn in group_pfn_counts: return 'group' else: return 'private' class PolicySet(object): """Represents a set of policies.""" def __init__(self, policy_directory): self._policy_directory = policy_directory @staticmethod def load(labels=None): """Loads a set of policies via the "default policy directory". The "default policy directory" contains pairs of policies and their labels. For example, a policy "policy.l0.json" is labeled "l0" in the default policy directory "policies.json". All policies in the directory are loaded by default. Policies can be limited by |labels|. Args: labels: An array that contains policy labels to be loaded. Returns: A PolicySet object. """ default_policy_directory = PolicySet._load_default_policy_directory() if labels: specified_policy_directory = {} for label in labels: if label in default_policy_directory: specified_policy_directory[label] = default_policy_directory[label] # TODO(dmikurube): Load an un-labeled policy file. return PolicySet._load_policies(specified_policy_directory) else: return PolicySet._load_policies(default_policy_directory) def __len__(self): return len(self._policy_directory) def __iter__(self): for label in self._policy_directory: yield label def __getitem__(self, label): return self._policy_directory[label] @staticmethod def _load_default_policy_directory(): with open(POLICIES_JSON_PATH, mode='r') as policies_f: default_policy_directory = json.load(policies_f) return default_policy_directory @staticmethod def _load_policies(directory): LOGGER.info('Loading policy files.') policies = {} for label in directory: LOGGER.info(' %s: %s' % (label, directory[label]['file'])) loaded = Policy.load(directory[label]['file'], directory[label]['format']) if loaded: policies[label] = loaded return PolicySet(policies)