1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import logging 7import os 8import re 9 10 11LOGGER = logging.getLogger('dmprof') 12 13BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 14POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') 15 16# Heap Profile Policy versions 17 18# POLICY_DEEP_1 DOES NOT include allocation_type columns. 19# mmap regions are distincted w/ mmap frames in the pattern column. 20POLICY_DEEP_1 = 'POLICY_DEEP_1' 21 22# POLICY_DEEP_2 DOES include allocation_type columns. 23# mmap regions are distincted w/ the allocation_type column. 24POLICY_DEEP_2 = 'POLICY_DEEP_2' 25 26# POLICY_DEEP_3 is in JSON format. 27POLICY_DEEP_3 = 'POLICY_DEEP_3' 28 29# POLICY_DEEP_3 contains typeinfo. 30POLICY_DEEP_4 = 'POLICY_DEEP_4' 31 32 33class Rule(object): 34 """Represents one matching rule in a policy file.""" 35 36 def __init__(self, 37 name, 38 allocator_type, 39 stackfunction_pattern=None, 40 stacksourcefile_pattern=None, 41 typeinfo_pattern=None, 42 mappedpathname_pattern=None, 43 mappedpermission_pattern=None, 44 sharedwith=None): 45 self._name = name 46 self._allocator_type = allocator_type 47 48 self._stackfunction_pattern = None 49 if stackfunction_pattern: 50 self._stackfunction_pattern = re.compile( 51 stackfunction_pattern + r'\Z') 52 53 self._stacksourcefile_pattern = None 54 if stacksourcefile_pattern: 55 self._stacksourcefile_pattern = re.compile( 56 stacksourcefile_pattern + r'\Z') 57 58 self._typeinfo_pattern = None 59 if typeinfo_pattern: 60 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') 61 62 self._mappedpathname_pattern = None 63 if mappedpathname_pattern: 64 self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') 65 66 self._mappedpermission_pattern = None 67 if mappedpermission_pattern: 68 self._mappedpermission_pattern = re.compile( 69 mappedpermission_pattern + r'\Z') 70 71 self._sharedwith = [] 72 if sharedwith: 73 self._sharedwith = sharedwith 74 75 @property 76 def name(self): 77 return self._name 78 79 @property 80 def allocator_type(self): 81 return self._allocator_type 82 83 @property 84 def stackfunction_pattern(self): 85 return self._stackfunction_pattern 86 87 @property 88 def stacksourcefile_pattern(self): 89 return self._stacksourcefile_pattern 90 91 @property 92 def typeinfo_pattern(self): 93 return self._typeinfo_pattern 94 95 @property 96 def mappedpathname_pattern(self): 97 return self._mappedpathname_pattern 98 99 @property 100 def mappedpermission_pattern(self): 101 return self._mappedpermission_pattern 102 103 @property 104 def sharedwith(self): 105 return self._sharedwith 106 107 108class Policy(object): 109 """Represents a policy, a content of a policy file.""" 110 111 def __init__(self, rules, version, components): 112 self._rules = rules 113 self._version = version 114 self._components = components 115 116 @property 117 def rules(self): 118 return self._rules 119 120 @property 121 def version(self): 122 return self._version 123 124 @property 125 def components(self): 126 return self._components 127 128 def find_rule(self, component_name): 129 """Finds a rule whose name is |component_name|. """ 130 for rule in self._rules: 131 if rule.name == component_name: 132 return rule 133 return None 134 135 def find_malloc(self, bucket): 136 """Finds a matching component name which a given |bucket| belongs to. 137 138 Args: 139 bucket: A Bucket object to be searched for. 140 141 Returns: 142 A string representing a component name. 143 """ 144 assert not bucket or bucket.allocator_type == 'malloc' 145 146 if not bucket: 147 return 'no-bucket' 148 if bucket.component_cache: 149 return bucket.component_cache 150 151 stackfunction = bucket.symbolized_joined_stackfunction 152 stacksourcefile = bucket.symbolized_joined_stacksourcefile 153 typeinfo = bucket.symbolized_typeinfo 154 if typeinfo.startswith('0x'): 155 typeinfo = bucket.typeinfo_name 156 157 for rule in self._rules: 158 if (rule.allocator_type == 'malloc' and 159 (not rule.stackfunction_pattern or 160 rule.stackfunction_pattern.match(stackfunction)) and 161 (not rule.stacksourcefile_pattern or 162 rule.stacksourcefile_pattern.match(stacksourcefile)) and 163 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): 164 bucket.component_cache = rule.name 165 return rule.name 166 167 assert False 168 169 def find_mmap(self, region, bucket_set, 170 pageframe=None, group_pfn_counts=None): 171 """Finds a matching component which a given mmap |region| belongs to. 172 173 It uses |bucket_set| to match with backtraces. If |pageframe| is given, 174 it considers memory sharing among processes. 175 176 NOTE: Don't use Bucket's |component_cache| for mmap regions because they're 177 classified not only with bucket information (mappedpathname for example). 178 179 Args: 180 region: A tuple representing a memory region. 181 bucket_set: A BucketSet object to look up backtraces. 182 pageframe: A PageFrame object representing a pageframe maybe including 183 a pagecount. 184 group_pfn_counts: A dict mapping a PFN to the number of times the 185 the pageframe is mapped by the known "group (Chrome)" processes. 186 187 Returns: 188 A string representing a component name. 189 """ 190 assert region[0] == 'hooked' 191 bucket = bucket_set.get(region[1]['bucket_id']) 192 assert not bucket or bucket.allocator_type == 'mmap' 193 194 if not bucket: 195 return 'no-bucket', None 196 197 stackfunction = bucket.symbolized_joined_stackfunction 198 stacksourcefile = bucket.symbolized_joined_stacksourcefile 199 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) 200 201 for rule in self._rules: 202 if (rule.allocator_type == 'mmap' and 203 (not rule.stackfunction_pattern or 204 rule.stackfunction_pattern.match(stackfunction)) and 205 (not rule.stacksourcefile_pattern or 206 rule.stacksourcefile_pattern.match(stacksourcefile)) and 207 (not rule.mappedpathname_pattern or 208 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and 209 (not rule.mappedpermission_pattern or 210 rule.mappedpermission_pattern.match( 211 region[1]['vma']['readable'] + 212 region[1]['vma']['writable'] + 213 region[1]['vma']['executable'] + 214 region[1]['vma']['private'])) and 215 (not rule.sharedwith or 216 not pageframe or sharedwith in rule.sharedwith)): 217 return rule.name, bucket 218 219 assert False 220 221 def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): 222 """Finds a matching component which a given unhooked |region| belongs to. 223 224 If |pageframe| is given, it considers memory sharing among processes. 225 226 Args: 227 region: A tuple representing a memory region. 228 pageframe: A PageFrame object representing a pageframe maybe including 229 a pagecount. 230 group_pfn_counts: A dict mapping a PFN to the number of times the 231 the pageframe is mapped by the known "group (Chrome)" processes. 232 233 Returns: 234 A string representing a component name. 235 """ 236 assert region[0] == 'unhooked' 237 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) 238 239 for rule in self._rules: 240 if (rule.allocator_type == 'unhooked' and 241 (not rule.mappedpathname_pattern or 242 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and 243 (not rule.mappedpermission_pattern or 244 rule.mappedpermission_pattern.match( 245 region[1]['vma']['readable'] + 246 region[1]['vma']['writable'] + 247 region[1]['vma']['executable'] + 248 region[1]['vma']['private'])) and 249 (not rule.sharedwith or 250 not pageframe or sharedwith in rule.sharedwith)): 251 return rule.name 252 253 assert False 254 255 @staticmethod 256 def load(filename, filetype): 257 """Loads a policy file of |filename| in a |format|. 258 259 Args: 260 filename: A filename to be loaded. 261 filetype: A string to specify a type of the file. Only 'json' is 262 supported for now. 263 264 Returns: 265 A loaded Policy object. 266 """ 267 with open(os.path.join(BASE_PATH, filename)) as policy_f: 268 return Policy.parse(policy_f, filetype) 269 270 @staticmethod 271 def parse(policy_f, filetype): 272 """Parses a policy file content in a |format|. 273 274 Args: 275 policy_f: An IO object to be loaded. 276 filetype: A string to specify a type of the file. Only 'json' is 277 supported for now. 278 279 Returns: 280 A loaded Policy object. 281 """ 282 if filetype == 'json': 283 return Policy._parse_json(policy_f) 284 else: 285 return None 286 287 JSON_COMMENT_REGEX = re.compile(r'//.*') 288 289 @staticmethod 290 def _parse_json(policy_f): 291 """Parses policy file in json format. 292 293 A policy file contains component's names and their stacktrace pattern 294 written in regular expression. Those patterns are matched against each 295 symbols of each stacktraces in the order written in the policy file 296 297 Args: 298 policy_f: A File/IO object to read. 299 300 Returns: 301 A loaded policy object. 302 """ 303 policy_json = policy_f.read() 304 policy_json = re.sub(Policy.JSON_COMMENT_REGEX, '', policy_json) 305 policy = json.loads(policy_json) 306 307 rules = [] 308 for rule in policy['rules']: 309 stackfunction = rule.get('stackfunction') or rule.get('stacktrace') 310 stacksourcefile = rule.get('stacksourcefile') 311 rules.append(Rule( 312 rule['name'], 313 rule['allocator'], # allocator_type 314 stackfunction, 315 stacksourcefile, 316 rule['typeinfo'] if 'typeinfo' in rule else None, 317 rule.get('mappedpathname'), 318 rule.get('mappedpermission'), 319 rule.get('sharedwith'))) 320 321 return Policy(rules, policy['version'], policy['components']) 322 323 @staticmethod 324 def _categorize_pageframe(pageframe, group_pfn_counts): 325 """Categorizes a pageframe based on its sharing status. 326 327 Returns: 328 'private' if |pageframe| is not shared with other processes. 'group' 329 if |pageframe| is shared only with group (Chrome-related) processes. 330 'others' if |pageframe| is shared with non-group processes. 331 """ 332 if not pageframe: 333 return 'private' 334 335 if pageframe.pagecount: 336 if pageframe.pagecount == 1: 337 return 'private' 338 elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: 339 return 'group' 340 else: 341 return 'others' 342 else: 343 if pageframe.pfn in group_pfn_counts: 344 return 'group' 345 else: 346 return 'private' 347 348 349class PolicySet(object): 350 """Represents a set of policies.""" 351 352 def __init__(self, policy_directory): 353 self._policy_directory = policy_directory 354 355 @staticmethod 356 def load(labels=None): 357 """Loads a set of policies via the "default policy directory". 358 359 The "default policy directory" contains pairs of policies and their labels. 360 For example, a policy "policy.l0.json" is labeled "l0" in the default 361 policy directory "policies.json". 362 363 All policies in the directory are loaded by default. Policies can be 364 limited by |labels|. 365 366 Args: 367 labels: An array that contains policy labels to be loaded. 368 369 Returns: 370 A PolicySet object. 371 """ 372 default_policy_directory = PolicySet._load_default_policy_directory() 373 if labels: 374 specified_policy_directory = {} 375 for label in labels: 376 if label in default_policy_directory: 377 specified_policy_directory[label] = default_policy_directory[label] 378 # TODO(dmikurube): Load an un-labeled policy file. 379 return PolicySet._load_policies(specified_policy_directory) 380 else: 381 return PolicySet._load_policies(default_policy_directory) 382 383 def __len__(self): 384 return len(self._policy_directory) 385 386 def __iter__(self): 387 for label in self._policy_directory: 388 yield label 389 390 def __getitem__(self, label): 391 return self._policy_directory[label] 392 393 @staticmethod 394 def _load_default_policy_directory(): 395 with open(POLICIES_JSON_PATH, mode='r') as policies_f: 396 default_policy_directory = json.load(policies_f) 397 return default_policy_directory 398 399 @staticmethod 400 def _load_policies(directory): 401 LOGGER.info('Loading policy files.') 402 policies = {} 403 for label in directory: 404 LOGGER.info(' %s: %s' % (label, directory[label]['file'])) 405 loaded = Policy.load(directory[label]['file'], directory[label]['format']) 406 if loaded: 407 policies[label] = loaded 408 return PolicySet(policies) 409