1#!/usr/bin/env python 2# Copyright 2014 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import argparse, os, sys, json, subprocess, pickle, StringIO 7 8parser = argparse.ArgumentParser( 9 description = 10 "Process the Blink points-to graph generated by the Blink GC plugin.") 11 12parser.add_argument( 13 '-', dest='use_stdin', action='store_true', 14 help='Read JSON graph files from stdin') 15 16parser.add_argument( 17 '-c', '--detect-cycles', action='store_true', 18 help='Detect cycles containing GC roots') 19 20parser.add_argument( 21 '-s', '--print-stats', action='store_true', 22 help='Statistics about ref-counted and traced objects') 23 24parser.add_argument( 25 '-v', '--verbose', action='store_true', 26 help='Verbose output') 27 28parser.add_argument( 29 '--ignore-cycles', default=None, metavar='FILE', 30 help='File with cycles to ignore') 31 32parser.add_argument( 33 '--ignore-classes', nargs='*', default=[], metavar='CLASS', 34 help='Classes to ignore when detecting cycles') 35 36parser.add_argument( 37 '--pickle-graph', default=None, metavar='FILE', 38 help='File to read/save the graph from/to') 39 40parser.add_argument( 41 'files', metavar='FILE_OR_DIR', nargs='*', default=[], 42 help='JSON graph files or directories containing them') 43 44# Command line args after parsing. 45args = None 46 47# Map from node labels to nodes. 48graph = {} 49 50# Set of root nodes. 51roots = [] 52 53# List of cycles to ignore. 54ignored_cycles = [] 55 56# Global flag to determine exit code. 57global_reported_error = False 58 59def set_reported_error(value): 60 global global_reported_error 61 global_reported_error = value 62 63def reported_error(): 64 return global_reported_error 65 66def log(msg): 67 if args.verbose: 68 print msg 69 70global_inc_copy = 0 71def inc_copy(): 72 global global_inc_copy 73 global_inc_copy += 1 74 75def get_node(name): 76 return graph.setdefault(name, Node(name)) 77 78ptr_types = ('raw', 'ref', 'mem') 79 80def inc_ptr(dst, ptr): 81 if ptr in ptr_types: 82 node = graph.get(dst) 83 if not node: return 84 node.counts[ptr] += 1 85 86def add_counts(s1, s2): 87 for (k, v) in s2.iteritems(): 88 s1[k] += s2[k] 89 90# Representation of graph nodes. Basically a map of directed edges. 91class Node: 92 def __init__(self, name): 93 self.name = name 94 self.edges = {} 95 self.reset() 96 def __repr__(self): 97 return "%s(%s) %s" % (self.name, self.visited, self.edges) 98 def update_node(self, decl): 99 # Currently we don't track any node info besides its edges. 100 pass 101 def update_edge(self, e): 102 new_edge = Edge(**e) 103 edge = self.edges.get(new_edge.key) 104 if edge: 105 # If an edge exist, its kind is the strongest of the two. 106 edge.kind = max(edge.kind, new_edge.kind) 107 else: 108 self.edges[new_edge.key] = new_edge 109 def super_edges(self): 110 return [ e for e in self.edges.itervalues() if e.is_super() ] 111 def subclass_edges(self): 112 return [ e for e in self.edges.itervalues() if e.is_subclass() ] 113 def reset(self): 114 self.cost = sys.maxint 115 self.visited = False 116 self.path = None 117 self.counts = {} 118 for ptr in ptr_types: 119 self.counts[ptr] = 0 120 def update_counts(self): 121 for e in self.edges.itervalues(): 122 inc_ptr(e.dst, e.ptr) 123 124# Representation of directed graph edges. 125class Edge: 126 def __init__(self, **decl): 127 self.src = decl['src'] 128 self.dst = decl['dst'] 129 self.lbl = decl['lbl'] 130 self.ptr = decl['ptr'] 131 self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root 132 self.loc = decl['loc'] 133 # The label does not uniquely determine an edge from a node. We 134 # define the semi-unique key to be the concatenation of the 135 # label and dst name. This is sufficient to track the strongest 136 # edge to a particular type. For example, if the field A::m_f 137 # has type HashMap<WeakMember<B>, Member<B>> we will have a 138 # strong edge with key m_f#B from A to B. 139 self.key = '%s#%s' % (self.lbl, self.dst) 140 def __repr__(self): 141 return '%s (%s) => %s' % (self.src, self.lbl, self.dst) 142 def is_root(self): 143 return self.kind == 2 144 def is_weak(self): 145 return self.kind == 0 146 def keeps_alive(self): 147 return self.kind > 0 148 def is_subclass(self): 149 return self.lbl.startswith('<subclass>') 150 def is_super(self): 151 return self.lbl.startswith('<super>') 152 153def parse_file(filename): 154 obj = json.load(open(filename)) 155 return obj 156 157def build_graphs_in_dir(dirname): 158 # TODO: Use plateform independent code, eg, os.walk 159 files = subprocess.check_output( 160 ['find', dirname, '-name', '*.graph.json']).split('\n') 161 log("Found %d files" % len(files)) 162 for f in files: 163 f.strip() 164 if len(f) < 1: 165 continue 166 build_graph(f) 167 168def build_graph(filename): 169 for decl in parse_file(filename): 170 if decl.has_key('name'): 171 # Add/update a node entry 172 name = decl['name'] 173 node = get_node(name) 174 node.update_node(decl) 175 else: 176 # Add/update an edge entry 177 name = decl['src'] 178 node = get_node(name) 179 node.update_edge(decl) 180 181# Copy all non-weak edges from super classes to their subclasses. 182# This causes all fields of a super to be considered fields of a 183# derived class without tranitively relating derived classes with 184# each other. For example, if B <: A, C <: A, and for some D, D => B, 185# we don't want that to entail that D => C. 186def copy_super_edges(edge): 187 if edge.is_weak() or not edge.is_super(): 188 return 189 inc_copy() 190 # Make the super-class edge weak (prohibits processing twice). 191 edge.kind = 0 192 # If the super class is not in our graph exit early. 193 super_node = graph.get(edge.dst) 194 if super_node is None: return 195 # Recursively copy all super-class edges. 196 for e in super_node.super_edges(): 197 copy_super_edges(e) 198 # Copy strong super-class edges (ignoring sub-class edges) to the sub class. 199 sub_node = graph[edge.src] 200 for e in super_node.edges.itervalues(): 201 if e.keeps_alive() and not e.is_subclass(): 202 new_edge = Edge( 203 src = sub_node.name, 204 dst = e.dst, 205 lbl = '%s <: %s' % (super_node.name, e.lbl), 206 ptr = e.ptr, 207 kind = e.kind, 208 loc = e.loc, 209 ) 210 sub_node.edges[new_edge.key] = new_edge 211 # Add a strong sub-class edge. 212 sub_edge = Edge( 213 src = super_node.name, 214 dst = sub_node.name, 215 lbl = '<subclass>', 216 ptr = edge.ptr, 217 kind = 1, 218 loc = edge.loc, 219 ) 220 super_node.edges[sub_edge.key] = sub_edge 221 222def complete_graph(): 223 for node in graph.itervalues(): 224 for edge in node.super_edges(): 225 copy_super_edges(edge) 226 for edge in node.edges.itervalues(): 227 if edge.is_root(): 228 roots.append(edge) 229 log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy) 230 231def reset_graph(): 232 for n in graph.itervalues(): 233 n.reset() 234 235def shortest_path(start, end): 236 start.cost = 0 237 minlist = [start] 238 while len(minlist) > 0: 239 minlist.sort(key=lambda n: -n.cost) 240 current = minlist.pop() 241 current.visited = True 242 if current == end or current.cost >= end.cost + 1: 243 return 244 for e in current.edges.itervalues(): 245 if not e.keeps_alive(): 246 continue 247 dst = graph.get(e.dst) 248 if dst is None or dst.visited: 249 continue 250 if current.cost < dst.cost: 251 dst.cost = current.cost + 1 252 dst.path = e 253 minlist.append(dst) 254 255def detect_cycles(): 256 for root_edge in roots: 257 reset_graph() 258 # Mark ignored classes as already visited 259 for ignore in args.ignore_classes: 260 name = ignore.find("::") > 0 and ignore or ("blink::" + ignore) 261 node = graph.get(name) 262 if node: 263 node.visited = True 264 src = graph[root_edge.src] 265 dst = graph.get(root_edge.dst) 266 if src.visited: 267 continue 268 if root_edge.dst == "WTF::String": 269 continue 270 if dst is None: 271 print "\nPersistent root to incomplete destination object:" 272 print root_edge 273 set_reported_error(True) 274 continue 275 # Find the shortest path from the root target (dst) to its host (src) 276 shortest_path(dst, src) 277 if src.cost < sys.maxint: 278 report_cycle(root_edge) 279 280def is_ignored_cycle(cycle): 281 for block in ignored_cycles: 282 if block_match(cycle, block): 283 return True 284 285def block_match(b1, b2): 286 if len(b1) != len(b2): 287 return False 288 for (l1, l2) in zip(b1, b2): 289 if l1 != l2: 290 return False 291 return True 292 293def report_cycle(root_edge): 294 dst = graph[root_edge.dst] 295 path = [] 296 edge = root_edge 297 dst.path = None 298 while edge: 299 path.append(edge) 300 edge = graph[edge.src].path 301 path.append(root_edge) 302 path.reverse() 303 # Find the max loc length for pretty printing. 304 max_loc = 0 305 for p in path: 306 if len(p.loc) > max_loc: 307 max_loc = len(p.loc) 308 out = StringIO.StringIO() 309 for p in path[:-1]: 310 print >>out, (p.loc + ':').ljust(max_loc + 1), p 311 sout = out.getvalue() 312 if not is_ignored_cycle(sout): 313 print "\nFound a potentially leaking cycle starting from a GC root:\n", sout 314 set_reported_error(True) 315 316def load_graph(): 317 global graph 318 global roots 319 log("Reading graph from pickled file: " + args.pickle_graph) 320 dump = pickle.load(open(args.pickle_graph, 'rb')) 321 graph = dump[0] 322 roots = dump[1] 323 324def save_graph(): 325 log("Saving graph to pickle file: " + args.pickle_graph) 326 dump = (graph, roots) 327 pickle.dump(dump, open(args.pickle_graph, 'wb')) 328 329def read_ignored_cycles(): 330 global ignored_cycles 331 if not args.ignore_cycles: 332 return 333 log("Reading ignored cycles from file: " + args.ignore_cycles) 334 block = [] 335 for l in open(args.ignore_cycles): 336 line = l.strip() 337 if not line or line.startswith('Found'): 338 if len(block) > 0: 339 ignored_cycles.append(block) 340 block = [] 341 else: 342 block += l 343 if len(block) > 0: 344 ignored_cycles.append(block) 345 346gc_bases = ( 347 'blink::GarbageCollected', 348 'blink::GarbageCollectedFinalized', 349 'blink::GarbageCollectedMixin', 350) 351ref_bases = ( 352 'WTF::RefCounted', 353 'WTF::ThreadSafeRefCounted', 354) 355gcref_bases = ( 356 'blink::RefCountedGarbageCollected', 357 'blink::ThreadSafeRefCountedGarbageCollected', 358) 359ref_mixins = ( 360 'blink::EventTarget', 361 'blink::EventTargetWithInlineData', 362 'blink::ActiveDOMObject', 363) 364 365def print_stats(): 366 gcref_managed = [] 367 ref_managed = [] 368 gc_managed = [] 369 hierarchies = [] 370 371 for node in graph.itervalues(): 372 node.update_counts() 373 for sup in node.super_edges(): 374 if sup.dst in gcref_bases: 375 gcref_managed.append(node) 376 elif sup.dst in ref_bases: 377 ref_managed.append(node) 378 elif sup.dst in gc_bases: 379 gc_managed.append(node) 380 381 groups = [("GC manged ", gc_managed), 382 ("ref counted ", ref_managed), 383 ("in transition", gcref_managed)] 384 total = sum([len(g) for (s,g) in groups]) 385 for (s, g) in groups: 386 percent = len(g) * 100 / total 387 print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g)) 388 389 for base in gcref_managed: 390 stats = dict({ 'classes': 0, 'ref-mixins': 0 }) 391 for ptr in ptr_types: stats[ptr] = 0 392 hierarchy_stats(base, stats) 393 hierarchies.append((base, stats)) 394 395 print "\nHierarchies in transition (RefCountedGarbageCollected):" 396 hierarchies.sort(key=lambda (n,s): -s['classes']) 397 for (node, stats) in hierarchies: 398 total = stats['mem'] + stats['ref'] + stats['raw'] 399 print ( 400 "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" % 401 (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ", 402 total == 0 and 100 or stats['mem'] * 100 / total, 403 node.name.replace('blink::', ''), 404 stats['classes'], 405 stats['mem'], 406 stats['ref'], 407 stats['raw'], 408 stats['ref-mixins'], 409 )) 410 411def hierarchy_stats(node, stats): 412 if not node: return 413 stats['classes'] += 1 414 add_counts(stats, node.counts) 415 for edge in node.super_edges(): 416 if edge.dst in ref_mixins: 417 stats['ref-mixins'] += 1 418 for edge in node.subclass_edges(): 419 hierarchy_stats(graph.get(edge.dst), stats) 420 421def main(): 422 global args 423 args = parser.parse_args() 424 if not (args.detect_cycles or args.print_stats): 425 print "Please select an operation to perform (eg, -c to detect cycles)" 426 parser.print_help() 427 return 1 428 if args.pickle_graph and os.path.isfile(args.pickle_graph): 429 load_graph() 430 else: 431 if args.use_stdin: 432 log("Reading files from stdin") 433 for f in sys.stdin: 434 build_graph(f.strip()) 435 else: 436 log("Reading files and directories from command line") 437 if len(args.files) == 0: 438 print "Please provide files or directores for building the graph" 439 parser.print_help() 440 return 1 441 for f in args.files: 442 if os.path.isdir(f): 443 log("Building graph from files in directory: " + f) 444 build_graphs_in_dir(f) 445 else: 446 log("Building graph from file: " + f) 447 build_graph(f) 448 log("Completing graph construction (%d graph nodes)" % len(graph)) 449 complete_graph() 450 if args.pickle_graph: 451 save_graph() 452 if args.detect_cycles: 453 read_ignored_cycles() 454 log("Detecting cycles containg GC roots") 455 detect_cycles() 456 if args.print_stats: 457 log("Printing statistics") 458 print_stats() 459 if reported_error(): 460 return 1 461 return 0 462 463if __name__ == '__main__': 464 sys.exit(main()) 465