1#!/usr/bin/python 2# 3# Copyright 2013 Google Inc. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17import fileinput 18import operator 19import optparse 20import os 21import pprint 22import re 23import subprocess 24import sys 25import json 26 27def format_bytes(bytes): 28 """Pretty-print a number of bytes.""" 29 if bytes > 1e6: 30 bytes = bytes / 1.0e6 31 return '%.1fm' % bytes 32 if bytes > 1e3: 33 bytes = bytes / 1.0e3 34 return '%.1fk' % bytes 35 return str(bytes) 36 37 38def symbol_type_to_human(type): 39 """Convert a symbol type as printed by nm into a human-readable name.""" 40 return { 41 'b': 'bss', 42 'd': 'data', 43 'r': 'read-only data', 44 't': 'code', 45 'u': 'weak symbol', # Unique global. 46 'w': 'weak symbol', 47 'v': 'weak symbol' 48 }[type] 49 50 51def parse_nm(input): 52 """Parse nm output. 53 54 Argument: an iterable over lines of nm output. 55 56 Yields: (symbol name, symbol type, symbol size, source file path). 57 Path may be None if nm couldn't figure out the source file. 58 """ 59 60 # Match lines with size + symbol + optional filename. 61 sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$') 62 63 # Match lines with addr but no size. 64 addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$') 65 # Match lines that don't have an address at all -- typically external symbols. 66 noaddr_re = re.compile(r'^ + (.) (.*)$') 67 68 for line in input: 69 line = line.rstrip() 70 match = sym_re.match(line) 71 if match: 72 size, type, sym = match.groups()[0:3] 73 size = int(size, 16) 74 type = type.lower() 75 if type in ['u', 'v']: 76 type = 'w' # just call them all weak 77 if type == 'b': 78 continue # skip all BSS for now 79 path = match.group(4) 80 yield sym, type, size, path 81 continue 82 match = addr_re.match(line) 83 if match: 84 type, sym = match.groups()[0:2] 85 # No size == we don't care. 86 continue 87 match = noaddr_re.match(line) 88 if match: 89 type, sym = match.groups() 90 if type in ('U', 'w'): 91 # external or weak symbol 92 continue 93 94 print >>sys.stderr, 'unparsed:', repr(line) 95 96def demangle(ident, cppfilt): 97 if cppfilt and ident.startswith('_Z'): 98 # Demangle names when possible. Mangled names all start with _Z. 99 ident = subprocess.check_output([cppfilt, ident]).strip() 100 return ident 101 102 103class Suffix: 104 def __init__(self, suffix, replacement): 105 self.pattern = '^(.*)' + suffix + '(.*)$' 106 self.re = re.compile(self.pattern) 107 self.replacement = replacement 108 109class SuffixCleanup: 110 """Pre-compile suffix regular expressions.""" 111 def __init__(self): 112 self.suffixes = [ 113 Suffix('\.part\.([0-9]+)', 'part'), 114 Suffix('\.constprop\.([0-9]+)', 'constprop'), 115 Suffix('\.isra\.([0-9]+)', 'isra'), 116 ] 117 def cleanup(self, ident, cppfilt): 118 """Cleanup identifiers that have suffixes preventing demangling, 119 and demangle if possible.""" 120 to_append = [] 121 for s in self.suffixes: 122 found = s.re.match(ident) 123 if not found: 124 continue 125 to_append += [' [' + s.replacement + '.' + found.group(2) + ']'] 126 ident = found.group(1) + found.group(3) 127 if len(to_append) > 0: 128 # Only try to demangle if there were suffixes. 129 ident = demangle(ident, cppfilt) 130 for s in to_append: 131 ident += s 132 return ident 133 134suffix_cleanup = SuffixCleanup() 135 136def parse_cpp_name(name, cppfilt): 137 name = suffix_cleanup.cleanup(name, cppfilt) 138 139 # Turn prefixes into suffixes so namespacing works. 140 prefixes = [ 141 ['bool ', ''], 142 ['construction vtable for ', ' [construction vtable]'], 143 ['global constructors keyed to ', ' [global constructors]'], 144 ['guard variable for ', ' [guard variable]'], 145 ['int ', ''], 146 ['non-virtual thunk to ', ' [non-virtual thunk]'], 147 ['typeinfo for ', ' [typeinfo]'], 148 ['typeinfo name for ', ' [typeinfo name]'], 149 ['virtual thunk to ', ' [virtual thunk]'], 150 ['void ', ''], 151 ['vtable for ', ' [vtable]'], 152 ['VTT for ', ' [VTT]'], 153 ] 154 for prefix, replacement in prefixes: 155 if name.startswith(prefix): 156 name = name[len(prefix):] + replacement 157 # Simplify parenthesis parsing. 158 replacements = [ 159 ['(anonymous namespace)', '[anonymous namespace]'], 160 ] 161 for value, replacement in replacements: 162 name = name.replace(value, replacement) 163 164 def parse_one(val): 165 """Returns (leftmost-part, remaining).""" 166 if (val.startswith('operator') and 167 not (val[8].isalnum() or val[8] == '_')): 168 # Operator overload function, terminate. 169 return (val, '') 170 co = val.find('::') 171 lt = val.find('<') 172 pa = val.find('(') 173 co = len(val) if co == -1 else co 174 lt = len(val) if lt == -1 else lt 175 pa = len(val) if pa == -1 else pa 176 if co < lt and co < pa: 177 # Namespace or type name. 178 return (val[:co], val[co+2:]) 179 if lt < pa: 180 # Template. Make sure we capture nested templates too. 181 open_tmpl = 1 182 gt = lt 183 while val[gt] != '>' or open_tmpl != 0: 184 gt = gt + 1 185 if val[gt] == '<': 186 open_tmpl = open_tmpl + 1 187 if val[gt] == '>': 188 open_tmpl = open_tmpl - 1 189 ret = val[gt+1:] 190 if ret.startswith('::'): 191 ret = ret[2:] 192 if ret.startswith('('): 193 # Template function, terminate. 194 return (val, '') 195 return (val[:gt+1], ret) 196 # Terminate with any function name, identifier, or unmangled name. 197 return (val, '') 198 199 parts = [] 200 while len(name) > 0: 201 (part, name) = parse_one(name) 202 assert len(part) > 0 203 parts.append(part) 204 return parts 205 206 207def treeify_syms(symbols, strip_prefix=None, cppfilt=None): 208 dirs = {} 209 for sym, type, size, path in symbols: 210 if path: 211 path = os.path.normpath(path) 212 if strip_prefix and path.startswith(strip_prefix): 213 path = path[len(strip_prefix):] 214 elif path.startswith('/'): 215 path = path[1:] 216 path = ['[path]'] + path.split('/') 217 218 parts = parse_cpp_name(sym, cppfilt) 219 if len(parts) == 1: 220 if path: 221 # No namespaces, group with path. 222 parts = path + parts 223 else: 224 new_prefix = ['[ungrouped]'] 225 regroups = [ 226 ['.L.str', '[str]'], 227 ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'], 228 ['.L__func__.', '[__func__]'], 229 ['.Lswitch.table', '[switch table]'], 230 ] 231 for prefix, group in regroups: 232 if parts[0].startswith(prefix): 233 parts[0] = parts[0][len(prefix):] 234 parts[0] = demangle(parts[0], cppfilt) 235 new_prefix += [group] 236 break 237 parts = new_prefix + parts 238 239 key = parts.pop() 240 tree = dirs 241 try: 242 depth = 0 243 for part in parts: 244 depth = depth + 1 245 assert part != '', path 246 if part not in tree: 247 tree[part] = {'$bloat_symbols':{}} 248 if type not in tree[part]['$bloat_symbols']: 249 tree[part]['$bloat_symbols'][type] = 0 250 tree[part]['$bloat_symbols'][type] += 1 251 tree = tree[part] 252 old_size, old_symbols = tree.get(key, (0, {})) 253 if type not in old_symbols: 254 old_symbols[type] = 0 255 old_symbols[type] += 1 256 tree[key] = (old_size + size, old_symbols) 257 except: 258 print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key) 259 raise 260 return dirs 261 262 263def jsonify_tree(tree, name): 264 children = [] 265 total = 0 266 files = 0 267 268 for key, val in tree.iteritems(): 269 if key == '$bloat_symbols': 270 continue 271 if isinstance(val, dict): 272 subtree = jsonify_tree(val, key) 273 total += subtree['data']['$area'] 274 children.append(subtree) 275 else: 276 (size, symbols) = val 277 total += size 278 assert len(symbols) == 1, symbols.values()[0] == 1 279 symbol = symbol_type_to_human(symbols.keys()[0]) 280 children.append({ 281 'name': key + ' ' + format_bytes(size), 282 'data': { 283 '$area': size, 284 '$symbol': symbol, 285 } 286 }) 287 288 children.sort(key=lambda child: -child['data']['$area']) 289 dominant_symbol = '' 290 if '$bloat_symbols' in tree: 291 dominant_symbol = symbol_type_to_human( 292 max(tree['$bloat_symbols'].iteritems(), 293 key=operator.itemgetter(1))[0]) 294 return { 295 'name': name + ' ' + format_bytes(total), 296 'data': { 297 '$area': total, 298 '$dominant_symbol': dominant_symbol, 299 }, 300 'children': children, 301 } 302 303 304def dump_nm(nmfile, strip_prefix, cppfilt): 305 dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt) 306 print ('var kTree = ' + 307 json.dumps(jsonify_tree(dirs, '[everything]'), indent=2)) 308 309 310def parse_objdump(input): 311 """Parse objdump -h output.""" 312 sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)') 313 sections = [] 314 debug_sections = [] 315 316 for line in input: 317 line = line.strip() 318 match = sec_re.match(line) 319 if match: 320 name, size = match.groups() 321 if name.startswith('.'): 322 name = name[1:] 323 if name.startswith('debug_'): 324 name = name[len('debug_'):] 325 debug_sections.append((name, int(size, 16))) 326 else: 327 sections.append((name, int(size, 16))) 328 continue 329 return sections, debug_sections 330 331 332def jsonify_sections(name, sections): 333 children = [] 334 total = 0 335 for section, size in sections: 336 children.append({ 337 'name': section + ' ' + format_bytes(size), 338 'data': { '$area': size } 339 }) 340 total += size 341 342 children.sort(key=lambda child: -child['data']['$area']) 343 344 return { 345 'name': name + ' ' + format_bytes(total), 346 'data': { '$area': total }, 347 'children': children 348 } 349 350 351def dump_sections(objdump): 352 sections, debug_sections = parse_objdump(objdump) 353 sections = jsonify_sections('sections', sections) 354 debug_sections = jsonify_sections('debug', debug_sections) 355 size = sections['data']['$area'] + debug_sections['data']['$area'] 356 print 'var kTree = ' + json.dumps({ 357 'name': 'top ' + format_bytes(size), 358 'data': { '$area': size }, 359 'children': [ debug_sections, sections ]}) 360 361 362usage="""%prog [options] MODE 363 364Modes are: 365 syms: output symbols json suitable for a treemap 366 dump: print symbols sorted by size (pipe to head for best output) 367 sections: output binary sections json suitable for a treemap 368 369nm output passed to --nm-output should from running a command 370like the following (note, can take a long time -- 30 minutes): 371 nm -C -S -l /path/to/binary > nm.out 372 373objdump output passed to --objdump-output should be from a command 374like: 375 objdump -h /path/to/binary > objdump.out""" 376parser = optparse.OptionParser(usage=usage) 377parser.add_option('--nm-output', action='store', dest='nmpath', 378 metavar='PATH', default='nm.out', 379 help='path to nm output [default=nm.out]') 380parser.add_option('--objdump-output', action='store', dest='objdumppath', 381 metavar='PATH', default='objdump.out', 382 help='path to objdump output [default=objdump.out]') 383parser.add_option('--strip-prefix', metavar='PATH', action='store', 384 help='strip PATH prefix from paths; e.g. /path/to/src/root') 385parser.add_option('--filter', action='store', 386 help='include only symbols/files matching FILTER') 387parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt', 388 default='c++filt', help="Path to c++filt, used to demangle " 389 "symbols that weren't handled by nm. Set to an invalid path " 390 "to disable.") 391opts, args = parser.parse_args() 392 393if len(args) != 1: 394 parser.print_usage() 395 sys.exit(1) 396 397mode = args[0] 398if mode == 'syms': 399 nmfile = open(opts.nmpath, 'r') 400 try: 401 res = subprocess.check_output([opts.cppfilt, 'main']) 402 if res.strip() != 'main': 403 print >>sys.stderr, ("%s failed demangling, " 404 "output won't be demangled." % opt.cppfilt) 405 opts.cppfilt = None 406 except: 407 print >>sys.stderr, ("Could not find c++filt at %s, " 408 "output won't be demangled." % opt.cppfilt) 409 opts.cppfilt = None 410 dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt) 411elif mode == 'sections': 412 objdumpfile = open(opts.objdumppath, 'r') 413 dump_sections(objdumpfile) 414elif mode == 'dump': 415 nmfile = open(opts.nmpath, 'r') 416 syms = list(parse_nm(nmfile)) 417 # a list of (sym, type, size, path); sort by size. 418 syms.sort(key=lambda x: -x[2]) 419 total = 0 420 for sym, type, size, path in syms: 421 if type in ('b', 'w'): 422 continue # skip bss and weak symbols 423 if path is None: 424 path = '' 425 if opts.filter and not (opts.filter in sym or opts.filter in path): 426 continue 427 print '%6s %s (%s) %s' % (format_bytes(size), sym, 428 symbol_type_to_human(type), path) 429 total += size 430 print '%6s %s' % (format_bytes(total), 'total'), 431else: 432 print 'unknown mode' 433 parser.print_usage() 434