1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Dump functions called by static intializers in a Linux Release binary. 7 8Usage example: 9 tools/linux/dump-static-intializers.py out/Release/chrome 10 11A brief overview of static initialization: 121) the compiler writes out, per object file, a function that contains 13 the static intializers for that file. 142) the compiler also writes out a pointer to that function in a special 15 section. 163) at link time, the linker concatenates the function pointer sections 17 into a single list of all initializers. 184) at run time, on startup the binary runs all function pointers. 19 20The functions in (1) all have mangled names of the form 21 _GLOBAL__I_foobar.cc 22using objdump, we can disassemble those functions and dump all symbols that 23they reference. 24""" 25 26import optparse 27import re 28import subprocess 29import sys 30 31# A map of symbol => informative text about it. 32NOTES = { 33 '__cxa_atexit@plt': 'registers a dtor to run at exit', 34 'std::__ioinit': '#includes <iostream>, use <ostream> instead', 35} 36 37# Determine whether this is a git checkout (as opposed to e.g. svn). 38IS_GIT_WORKSPACE = (subprocess.Popen( 39 ['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0) 40 41class Demangler(object): 42 """A wrapper around c++filt to provide a function to demangle symbols.""" 43 def __init__(self): 44 self.cppfilt = subprocess.Popen(['c++filt'], 45 stdin=subprocess.PIPE, 46 stdout=subprocess.PIPE) 47 48 def Demangle(self, sym): 49 """Given mangled symbol |sym|, return its demangled form.""" 50 self.cppfilt.stdin.write(sym + '\n') 51 return self.cppfilt.stdout.readline().strip() 52 53# Matches for example: "cert_logger.pb.cc", capturing "cert_logger". 54protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$') 55def QualifyFilenameAsProto(filename): 56 """Attempt to qualify a bare |filename| with a src-relative path, assuming it 57 is a protoc-generated file. If a single match is found, it is returned. 58 Otherwise the original filename is returned.""" 59 if not IS_GIT_WORKSPACE: 60 return filename 61 match = protobuf_filename_re.match(filename) 62 if not match: 63 return filename 64 basename = match.groups(0) 65 gitlsfiles = subprocess.Popen( 66 ['git', 'ls-files', '--', '*/%s.proto' % basename], 67 stdout=subprocess.PIPE) 68 candidate = filename 69 for line in gitlsfiles.stdout: 70 if candidate != filename: 71 return filename # Multiple hits, can't help. 72 candidate = line.strip() 73 return candidate 74 75# Regex matching the substring of a symbol's demangled text representation most 76# likely to appear in a source file. 77# Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes 78# "InitBuiltinFunctionTable", since the first (optional & non-capturing) group 79# picks up any ::-qualification and the last fragment picks up a suffix that 80# starts with an opener. 81symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$') 82def QualifyFilename(filename, symbol): 83 """Given a bare filename and a symbol that occurs in it, attempt to qualify 84 it with a src-relative path. If more than one file matches, return the 85 original filename.""" 86 if not IS_GIT_WORKSPACE: 87 return filename 88 match = symbol_code_name_re.match(symbol) 89 if not match: 90 return filename 91 symbol = match.group(1) 92 gitgrep = subprocess.Popen( 93 ['git', 'grep', '-l', symbol, '--', '*/%s' % filename], 94 stdout=subprocess.PIPE) 95 candidate = filename 96 for line in gitgrep.stdout: 97 if candidate != filename: # More than one candidate; return bare filename. 98 return filename 99 candidate = line.strip() 100 return candidate 101 102# Regex matching nm output for the symbols we're interested in. 103# See test_ParseNmLine for examples. 104nm_re = re.compile(r'(\S+) (\S+) t (?:_ZN12)?_GLOBAL__(?:sub_)?I_(.*)') 105def ParseNmLine(line): 106 """Given a line of nm output, parse static initializers as a 107 (file, start, size) tuple.""" 108 match = nm_re.match(line) 109 if match: 110 addr, size, filename = match.groups() 111 return (filename, int(addr, 16), int(size, 16)) 112 113 114def test_ParseNmLine(): 115 """Verify the nm_re regex matches some sample lines.""" 116 parse = ParseNmLine( 117 '0000000001919920 0000000000000008 t ' 118 '_ZN12_GLOBAL__I_safe_browsing_service.cc') 119 assert parse == ('safe_browsing_service.cc', 26319136, 8), parse 120 121 parse = ParseNmLine( 122 '00000000026b9eb0 0000000000000024 t ' 123 '_GLOBAL__sub_I_extension_specifics.pb.cc') 124 assert parse == ('extension_specifics.pb.cc', 40607408, 36), parse 125 126# Just always run the test; it is fast enough. 127test_ParseNmLine() 128 129 130def ParseNm(binary): 131 """Given a binary, yield static initializers as (file, start, size) tuples.""" 132 nm = subprocess.Popen(['nm', '-S', binary], stdout=subprocess.PIPE) 133 for line in nm.stdout: 134 parse = ParseNmLine(line) 135 if parse: 136 yield parse 137 138# Regex matching objdump output for the symbols we're interested in. 139# Example line: 140# 12354ab: (disassembly, including <FunctionReference>) 141disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>') 142def ExtractSymbolReferences(binary, start, end): 143 """Given a span of addresses, returns symbol references from disassembly.""" 144 cmd = ['objdump', binary, '--disassemble', 145 '--start-address=0x%x' % start, '--stop-address=0x%x' % end] 146 objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE) 147 148 refs = set() 149 for line in objdump.stdout: 150 if '__static_initialization_and_destruction' in line: 151 raise RuntimeError, ('code mentions ' 152 '__static_initialization_and_destruction; ' 153 'did you accidentally run this on a Debug binary?') 154 match = disassembly_re.search(line) 155 if match: 156 (ref,) = match.groups() 157 if ref.startswith('.LC') or ref.startswith('_DYNAMIC'): 158 # Ignore these, they are uninformative. 159 continue 160 if ref.startswith('_GLOBAL__I_'): 161 # Probably a relative jump within this function. 162 continue 163 refs.add(ref) 164 165 return sorted(refs) 166 167def main(): 168 parser = optparse.OptionParser(usage='%prog [option] filename') 169 parser.add_option('-d', '--diffable', dest='diffable', 170 action='store_true', default=False, 171 help='Prints the filename on each line, for more easily ' 172 'diff-able output. (Used by sizes.py)') 173 opts, args = parser.parse_args() 174 if len(args) != 1: 175 parser.error('missing filename argument') 176 return 1 177 binary = args[0] 178 179 demangler = Demangler() 180 file_count = 0 181 initializer_count = 0 182 183 files = ParseNm(binary) 184 if opts.diffable: 185 files = sorted(files) 186 for filename, addr, size in files: 187 file_count += 1 188 ref_output = [] 189 190 qualified_filename = QualifyFilenameAsProto(filename) 191 192 if size == 2: 193 # gcc generates a two-byte 'repz retq' initializer when there is a 194 # ctor even when the ctor is empty. This is fixed in gcc 4.6, but 195 # Android uses gcc 4.4. 196 ref_output.append('[empty ctor, but it still has cost on gcc <4.6]') 197 else: 198 for ref in ExtractSymbolReferences(binary, addr, addr+size): 199 initializer_count += 1 200 201 ref = demangler.Demangle(ref) 202 if qualified_filename == filename: 203 qualified_filename = QualifyFilename(filename, ref) 204 205 note = '' 206 if ref in NOTES: 207 note = NOTES[ref] 208 elif ref.endswith('_2eproto()'): 209 note = 'protocol compiler bug: crbug.com/105626' 210 211 if note: 212 ref_output.append('%s [%s]' % (ref, note)) 213 else: 214 ref_output.append(ref) 215 216 if opts.diffable: 217 if ref_output: 218 print '\n'.join('# ' + qualified_filename + ' ' + r for r in ref_output) 219 else: 220 print '# %s: (empty initializer list)' % qualified_filename 221 else: 222 print '%s (initializer offset 0x%x size 0x%x)' % (qualified_filename, 223 addr, size) 224 print ''.join(' %s\n' % r for r in ref_output) 225 226 if opts.diffable: 227 print '#', 228 print 'Found %d static initializers in %d files.' % (initializer_count, 229 file_count) 230 231 return 0 232 233if '__main__' == __name__: 234 sys.exit(main()) 235