1#! /usr/bin/python 2 3import os 4import sys 5import string 6import re 7 8## hash from symbol name to list of symbols with that name, 9## where the list of symbols contains a list representing each symbol 10symbols = {} 11roots = {} 12 13def createBacklinks(name, syms): 14 for s in syms: 15 refs = s[2] 16 for r in refs: 17 ## for each ref, add ourselves as a referencer 18 if symbols.has_key(r): 19 targets = symbols[r] 20 for t in targets: 21 if name not in t[5]: 22 t[5].append(name) 23 24def markSymbol(frm, name): 25 if not symbols.has_key(name): 26 print "%s referenced but was not in the objdump" 27 syms = symbols[name] 28 ## print ambiguous references unless they are internal noise like ".L129" 29 if len(syms) > 1 and name[0] != '.': 30 print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name) 31 print syms 32 for s in syms: 33 if s[4]: 34 pass ## already marked 35 else: 36 s[4] = 1 37 refs = s[2] 38 for r in refs: 39 markSymbol(s[0], r) 40 41def cmpFilename(a, b): 42 v = cmp(a[1], b[1]) 43 if v == 0: 44 v = cmp(a[0], b[0]) 45 return v 46 47def sizeAsString(bytes): 48 if bytes < 1024: 49 return "%d bytes" % bytes 50 elif bytes < 1024*1024: 51 return "%.2gK" % (bytes / 1024.0) 52 else: 53 return "%.2gM" % (bytes / 1024.0 / 1024.0) 54 55def printLost(): 56 list = [] 57 filename = None 58 for (name, syms) in symbols.items(): 59 s = syms[0] ## we always mark all or none for now 60 if not s[4] and name[0] != '.': ## skip .L129 type symbols 61 filename = s[3] 62 if not filename: 63 filename = "unknown file" 64 list.append ((name, filename, s[5], s[7])) 65 66 file_summaries = [] 67 total_unused = 0 68 total_this_file = 0 69 filename = None 70 list.sort(cmpFilename) 71 for l in list: 72 next_filename = l[1] 73 if next_filename != filename: 74 if total_this_file > 0: 75 file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename)) 76 print "%s has these symbols not reachable from exported symbols:" % next_filename 77 filename = next_filename 78 total_this_file = 0 79 print " %s %s" % (l[0], sizeAsString(l[3])) 80 total_unused = total_unused + l[3] 81 total_this_file = total_this_file + l[3] 82 for trace in l[2]: 83 print " referenced from %s" % trace 84 85 for fs in file_summaries: 86 print fs 87 print "%s total may be unused" % sizeAsString(total_unused) 88 89def main(): 90 91 ## 0001aa44 <_dbus_message_get_network_data>: 92 sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:') 93 ## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa> 94 ref_re = re.compile (' <([^>]+)> *$') 95 ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139 96 file_re = re.compile ('^(\/[^:].*):[0-9]+$') 97 ## _dbus_message_get_network_data+0xa 98 funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+') 99 ## 00005410 T dbus_address_entries_free 100 dynsym_re = re.compile ('T ([^ \n]+)$') 101 102 filename = sys.argv[1] 103 104 command = """ 105 objdump -D --demangle -l %s 106 """ % filename 107 108 command = string.strip (command) 109 110 print "Running: %s" % command 111 112 f = os.popen(command) 113 114 ## first we find which functions reference which other functions 115 current_sym = None 116 lines = f.readlines() 117 for l in lines: 118 addr = None 119 name = None 120 target = None 121 file = None 122 123 match = sym_re.match(l) 124 if match: 125 addr = match.group(1) 126 name = match.group(2) 127 else: 128 match = ref_re.search(l) 129 if match: 130 target = match.group(1) 131 else: 132 match = file_re.match(l) 133 if match: 134 file = match.group(1) 135 136 if name: 137 ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size 138 item = [name, addr, [], None, 0, [], 0, 0] 139 if symbols.has_key(name): 140 symbols[name].append(item) 141 else: 142 symbols[name] = [item] 143 144 if current_sym: 145 prev_addr = long(current_sym[1], 16) 146 our_addr = long(item[1], 16) 147 item[7] = our_addr - prev_addr 148 if item[7] < 0: 149 print "Computed negative size %d for %s" % (item[7], item[0]) 150 item[7] = 0 151 152 current_sym = item 153 154 elif target and current_sym: 155 match = funcname_re.match(target) 156 if match: 157 ## dump the "+address" 158 target = match.group(1) 159 if target == current_sym[0]: 160 pass ## skip self-references 161 else: 162 current_sym[2].append (target) 163 164 elif file and current_sym: 165 if file.startswith('/usr/include'): 166 ## inlined libc thingy 167 pass 168 elif current_sym[0].startswith('.debug'): 169 ## debug info 170 pass 171 elif current_sym[3] and current_sym[3] != file: 172 raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file)) 173 else: 174 current_sym[3] = file 175 176 ## now we need to find the roots (exported symbols) 177 command = "nm -D %s" % filename 178 print "Running: %s" % command 179 f = os.popen(command) 180 lines = f.readlines () 181 for l in lines: 182 match = dynsym_re.search(l) 183 if match: 184 name = match.group(1) 185 if roots.has_key(name): 186 raise Exception("symbol %s exported twice?" % name) 187 else: 188 roots[name] = 1 189 190 print "%d symbols exported from this object" % len(roots) 191 192 ## these functions are used only indirectly, so we don't 193 ## notice they are used. Manually add them as roots... 194 vtable_roots = ['unix_finalize', 195 'unix_handle_watch', 196 'unix_disconnect', 197 'unix_connection_set', 198 'unix_do_iteration', 199 'unix_live_messages_changed', 200 'unix_get_unix_fd', 201 'handle_client_data_cookie_sha1_mech', 202 'handle_client_data_external_mech', 203 'handle_server_data_cookie_sha1_mech', 204 'handle_server_data_external_mech', 205 'handle_client_initial_response_cookie_sha1_mech', 206 'handle_client_initial_response_external_mech', 207 'handle_client_shutdown_cookie_sha1_mech', 208 'handle_client_shutdown_external_mech', 209 'handle_server_shutdown_cookie_sha1_mech', 210 'handle_server_shutdown_external_mech' 211 ] 212 213 for vr in vtable_roots: 214 if roots.has_key(vr): 215 raise Exception("%s is already a root" % vr) 216 roots[vr] = 1 217 218 for k in roots.keys(): 219 markSymbol("root", k) 220 221 for (k, v) in symbols.items(): 222 createBacklinks(k, v) 223 224 print """ 225 226The symbols mentioned below don't appear to be reachable starting from 227the dynamic exports of the library. However, this program is pretty 228dumb; a limitation that creates false positives is that it can only 229trace 'reachable' through hardcoded function calls, if a function is 230called only through a vtable, it won't be marked reachable (and 231neither will its children in the call graph). 232 233Also, the sizes mentioned are more or less completely bogus. 234 235""" 236 237 print "The following are hardcoded in as vtable roots: %s" % vtable_roots 238 239 printLost() 240 241if __name__ == "__main__": 242 main() 243