• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/python
2
3import os
4import sys
5import string
6import re
7
8## hash from symbol name to list of symbols with that name,
9## where the list of symbols contains a list representing each symbol
10symbols = {}
11roots = {}
12
13def createBacklinks(name, syms):
14    for s in syms:
15        refs = s[2]
16        for r in refs:
17            ## for each ref, add ourselves as a referencer
18            if symbols.has_key(r):
19                targets = symbols[r]
20                for t in targets:
21                    if name not in t[5]:
22                        t[5].append(name)
23
24def markSymbol(frm, name):
25    if not symbols.has_key(name):
26        print "%s referenced but was not in the objdump"
27    syms = symbols[name]
28    ## print ambiguous references unless they are internal noise like ".L129"
29    if len(syms) > 1 and name[0] != '.':
30        print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name)
31        print syms
32    for s in syms:
33        if s[4]:
34            pass ## already marked
35        else:
36            s[4] = 1
37            refs = s[2]
38            for r in refs:
39                markSymbol(s[0], r)
40
41def cmpFilename(a, b):
42    v = cmp(a[1], b[1])
43    if v == 0:
44        v = cmp(a[0], b[0])
45    return v
46
47def sizeAsString(bytes):
48    if bytes < 1024:
49        return "%d bytes" % bytes
50    elif bytes < 1024*1024:
51        return "%.2gK" % (bytes / 1024.0)
52    else:
53        return "%.2gM" % (bytes / 1024.0 / 1024.0)
54
55def printLost():
56    list = []
57    filename = None
58    for (name, syms) in symbols.items():
59        s = syms[0] ## we always mark all or none for now
60        if not s[4] and name[0] != '.': ## skip .L129 type symbols
61            filename = s[3]
62            if not filename:
63                filename = "unknown file"
64            list.append ((name, filename, s[5], s[7]))
65
66    file_summaries = []
67    total_unused = 0
68    total_this_file = 0
69    filename = None
70    list.sort(cmpFilename)
71    for l in list:
72        next_filename = l[1]
73        if next_filename != filename:
74            if total_this_file > 0:
75                file_summaries.append ("  %s may be unused in %s" % (sizeAsString(total_this_file), filename))
76            print "%s has these symbols not reachable from exported symbols:" % next_filename
77            filename = next_filename
78            total_this_file = 0
79        print "    %s %s" % (l[0], sizeAsString(l[3]))
80        total_unused = total_unused + l[3]
81        total_this_file = total_this_file + l[3]
82        for trace in l[2]:
83            print "       referenced from %s" % trace
84
85    for fs in file_summaries:
86        print fs
87    print "%s total may be unused" % sizeAsString(total_unused)
88
89def main():
90
91    ## 0001aa44 <_dbus_message_get_network_data>:
92    sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:')
93    ## 1aa49:       e8 00 00 00 00          call   1aa4e <_dbus_message_get_network_data+0xa>
94    ref_re = re.compile (' <([^>]+)> *$')
95    ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139
96    file_re = re.compile ('^(\/[^:].*):[0-9]+$')
97    ## _dbus_message_get_network_data+0xa
98    funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+')
99    ## 00005410 T dbus_address_entries_free
100    dynsym_re = re.compile ('T ([^ \n]+)$')
101
102    filename = sys.argv[1]
103
104    command = """
105    objdump -D --demangle -l %s
106    """ % filename
107
108    command = string.strip (command)
109
110    print "Running: %s" % command
111
112    f = os.popen(command)
113
114    ## first we find which functions reference which other functions
115    current_sym = None
116    lines = f.readlines()
117    for l in lines:
118        addr = None
119        name = None
120        target = None
121        file = None
122
123        match = sym_re.match(l)
124        if match:
125            addr = match.group(1)
126            name = match.group(2)
127        else:
128            match = ref_re.search(l)
129            if match:
130                target = match.group(1)
131            else:
132                match = file_re.match(l)
133                if match:
134                    file = match.group(1)
135
136        if name:
137            ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size
138            item = [name, addr, [], None, 0, [], 0, 0]
139            if symbols.has_key(name):
140                symbols[name].append(item)
141            else:
142                symbols[name] = [item]
143
144            if current_sym:
145                prev_addr = long(current_sym[1], 16)
146                our_addr = long(item[1], 16)
147                item[7] = our_addr - prev_addr
148                if item[7] < 0:
149                    print "Computed negative size %d for %s" % (item[7], item[0])
150                    item[7] = 0
151
152            current_sym = item
153
154        elif target and current_sym:
155            match = funcname_re.match(target)
156            if match:
157                ## dump the "+address"
158                target = match.group(1)
159            if target == current_sym[0]:
160                pass ## skip self-references
161            else:
162                current_sym[2].append (target)
163
164        elif file and current_sym:
165            if file.startswith('/usr/include'):
166                ## inlined libc thingy
167                pass
168            elif current_sym[0].startswith('.debug'):
169                ## debug info
170                pass
171            elif current_sym[3] and current_sym[3] != file:
172                raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file))
173            else:
174                current_sym[3] = file
175
176    ## now we need to find the roots (exported symbols)
177    command = "nm -D %s" % filename
178    print "Running: %s" % command
179    f = os.popen(command)
180    lines = f.readlines ()
181    for l in lines:
182        match = dynsym_re.search(l)
183        if match:
184            name = match.group(1)
185            if roots.has_key(name):
186                raise Exception("symbol %s exported twice?" % name)
187            else:
188                roots[name] = 1
189
190    print "%d symbols exported from this object" % len(roots)
191
192    ## these functions are used only indirectly, so we don't
193    ## notice they are used. Manually add them as roots...
194    vtable_roots = ['unix_finalize',
195                    'unix_handle_watch',
196                    'unix_disconnect',
197                    'unix_connection_set',
198                    'unix_do_iteration',
199                    'unix_live_messages_changed',
200                    'unix_get_unix_fd',
201                    'handle_client_data_cookie_sha1_mech',
202                    'handle_client_data_external_mech',
203                    'handle_server_data_cookie_sha1_mech',
204                    'handle_server_data_external_mech',
205                    'handle_client_initial_response_cookie_sha1_mech',
206                    'handle_client_initial_response_external_mech',
207                    'handle_client_shutdown_cookie_sha1_mech',
208                    'handle_client_shutdown_external_mech',
209                    'handle_server_shutdown_cookie_sha1_mech',
210                    'handle_server_shutdown_external_mech'
211                    ]
212
213    for vr in vtable_roots:
214        if roots.has_key(vr):
215            raise Exception("%s is already a root" % vr)
216        roots[vr] = 1
217
218    for k in roots.keys():
219        markSymbol("root", k)
220
221    for (k, v) in symbols.items():
222        createBacklinks(k, v)
223
224    print """
225
226The symbols mentioned below don't appear to be reachable starting from
227the dynamic exports of the library. However, this program is pretty
228dumb; a limitation that creates false positives is that it can only
229trace 'reachable' through hardcoded function calls, if a function is
230called only through a vtable, it won't be marked reachable (and
231neither will its children in the call graph).
232
233Also, the sizes mentioned are more or less completely bogus.
234
235"""
236
237    print "The following are hardcoded in as vtable roots: %s" % vtable_roots
238
239    printLost()
240
241if __name__ == "__main__":
242    main()
243