• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Script to find struct sizes.
4#
5
6import os
7import glob
8import itertools as it
9import subprocess as sp
10import shlex
11import re
12import csv
13import collections as co
14
15
16OBJ_PATHS = ['*.o']
17
18def collect(paths, **args):
19    decl_pattern = re.compile(
20        '^\s+(?P<no>[0-9]+)'
21            '\s+(?P<dir>[0-9]+)'
22            '\s+.*'
23            '\s+(?P<file>[^\s]+)$')
24    struct_pattern = re.compile(
25        '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
26            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
27            '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
28            '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
29
30    results = co.defaultdict(lambda: 0)
31    for path in paths:
32        # find decl, we want to filter by structs in .h files
33        decls = {}
34        # note objdump-tool may contain extra args
35        cmd = args['objdump_tool'] + ['--dwarf=rawline', path]
36        if args.get('verbose'):
37            print(' '.join(shlex.quote(c) for c in cmd))
38        proc = sp.Popen(cmd,
39            stdout=sp.PIPE,
40            stderr=sp.PIPE if not args.get('verbose') else None,
41            universal_newlines=True,
42            errors='replace')
43        for line in proc.stdout:
44            # find file numbers
45            m = decl_pattern.match(line)
46            if m:
47                decls[int(m.group('no'))] = m.group('file')
48        proc.wait()
49        if proc.returncode != 0:
50            if not args.get('verbose'):
51                for line in proc.stderr:
52                    sys.stdout.write(line)
53            sys.exit(-1)
54
55        # collect structs as we parse dwarf info
56        found = False
57        name = None
58        decl = None
59        size = None
60
61        # note objdump-tool may contain extra args
62        cmd = args['objdump_tool'] + ['--dwarf=info', path]
63        if args.get('verbose'):
64            print(' '.join(shlex.quote(c) for c in cmd))
65        proc = sp.Popen(cmd,
66            stdout=sp.PIPE,
67            stderr=sp.PIPE if not args.get('verbose') else None,
68            universal_newlines=True,
69            errors='replace')
70        for line in proc.stdout:
71            # state machine here to find structs
72            m = struct_pattern.match(line)
73            if m:
74                if m.group('tag'):
75                    if (name is not None
76                            and decl is not None
77                            and size is not None):
78                        decl = decls.get(decl, '?')
79                        results[(decl, name)] = size
80                    found = (m.group('tag') == 'structure_type')
81                    name = None
82                    decl = None
83                    size = None
84                elif found and m.group('name'):
85                    name = m.group('name')
86                elif found and name and m.group('decl'):
87                    decl = int(m.group('decl'))
88                elif found and name and m.group('size'):
89                    size = int(m.group('size'))
90        proc.wait()
91        if proc.returncode != 0:
92            if not args.get('verbose'):
93                for line in proc.stderr:
94                    sys.stdout.write(line)
95            sys.exit(-1)
96
97    flat_results = []
98    for (file, struct), size in results.items():
99        # map to source files
100        if args.get('build_dir'):
101            file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
102        # only include structs declared in header files in the current
103        # directory, ignore internal-only # structs (these are represented
104        # in other measurements)
105        if not args.get('everything'):
106            if not file.endswith('.h'):
107                continue
108        # replace .o with .c, different scripts report .o/.c, we need to
109        # choose one if we want to deduplicate csv files
110        file = re.sub('\.o$', '.c', file)
111
112        flat_results.append((file, struct, size))
113
114    return flat_results
115
116
117def main(**args):
118    def openio(path, mode='r'):
119        if path == '-':
120            if 'r' in mode:
121                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
122            else:
123                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
124        else:
125            return open(path, mode)
126
127    # find sizes
128    if not args.get('use', None):
129        # find .o files
130        paths = []
131        for path in args['obj_paths']:
132            if os.path.isdir(path):
133                path = path + '/*.o'
134
135            for path in glob.glob(path):
136                paths.append(path)
137
138        if not paths:
139            print('no .obj files found in %r?' % args['obj_paths'])
140            sys.exit(-1)
141
142        results = collect(paths, **args)
143    else:
144        with openio(args['use']) as f:
145            r = csv.DictReader(f)
146            results = [
147                (   result['file'],
148                    result['name'],
149                    int(result['struct_size']))
150                for result in r
151                if result.get('struct_size') not in {None, ''}]
152
153    total = 0
154    for _, _, size in results:
155        total += size
156
157    # find previous results?
158    if args.get('diff'):
159        try:
160            with openio(args['diff']) as f:
161                r = csv.DictReader(f)
162                prev_results = [
163                    (   result['file'],
164                        result['name'],
165                        int(result['struct_size']))
166                    for result in r
167                    if result.get('struct_size') not in {None, ''}]
168        except FileNotFoundError:
169            prev_results = []
170
171        prev_total = 0
172        for _, _, size in prev_results:
173            prev_total += size
174
175    # write results to CSV
176    if args.get('output'):
177        merged_results = co.defaultdict(lambda: {})
178        other_fields = []
179
180        # merge?
181        if args.get('merge'):
182            try:
183                with openio(args['merge']) as f:
184                    r = csv.DictReader(f)
185                    for result in r:
186                        file = result.pop('file', '')
187                        struct = result.pop('name', '')
188                        result.pop('struct_size', None)
189                        merged_results[(file, struct)] = result
190                        other_fields = result.keys()
191            except FileNotFoundError:
192                pass
193
194        for file, struct, size in results:
195            merged_results[(file, struct)]['struct_size'] = size
196
197        with openio(args['output'], 'w') as f:
198            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'struct_size'])
199            w.writeheader()
200            for (file, struct), result in sorted(merged_results.items()):
201                w.writerow({'file': file, 'name': struct, **result})
202
203    # print results
204    def dedup_entries(results, by='name'):
205        entries = co.defaultdict(lambda: 0)
206        for file, struct, size in results:
207            entry = (file if by == 'file' else struct)
208            entries[entry] += size
209        return entries
210
211    def diff_entries(olds, news):
212        diff = co.defaultdict(lambda: (0, 0, 0, 0))
213        for name, new in news.items():
214            diff[name] = (0, new, new, 1.0)
215        for name, old in olds.items():
216            _, new, _, _ = diff[name]
217            diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
218        return diff
219
220    def sorted_entries(entries):
221        if args.get('size_sort'):
222            return sorted(entries, key=lambda x: (-x[1], x))
223        elif args.get('reverse_size_sort'):
224            return sorted(entries, key=lambda x: (+x[1], x))
225        else:
226            return sorted(entries)
227
228    def sorted_diff_entries(entries):
229        if args.get('size_sort'):
230            return sorted(entries, key=lambda x: (-x[1][1], x))
231        elif args.get('reverse_size_sort'):
232            return sorted(entries, key=lambda x: (+x[1][1], x))
233        else:
234            return sorted(entries, key=lambda x: (-x[1][3], x))
235
236    def print_header(by=''):
237        if not args.get('diff'):
238            print('%-36s %7s' % (by, 'size'))
239        else:
240            print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
241
242    def print_entry(name, size):
243        print("%-36s %7d" % (name, size))
244
245    def print_diff_entry(name, old, new, diff, ratio):
246        print("%-36s %7s %7s %+7d%s" % (name,
247            old or "-",
248            new or "-",
249            diff,
250            ' (%+.1f%%)' % (100*ratio) if ratio else ''))
251
252    def print_entries(by='name'):
253        entries = dedup_entries(results, by=by)
254
255        if not args.get('diff'):
256            print_header(by=by)
257            for name, size in sorted_entries(entries.items()):
258                print_entry(name, size)
259        else:
260            prev_entries = dedup_entries(prev_results, by=by)
261            diff = diff_entries(prev_entries, entries)
262            print_header(by='%s (%d added, %d removed)' % (by,
263                sum(1 for old, _, _, _ in diff.values() if not old),
264                sum(1 for _, new, _, _ in diff.values() if not new)))
265            for name, (old, new, diff, ratio) in sorted_diff_entries(
266                    diff.items()):
267                if ratio or args.get('all'):
268                    print_diff_entry(name, old, new, diff, ratio)
269
270    def print_totals():
271        if not args.get('diff'):
272            print_entry('TOTAL', total)
273        else:
274            ratio = (0.0 if not prev_total and not total
275                else 1.0 if not prev_total
276                else (total-prev_total)/prev_total)
277            print_diff_entry('TOTAL',
278                prev_total, total,
279                total-prev_total,
280                ratio)
281
282    if args.get('quiet'):
283        pass
284    elif args.get('summary'):
285        print_header()
286        print_totals()
287    elif args.get('files'):
288        print_entries(by='file')
289        print_totals()
290    else:
291        print_entries(by='name')
292        print_totals()
293
294if __name__ == "__main__":
295    import argparse
296    import sys
297    parser = argparse.ArgumentParser(
298        description="Find struct sizes.")
299    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
300        help="Description of where to find *.o files. May be a directory \
301            or a list of paths. Defaults to %r." % OBJ_PATHS)
302    parser.add_argument('-v', '--verbose', action='store_true',
303        help="Output commands that run behind the scenes.")
304    parser.add_argument('-q', '--quiet', action='store_true',
305        help="Don't show anything, useful with -o.")
306    parser.add_argument('-o', '--output',
307        help="Specify CSV file to store results.")
308    parser.add_argument('-u', '--use',
309        help="Don't compile and find struct sizes, instead use this CSV file.")
310    parser.add_argument('-d', '--diff',
311        help="Specify CSV file to diff struct size against.")
312    parser.add_argument('-m', '--merge',
313        help="Merge with an existing CSV file when writing to output.")
314    parser.add_argument('-a', '--all', action='store_true',
315        help="Show all functions, not just the ones that changed.")
316    parser.add_argument('-A', '--everything', action='store_true',
317        help="Include builtin and libc specific symbols.")
318    parser.add_argument('-s', '--size-sort', action='store_true',
319        help="Sort by size.")
320    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
321        help="Sort by size, but backwards.")
322    parser.add_argument('-F', '--files', action='store_true',
323        help="Show file-level struct sizes.")
324    parser.add_argument('-Y', '--summary', action='store_true',
325        help="Only show the total struct size.")
326    parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(),
327        help="Path to the objdump tool to use.")
328    parser.add_argument('--build-dir',
329        help="Specify the relative build directory. Used to map object files \
330            to the correct source files.")
331    sys.exit(main(**vars(parser.parse_args())))
332