1#!/usr/bin/env python3 2# 3# Script to find struct sizes. 4# 5 6import os 7import glob 8import itertools as it 9import subprocess as sp 10import shlex 11import re 12import csv 13import collections as co 14 15 16OBJ_PATHS = ['*.o'] 17 18def collect(paths, **args): 19 decl_pattern = re.compile( 20 '^\s+(?P<no>[0-9]+)' 21 '\s+(?P<dir>[0-9]+)' 22 '\s+.*' 23 '\s+(?P<file>[^\s]+)$') 24 struct_pattern = re.compile( 25 '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*' 26 '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*' 27 '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*' 28 '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$') 29 30 results = co.defaultdict(lambda: 0) 31 for path in paths: 32 # find decl, we want to filter by structs in .h files 33 decls = {} 34 # note objdump-tool may contain extra args 35 cmd = args['objdump_tool'] + ['--dwarf=rawline', path] 36 if args.get('verbose'): 37 print(' '.join(shlex.quote(c) for c in cmd)) 38 proc = sp.Popen(cmd, 39 stdout=sp.PIPE, 40 stderr=sp.PIPE if not args.get('verbose') else None, 41 universal_newlines=True, 42 errors='replace') 43 for line in proc.stdout: 44 # find file numbers 45 m = decl_pattern.match(line) 46 if m: 47 decls[int(m.group('no'))] = m.group('file') 48 proc.wait() 49 if proc.returncode != 0: 50 if not args.get('verbose'): 51 for line in proc.stderr: 52 sys.stdout.write(line) 53 sys.exit(-1) 54 55 # collect structs as we parse dwarf info 56 found = False 57 name = None 58 decl = None 59 size = None 60 61 # note objdump-tool may contain extra args 62 cmd = args['objdump_tool'] + ['--dwarf=info', path] 63 if args.get('verbose'): 64 print(' '.join(shlex.quote(c) for c in cmd)) 65 proc = sp.Popen(cmd, 66 stdout=sp.PIPE, 67 stderr=sp.PIPE if not args.get('verbose') else None, 68 universal_newlines=True, 69 errors='replace') 70 for line in proc.stdout: 71 # state machine here to find structs 72 m = struct_pattern.match(line) 73 if m: 74 if m.group('tag'): 75 if (name is not None 76 and decl is not None 77 and size is not None): 78 decl = decls.get(decl, '?') 79 results[(decl, name)] = size 80 found = (m.group('tag') == 'structure_type') 81 name = None 82 decl = None 83 size = None 84 elif found and m.group('name'): 85 name = m.group('name') 86 elif found and name and m.group('decl'): 87 decl = int(m.group('decl')) 88 elif found and name and m.group('size'): 89 size = int(m.group('size')) 90 proc.wait() 91 if proc.returncode != 0: 92 if not args.get('verbose'): 93 for line in proc.stderr: 94 sys.stdout.write(line) 95 sys.exit(-1) 96 97 flat_results = [] 98 for (file, struct), size in results.items(): 99 # map to source files 100 if args.get('build_dir'): 101 file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) 102 # only include structs declared in header files in the current 103 # directory, ignore internal-only # structs (these are represented 104 # in other measurements) 105 if not args.get('everything'): 106 if not file.endswith('.h'): 107 continue 108 # replace .o with .c, different scripts report .o/.c, we need to 109 # choose one if we want to deduplicate csv files 110 file = re.sub('\.o$', '.c', file) 111 112 flat_results.append((file, struct, size)) 113 114 return flat_results 115 116 117def main(**args): 118 def openio(path, mode='r'): 119 if path == '-': 120 if 'r' in mode: 121 return os.fdopen(os.dup(sys.stdin.fileno()), 'r') 122 else: 123 return os.fdopen(os.dup(sys.stdout.fileno()), 'w') 124 else: 125 return open(path, mode) 126 127 # find sizes 128 if not args.get('use', None): 129 # find .o files 130 paths = [] 131 for path in args['obj_paths']: 132 if os.path.isdir(path): 133 path = path + '/*.o' 134 135 for path in glob.glob(path): 136 paths.append(path) 137 138 if not paths: 139 print('no .obj files found in %r?' % args['obj_paths']) 140 sys.exit(-1) 141 142 results = collect(paths, **args) 143 else: 144 with openio(args['use']) as f: 145 r = csv.DictReader(f) 146 results = [ 147 ( result['file'], 148 result['name'], 149 int(result['struct_size'])) 150 for result in r 151 if result.get('struct_size') not in {None, ''}] 152 153 total = 0 154 for _, _, size in results: 155 total += size 156 157 # find previous results? 158 if args.get('diff'): 159 try: 160 with openio(args['diff']) as f: 161 r = csv.DictReader(f) 162 prev_results = [ 163 ( result['file'], 164 result['name'], 165 int(result['struct_size'])) 166 for result in r 167 if result.get('struct_size') not in {None, ''}] 168 except FileNotFoundError: 169 prev_results = [] 170 171 prev_total = 0 172 for _, _, size in prev_results: 173 prev_total += size 174 175 # write results to CSV 176 if args.get('output'): 177 merged_results = co.defaultdict(lambda: {}) 178 other_fields = [] 179 180 # merge? 181 if args.get('merge'): 182 try: 183 with openio(args['merge']) as f: 184 r = csv.DictReader(f) 185 for result in r: 186 file = result.pop('file', '') 187 struct = result.pop('name', '') 188 result.pop('struct_size', None) 189 merged_results[(file, struct)] = result 190 other_fields = result.keys() 191 except FileNotFoundError: 192 pass 193 194 for file, struct, size in results: 195 merged_results[(file, struct)]['struct_size'] = size 196 197 with openio(args['output'], 'w') as f: 198 w = csv.DictWriter(f, ['file', 'name', *other_fields, 'struct_size']) 199 w.writeheader() 200 for (file, struct), result in sorted(merged_results.items()): 201 w.writerow({'file': file, 'name': struct, **result}) 202 203 # print results 204 def dedup_entries(results, by='name'): 205 entries = co.defaultdict(lambda: 0) 206 for file, struct, size in results: 207 entry = (file if by == 'file' else struct) 208 entries[entry] += size 209 return entries 210 211 def diff_entries(olds, news): 212 diff = co.defaultdict(lambda: (0, 0, 0, 0)) 213 for name, new in news.items(): 214 diff[name] = (0, new, new, 1.0) 215 for name, old in olds.items(): 216 _, new, _, _ = diff[name] 217 diff[name] = (old, new, new-old, (new-old)/old if old else 1.0) 218 return diff 219 220 def sorted_entries(entries): 221 if args.get('size_sort'): 222 return sorted(entries, key=lambda x: (-x[1], x)) 223 elif args.get('reverse_size_sort'): 224 return sorted(entries, key=lambda x: (+x[1], x)) 225 else: 226 return sorted(entries) 227 228 def sorted_diff_entries(entries): 229 if args.get('size_sort'): 230 return sorted(entries, key=lambda x: (-x[1][1], x)) 231 elif args.get('reverse_size_sort'): 232 return sorted(entries, key=lambda x: (+x[1][1], x)) 233 else: 234 return sorted(entries, key=lambda x: (-x[1][3], x)) 235 236 def print_header(by=''): 237 if not args.get('diff'): 238 print('%-36s %7s' % (by, 'size')) 239 else: 240 print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff')) 241 242 def print_entry(name, size): 243 print("%-36s %7d" % (name, size)) 244 245 def print_diff_entry(name, old, new, diff, ratio): 246 print("%-36s %7s %7s %+7d%s" % (name, 247 old or "-", 248 new or "-", 249 diff, 250 ' (%+.1f%%)' % (100*ratio) if ratio else '')) 251 252 def print_entries(by='name'): 253 entries = dedup_entries(results, by=by) 254 255 if not args.get('diff'): 256 print_header(by=by) 257 for name, size in sorted_entries(entries.items()): 258 print_entry(name, size) 259 else: 260 prev_entries = dedup_entries(prev_results, by=by) 261 diff = diff_entries(prev_entries, entries) 262 print_header(by='%s (%d added, %d removed)' % (by, 263 sum(1 for old, _, _, _ in diff.values() if not old), 264 sum(1 for _, new, _, _ in diff.values() if not new))) 265 for name, (old, new, diff, ratio) in sorted_diff_entries( 266 diff.items()): 267 if ratio or args.get('all'): 268 print_diff_entry(name, old, new, diff, ratio) 269 270 def print_totals(): 271 if not args.get('diff'): 272 print_entry('TOTAL', total) 273 else: 274 ratio = (0.0 if not prev_total and not total 275 else 1.0 if not prev_total 276 else (total-prev_total)/prev_total) 277 print_diff_entry('TOTAL', 278 prev_total, total, 279 total-prev_total, 280 ratio) 281 282 if args.get('quiet'): 283 pass 284 elif args.get('summary'): 285 print_header() 286 print_totals() 287 elif args.get('files'): 288 print_entries(by='file') 289 print_totals() 290 else: 291 print_entries(by='name') 292 print_totals() 293 294if __name__ == "__main__": 295 import argparse 296 import sys 297 parser = argparse.ArgumentParser( 298 description="Find struct sizes.") 299 parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS, 300 help="Description of where to find *.o files. May be a directory \ 301 or a list of paths. Defaults to %r." % OBJ_PATHS) 302 parser.add_argument('-v', '--verbose', action='store_true', 303 help="Output commands that run behind the scenes.") 304 parser.add_argument('-q', '--quiet', action='store_true', 305 help="Don't show anything, useful with -o.") 306 parser.add_argument('-o', '--output', 307 help="Specify CSV file to store results.") 308 parser.add_argument('-u', '--use', 309 help="Don't compile and find struct sizes, instead use this CSV file.") 310 parser.add_argument('-d', '--diff', 311 help="Specify CSV file to diff struct size against.") 312 parser.add_argument('-m', '--merge', 313 help="Merge with an existing CSV file when writing to output.") 314 parser.add_argument('-a', '--all', action='store_true', 315 help="Show all functions, not just the ones that changed.") 316 parser.add_argument('-A', '--everything', action='store_true', 317 help="Include builtin and libc specific symbols.") 318 parser.add_argument('-s', '--size-sort', action='store_true', 319 help="Sort by size.") 320 parser.add_argument('-S', '--reverse-size-sort', action='store_true', 321 help="Sort by size, but backwards.") 322 parser.add_argument('-F', '--files', action='store_true', 323 help="Show file-level struct sizes.") 324 parser.add_argument('-Y', '--summary', action='store_true', 325 help="Only show the total struct size.") 326 parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(), 327 help="Path to the objdump tool to use.") 328 parser.add_argument('--build-dir', 329 help="Specify the relative build directory. Used to map object files \ 330 to the correct source files.") 331 sys.exit(main(**vars(parser.parse_args()))) 332