• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Script to find stack usage at the function level. Will detect recursion and
4# report as infinite stack usage.
5#
6
7import os
8import glob
9import itertools as it
10import re
11import csv
12import collections as co
13import math as m
14
15
16CI_PATHS = ['*.ci']
17
18def collect(paths, **args):
19    # parse the vcg format
20    k_pattern = re.compile('([a-z]+)\s*:', re.DOTALL)
21    v_pattern = re.compile('(?:"(.*?)"|([a-z]+))', re.DOTALL)
22    def parse_vcg(rest):
23        def parse_vcg(rest):
24            node = []
25            while True:
26                rest = rest.lstrip()
27                m = k_pattern.match(rest)
28                if not m:
29                    return (node, rest)
30                k, rest = m.group(1), rest[m.end(0):]
31
32                rest = rest.lstrip()
33                if rest.startswith('{'):
34                    v, rest = parse_vcg(rest[1:])
35                    assert rest[0] == '}', "unexpected %r" % rest[0:1]
36                    rest = rest[1:]
37                    node.append((k, v))
38                else:
39                    m = v_pattern.match(rest)
40                    assert m, "unexpected %r" % rest[0:1]
41                    v, rest = m.group(1) or m.group(2), rest[m.end(0):]
42                    node.append((k, v))
43
44        node, rest = parse_vcg(rest)
45        assert rest == '', "unexpected %r" % rest[0:1]
46        return node
47
48    # collect into functions
49    results = co.defaultdict(lambda: (None, None, 0, set()))
50    f_pattern = re.compile(
51        r'([^\\]*)\\n([^:]*)[^\\]*\\n([0-9]+) bytes \((.*)\)')
52    for path in paths:
53        with open(path) as f:
54            vcg = parse_vcg(f.read())
55        for k, graph in vcg:
56            if k != 'graph':
57                continue
58            for k, info in graph:
59                if k == 'node':
60                    info = dict(info)
61                    m = f_pattern.match(info['label'])
62                    if m:
63                        function, file, size, type = m.groups()
64                        if not args.get('quiet') and type != 'static':
65                            print('warning: found non-static stack for %s (%s)'
66                                % (function, type))
67                        _, _, _, targets = results[info['title']]
68                        results[info['title']] = (
69                            file, function, int(size), targets)
70                elif k == 'edge':
71                    info = dict(info)
72                    _, _, _, targets = results[info['sourcename']]
73                    targets.add(info['targetname'])
74                else:
75                    continue
76
77    if not args.get('everything'):
78        for source, (s_file, s_function, _, _) in list(results.items()):
79            # discard internal functions
80            if s_file.startswith('<') or s_file.startswith('/usr/include'):
81                del results[source]
82
83    # find maximum stack size recursively, this requires also detecting cycles
84    # (in case of recursion)
85    def find_limit(source, seen=None):
86        seen = seen or set()
87        if source not in results:
88            return 0
89        _, _, frame, targets = results[source]
90
91        limit = 0
92        for target in targets:
93            if target in seen:
94                # found a cycle
95                return float('inf')
96            limit_ = find_limit(target, seen | {target})
97            limit = max(limit, limit_)
98
99        return frame + limit
100
101    def find_deps(targets):
102        deps = set()
103        for target in targets:
104            if target in results:
105                t_file, t_function, _, _ = results[target]
106                deps.add((t_file, t_function))
107        return deps
108
109    # flatten into a list
110    flat_results = []
111    for source, (s_file, s_function, frame, targets) in results.items():
112        limit = find_limit(source)
113        deps = find_deps(targets)
114        flat_results.append((s_file, s_function, frame, limit, deps))
115
116    return flat_results
117
118def main(**args):
119    def openio(path, mode='r'):
120        if path == '-':
121            if 'r' in mode:
122                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
123            else:
124                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
125        else:
126            return open(path, mode)
127
128    # find sizes
129    if not args.get('use', None):
130        # find .ci files
131        paths = []
132        for path in args['ci_paths']:
133            if os.path.isdir(path):
134                path = path + '/*.ci'
135
136            for path in glob.glob(path):
137                paths.append(path)
138
139        if not paths:
140            print('no .ci files found in %r?' % args['ci_paths'])
141            sys.exit(-1)
142
143        results = collect(paths, **args)
144    else:
145        with openio(args['use']) as f:
146            r = csv.DictReader(f)
147            results = [
148                (   result['file'],
149                    result['name'],
150                    int(result['stack_frame']),
151                    float(result['stack_limit']), # note limit can be inf
152                    set())
153                for result in r
154                if result.get('stack_frame') not in {None, ''}
155                if result.get('stack_limit') not in {None, ''}]
156
157    total_frame = 0
158    total_limit = 0
159    for _, _, frame, limit, _ in results:
160        total_frame += frame
161        total_limit = max(total_limit, limit)
162
163    # find previous results?
164    if args.get('diff'):
165        try:
166            with openio(args['diff']) as f:
167                r = csv.DictReader(f)
168                prev_results = [
169                    (   result['file'],
170                        result['name'],
171                        int(result['stack_frame']),
172                        float(result['stack_limit']),
173                        set())
174                    for result in r
175                    if result.get('stack_frame') not in {None, ''}
176                    if result.get('stack_limit') not in {None, ''}]
177        except FileNotFoundError:
178            prev_results = []
179
180        prev_total_frame = 0
181        prev_total_limit = 0
182        for _, _, frame, limit, _ in prev_results:
183            prev_total_frame += frame
184            prev_total_limit = max(prev_total_limit, limit)
185
186    # write results to CSV
187    if args.get('output'):
188        merged_results = co.defaultdict(lambda: {})
189        other_fields = []
190
191        # merge?
192        if args.get('merge'):
193            try:
194                with openio(args['merge']) as f:
195                    r = csv.DictReader(f)
196                    for result in r:
197                        file = result.pop('file', '')
198                        func = result.pop('name', '')
199                        result.pop('stack_frame', None)
200                        result.pop('stack_limit', None)
201                        merged_results[(file, func)] = result
202                        other_fields = result.keys()
203            except FileNotFoundError:
204                pass
205
206        for file, func, frame, limit, _ in results:
207            merged_results[(file, func)]['stack_frame'] = frame
208            merged_results[(file, func)]['stack_limit'] = limit
209
210        with openio(args['output'], 'w') as f:
211            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'stack_frame', 'stack_limit'])
212            w.writeheader()
213            for (file, func), result in sorted(merged_results.items()):
214                w.writerow({'file': file, 'name': func, **result})
215
216    # print results
217    def dedup_entries(results, by='name'):
218        entries = co.defaultdict(lambda: (0, 0, set()))
219        for file, func, frame, limit, deps in results:
220            entry = (file if by == 'file' else func)
221            entry_frame, entry_limit, entry_deps = entries[entry]
222            entries[entry] = (
223                entry_frame + frame,
224                max(entry_limit, limit),
225                entry_deps | {file if by == 'file' else func
226                    for file, func in deps})
227        return entries
228
229    def diff_entries(olds, news):
230        diff = co.defaultdict(lambda: (None, None, None, None, 0, 0, 0, set()))
231        for name, (new_frame, new_limit, deps) in news.items():
232            diff[name] = (
233                None, None,
234                new_frame, new_limit,
235                new_frame, new_limit,
236                1.0,
237                deps)
238        for name, (old_frame, old_limit, _) in olds.items():
239            _, _, new_frame, new_limit, _, _, _, deps = diff[name]
240            diff[name] = (
241                old_frame, old_limit,
242                new_frame, new_limit,
243                (new_frame or 0) - (old_frame or 0),
244                0 if m.isinf(new_limit or 0) and m.isinf(old_limit or 0)
245                    else (new_limit or 0) - (old_limit or 0),
246                0.0 if m.isinf(new_limit or 0) and m.isinf(old_limit or 0)
247                    else +float('inf') if m.isinf(new_limit or 0)
248                    else -float('inf') if m.isinf(old_limit or 0)
249                    else +0.0 if not old_limit and not new_limit
250                    else +1.0 if not old_limit
251                    else ((new_limit or 0) - (old_limit or 0))/(old_limit or 0),
252                deps)
253        return diff
254
255    def sorted_entries(entries):
256        if args.get('limit_sort'):
257            return sorted(entries, key=lambda x: (-x[1][1], x))
258        elif args.get('reverse_limit_sort'):
259            return sorted(entries, key=lambda x: (+x[1][1], x))
260        elif args.get('frame_sort'):
261            return sorted(entries, key=lambda x: (-x[1][0], x))
262        elif args.get('reverse_frame_sort'):
263            return sorted(entries, key=lambda x: (+x[1][0], x))
264        else:
265            return sorted(entries)
266
267    def sorted_diff_entries(entries):
268        if args.get('limit_sort'):
269            return sorted(entries, key=lambda x: (-(x[1][3] or 0), x))
270        elif args.get('reverse_limit_sort'):
271            return sorted(entries, key=lambda x: (+(x[1][3] or 0), x))
272        elif args.get('frame_sort'):
273            return sorted(entries, key=lambda x: (-(x[1][2] or 0), x))
274        elif args.get('reverse_frame_sort'):
275            return sorted(entries, key=lambda x: (+(x[1][2] or 0), x))
276        else:
277            return sorted(entries, key=lambda x: (-x[1][6], x))
278
279    def print_header(by=''):
280        if not args.get('diff'):
281            print('%-36s %7s %7s' % (by, 'frame', 'limit'))
282        else:
283            print('%-36s %15s %15s %15s' % (by, 'old', 'new', 'diff'))
284
285    def print_entry(name, frame, limit):
286        print("%-36s %7d %7s" % (name,
287            frame, '∞' if m.isinf(limit) else int(limit)))
288
289    def print_diff_entry(name,
290            old_frame, old_limit,
291            new_frame, new_limit,
292            diff_frame, diff_limit,
293            ratio):
294        print('%-36s %7s %7s %7s %7s %+7d %7s%s' % (name,
295            old_frame if old_frame is not None else "-",
296            ('∞' if m.isinf(old_limit) else int(old_limit))
297                if old_limit is not None else "-",
298            new_frame if new_frame is not None else "-",
299            ('∞' if m.isinf(new_limit) else int(new_limit))
300                if new_limit is not None else "-",
301            diff_frame,
302            ('+∞' if diff_limit > 0 and m.isinf(diff_limit)
303                else '-∞' if diff_limit < 0 and m.isinf(diff_limit)
304                else '%+d' % diff_limit),
305            '' if not ratio
306                else ' (+∞%)' if ratio > 0 and m.isinf(ratio)
307                else ' (-∞%)' if ratio < 0 and m.isinf(ratio)
308                else ' (%+.1f%%)' % (100*ratio)))
309
310    def print_entries(by='name'):
311        # build optional tree of dependencies
312        def print_deps(entries, depth, print,
313                filter=lambda _: True,
314                prefixes=('', '', '', '')):
315            entries = entries if isinstance(entries, list) else list(entries)
316            filtered_entries = [(name, entry)
317                for name, entry in entries
318                if filter(name)]
319            for i, (name, entry) in enumerate(filtered_entries):
320                last = (i == len(filtered_entries)-1)
321                print(prefixes[0+last] + name, entry)
322
323                if depth > 0:
324                    deps = entry[-1]
325                    print_deps(entries, depth-1, print,
326                        lambda name: name in deps,
327                        (   prefixes[2+last] + "|-> ",
328                            prefixes[2+last] + "'-> ",
329                            prefixes[2+last] + "|   ",
330                            prefixes[2+last] + "    "))
331
332        entries = dedup_entries(results, by=by)
333
334        if not args.get('diff'):
335            print_header(by=by)
336            print_deps(
337                sorted_entries(entries.items()),
338                args.get('depth') or 0,
339                lambda name, entry: print_entry(name, *entry[:-1]))
340        else:
341            prev_entries = dedup_entries(prev_results, by=by)
342            diff = diff_entries(prev_entries, entries)
343
344            print_header(by='%s (%d added, %d removed)' % (by,
345                sum(1 for _, old, _, _, _, _, _, _ in diff.values() if old is None),
346                sum(1 for _, _, _, new, _, _, _, _ in diff.values() if new is None)))
347            print_deps(
348                filter(
349                    lambda x: x[1][6] or args.get('all'),
350                    sorted_diff_entries(diff.items())),
351                args.get('depth') or 0,
352                lambda name, entry: print_diff_entry(name, *entry[:-1]))
353
354    def print_totals():
355        if not args.get('diff'):
356            print_entry('TOTAL', total_frame, total_limit)
357        else:
358            diff_frame = total_frame - prev_total_frame
359            diff_limit = (
360                0 if m.isinf(total_limit or 0) and m.isinf(prev_total_limit or 0)
361                    else (total_limit or 0) - (prev_total_limit or 0))
362            ratio = (
363                0.0 if m.isinf(total_limit or 0) and m.isinf(prev_total_limit or 0)
364                    else +float('inf') if m.isinf(total_limit or 0)
365                    else -float('inf') if m.isinf(prev_total_limit or 0)
366                    else 0.0 if not prev_total_limit and not total_limit
367                    else 1.0 if not prev_total_limit
368                    else ((total_limit or 0) - (prev_total_limit or 0))/(prev_total_limit or 0))
369            print_diff_entry('TOTAL',
370                prev_total_frame, prev_total_limit,
371                total_frame, total_limit,
372                diff_frame, diff_limit,
373                ratio)
374
375    if args.get('quiet'):
376        pass
377    elif args.get('summary'):
378        print_header()
379        print_totals()
380    elif args.get('files'):
381        print_entries(by='file')
382        print_totals()
383    else:
384        print_entries(by='name')
385        print_totals()
386
387
388if __name__ == "__main__":
389    import argparse
390    import sys
391    parser = argparse.ArgumentParser(
392        description="Find stack usage at the function level.")
393    parser.add_argument('ci_paths', nargs='*', default=CI_PATHS,
394        help="Description of where to find *.ci files. May be a directory \
395            or a list of paths. Defaults to %r." % CI_PATHS)
396    parser.add_argument('-v', '--verbose', action='store_true',
397        help="Output commands that run behind the scenes.")
398    parser.add_argument('-q', '--quiet', action='store_true',
399        help="Don't show anything, useful with -o.")
400    parser.add_argument('-o', '--output',
401        help="Specify CSV file to store results.")
402    parser.add_argument('-u', '--use',
403        help="Don't parse callgraph files, instead use this CSV file.")
404    parser.add_argument('-d', '--diff',
405        help="Specify CSV file to diff against.")
406    parser.add_argument('-m', '--merge',
407        help="Merge with an existing CSV file when writing to output.")
408    parser.add_argument('-a', '--all', action='store_true',
409        help="Show all functions, not just the ones that changed.")
410    parser.add_argument('-A', '--everything', action='store_true',
411        help="Include builtin and libc specific symbols.")
412    parser.add_argument('-s', '--limit-sort', action='store_true',
413        help="Sort by stack limit.")
414    parser.add_argument('-S', '--reverse-limit-sort', action='store_true',
415        help="Sort by stack limit, but backwards.")
416    parser.add_argument('--frame-sort', action='store_true',
417        help="Sort by stack frame size.")
418    parser.add_argument('--reverse-frame-sort', action='store_true',
419        help="Sort by stack frame size, but backwards.")
420    parser.add_argument('-L', '--depth', default=0, type=lambda x: int(x, 0),
421        nargs='?', const=float('inf'),
422        help="Depth of dependencies to show.")
423    parser.add_argument('-F', '--files', action='store_true',
424        help="Show file-level calls.")
425    parser.add_argument('-Y', '--summary', action='store_true',
426        help="Only show the total stack size.")
427    parser.add_argument('--build-dir',
428        help="Specify the relative build directory. Used to map object files \
429            to the correct source files.")
430    sys.exit(main(**vars(parser.parse_args())))
431