#!/usr/bin/env python3
#
# Script to find code size at the function level. Basically just a bit wrapper
# around nm with some extra conveniences for comparing builds. Heavily inspired
# by Linux's Bloat-O-Meter.
#

import os
import glob
import itertools as it
import subprocess as sp
import shlex
import re
import csv
import collections as co


OBJ_PATHS = ['*.o', 'bd/*.o']

def collect(paths, **args):
    results = co.defaultdict(lambda: 0)
    pattern = re.compile(
        '^(?P<size>[0-9a-fA-F]+)' +
        ' (?P<type>[%s])' % re.escape(args['type']) +
        ' (?P<func>.+?)$')
    for path in paths:
        # note nm-tool may contain extra args
        cmd = args['nm_tool'] + ['--size-sort', path]
        if args.get('verbose'):
            print(' '.join(shlex.quote(c) for c in cmd))
        proc = sp.Popen(cmd,
            stdout=sp.PIPE,
            stderr=sp.PIPE if not args.get('verbose') else None,
            universal_newlines=True)
        for line in proc.stdout:
            m = pattern.match(line)
            if m:
                results[(path, m.group('func'))] += int(m.group('size'), 16)
        proc.wait()
        if proc.returncode != 0:
            if not args.get('verbose'):
                for line in proc.stderr:
                    sys.stdout.write(line)
            sys.exit(-1)

    flat_results = []
    for (file, func), size in results.items():
        # map to source files
        if args.get('build_dir'):
            file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
        # discard internal functions
        if func.startswith('__'):
            continue
        # discard .8449 suffixes created by optimizer
        func = re.sub('\.[0-9]+', '', func)
        flat_results.append((file, func, size))

    return flat_results

def main(**args):
    # find sizes
    if not args.get('use', None):
        # find .o files
        paths = []
        for path in args['obj_paths']:
            if os.path.isdir(path):
                path = path + '/*.o'

            for path in glob.glob(path):
                paths.append(path)

        if not paths:
            print('no .obj files found in %r?' % args['obj_paths'])
            sys.exit(-1)

        results = collect(paths, **args)
    else:
        with open(args['use']) as f:
            r = csv.DictReader(f)
            results = [
                (   result['file'],
                    result['function'],
                    int(result['size']))
                for result in r]

    total = 0
    for _, _, size in results:
        total += size

    # find previous results?
    if args.get('diff'):
        with open(args['diff']) as f:
            r = csv.DictReader(f)
            prev_results = [
                (   result['file'],
                    result['function'],
                    int(result['size']))
                for result in r]

        prev_total = 0
        for _, _, size in prev_results:
            prev_total += size

    # write results to CSV
    if args.get('output'):
        with open(args['output'], 'w') as f:
            w = csv.writer(f)
            w.writerow(['file', 'function', 'size'])
            for file, func, size in sorted(results):
                w.writerow((file, func, size))

    # print results
    def dedup_entries(results, by='function'):
        entries = co.defaultdict(lambda: 0)
        for file, func, size in results:
            entry = (file if by == 'file' else func)
            entries[entry] += size
        return entries

    def diff_entries(olds, news):
        diff = co.defaultdict(lambda: (0, 0, 0, 0))
        for name, new in news.items():
            diff[name] = (0, new, new, 1.0)
        for name, old in olds.items():
            _, new, _, _ = diff[name]
            diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
        return diff

    def print_header(by=''):
        if not args.get('diff'):
            print('%-36s %7s' % (by, 'size'))
        else:
            print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))

    def print_entries(by='function'):
        entries = dedup_entries(results, by=by)

        if not args.get('diff'):
            print_header(by=by)
            for name, size in sorted(entries.items()):
                print("%-36s %7d" % (name, size))
        else:
            prev_entries = dedup_entries(prev_results, by=by)
            diff = diff_entries(prev_entries, entries)
            print_header(by='%s (%d added, %d removed)' % (by,
                sum(1 for old, _, _, _ in diff.values() if not old),
                sum(1 for _, new, _, _ in diff.values() if not new)))
            for name, (old, new, diff, ratio) in sorted(diff.items(),
                    key=lambda x: (-x[1][3], x)):
                if ratio or args.get('all'):
                    print("%-36s %7s %7s %+7d%s" % (name,
                        old or "-",
                        new or "-",
                        diff,
                        ' (%+.1f%%)' % (100*ratio) if ratio else ''))

    def print_totals():
        if not args.get('diff'):
            print("%-36s %7d" % ('TOTAL', total))
        else:
            ratio = (total-prev_total)/prev_total if prev_total else 1.0
            print("%-36s %7s %7s %+7d%s" % (
                'TOTAL',
                prev_total if prev_total else '-',
                total if total else '-',
                total-prev_total,
                ' (%+.1f%%)' % (100*ratio) if ratio else ''))

    if args.get('quiet'):
        pass
    elif args.get('summary'):
        print_header()
        print_totals()
    elif args.get('files'):
        print_entries(by='file')
        print_totals()
    else:
        print_entries(by='function')
        print_totals()

if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
        description="Find code size at the function level.")
    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
        help="Description of where to find *.o files. May be a directory \
            or a list of paths. Defaults to %r." % OBJ_PATHS)
    parser.add_argument('-v', '--verbose', action='store_true',
        help="Output commands that run behind the scenes.")
    parser.add_argument('-o', '--output',
        help="Specify CSV file to store results.")
    parser.add_argument('-u', '--use',
        help="Don't compile and find code sizes, instead use this CSV file.")
    parser.add_argument('-d', '--diff',
        help="Specify CSV file to diff code size against.")
    parser.add_argument('-a', '--all', action='store_true',
        help="Show all functions, not just the ones that changed.")
    parser.add_argument('--files', action='store_true',
        help="Show file-level code sizes. Note this does not include padding! "
            "So sizes may differ from other tools.")
    parser.add_argument('-s', '--summary', action='store_true',
        help="Only show the total code size.")
    parser.add_argument('-q', '--quiet', action='store_true',
        help="Don't show anything, useful with -o.")
    parser.add_argument('--type', default='tTrRdDbB',
        help="Type of symbols to report, this uses the same single-character "
            "type-names emitted by nm. Defaults to %(default)r.")
    parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
        help="Path to the nm tool to use.")
    parser.add_argument('--build-dir',
        help="Specify the relative build directory. Used to map object files \
            to the correct source files.")
    sys.exit(main(**vars(parser.parse_args())))