• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3"""
4strip_asm.py - Cleanup ASM output for the specified file
5"""
6
7from argparse import ArgumentParser
8import sys
9import os
10import re
11
12def find_used_labels(asm):
13    found = set()
14    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
15    for l in asm.splitlines():
16        m = label_re.match(l)
17        if m:
18            found.add('.L%s' % m.group(1))
19    return found
20
21
22def normalize_labels(asm):
23    decls = set()
24    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
25    for l in asm.splitlines():
26        m = label_decl.match(l)
27        if m:
28            decls.add(m.group(0))
29    if len(decls) == 0:
30        return asm
31    needs_dot = next(iter(decls))[0] != '.'
32    if not needs_dot:
33        return asm
34    for ld in decls:
35        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
36    return asm
37
38
39def transform_labels(asm):
40    asm = normalize_labels(asm)
41    used_decls = find_used_labels(asm)
42    new_asm = ''
43    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
44    for l in asm.splitlines():
45        m = label_decl.match(l)
46        if not m or m.group(0) in used_decls:
47            new_asm += l
48            new_asm += '\n'
49    return new_asm
50
51
52def is_identifier(tk):
53    if len(tk) == 0:
54        return False
55    first = tk[0]
56    if not first.isalpha() and first != '_':
57        return False
58    for i in range(1, len(tk)):
59        c = tk[i]
60        if not c.isalnum() and c != '_':
61            return False
62    return True
63
64def process_identifiers(l):
65    """
66    process_identifiers - process all identifiers and modify them to have
67    consistent names across all platforms; specifically across ELF and MachO.
68    For example, MachO inserts an additional understore at the beginning of
69    names. This function removes that.
70    """
71    parts = re.split(r'([a-zA-Z0-9_]+)', l)
72    new_line = ''
73    for tk in parts:
74        if is_identifier(tk):
75            if tk.startswith('__Z'):
76                tk = tk[1:]
77            elif tk.startswith('_') and len(tk) > 1 and \
78                    tk[1].isalpha() and tk[1] != 'Z':
79                tk = tk[1:]
80        new_line += tk
81    return new_line
82
83
84def process_asm(asm):
85    """
86    Strip the ASM of unwanted directives and lines
87    """
88    new_contents = ''
89    asm = transform_labels(asm)
90
91    # TODO: Add more things we want to remove
92    discard_regexes = [
93        re.compile("\s+\..*$"), # directive
94        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
95        re.compile("\s*#.*$"), # comment line
96        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
97        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
98    ]
99    keep_regexes = [
100
101    ]
102    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
103    for l in asm.splitlines():
104        # Remove Mach-O attribute
105        l = l.replace('@GOTPCREL', '')
106        add_line = True
107        for reg in discard_regexes:
108            if reg.match(l) is not None:
109                add_line = False
110                break
111        for reg in keep_regexes:
112            if reg.match(l) is not None:
113                add_line = True
114                break
115        if add_line:
116            if fn_label_def.match(l) and len(new_contents) != 0:
117                new_contents += '\n'
118            l = process_identifiers(l)
119            new_contents += l
120            new_contents += '\n'
121    return new_contents
122
123def main():
124    parser = ArgumentParser(
125        description='generate a stripped assembly file')
126    parser.add_argument(
127        'input', metavar='input', type=str, nargs=1,
128        help='An input assembly file')
129    parser.add_argument(
130        'out', metavar='output', type=str, nargs=1,
131        help='The output file')
132    args, unknown_args = parser.parse_known_args()
133    input = args.input[0]
134    output = args.out[0]
135    if not os.path.isfile(input):
136        print(("ERROR: input file '%s' does not exist") % input)
137        sys.exit(1)
138    contents = None
139    with open(input, 'r') as f:
140        contents = f.read()
141    new_contents = process_asm(contents)
142    with open(output, 'w') as f:
143        f.write(new_contents)
144
145
146if __name__ == '__main__':
147    main()
148
149# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
150# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
151# kate: indent-mode python; remove-trailing-spaces modified;
152