1# Capstone Disassembler Engine 2# By Dang Hoang Vu, 2013 3from __future__ import print_function 4import sys, re 5 6INCL_DIR = '../include/capstone/' 7 8include = [ 'arm.h', 'arm64.h', 'm68k.h', 'mips.h', 'x86.h', 'ppc.h', 'sparc.h', 'systemz.h', 'xcore.h', 'tms320c64x.h', 'm680x.h', 'evm.h', 'mos65xx.h' ] 9 10template = { 11 'java': { 12 'header': "// For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT\npackage capstone;\n\npublic class %s_const {\n", 13 'footer': "}", 14 'line_format': '\tpublic static final int %s = %s;\n', 15 'out_file': './java/capstone/%s_const.java', 16 # prefixes for constant filenames of all archs - case sensitive 17 'arm.h': 'Arm', 18 'arm64.h': 'Arm64', 19 'm68k.h': 'M68k', 20 'mips.h': 'Mips', 21 'x86.h': 'X86', 22 'ppc.h': 'Ppc', 23 'sparc.h': 'Sparc', 24 'systemz.h': 'Sysz', 25 'xcore.h': 'Xcore', 26 'tms320c64x.h': 'TMS320C64x', 27 'm680x.h': 'M680x', 28 'evm.h': 'Evm', 29 'comment_open': '\t//', 30 'comment_close': '', 31 }, 32 'python': { 33 'header': "# For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.py]\n", 34 'footer': "", 35 'line_format': '%s = %s\n', 36 'out_file': './python/capstone/%s_const.py', 37 # prefixes for constant filenames of all archs - case sensitive 38 'arm.h': 'arm', 39 'arm64.h': 'arm64', 40 'm68k.h': 'm68k', 41 'mips.h': 'mips', 42 'x86.h': 'x86', 43 'ppc.h': 'ppc', 44 'sparc.h': 'sparc', 45 'systemz.h': 'sysz', 46 'xcore.h': 'xcore', 47 'tms320c64x.h': 'tms320c64x', 48 'm680x.h': 'm680x', 49 'evm.h': 'evm', 50 'mos65xx.h': 'mos65xx', 51 'comment_open': '#', 52 'comment_close': '', 53 }, 54 'ocaml': { 55 'header': "(* For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.ml] *)\n", 56 'footer': "", 57 'line_format': 'let _%s = %s;;\n', 58 'out_file': './ocaml/%s_const.ml', 59 # prefixes for constant filenames of all archs - case sensitive 60 'arm.h': 'arm', 61 'arm64.h': 'arm64', 62 'mips.h': 'mips', 63 'm68k.h': 'm68k', 64 'x86.h': 'x86', 65 'ppc.h': 'ppc', 66 'sparc.h': 'sparc', 67 'systemz.h': 'sysz', 68 'xcore.h': 'xcore', 69 'tms320c64x.h': 'tms320c64x', 70 'm680x.h': 'm680x', 71 'evm.h': 'evm', 72 'comment_open': '(*', 73 'comment_close': ' *)', 74 }, 75} 76 77# markup for comments to be added to autogen files 78MARKUP = '//>' 79 80def gen(lang): 81 global include, INCL_DIR 82 print('Generating bindings for', lang) 83 templ = template[lang] 84 print('Generating bindings for', lang) 85 for target in include: 86 if target not in templ: 87 print("Warning: No binding found for %s" % target) 88 continue 89 prefix = templ[target] 90 outfile = open(templ['out_file'] %(prefix), 'wb') # open as binary prevents windows newlines 91 outfile.write((templ['header'] % (prefix)).encode("utf-8")) 92 93 lines = open(INCL_DIR + target).readlines() 94 95 count = 0 96 for line in lines: 97 line = line.strip() 98 99 if line.startswith(MARKUP): # markup for comments 100 outfile.write(("\n%s%s%s\n" %(templ['comment_open'], \ 101 line.replace(MARKUP, ''), \ 102 templ['comment_close']) ).encode("utf-8")) 103 continue 104 105 if line == '' or line.startswith('//'): 106 continue 107 108 if line.startswith('#define '): 109 line = line[8:] #cut off define 110 xline = re.split('\s+', line, 1) #split to at most 2 express 111 if len(xline) != 2: 112 continue 113 if '(' in xline[0] or ')' in xline[0]: #does it look like a function 114 continue 115 xline.insert(1, '=') # insert an = so the expression below can parse it 116 line = ' '.join(xline) 117 118 if not line.startswith(prefix.upper()): 119 continue 120 121 tmp = line.strip().split(',') 122 for t in tmp: 123 t = t.strip() 124 if not t or t.startswith('//'): continue 125 # hacky: remove type cast (uint64_t) 126 t = t.replace('(uint64_t)', '') 127 t = re.sub(r'\((\d+)ULL << (\d+)\)', r'\1 << \2', t) # (1ULL<<1) to 1 << 1 128 f = re.split('\s+', t) 129 130 if f[0].startswith(prefix.upper()): 131 if len(f) > 1 and f[1] not in ('//', '///<', '='): 132 print("Error: Unable to convert %s" % f) 133 continue 134 elif len(f) > 1 and f[1] == '=': 135 rhs = ''.join(f[2:]) 136 else: 137 rhs = str(count) 138 count += 1 139 140 try: 141 count = int(rhs) + 1 142 if (count == 1): 143 outfile.write(("\n").encode("utf-8")) 144 except ValueError: 145 if lang == 'ocaml': 146 # ocaml uses lsl for '<<', lor for '|' 147 rhs = rhs.replace('<<', ' lsl ') 148 rhs = rhs.replace('|', ' lor ') 149 # ocaml variable has _ as prefix 150 if rhs[0].isalpha(): 151 rhs = '_' + rhs 152 153 outfile.write((templ['line_format'] %(f[0].strip(), rhs)).encode("utf-8")) 154 155 outfile.write((templ['footer']).encode("utf-8")) 156 outfile.close() 157 158def main(): 159 try: 160 if sys.argv[1] == 'all': 161 for key in template.keys(): 162 gen(key) 163 else: 164 gen(sys.argv[1]) 165 except: 166 raise RuntimeError("Unsupported binding %s" % sys.argv[1]) 167 168if __name__ == "__main__": 169 if len(sys.argv) < 2: 170 print("Usage:", sys.argv[0], " <bindings: java|python|ocaml|all>") 171 sys.exit(1) 172 main() 173