1#!/usr/bin/python 2# Test tool to disassemble MC files. By Nguyen Anh Quynh, 2017 3import array, os.path, sys 4from capstone import * 5 6 7# convert all hex numbers to decimal numbers in a text 8def normalize_hex(a): 9 while(True): 10 i = a.find('0x') 11 if i == -1: # no more hex number 12 break 13 hexnum = '0x' 14 for c in a[i + 2:]: 15 if c in '0123456789abcdefABCDEF': 16 hexnum += c 17 else: 18 break 19 num = int(hexnum, 16) 20 a = a.replace(hexnum, str(num)) 21 return a 22 23 24def test_file(fname): 25 print("Test %s" %fname); 26 f = open(fname) 27 lines = f.readlines() 28 f.close() 29 30 if not lines[0].startswith('# '): 31 print("ERROR: decoding information is missing") 32 return 33 34 # skip '# ' at the front, then split line to get out hexcode 35 # Note: option can be '', or 'None' 36 #print lines[0] 37 #print lines[0][2:].split(', ') 38 (arch, mode, option) = lines[0][2:].split(', ') 39 mode = mode.replace(' ', '') 40 option = option.strip() 41 42 archs = { 43 "CS_ARCH_ARM": CS_ARCH_ARM, 44 "CS_ARCH_ARM64": CS_ARCH_ARM64, 45 "CS_ARCH_MIPS": CS_ARCH_MIPS, 46 "CS_ARCH_PPC": CS_ARCH_PPC, 47 "CS_ARCH_SPARC": CS_ARCH_SPARC, 48 "CS_ARCH_SYSZ": CS_ARCH_SYSZ, 49 "CS_ARCH_X86": CS_ARCH_X86, 50 "CS_ARCH_XCORE": CS_ARCH_XCORE, 51 "CS_ARCH_M68K": CS_ARCH_M68K, 52 } 53 54 modes = { 55 "CS_MODE_16": CS_MODE_16, 56 "CS_MODE_32": CS_MODE_32, 57 "CS_MODE_64": CS_MODE_64, 58 "CS_MODE_MIPS32": CS_MODE_MIPS32, 59 "CS_MODE_MIPS64": CS_MODE_MIPS64, 60 "0": CS_MODE_ARM, 61 "CS_MODE_ARM": CS_MODE_ARM, 62 "CS_MODE_THUMB": CS_MODE_THUMB, 63 "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8, 64 "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8, 65 "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS, 66 "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN, 67 "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN, 68 "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN, 69 "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN, 70 "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO, 71 "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, 72 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, 73 "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9, 74 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN, 75 "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN, 76 "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN, 77 "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN, 78 } 79 80 options = { 81 "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT, 82 "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME, 83 } 84 85 mc_modes = { 86 ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'], 87 ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'], 88 ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'], 89 ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'], 90 ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'], 91 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'], 92 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'], 93 ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'], 94 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'], 95 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'], 96 ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'], 97 ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'], 98 ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'], 99 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'], 100 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'], 101 ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'], 102 ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'], 103 ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'], 104 ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'], 105 } 106 107 #if not option in ('', 'None'): 108 # print archs[arch], modes[mode], options[option] 109 110 #print(arch, mode, option) 111 md = Cs(archs[arch], modes[mode]) 112 113 if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' : 114 md.syntax = CS_OPT_SYNTAX_NOREGNAME 115 116 if fname.endswith('3DNow.s.cs'): 117 md.syntax = CS_OPT_SYNTAX_ATT 118 119 for line in lines[1:]: 120 # ignore all the input lines having # in front. 121 if line.startswith('#'): 122 continue 123 #print("Check %s" %line) 124 code = line.split(' = ')[0] 125 asm = ''.join(line.split(' = ')[1:]) 126 hex_code = code.replace('0x', '') 127 hex_code = hex_code.replace(',', '') 128 hex_data = hex_code.decode('hex') 129 #hex_bytes = array.array('B', hex_data) 130 131 x = list(md.disasm(hex_data, 0)) 132 if len(x) > 0: 133 if x[0].op_str != '': 134 cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str) 135 else: 136 cs_output = x[0].mnemonic 137 else: 138 cs_output = 'FAILED to disassemble' 139 140 cs_output2 = normalize_hex(cs_output) 141 cs_output2 = cs_output2.replace(' ', '') 142 143 if arch == 'CS_ARCH_MIPS': 144 # normalize register alias names 145 cs_output2 = cs_output2.replace('$at', '$1') 146 cs_output2 = cs_output2.replace('$v0', '$2') 147 cs_output2 = cs_output2.replace('$v1', '$3') 148 149 cs_output2 = cs_output2.replace('$a0', '$4') 150 cs_output2 = cs_output2.replace('$a1', '$5') 151 cs_output2 = cs_output2.replace('$a2', '$6') 152 cs_output2 = cs_output2.replace('$a3', '$7') 153 154 cs_output2 = cs_output2.replace('$t0', '$8') 155 cs_output2 = cs_output2.replace('$t1', '$9') 156 cs_output2 = cs_output2.replace('$t2', '$10') 157 cs_output2 = cs_output2.replace('$t3', '$11') 158 cs_output2 = cs_output2.replace('$t4', '$12') 159 cs_output2 = cs_output2.replace('$t5', '$13') 160 cs_output2 = cs_output2.replace('$t6', '$14') 161 cs_output2 = cs_output2.replace('$t7', '$15') 162 cs_output2 = cs_output2.replace('$t8', '$24') 163 cs_output2 = cs_output2.replace('$t9', '$25') 164 165 cs_output2 = cs_output2.replace('$s0', '$16') 166 cs_output2 = cs_output2.replace('$s1', '$17') 167 cs_output2 = cs_output2.replace('$s2', '$18') 168 cs_output2 = cs_output2.replace('$s3', '$19') 169 cs_output2 = cs_output2.replace('$s4', '$20') 170 cs_output2 = cs_output2.replace('$s5', '$21') 171 cs_output2 = cs_output2.replace('$s6', '$22') 172 cs_output2 = cs_output2.replace('$s7', '$23') 173 174 cs_output2 = cs_output2.replace('$k0', '$26') 175 cs_output2 = cs_output2.replace('$k1', '$27') 176 177 print("\t%s = %s" %(hex_code, cs_output)) 178 179 180if __name__ == '__main__': 181 if len(sys.argv) == 1: 182 fnames = sys.stdin.readlines() 183 for fname in fnames: 184 test_file(fname.strip()) 185 else: 186 #print("Usage: ./test_mc.py <input-file.s.cs>") 187 test_file(sys.argv[1]) 188 189