1# Capstone Python bindings, by Nguyen Anh Quynnh <aquynh@gmail.com> 2import os, sys 3from platform import system 4_python2 = sys.version_info[0] < 3 5if _python2: 6 range = xrange 7 8__all__ = [ 9 'Cs', 10 'CsInsn', 11 12 'cs_disasm_quick', 13 'cs_disasm_lite', 14 'cs_version', 15 'cs_support', 16 'version_bind', 17 'debug', 18 19 'CS_API_MAJOR', 20 'CS_API_MINOR', 21 22 'CS_VERSION_MAJOR', 23 'CS_VERSION_MINOR', 24 'CS_VERSION_EXTRA', 25 26 'CS_ARCH_ARM', 27 'CS_ARCH_ARM64', 28 'CS_ARCH_MIPS', 29 'CS_ARCH_X86', 30 'CS_ARCH_PPC', 31 'CS_ARCH_SPARC', 32 'CS_ARCH_SYSZ', 33 'CS_ARCH_XCORE', 34 'CS_ARCH_M68K', 35 'CS_ARCH_TMS320C64X', 36 'CS_ARCH_M680X', 37 'CS_ARCH_EVM', 38 'CS_ARCH_MOS65XX', 39 'CS_ARCH_ALL', 40 41 'CS_MODE_LITTLE_ENDIAN', 42 'CS_MODE_BIG_ENDIAN', 43 'CS_MODE_16', 44 'CS_MODE_32', 45 'CS_MODE_64', 46 'CS_MODE_ARM', 47 'CS_MODE_THUMB', 48 'CS_MODE_MCLASS', 49 'CS_MODE_MICRO', 50 'CS_MODE_MIPS3', 51 'CS_MODE_MIPS32R6', 52 'CS_MODE_MIPS2', 53 'CS_MODE_V8', 54 'CS_MODE_V9', 55 'CS_MODE_QPX', 56 'CS_MODE_M68K_000', 57 'CS_MODE_M68K_010', 58 'CS_MODE_M68K_020', 59 'CS_MODE_M68K_030', 60 'CS_MODE_M68K_040', 61 'CS_MODE_M68K_060', 62 'CS_MODE_MIPS32', 63 'CS_MODE_MIPS64', 64 'CS_MODE_M680X_6301', 65 'CS_MODE_M680X_6309', 66 'CS_MODE_M680X_6800', 67 'CS_MODE_M680X_6801', 68 'CS_MODE_M680X_6805', 69 'CS_MODE_M680X_6808', 70 'CS_MODE_M680X_6809', 71 'CS_MODE_M680X_6811', 72 'CS_MODE_M680X_CPU12', 73 'CS_MODE_M680X_HCS08', 74 75 'CS_OPT_SYNTAX', 76 'CS_OPT_SYNTAX_DEFAULT', 77 'CS_OPT_SYNTAX_INTEL', 78 'CS_OPT_SYNTAX_ATT', 79 'CS_OPT_SYNTAX_NOREGNAME', 80 'CS_OPT_SYNTAX_MASM', 81 82 'CS_OPT_DETAIL', 83 'CS_OPT_MODE', 84 'CS_OPT_ON', 85 'CS_OPT_OFF', 86 87 'CS_ERR_OK', 88 'CS_ERR_MEM', 89 'CS_ERR_ARCH', 90 'CS_ERR_HANDLE', 91 'CS_ERR_CSH', 92 'CS_ERR_MODE', 93 'CS_ERR_OPTION', 94 'CS_ERR_DETAIL', 95 'CS_ERR_VERSION', 96 'CS_ERR_MEMSETUP', 97 'CS_ERR_DIET', 98 'CS_ERR_SKIPDATA', 99 'CS_ERR_X86_ATT', 100 'CS_ERR_X86_INTEL', 101 102 'CS_SUPPORT_DIET', 103 'CS_SUPPORT_X86_REDUCE', 104 'CS_SKIPDATA_CALLBACK', 105 106 'CS_OP_INVALID', 107 'CS_OP_REG', 108 'CS_OP_IMM', 109 'CS_OP_MEM', 110 'CS_OP_FP', 111 112 'CS_GRP_INVALID', 113 'CS_GRP_JUMP', 114 'CS_GRP_CALL', 115 'CS_GRP_RET', 116 'CS_GRP_INT', 117 'CS_GRP_IRET', 118 'CS_GRP_PRIVILEGE', 119 120 'CS_AC_INVALID', 121 'CS_AC_READ', 122 'CS_AC_WRITE', 123 124 'CsError', 125 126 '__version__', 127] 128 129# Capstone C interface 130 131# API version 132CS_API_MAJOR = 5 133CS_API_MINOR = 0 134 135# Package version 136CS_VERSION_MAJOR = CS_API_MAJOR 137CS_VERSION_MINOR = CS_API_MINOR 138CS_VERSION_EXTRA = 0 139 140__version__ = "%u.%u.%u" %(CS_VERSION_MAJOR, CS_VERSION_MINOR, CS_VERSION_EXTRA) 141 142# architectures 143CS_ARCH_ARM = 0 144CS_ARCH_ARM64 = 1 145CS_ARCH_MIPS = 2 146CS_ARCH_X86 = 3 147CS_ARCH_PPC = 4 148CS_ARCH_SPARC = 5 149CS_ARCH_SYSZ = 6 150CS_ARCH_XCORE = 7 151CS_ARCH_M68K = 8 152CS_ARCH_TMS320C64X = 9 153CS_ARCH_M680X = 10 154CS_ARCH_EVM = 11 155CS_ARCH_MOS65XX = 12 156CS_ARCH_MAX = 13 157CS_ARCH_ALL = 0xFFFF 158 159# disasm mode 160CS_MODE_LITTLE_ENDIAN = 0 # little-endian mode (default mode) 161CS_MODE_ARM = 0 # ARM mode 162CS_MODE_16 = (1 << 1) # 16-bit mode (for X86) 163CS_MODE_32 = (1 << 2) # 32-bit mode (for X86) 164CS_MODE_64 = (1 << 3) # 64-bit mode (for X86, PPC) 165CS_MODE_THUMB = (1 << 4) # ARM's Thumb mode, including Thumb-2 166CS_MODE_MCLASS = (1 << 5) # ARM's Cortex-M series 167CS_MODE_V8 = (1 << 6) # ARMv8 A32 encodings for ARM 168CS_MODE_MICRO = (1 << 4) # MicroMips mode (MIPS architecture) 169CS_MODE_MIPS3 = (1 << 5) # Mips III ISA 170CS_MODE_MIPS32R6 = (1 << 6) # Mips32r6 ISA 171CS_MODE_MIPS2 = (1 << 7) # Mips II ISA 172CS_MODE_V9 = (1 << 4) # Sparc V9 mode (for Sparc) 173CS_MODE_QPX = (1 << 4) # Quad Processing eXtensions mode (PPC) 174CS_MODE_M68K_000 = (1 << 1) # M68K 68000 mode 175CS_MODE_M68K_010 = (1 << 2) # M68K 68010 mode 176CS_MODE_M68K_020 = (1 << 3) # M68K 68020 mode 177CS_MODE_M68K_030 = (1 << 4) # M68K 68030 mode 178CS_MODE_M68K_040 = (1 << 5) # M68K 68040 mode 179CS_MODE_M68K_060 = (1 << 6) # M68K 68060 mode 180CS_MODE_BIG_ENDIAN = (1 << 31) # big-endian mode 181CS_MODE_MIPS32 = CS_MODE_32 # Mips32 ISA 182CS_MODE_MIPS64 = CS_MODE_64 # Mips64 ISA 183CS_MODE_M680X_6301 = (1 << 1) # M680X HD6301/3 mode 184CS_MODE_M680X_6309 = (1 << 2) # M680X HD6309 mode 185CS_MODE_M680X_6800 = (1 << 3) # M680X M6800/2 mode 186CS_MODE_M680X_6801 = (1 << 4) # M680X M6801/3 mode 187CS_MODE_M680X_6805 = (1 << 5) # M680X M6805 mode 188CS_MODE_M680X_6808 = (1 << 6) # M680X M68HC08 mode 189CS_MODE_M680X_6809 = (1 << 7) # M680X M6809 mode 190CS_MODE_M680X_6811 = (1 << 8) # M680X M68HC11 mode 191CS_MODE_M680X_CPU12 = (1 << 9) # M680X CPU12 mode 192CS_MODE_M680X_HCS08 = (1 << 10) # M680X HCS08 mode 193 194# Capstone option type 195CS_OPT_SYNTAX = 1 # Intel X86 asm syntax (CS_ARCH_X86 arch) 196CS_OPT_DETAIL = 2 # Break down instruction structure into details 197CS_OPT_MODE = 3 # Change engine's mode at run-time 198CS_OPT_MEM = 4 # Change engine's mode at run-time 199CS_OPT_SKIPDATA = 5 # Skip data when disassembling 200CS_OPT_SKIPDATA_SETUP = 6 # Setup user-defined function for SKIPDATA option 201CS_OPT_MNEMONIC = 7 # Customize instruction mnemonic 202CS_OPT_UNSIGNED = 8 # Print immediate in unsigned form 203 204# Capstone option value 205CS_OPT_OFF = 0 # Turn OFF an option - default option of CS_OPT_DETAIL 206CS_OPT_ON = 3 # Turn ON an option (CS_OPT_DETAIL) 207 208# Common instruction operand types - to be consistent across all architectures. 209CS_OP_INVALID = 0 210CS_OP_REG = 1 211CS_OP_IMM = 2 212CS_OP_MEM = 3 213CS_OP_FP = 4 214 215# Common instruction groups - to be consistent across all architectures. 216CS_GRP_INVALID = 0 # uninitialized/invalid group. 217CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) 218CS_GRP_CALL = 2 # all call instructions 219CS_GRP_RET = 3 # all return instructions 220CS_GRP_INT = 4 # all interrupt instructions (int+syscall) 221CS_GRP_IRET = 5 # all interrupt return instructions 222CS_GRP_PRIVILEGE = 6 # all privileged instructions 223 224# Access types for instruction operands. 225CS_AC_INVALID = 0 # Invalid/unitialized access type. 226CS_AC_READ = (1 << 0) # Operand that is read from. 227CS_AC_WRITE = (1 << 1) # Operand that is written to. 228 229# Capstone syntax value 230CS_OPT_SYNTAX_DEFAULT = 0 # Default assembly syntax of all platforms (CS_OPT_SYNTAX) 231CS_OPT_SYNTAX_INTEL = 1 # Intel X86 asm syntax - default syntax on X86 (CS_OPT_SYNTAX, CS_ARCH_X86) 232CS_OPT_SYNTAX_ATT = 2 # ATT asm syntax (CS_OPT_SYNTAX, CS_ARCH_X86) 233CS_OPT_SYNTAX_NOREGNAME = 3 # Asm syntax prints register name with only number - (CS_OPT_SYNTAX, CS_ARCH_PPC, CS_ARCH_ARM) 234CS_OPT_SYNTAX_MASM = 4 # MASM syntax (CS_OPT_SYNTAX, CS_ARCH_X86) 235 236# Capstone error type 237CS_ERR_OK = 0 # No error: everything was fine 238CS_ERR_MEM = 1 # Out-Of-Memory error: cs_open(), cs_disasm() 239CS_ERR_ARCH = 2 # Unsupported architecture: cs_open() 240CS_ERR_HANDLE = 3 # Invalid handle: cs_op_count(), cs_op_index() 241CS_ERR_CSH = 4 # Invalid csh argument: cs_close(), cs_errno(), cs_option() 242CS_ERR_MODE = 5 # Invalid/unsupported mode: cs_open() 243CS_ERR_OPTION = 6 # Invalid/unsupported option: cs_option() 244CS_ERR_DETAIL = 7 # Invalid/unsupported option: cs_option() 245CS_ERR_MEMSETUP = 8 246CS_ERR_VERSION = 9 # Unsupported version (bindings) 247CS_ERR_DIET = 10 # Information irrelevant in diet engine 248CS_ERR_SKIPDATA = 11 # Access irrelevant data for "data" instruction in SKIPDATA mode 249CS_ERR_X86_ATT = 12 # X86 AT&T syntax is unsupported (opt-out at compile time) 250CS_ERR_X86_INTEL = 13 # X86 Intel syntax is unsupported (opt-out at compile time) 251CS_ERR_X86_MASM = 14 # X86 Intel syntax is unsupported (opt-out at compile time) 252 253# query id for cs_support() 254CS_SUPPORT_DIET = CS_ARCH_ALL + 1 255CS_SUPPORT_X86_REDUCE = CS_ARCH_ALL+2 256 257# Capstone reverse lookup 258CS_AC = {v:k for k,v in locals().items() if k.startswith('CS_AC_')} 259CS_ARCH = {v:k for k,v in locals().items() if k.startswith('CS_ARCH_')} 260CS_ERR = {v:k for k,v in locals().items() if k.startswith('CS_ERR_')} 261CS_GRP = {v:k for k,v in locals().items() if k.startswith('CS_GRP_')} 262CS_MODE = {v:k for k,v in locals().items() if k.startswith('CS_MODE_')} 263CS_OP = {v:k for k,v in locals().items() if k.startswith('CS_OP_')} 264CS_OPT = {v:k for k,v in locals().items() if k.startswith('CS_OPT_')} 265 266import ctypes, ctypes.util 267from os.path import split, join, dirname 268import distutils.sysconfig 269import pkg_resources 270 271import inspect 272if not hasattr(sys.modules[__name__], '__file__'): 273 __file__ = inspect.getfile(inspect.currentframe()) 274 275if sys.platform == 'darwin': 276 _lib = "libcapstone.dylib" 277elif sys.platform in ('win32', 'cygwin'): 278 _lib = "capstone.dll" 279else: 280 _lib = "libcapstone.so" 281 282_found = False 283 284def _load_lib(path): 285 lib_file = join(path, _lib) 286 if os.path.exists(lib_file): 287 return ctypes.cdll.LoadLibrary(lib_file) 288 else: 289 # if we're on linux, try again with .so.4 extension 290 if lib_file.endswith('.so'): 291 if os.path.exists(lib_file + '.4'): 292 return ctypes.cdll.LoadLibrary(lib_file + '.4') 293 return None 294 295_cs = None 296 297# Loading attempts, in order 298# - user-provided environment variable 299# - pkg_resources can get us the path to the local libraries 300# - we can get the path to the local libraries by parsing our filename 301# - global load 302# - python's lib directory 303# - last-gasp attempt at some hardcoded paths on darwin and linux 304 305_path_list = [os.getenv('LIBCAPSTONE_PATH', None), 306 pkg_resources.resource_filename(__name__, 'lib'), 307 join(split(__file__)[0], 'lib'), 308 '', 309 distutils.sysconfig.get_python_lib(), 310 "/usr/local/lib/" if sys.platform == 'darwin' else '/usr/lib64'] 311 312for _path in _path_list: 313 if _path is None: continue 314 _cs = _load_lib(_path) 315 if _cs is not None: break 316else: 317 raise ImportError("ERROR: fail to load the dynamic library.") 318 319 320# low-level structure for C code 321 322def copy_ctypes(src): 323 """Returns a new ctypes object which is a bitwise copy of an existing one""" 324 dst = type(src)() 325 ctypes.memmove(ctypes.byref(dst), ctypes.byref(src), ctypes.sizeof(type(src))) 326 return dst 327 328def copy_ctypes_list(src): 329 return [copy_ctypes(n) for n in src] 330 331# Weird import placement because these modules are needed by the below code but need the above functions 332from . import arm, arm64, m68k, mips, ppc, sparc, systemz, x86, xcore, tms320c64x, m680x, evm, mos65xx 333 334class _cs_arch(ctypes.Union): 335 _fields_ = ( 336 ('arm64', arm64.CsArm64), 337 ('arm', arm.CsArm), 338 ('m68k', m68k.CsM68K), 339 ('mips', mips.CsMips), 340 ('x86', x86.CsX86), 341 ('ppc', ppc.CsPpc), 342 ('sparc', sparc.CsSparc), 343 ('sysz', systemz.CsSysz), 344 ('xcore', xcore.CsXcore), 345 ('tms320c64x', tms320c64x.CsTMS320C64x), 346 ('m680x', m680x.CsM680x), 347 ('evm', evm.CsEvm), 348 ('mos65xx', mos65xx.CsMOS65xx), 349 ) 350 351class _cs_detail(ctypes.Structure): 352 _fields_ = ( 353 ('regs_read', ctypes.c_uint16 * 16), 354 ('regs_read_count', ctypes.c_ubyte), 355 ('regs_write', ctypes.c_uint16 * 20), 356 ('regs_write_count', ctypes.c_ubyte), 357 ('groups', ctypes.c_ubyte * 8), 358 ('groups_count', ctypes.c_ubyte), 359 ('arch', _cs_arch), 360 ) 361 362class _cs_insn(ctypes.Structure): 363 _fields_ = ( 364 ('id', ctypes.c_uint), 365 ('address', ctypes.c_uint64), 366 ('size', ctypes.c_uint16), 367 ('bytes', ctypes.c_ubyte * 24), 368 ('mnemonic', ctypes.c_char * 32), 369 ('op_str', ctypes.c_char * 160), 370 ('detail', ctypes.POINTER(_cs_detail)), 371 ) 372 373# callback for SKIPDATA option 374CS_SKIPDATA_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, ctypes.c_size_t, ctypes.c_void_p) 375 376class _cs_opt_skipdata(ctypes.Structure): 377 _fields_ = ( 378 ('mnemonic', ctypes.c_char_p), 379 ('callback', CS_SKIPDATA_CALLBACK), 380 ('user_data', ctypes.c_void_p), 381 ) 382 383class _cs_opt_mnem(ctypes.Structure): 384 _fields_ = ( 385 ('id', ctypes.c_uint), 386 ('mnemonic', ctypes.c_char_p), 387 ) 388 389# setup all the function prototype 390def _setup_prototype(lib, fname, restype, *argtypes): 391 getattr(lib, fname).restype = restype 392 getattr(lib, fname).argtypes = argtypes 393 394_setup_prototype(_cs, "cs_open", ctypes.c_int, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_size_t)) 395_setup_prototype(_cs, "cs_disasm", ctypes.c_size_t, ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, \ 396 ctypes.c_uint64, ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(_cs_insn))) 397_setup_prototype(_cs, "cs_free", None, ctypes.c_void_p, ctypes.c_size_t) 398_setup_prototype(_cs, "cs_close", ctypes.c_int, ctypes.POINTER(ctypes.c_size_t)) 399_setup_prototype(_cs, "cs_reg_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint) 400_setup_prototype(_cs, "cs_insn_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint) 401_setup_prototype(_cs, "cs_group_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint) 402_setup_prototype(_cs, "cs_op_count", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint) 403_setup_prototype(_cs, "cs_op_index", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint, ctypes.c_uint) 404_setup_prototype(_cs, "cs_errno", ctypes.c_int, ctypes.c_size_t) 405_setup_prototype(_cs, "cs_option", ctypes.c_int, ctypes.c_size_t, ctypes.c_int, ctypes.c_void_p) 406_setup_prototype(_cs, "cs_version", ctypes.c_int, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int)) 407_setup_prototype(_cs, "cs_support", ctypes.c_bool, ctypes.c_int) 408_setup_prototype(_cs, "cs_strerror", ctypes.c_char_p, ctypes.c_int) 409_setup_prototype(_cs, "cs_regs_access", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.POINTER(ctypes.c_uint16*64), ctypes.POINTER(ctypes.c_uint8), ctypes.POINTER(ctypes.c_uint16*64), ctypes.POINTER(ctypes.c_uint8)) 410 411 412# access to error code via @errno of CsError 413class CsError(Exception): 414 def __init__(self, errno): 415 self.errno = errno 416 417 if _python2: 418 def __str__(self): 419 return _cs.cs_strerror(self.errno) 420 421 else: 422 def __str__(self): 423 return _cs.cs_strerror(self.errno).decode() 424 425 426# return the core's version 427def cs_version(): 428 major = ctypes.c_int() 429 minor = ctypes.c_int() 430 combined = _cs.cs_version(ctypes.byref(major), ctypes.byref(minor)) 431 return (major.value, minor.value, combined) 432 433 434# return the binding's version 435def version_bind(): 436 return (CS_API_MAJOR, CS_API_MINOR, (CS_API_MAJOR << 8) + CS_API_MINOR) 437 438 439def cs_support(query): 440 return _cs.cs_support(query) 441 442 443# dummy class resembling Cs class, just for cs_disasm_quick() 444# this class only need to be referenced to via 2 fields: @csh & @arch 445class _dummy_cs(object): 446 def __init__(self, csh, arch): 447 self.csh = csh 448 self.arch = arch 449 self._detail = False 450 451 452# Quick & dirty Python function to disasm raw binary code 453# This function return CsInsn objects 454# NOTE: you might want to use more efficient Cs class & its methods. 455def cs_disasm_quick(arch, mode, code, offset, count=0): 456 # verify version compatibility with the core before doing anything 457 (major, minor, _combined) = cs_version() 458 if major != CS_API_MAJOR or minor != CS_API_MINOR: 459 # our binding version is different from the core's API version 460 raise CsError(CS_ERR_VERSION) 461 462 csh = ctypes.c_size_t() 463 status = _cs.cs_open(arch, mode, ctypes.byref(csh)) 464 if status != CS_ERR_OK: 465 raise CsError(status) 466 467 all_insn = ctypes.POINTER(_cs_insn)() 468 res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn)) 469 if res > 0: 470 try: 471 for i in range(res): 472 yield CsInsn(_dummy_cs(csh, arch), all_insn[i]) 473 finally: 474 _cs.cs_free(all_insn, res) 475 else: 476 status = _cs.cs_errno(csh) 477 if status != CS_ERR_OK: 478 raise CsError(status) 479 return 480 yield 481 482 status = _cs.cs_close(ctypes.byref(csh)) 483 if status != CS_ERR_OK: 484 raise CsError(status) 485 486 487# Another quick, but lighter function to disasm raw binary code. 488# This function is faster than cs_disasm_quick() around 20% because 489# cs_disasm_lite() only return tuples of (address, size, mnemonic, op_str), 490# rather than CsInsn objects. 491# NOTE: you might want to use more efficient Cs class & its methods. 492def cs_disasm_lite(arch, mode, code, offset, count=0): 493 # verify version compatibility with the core before doing anything 494 (major, minor, _combined) = cs_version() 495 if major != CS_API_MAJOR or minor != CS_API_MINOR: 496 # our binding version is different from the core's API version 497 raise CsError(CS_ERR_VERSION) 498 499 if cs_support(CS_SUPPORT_DIET): 500 # Diet engine cannot provide @mnemonic & @op_str 501 raise CsError(CS_ERR_DIET) 502 503 csh = ctypes.c_size_t() 504 status = _cs.cs_open(arch, mode, ctypes.byref(csh)) 505 if status != CS_ERR_OK: 506 raise CsError(status) 507 508 all_insn = ctypes.POINTER(_cs_insn)() 509 res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn)) 510 if res > 0: 511 try: 512 for i in range(res): 513 insn = all_insn[i] 514 yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii')) 515 finally: 516 _cs.cs_free(all_insn, res) 517 else: 518 status = _cs.cs_errno(csh) 519 if status != CS_ERR_OK: 520 raise CsError(status) 521 return 522 yield 523 524 status = _cs.cs_close(ctypes.byref(csh)) 525 if status != CS_ERR_OK: 526 raise CsError(status) 527 528def _ascii_name_or_default(name, default): 529 return default if name is None else name.decode('ascii') 530 531 532# Python-style class to disasm code 533class CsInsn(object): 534 def __init__(self, cs, all_info): 535 self._raw = copy_ctypes(all_info) 536 self._cs = cs 537 if self._cs._detail and self._raw.id != 0: 538 # save detail 539 self._raw.detail = ctypes.pointer(all_info.detail._type_()) 540 ctypes.memmove(ctypes.byref(self._raw.detail[0]), ctypes.byref(all_info.detail[0]), ctypes.sizeof(type(all_info.detail[0]))) 541 542 def __repr__(self): 543 return '<CsInsn 0x%x [%s]: %s %s>' % (self.address, self.bytes.hex(), self.mnemonic, self.op_str) 544 545 # return instruction's ID. 546 @property 547 def id(self): 548 return self._raw.id 549 550 # return instruction's address. 551 @property 552 def address(self): 553 return self._raw.address 554 555 # return instruction's size. 556 @property 557 def size(self): 558 return self._raw.size 559 560 # return instruction's machine bytes (which should have @size bytes). 561 @property 562 def bytes(self): 563 return bytearray(self._raw.bytes)[:self._raw.size] 564 565 # return instruction's mnemonic. 566 @property 567 def mnemonic(self): 568 if self._cs._diet: 569 # Diet engine cannot provide @mnemonic. 570 raise CsError(CS_ERR_DIET) 571 572 return self._raw.mnemonic.decode('ascii') 573 574 # return instruction's operands (in string). 575 @property 576 def op_str(self): 577 if self._cs._diet: 578 # Diet engine cannot provide @op_str. 579 raise CsError(CS_ERR_DIET) 580 581 return self._raw.op_str.decode('ascii') 582 583 # return list of all implicit registers being read. 584 @property 585 def regs_read(self): 586 if self._raw.id == 0: 587 raise CsError(CS_ERR_SKIPDATA) 588 589 if self._cs._diet: 590 # Diet engine cannot provide @regs_read. 591 raise CsError(CS_ERR_DIET) 592 593 if self._cs._detail: 594 return self._raw.detail.contents.regs_read[:self._raw.detail.contents.regs_read_count] 595 596 raise CsError(CS_ERR_DETAIL) 597 598 # return list of all implicit registers being modified 599 @property 600 def regs_write(self): 601 if self._raw.id == 0: 602 raise CsError(CS_ERR_SKIPDATA) 603 604 if self._cs._diet: 605 # Diet engine cannot provide @regs_write 606 raise CsError(CS_ERR_DIET) 607 608 if self._cs._detail: 609 return self._raw.detail.contents.regs_write[:self._raw.detail.contents.regs_write_count] 610 611 raise CsError(CS_ERR_DETAIL) 612 613 # return list of semantic groups this instruction belongs to. 614 @property 615 def groups(self): 616 if self._raw.id == 0: 617 raise CsError(CS_ERR_SKIPDATA) 618 619 if self._cs._diet: 620 # Diet engine cannot provide @groups 621 raise CsError(CS_ERR_DIET) 622 623 if self._cs._detail: 624 return self._raw.detail.contents.groups[:self._raw.detail.contents.groups_count] 625 626 raise CsError(CS_ERR_DETAIL) 627 628 def __gen_detail(self): 629 if self._raw.id == 0: 630 # do nothing in skipdata mode 631 return 632 633 arch = self._cs.arch 634 if arch == CS_ARCH_ARM: 635 (self.usermode, self.vector_size, self.vector_data, self.cps_mode, self.cps_flag, self.cc, self.update_flags, \ 636 self.writeback, self.mem_barrier, self.operands) = arm.get_arch_info(self._raw.detail.contents.arch.arm) 637 elif arch == CS_ARCH_ARM64: 638 (self.cc, self.update_flags, self.writeback, self.operands) = \ 639 arm64.get_arch_info(self._raw.detail.contents.arch.arm64) 640 elif arch == CS_ARCH_X86: 641 (self.prefix, self.opcode, self.rex, self.addr_size, \ 642 self.modrm, self.sib, self.disp, \ 643 self.sib_index, self.sib_scale, self.sib_base, self.xop_cc, self.sse_cc, \ 644 self.avx_cc, self.avx_sae, self.avx_rm, self.eflags, \ 645 self.modrm_offset, self.disp_offset, self.disp_size, self.imm_offset, self.imm_size, \ 646 self.operands) = x86.get_arch_info(self._raw.detail.contents.arch.x86) 647 elif arch == CS_ARCH_M68K: 648 (self.operands, self.op_size) = m68k.get_arch_info(self._raw.detail.contents.arch.m68k) 649 elif arch == CS_ARCH_MIPS: 650 self.operands = mips.get_arch_info(self._raw.detail.contents.arch.mips) 651 elif arch == CS_ARCH_PPC: 652 (self.bc, self.bh, self.update_cr0, self.operands) = \ 653 ppc.get_arch_info(self._raw.detail.contents.arch.ppc) 654 elif arch == CS_ARCH_SPARC: 655 (self.cc, self.hint, self.operands) = sparc.get_arch_info(self._raw.detail.contents.arch.sparc) 656 elif arch == CS_ARCH_SYSZ: 657 (self.cc, self.operands) = systemz.get_arch_info(self._raw.detail.contents.arch.sysz) 658 elif arch == CS_ARCH_XCORE: 659 (self.operands) = xcore.get_arch_info(self._raw.detail.contents.arch.xcore) 660 elif arch == CS_ARCH_TMS320C64X: 661 (self.condition, self.funit, self.parallel, self.operands) = tms320c64x.get_arch_info(self._raw.detail.contents.arch.tms320c64x) 662 elif arch == CS_ARCH_M680X: 663 (self.flags, self.operands) = m680x.get_arch_info(self._raw.detail.contents.arch.m680x) 664 elif arch == CS_ARCH_EVM: 665 (self.pop, self.push, self.fee) = evm.get_arch_info(self._raw.detail.contents.arch.evm) 666 elif arch == CS_ARCH_MOS65XX: 667 (self.am, self.modifies_flags, self.operands) = mos65xx.get_arch_info(self._raw.detail.contents.arch.mos65xx) 668 669 670 def __getattr__(self, name): 671 if not self._cs._detail: 672 raise CsError(CS_ERR_DETAIL) 673 674 attr = object.__getattribute__ 675 if not attr(self, '_cs')._detail: 676 raise AttributeError(name) 677 _dict = attr(self, '__dict__') 678 if 'operands' not in _dict: 679 self.__gen_detail() 680 if name not in _dict: 681 if self._raw.id == 0: 682 raise CsError(CS_ERR_SKIPDATA) 683 raise AttributeError(name) 684 return _dict[name] 685 686 # get the last error code 687 def errno(self): 688 return _cs.cs_errno(self._cs.csh) 689 690 # get the register name, given the register ID 691 def reg_name(self, reg_id, default=None): 692 if self._cs._diet: 693 # Diet engine cannot provide register name 694 raise CsError(CS_ERR_DIET) 695 696 return _ascii_name_or_default(_cs.cs_reg_name(self._cs.csh, reg_id), default) 697 698 # get the instruction name 699 def insn_name(self, default=None): 700 if self._cs._diet: 701 # Diet engine cannot provide instruction name 702 raise CsError(CS_ERR_DIET) 703 704 if self._raw.id == 0: 705 return default 706 707 return _ascii_name_or_default(_cs.cs_insn_name(self._cs.csh, self.id), default) 708 709 # get the group name 710 def group_name(self, group_id, default=None): 711 if self._cs._diet: 712 # Diet engine cannot provide group name 713 raise CsError(CS_ERR_DIET) 714 715 return _ascii_name_or_default(_cs.cs_group_name(self._cs.csh, group_id), default) 716 717 718 # verify if this insn belong to group with id as @group_id 719 def group(self, group_id): 720 if self._raw.id == 0: 721 raise CsError(CS_ERR_SKIPDATA) 722 723 if self._cs._diet: 724 # Diet engine cannot provide group information 725 raise CsError(CS_ERR_DIET) 726 727 return group_id in self.groups 728 729 # verify if this instruction implicitly read register @reg_id 730 def reg_read(self, reg_id): 731 if self._raw.id == 0: 732 raise CsError(CS_ERR_SKIPDATA) 733 734 if self._cs._diet: 735 # Diet engine cannot provide regs_read information 736 raise CsError(CS_ERR_DIET) 737 738 return reg_id in self.regs_read 739 740 # verify if this instruction implicitly modified register @reg_id 741 def reg_write(self, reg_id): 742 if self._raw.id == 0: 743 raise CsError(CS_ERR_SKIPDATA) 744 745 if self._cs._diet: 746 # Diet engine cannot provide regs_write information 747 raise CsError(CS_ERR_DIET) 748 749 return reg_id in self.regs_write 750 751 # return number of operands having same operand type @op_type 752 def op_count(self, op_type): 753 if self._raw.id == 0: 754 raise CsError(CS_ERR_SKIPDATA) 755 756 c = 0 757 for op in self.operands: 758 if op.type == op_type: 759 c += 1 760 return c 761 762 # get the operand at position @position of all operands having the same type @op_type 763 def op_find(self, op_type, position): 764 if self._raw.id == 0: 765 raise CsError(CS_ERR_SKIPDATA) 766 767 c = 0 768 for op in self.operands: 769 if op.type == op_type: 770 c += 1 771 if c == position: 772 return op 773 774 # Return (list-of-registers-read, list-of-registers-modified) by this instructions. 775 # This includes all the implicit & explicit registers. 776 def regs_access(self): 777 if self._raw.id == 0: 778 raise CsError(CS_ERR_SKIPDATA) 779 780 regs_read = (ctypes.c_uint16 * 64)() 781 regs_read_count = ctypes.c_uint8() 782 regs_write = (ctypes.c_uint16 * 64)() 783 regs_write_count = ctypes.c_uint8() 784 785 status = _cs.cs_regs_access(self._cs.csh, self._raw, ctypes.byref(regs_read), ctypes.byref(regs_read_count), ctypes.byref(regs_write), ctypes.byref(regs_write_count)) 786 if status != CS_ERR_OK: 787 raise CsError(status) 788 789 if regs_read_count.value > 0: 790 regs_read = regs_read[:regs_read_count.value] 791 else: 792 regs_read = () 793 794 if regs_write_count.value > 0: 795 regs_write = regs_write[:regs_write_count.value] 796 else: 797 regs_write = () 798 799 return (regs_read, regs_write) 800 801 802 803class Cs(object): 804 def __init__(self, arch, mode): 805 # verify version compatibility with the core before doing anything 806 (major, minor, _combined) = cs_version() 807 if major != CS_API_MAJOR or minor != CS_API_MINOR: 808 self.csh = None 809 # our binding version is different from the core's API version 810 raise CsError(CS_ERR_VERSION) 811 812 self.arch, self._mode = arch, mode 813 self.csh = ctypes.c_size_t() 814 status = _cs.cs_open(arch, mode, ctypes.byref(self.csh)) 815 if status != CS_ERR_OK: 816 self.csh = None 817 raise CsError(status) 818 819 try: 820 import ccapstone 821 # rewire disasm to use the faster version 822 self.disasm = ccapstone.Cs(self).disasm 823 except: 824 pass 825 826 if arch == CS_ARCH_X86: 827 # Intel syntax is default for X86 828 self._syntax = CS_OPT_SYNTAX_INTEL 829 else: 830 self._syntax = None 831 832 self._detail = False # by default, do not produce instruction details 833 self._imm_unsigned = False # by default, print immediate operands as signed numbers 834 self._diet = cs_support(CS_SUPPORT_DIET) 835 self._x86reduce = cs_support(CS_SUPPORT_X86_REDUCE) 836 837 # default mnemonic for SKIPDATA 838 self._skipdata_mnem = ".byte" 839 self._skipdata_cb = (None, None) 840 # store reference to option object to avoid it being freed 841 # because C code uses it by reference 842 self._skipdata_opt = _cs_opt_skipdata() 843 self._skipdata = False 844 845 846 847 # destructor to be called automatically when object is destroyed. 848 def __del__(self): 849 if self.csh: 850 try: 851 status = _cs.cs_close(ctypes.byref(self.csh)) 852 if status != CS_ERR_OK: 853 raise CsError(status) 854 except: # _cs might be pulled from under our feet 855 pass 856 857 858 # def option(self, opt_type, opt_value): 859 # return _cs.cs_option(self.csh, opt_type, opt_value) 860 861 862 # is this a diet engine? 863 @property 864 def diet(self): 865 return self._diet 866 867 868 # is this engine compiled with X86-reduce option? 869 @property 870 def x86_reduce(self): 871 return self._x86reduce 872 873 874 # return assembly syntax. 875 @property 876 def syntax(self): 877 return self._syntax 878 879 880 # syntax setter: modify assembly syntax. 881 @syntax.setter 882 def syntax(self, style): 883 status = _cs.cs_option(self.csh, CS_OPT_SYNTAX, style) 884 if status != CS_ERR_OK: 885 raise CsError(status) 886 # save syntax 887 self._syntax = style 888 889 890 # return current skipdata status 891 @property 892 def skipdata(self): 893 return self._skipdata 894 895 896 # setter: modify skipdata status 897 @skipdata.setter 898 def skipdata(self, opt): 899 if opt == False: 900 status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA, CS_OPT_OFF) 901 else: 902 status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA, CS_OPT_ON) 903 if status != CS_ERR_OK: 904 raise CsError(status) 905 906 # save this option 907 self._skipdata = opt 908 909 910 @property 911 def skipdata_setup(self): 912 return (self._skipdata_mnem,) + self._skipdata_cb 913 914 915 @skipdata_setup.setter 916 def skipdata_setup(self, opt): 917 _mnem, _cb, _ud = opt 918 self._skipdata_opt.mnemonic = _mnem.encode() 919 self._skipdata_opt.callback = CS_SKIPDATA_CALLBACK(_cb or 0) 920 self._skipdata_opt.user_data = ctypes.cast(_ud, ctypes.c_void_p) 921 status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA_SETUP, ctypes.cast(ctypes.byref(self._skipdata_opt), ctypes.c_void_p)) 922 if status != CS_ERR_OK: 923 raise CsError(status) 924 925 self._skipdata_mnem = _mnem 926 self._skipdata_cb = (_cb, _ud) 927 928 929 @property 930 def skipdata_mnem(self): 931 return self._skipdata_mnem 932 933 934 @skipdata_mnem.setter 935 def skipdata_mnem(self, mnem): 936 self.skipdata_setup = (mnem,) + self._skipdata_cb 937 938 939 @property 940 def skipdata_callback(self): 941 return self._skipdata_cb 942 943 944 @skipdata_callback.setter 945 def skipdata_callback(self, val): 946 if not isinstance(val, tuple): 947 val = (val, None) 948 func, data = val 949 self.skipdata_setup = (self._skipdata_mnem, func, data) 950 951 952 # customize instruction mnemonic 953 def mnemonic_setup(self, id, mnem): 954 _mnem_opt = _cs_opt_mnem() 955 _mnem_opt.id = id 956 if mnem: 957 _mnem_opt.mnemonic = mnem.encode() 958 else: 959 _mnem_opt.mnemonic = mnem 960 status = _cs.cs_option(self.csh, CS_OPT_MNEMONIC, ctypes.cast(ctypes.byref(_mnem_opt), ctypes.c_void_p)) 961 if status != CS_ERR_OK: 962 raise CsError(status) 963 964 965 # check to see if this engine supports a particular arch, 966 # or diet mode (depending on @query). 967 def support(self, query): 968 return cs_support(query) 969 970 971 # is detail mode enable? 972 @property 973 def detail(self): 974 return self._detail 975 976 977 # modify detail mode. 978 @detail.setter 979 def detail(self, opt): # opt is boolean type, so must be either 'True' or 'False' 980 if opt == False: 981 status = _cs.cs_option(self.csh, CS_OPT_DETAIL, CS_OPT_OFF) 982 else: 983 status = _cs.cs_option(self.csh, CS_OPT_DETAIL, CS_OPT_ON) 984 if status != CS_ERR_OK: 985 raise CsError(status) 986 # save detail 987 self._detail = opt 988 989 990 # is detail mode enable? 991 @property 992 def imm_unsigned(self): 993 return self._imm_unsigned 994 995 996 # modify detail mode. 997 @imm_unsigned.setter 998 def imm_unsigned(self, opt): # opt is boolean type, so must be either 'True' or 'False' 999 if opt == False: 1000 status = _cs.cs_option(self.csh, CS_OPT_UNSIGNED, CS_OPT_OFF) 1001 else: 1002 status = _cs.cs_option(self.csh, CS_OPT_UNSIGNED, CS_OPT_ON) 1003 if status != CS_ERR_OK: 1004 raise CsError(status) 1005 # save detail 1006 self._imm_unsigned = opt 1007 1008 1009 # return disassembly mode of this engine. 1010 @property 1011 def mode(self): 1012 return self._mode 1013 1014 1015 # modify engine's mode at run-time. 1016 @mode.setter 1017 def mode(self, opt): # opt is new disasm mode, of int type 1018 status = _cs.cs_option(self.csh, CS_OPT_MODE, opt) 1019 if status != CS_ERR_OK: 1020 raise CsError(status) 1021 # save mode 1022 self._mode = opt 1023 1024 # get the last error code 1025 def errno(self): 1026 return _cs.cs_errno(self.csh) 1027 1028 # get the register name, given the register ID 1029 def reg_name(self, reg_id, default=None): 1030 if self._diet: 1031 # Diet engine cannot provide register name 1032 raise CsError(CS_ERR_DIET) 1033 1034 return _ascii_name_or_default(_cs.cs_reg_name(self.csh, reg_id), default) 1035 1036 # get the instruction name, given the instruction ID 1037 def insn_name(self, insn_id, default=None): 1038 if self._diet: 1039 # Diet engine cannot provide instruction name 1040 raise CsError(CS_ERR_DIET) 1041 1042 return _ascii_name_or_default(_cs.cs_insn_name(self.csh, insn_id), default) 1043 1044 # get the group name 1045 def group_name(self, group_id, default=None): 1046 if self._diet: 1047 # Diet engine cannot provide group name 1048 raise CsError(CS_ERR_DIET) 1049 1050 return _ascii_name_or_default(_cs.cs_group_name(self.csh, group_id), default) 1051 1052 # Disassemble binary & return disassembled instructions in CsInsn objects 1053 def disasm(self, code, offset, count=0): 1054 all_insn = ctypes.POINTER(_cs_insn)() 1055 '''if not _python2: 1056 print(code) 1057 code = code.encode() 1058 print(code)''' 1059 # Pass a bytearray by reference 1060 size = len(code) 1061 if isinstance(code, bytearray): 1062 code = ctypes.byref(ctypes.c_char.from_buffer(code)) 1063 res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn)) 1064 if res > 0: 1065 try: 1066 for i in range(res): 1067 yield CsInsn(self, all_insn[i]) 1068 finally: 1069 _cs.cs_free(all_insn, res) 1070 else: 1071 status = _cs.cs_errno(self.csh) 1072 if status != CS_ERR_OK: 1073 raise CsError(status) 1074 return 1075 yield 1076 1077 1078 # Light function to disassemble binary. This is about 20% faster than disasm() because 1079 # unlike disasm(), disasm_lite() only return tuples of (address, size, mnemonic, op_str), 1080 # rather than CsInsn objects. 1081 def disasm_lite(self, code, offset, count=0): 1082 if self._diet: 1083 # Diet engine cannot provide @mnemonic & @op_str 1084 raise CsError(CS_ERR_DIET) 1085 1086 all_insn = ctypes.POINTER(_cs_insn)() 1087 size = len(code) 1088 # Pass a bytearray by reference 1089 if isinstance(code, bytearray): 1090 code = ctypes.byref(ctypes.c_char.from_buffer(code)) 1091 res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn)) 1092 if res > 0: 1093 try: 1094 for i in range(res): 1095 insn = all_insn[i] 1096 yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii')) 1097 finally: 1098 _cs.cs_free(all_insn, res) 1099 else: 1100 status = _cs.cs_errno(self.csh) 1101 if status != CS_ERR_OK: 1102 raise CsError(status) 1103 return 1104 yield 1105 1106 1107# print out debugging info 1108def debug(): 1109 # is Cython there? 1110 try: 1111 from . import ccapstone 1112 return ccapstone.debug() 1113 except: 1114 # no Cython, fallback to Python code below 1115 pass 1116 1117 if cs_support(CS_SUPPORT_DIET): 1118 diet = "diet" 1119 else: 1120 diet = "standard" 1121 1122 archs = { "arm": CS_ARCH_ARM, "arm64": CS_ARCH_ARM64, "m68k": CS_ARCH_M68K, \ 1123 "mips": CS_ARCH_MIPS, "ppc": CS_ARCH_PPC, "sparc": CS_ARCH_SPARC, \ 1124 "sysz": CS_ARCH_SYSZ, 'xcore': CS_ARCH_XCORE, "tms320c64x": CS_ARCH_TMS320C64X, \ 1125 "m680x": CS_ARCH_M680X, 'evm': CS_ARCH_EVM, 'mos65xx': CS_ARCH_MOS65XX } 1126 1127 all_archs = "" 1128 keys = archs.keys() 1129 for k in sorted(keys): 1130 if cs_support(archs[k]): 1131 all_archs += "-%s" % k 1132 1133 if cs_support(CS_ARCH_X86): 1134 all_archs += "-x86" 1135 if cs_support(CS_SUPPORT_X86_REDUCE): 1136 all_archs += "_reduce" 1137 1138 (major, minor, _combined) = cs_version() 1139 1140 return "python-%s%s-c%u.%u-b%u.%u" % (diet, all_archs, major, minor, CS_API_MAJOR, CS_API_MINOR) 1141