• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Capstone Python bindings, by Nguyen Anh Quynnh <aquynh@gmail.com>
2import os, sys
3from platform import system
4_python2 = sys.version_info[0] < 3
5if _python2:
6    range = xrange
7
8__all__ = [
9    'Cs',
10    'CsInsn',
11
12    'cs_disasm_quick',
13    'cs_disasm_lite',
14    'cs_version',
15    'cs_support',
16    'version_bind',
17    'debug',
18
19    'CS_API_MAJOR',
20    'CS_API_MINOR',
21
22    'CS_VERSION_MAJOR',
23    'CS_VERSION_MINOR',
24    'CS_VERSION_EXTRA',
25
26    'CS_ARCH_ARM',
27    'CS_ARCH_ARM64',
28    'CS_ARCH_MIPS',
29    'CS_ARCH_X86',
30    'CS_ARCH_PPC',
31    'CS_ARCH_SPARC',
32    'CS_ARCH_SYSZ',
33    'CS_ARCH_XCORE',
34    'CS_ARCH_M68K',
35    'CS_ARCH_TMS320C64X',
36    'CS_ARCH_M680X',
37    'CS_ARCH_EVM',
38    'CS_ARCH_MOS65XX',
39    'CS_ARCH_ALL',
40
41    'CS_MODE_LITTLE_ENDIAN',
42    'CS_MODE_BIG_ENDIAN',
43    'CS_MODE_16',
44    'CS_MODE_32',
45    'CS_MODE_64',
46    'CS_MODE_ARM',
47    'CS_MODE_THUMB',
48    'CS_MODE_MCLASS',
49    'CS_MODE_MICRO',
50    'CS_MODE_MIPS3',
51    'CS_MODE_MIPS32R6',
52    'CS_MODE_MIPS2',
53    'CS_MODE_V8',
54    'CS_MODE_V9',
55    'CS_MODE_QPX',
56    'CS_MODE_M68K_000',
57    'CS_MODE_M68K_010',
58    'CS_MODE_M68K_020',
59    'CS_MODE_M68K_030',
60    'CS_MODE_M68K_040',
61    'CS_MODE_M68K_060',
62    'CS_MODE_MIPS32',
63    'CS_MODE_MIPS64',
64    'CS_MODE_M680X_6301',
65    'CS_MODE_M680X_6309',
66    'CS_MODE_M680X_6800',
67    'CS_MODE_M680X_6801',
68    'CS_MODE_M680X_6805',
69    'CS_MODE_M680X_6808',
70    'CS_MODE_M680X_6809',
71    'CS_MODE_M680X_6811',
72    'CS_MODE_M680X_CPU12',
73    'CS_MODE_M680X_HCS08',
74
75    'CS_OPT_SYNTAX',
76    'CS_OPT_SYNTAX_DEFAULT',
77    'CS_OPT_SYNTAX_INTEL',
78    'CS_OPT_SYNTAX_ATT',
79    'CS_OPT_SYNTAX_NOREGNAME',
80    'CS_OPT_SYNTAX_MASM',
81
82    'CS_OPT_DETAIL',
83    'CS_OPT_MODE',
84    'CS_OPT_ON',
85    'CS_OPT_OFF',
86
87    'CS_ERR_OK',
88    'CS_ERR_MEM',
89    'CS_ERR_ARCH',
90    'CS_ERR_HANDLE',
91    'CS_ERR_CSH',
92    'CS_ERR_MODE',
93    'CS_ERR_OPTION',
94    'CS_ERR_DETAIL',
95    'CS_ERR_VERSION',
96    'CS_ERR_MEMSETUP',
97    'CS_ERR_DIET',
98    'CS_ERR_SKIPDATA',
99    'CS_ERR_X86_ATT',
100    'CS_ERR_X86_INTEL',
101
102    'CS_SUPPORT_DIET',
103    'CS_SUPPORT_X86_REDUCE',
104    'CS_SKIPDATA_CALLBACK',
105
106    'CS_OP_INVALID',
107    'CS_OP_REG',
108    'CS_OP_IMM',
109    'CS_OP_MEM',
110    'CS_OP_FP',
111
112    'CS_GRP_INVALID',
113    'CS_GRP_JUMP',
114    'CS_GRP_CALL',
115    'CS_GRP_RET',
116    'CS_GRP_INT',
117    'CS_GRP_IRET',
118    'CS_GRP_PRIVILEGE',
119
120    'CS_AC_INVALID',
121    'CS_AC_READ',
122    'CS_AC_WRITE',
123
124    'CsError',
125
126    '__version__',
127]
128
129# Capstone C interface
130
131# API version
132CS_API_MAJOR = 5
133CS_API_MINOR = 0
134
135# Package version
136CS_VERSION_MAJOR = CS_API_MAJOR
137CS_VERSION_MINOR = CS_API_MINOR
138CS_VERSION_EXTRA = 0
139
140__version__ = "%u.%u.%u" %(CS_VERSION_MAJOR, CS_VERSION_MINOR, CS_VERSION_EXTRA)
141
142# architectures
143CS_ARCH_ARM = 0
144CS_ARCH_ARM64 = 1
145CS_ARCH_MIPS = 2
146CS_ARCH_X86 = 3
147CS_ARCH_PPC = 4
148CS_ARCH_SPARC = 5
149CS_ARCH_SYSZ = 6
150CS_ARCH_XCORE = 7
151CS_ARCH_M68K = 8
152CS_ARCH_TMS320C64X = 9
153CS_ARCH_M680X = 10
154CS_ARCH_EVM = 11
155CS_ARCH_MOS65XX = 12
156CS_ARCH_MAX = 13
157CS_ARCH_ALL = 0xFFFF
158
159# disasm mode
160CS_MODE_LITTLE_ENDIAN = 0      # little-endian mode (default mode)
161CS_MODE_ARM = 0                # ARM mode
162CS_MODE_16 = (1 << 1)          # 16-bit mode (for X86)
163CS_MODE_32 = (1 << 2)          # 32-bit mode (for X86)
164CS_MODE_64 = (1 << 3)          # 64-bit mode (for X86, PPC)
165CS_MODE_THUMB = (1 << 4)       # ARM's Thumb mode, including Thumb-2
166CS_MODE_MCLASS = (1 << 5)      # ARM's Cortex-M series
167CS_MODE_V8 = (1 << 6)          # ARMv8 A32 encodings for ARM
168CS_MODE_MICRO = (1 << 4)       # MicroMips mode (MIPS architecture)
169CS_MODE_MIPS3 = (1 << 5)       # Mips III ISA
170CS_MODE_MIPS32R6 = (1 << 6)    # Mips32r6 ISA
171CS_MODE_MIPS2 = (1 << 7)       # Mips II ISA
172CS_MODE_V9 = (1 << 4)          # Sparc V9 mode (for Sparc)
173CS_MODE_QPX = (1 << 4)         # Quad Processing eXtensions mode (PPC)
174CS_MODE_M68K_000 = (1 << 1)    # M68K 68000 mode
175CS_MODE_M68K_010 = (1 << 2)    # M68K 68010 mode
176CS_MODE_M68K_020 = (1 << 3)    # M68K 68020 mode
177CS_MODE_M68K_030 = (1 << 4)    # M68K 68030 mode
178CS_MODE_M68K_040 = (1 << 5)    # M68K 68040 mode
179CS_MODE_M68K_060 = (1 << 6)    # M68K 68060 mode
180CS_MODE_BIG_ENDIAN = (1 << 31) # big-endian mode
181CS_MODE_MIPS32 = CS_MODE_32    # Mips32 ISA
182CS_MODE_MIPS64 = CS_MODE_64    # Mips64 ISA
183CS_MODE_M680X_6301 = (1 << 1)  # M680X HD6301/3 mode
184CS_MODE_M680X_6309 = (1 << 2)  # M680X HD6309 mode
185CS_MODE_M680X_6800 = (1 << 3)  # M680X M6800/2 mode
186CS_MODE_M680X_6801 = (1 << 4)  # M680X M6801/3 mode
187CS_MODE_M680X_6805 = (1 << 5)  # M680X M6805 mode
188CS_MODE_M680X_6808 = (1 << 6)  # M680X M68HC08 mode
189CS_MODE_M680X_6809 = (1 << 7)  # M680X M6809 mode
190CS_MODE_M680X_6811 = (1 << 8)  # M680X M68HC11 mode
191CS_MODE_M680X_CPU12 = (1 << 9)  # M680X CPU12 mode
192CS_MODE_M680X_HCS08 = (1 << 10)  # M680X HCS08 mode
193
194# Capstone option type
195CS_OPT_SYNTAX = 1    # Intel X86 asm syntax (CS_ARCH_X86 arch)
196CS_OPT_DETAIL = 2    # Break down instruction structure into details
197CS_OPT_MODE = 3      # Change engine's mode at run-time
198CS_OPT_MEM = 4       # Change engine's mode at run-time
199CS_OPT_SKIPDATA = 5  # Skip data when disassembling
200CS_OPT_SKIPDATA_SETUP = 6      # Setup user-defined function for SKIPDATA option
201CS_OPT_MNEMONIC = 7  # Customize instruction mnemonic
202CS_OPT_UNSIGNED = 8  # Print immediate in unsigned form
203
204# Capstone option value
205CS_OPT_OFF = 0             # Turn OFF an option - default option of CS_OPT_DETAIL
206CS_OPT_ON = 3              # Turn ON an option (CS_OPT_DETAIL)
207
208# Common instruction operand types - to be consistent across all architectures.
209CS_OP_INVALID = 0
210CS_OP_REG = 1
211CS_OP_IMM = 2
212CS_OP_MEM = 3
213CS_OP_FP  = 4
214
215# Common instruction groups - to be consistent across all architectures.
216CS_GRP_INVALID = 0  # uninitialized/invalid group.
217CS_GRP_JUMP    = 1  # all jump instructions (conditional+direct+indirect jumps)
218CS_GRP_CALL    = 2  # all call instructions
219CS_GRP_RET     = 3  # all return instructions
220CS_GRP_INT     = 4  # all interrupt instructions (int+syscall)
221CS_GRP_IRET    = 5  # all interrupt return instructions
222CS_GRP_PRIVILEGE = 6  # all privileged instructions
223
224# Access types for instruction operands.
225CS_AC_INVALID  = 0        # Invalid/unitialized access type.
226CS_AC_READ     = (1 << 0) # Operand that is read from.
227CS_AC_WRITE    = (1 << 1) # Operand that is written to.
228
229# Capstone syntax value
230CS_OPT_SYNTAX_DEFAULT = 0    # Default assembly syntax of all platforms (CS_OPT_SYNTAX)
231CS_OPT_SYNTAX_INTEL = 1    # Intel X86 asm syntax - default syntax on X86 (CS_OPT_SYNTAX, CS_ARCH_X86)
232CS_OPT_SYNTAX_ATT = 2      # ATT asm syntax (CS_OPT_SYNTAX, CS_ARCH_X86)
233CS_OPT_SYNTAX_NOREGNAME = 3   # Asm syntax prints register name with only number - (CS_OPT_SYNTAX, CS_ARCH_PPC, CS_ARCH_ARM)
234CS_OPT_SYNTAX_MASM = 4      # MASM syntax (CS_OPT_SYNTAX, CS_ARCH_X86)
235
236# Capstone error type
237CS_ERR_OK = 0      # No error: everything was fine
238CS_ERR_MEM = 1     # Out-Of-Memory error: cs_open(), cs_disasm()
239CS_ERR_ARCH = 2    # Unsupported architecture: cs_open()
240CS_ERR_HANDLE = 3  # Invalid handle: cs_op_count(), cs_op_index()
241CS_ERR_CSH = 4     # Invalid csh argument: cs_close(), cs_errno(), cs_option()
242CS_ERR_MODE = 5    # Invalid/unsupported mode: cs_open()
243CS_ERR_OPTION = 6  # Invalid/unsupported option: cs_option()
244CS_ERR_DETAIL = 7  # Invalid/unsupported option: cs_option()
245CS_ERR_MEMSETUP = 8
246CS_ERR_VERSION = 9 # Unsupported version (bindings)
247CS_ERR_DIET = 10   # Information irrelevant in diet engine
248CS_ERR_SKIPDATA = 11 # Access irrelevant data for "data" instruction in SKIPDATA mode
249CS_ERR_X86_ATT = 12 # X86 AT&T syntax is unsupported (opt-out at compile time)
250CS_ERR_X86_INTEL = 13 # X86 Intel syntax is unsupported (opt-out at compile time)
251CS_ERR_X86_MASM = 14 # X86 Intel syntax is unsupported (opt-out at compile time)
252
253# query id for cs_support()
254CS_SUPPORT_DIET = CS_ARCH_ALL + 1
255CS_SUPPORT_X86_REDUCE = CS_ARCH_ALL+2
256
257# Capstone reverse lookup
258CS_AC    = {v:k for k,v in locals().items() if k.startswith('CS_AC_')}
259CS_ARCH  = {v:k for k,v in locals().items() if k.startswith('CS_ARCH_')}
260CS_ERR   = {v:k for k,v in locals().items() if k.startswith('CS_ERR_')}
261CS_GRP   = {v:k for k,v in locals().items() if k.startswith('CS_GRP_')}
262CS_MODE  = {v:k for k,v in locals().items() if k.startswith('CS_MODE_')}
263CS_OP    = {v:k for k,v in locals().items() if k.startswith('CS_OP_')}
264CS_OPT   = {v:k for k,v in locals().items() if k.startswith('CS_OPT_')}
265
266import ctypes, ctypes.util
267from os.path import split, join, dirname
268import distutils.sysconfig
269import pkg_resources
270
271import inspect
272if not hasattr(sys.modules[__name__], '__file__'):
273    __file__ = inspect.getfile(inspect.currentframe())
274
275if sys.platform == 'darwin':
276    _lib = "libcapstone.dylib"
277elif sys.platform in ('win32', 'cygwin'):
278    _lib = "capstone.dll"
279else:
280    _lib = "libcapstone.so"
281
282_found = False
283
284def _load_lib(path):
285    lib_file = join(path, _lib)
286    if os.path.exists(lib_file):
287        return ctypes.cdll.LoadLibrary(lib_file)
288    else:
289        # if we're on linux, try again with .so.4 extension
290        if lib_file.endswith('.so'):
291            if os.path.exists(lib_file + '.4'):
292                return ctypes.cdll.LoadLibrary(lib_file + '.4')
293    return None
294
295_cs = None
296
297# Loading attempts, in order
298# - user-provided environment variable
299# - pkg_resources can get us the path to the local libraries
300# - we can get the path to the local libraries by parsing our filename
301# - global load
302# - python's lib directory
303# - last-gasp attempt at some hardcoded paths on darwin and linux
304
305_path_list = [os.getenv('LIBCAPSTONE_PATH', None),
306              pkg_resources.resource_filename(__name__, 'lib'),
307              join(split(__file__)[0], 'lib'),
308              '',
309              distutils.sysconfig.get_python_lib(),
310              "/usr/local/lib/" if sys.platform == 'darwin' else '/usr/lib64']
311
312for _path in _path_list:
313    if _path is None: continue
314    _cs = _load_lib(_path)
315    if _cs is not None: break
316else:
317    raise ImportError("ERROR: fail to load the dynamic library.")
318
319
320# low-level structure for C code
321
322def copy_ctypes(src):
323    """Returns a new ctypes object which is a bitwise copy of an existing one"""
324    dst = type(src)()
325    ctypes.memmove(ctypes.byref(dst), ctypes.byref(src), ctypes.sizeof(type(src)))
326    return dst
327
328def copy_ctypes_list(src):
329    return [copy_ctypes(n) for n in src]
330
331# Weird import placement because these modules are needed by the below code but need the above functions
332from . import arm, arm64, m68k, mips, ppc, sparc, systemz, x86, xcore, tms320c64x, m680x, evm, mos65xx
333
334class _cs_arch(ctypes.Union):
335    _fields_ = (
336        ('arm64', arm64.CsArm64),
337        ('arm', arm.CsArm),
338        ('m68k', m68k.CsM68K),
339        ('mips', mips.CsMips),
340        ('x86', x86.CsX86),
341        ('ppc', ppc.CsPpc),
342        ('sparc', sparc.CsSparc),
343        ('sysz', systemz.CsSysz),
344        ('xcore', xcore.CsXcore),
345        ('tms320c64x', tms320c64x.CsTMS320C64x),
346        ('m680x', m680x.CsM680x),
347        ('evm', evm.CsEvm),
348        ('mos65xx', mos65xx.CsMOS65xx),
349    )
350
351class _cs_detail(ctypes.Structure):
352    _fields_ = (
353        ('regs_read', ctypes.c_uint16 * 16),
354        ('regs_read_count', ctypes.c_ubyte),
355        ('regs_write', ctypes.c_uint16 * 20),
356        ('regs_write_count', ctypes.c_ubyte),
357        ('groups', ctypes.c_ubyte * 8),
358        ('groups_count', ctypes.c_ubyte),
359        ('arch', _cs_arch),
360    )
361
362class _cs_insn(ctypes.Structure):
363    _fields_ = (
364        ('id', ctypes.c_uint),
365        ('address', ctypes.c_uint64),
366        ('size', ctypes.c_uint16),
367        ('bytes', ctypes.c_ubyte * 24),
368        ('mnemonic', ctypes.c_char * 32),
369        ('op_str', ctypes.c_char * 160),
370        ('detail', ctypes.POINTER(_cs_detail)),
371    )
372
373# callback for SKIPDATA option
374CS_SKIPDATA_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, ctypes.c_size_t, ctypes.c_void_p)
375
376class _cs_opt_skipdata(ctypes.Structure):
377    _fields_ = (
378        ('mnemonic', ctypes.c_char_p),
379        ('callback', CS_SKIPDATA_CALLBACK),
380        ('user_data', ctypes.c_void_p),
381    )
382
383class _cs_opt_mnem(ctypes.Structure):
384    _fields_ = (
385        ('id', ctypes.c_uint),
386        ('mnemonic', ctypes.c_char_p),
387    )
388
389# setup all the function prototype
390def _setup_prototype(lib, fname, restype, *argtypes):
391    getattr(lib, fname).restype = restype
392    getattr(lib, fname).argtypes = argtypes
393
394_setup_prototype(_cs, "cs_open", ctypes.c_int, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_size_t))
395_setup_prototype(_cs, "cs_disasm", ctypes.c_size_t, ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, \
396        ctypes.c_uint64, ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(_cs_insn)))
397_setup_prototype(_cs, "cs_free", None, ctypes.c_void_p, ctypes.c_size_t)
398_setup_prototype(_cs, "cs_close", ctypes.c_int, ctypes.POINTER(ctypes.c_size_t))
399_setup_prototype(_cs, "cs_reg_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
400_setup_prototype(_cs, "cs_insn_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
401_setup_prototype(_cs, "cs_group_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
402_setup_prototype(_cs, "cs_op_count", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint)
403_setup_prototype(_cs, "cs_op_index", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint, ctypes.c_uint)
404_setup_prototype(_cs, "cs_errno", ctypes.c_int, ctypes.c_size_t)
405_setup_prototype(_cs, "cs_option", ctypes.c_int, ctypes.c_size_t, ctypes.c_int, ctypes.c_void_p)
406_setup_prototype(_cs, "cs_version", ctypes.c_int, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int))
407_setup_prototype(_cs, "cs_support", ctypes.c_bool, ctypes.c_int)
408_setup_prototype(_cs, "cs_strerror", ctypes.c_char_p, ctypes.c_int)
409_setup_prototype(_cs, "cs_regs_access", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.POINTER(ctypes.c_uint16*64), ctypes.POINTER(ctypes.c_uint8), ctypes.POINTER(ctypes.c_uint16*64), ctypes.POINTER(ctypes.c_uint8))
410
411
412# access to error code via @errno of CsError
413class CsError(Exception):
414    def __init__(self, errno):
415        self.errno = errno
416
417    if _python2:
418        def __str__(self):
419            return _cs.cs_strerror(self.errno)
420
421    else:
422        def __str__(self):
423            return _cs.cs_strerror(self.errno).decode()
424
425
426# return the core's version
427def cs_version():
428    major = ctypes.c_int()
429    minor = ctypes.c_int()
430    combined = _cs.cs_version(ctypes.byref(major), ctypes.byref(minor))
431    return (major.value, minor.value, combined)
432
433
434# return the binding's version
435def version_bind():
436    return (CS_API_MAJOR, CS_API_MINOR, (CS_API_MAJOR << 8) + CS_API_MINOR)
437
438
439def cs_support(query):
440    return _cs.cs_support(query)
441
442
443# dummy class resembling Cs class, just for cs_disasm_quick()
444# this class only need to be referenced to via 2 fields: @csh & @arch
445class _dummy_cs(object):
446    def __init__(self, csh, arch):
447        self.csh = csh
448        self.arch = arch
449        self._detail = False
450
451
452# Quick & dirty Python function to disasm raw binary code
453# This function return CsInsn objects
454# NOTE: you might want to use more efficient Cs class & its methods.
455def cs_disasm_quick(arch, mode, code, offset, count=0):
456    # verify version compatibility with the core before doing anything
457    (major, minor, _combined) = cs_version()
458    if major != CS_API_MAJOR or minor != CS_API_MINOR:
459        # our binding version is different from the core's API version
460        raise CsError(CS_ERR_VERSION)
461
462    csh = ctypes.c_size_t()
463    status = _cs.cs_open(arch, mode, ctypes.byref(csh))
464    if status != CS_ERR_OK:
465        raise CsError(status)
466
467    all_insn = ctypes.POINTER(_cs_insn)()
468    res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn))
469    if res > 0:
470        try:
471            for i in range(res):
472                yield CsInsn(_dummy_cs(csh, arch), all_insn[i])
473        finally:
474            _cs.cs_free(all_insn, res)
475    else:
476        status = _cs.cs_errno(csh)
477        if status != CS_ERR_OK:
478            raise CsError(status)
479        return
480        yield
481
482    status = _cs.cs_close(ctypes.byref(csh))
483    if status != CS_ERR_OK:
484        raise CsError(status)
485
486
487# Another quick, but lighter function to disasm raw binary code.
488# This function is faster than cs_disasm_quick() around 20% because
489# cs_disasm_lite() only return tuples of (address, size, mnemonic, op_str),
490# rather than CsInsn objects.
491# NOTE: you might want to use more efficient Cs class & its methods.
492def cs_disasm_lite(arch, mode, code, offset, count=0):
493    # verify version compatibility with the core before doing anything
494    (major, minor, _combined) = cs_version()
495    if major != CS_API_MAJOR or minor != CS_API_MINOR:
496        # our binding version is different from the core's API version
497        raise CsError(CS_ERR_VERSION)
498
499    if cs_support(CS_SUPPORT_DIET):
500        # Diet engine cannot provide @mnemonic & @op_str
501        raise CsError(CS_ERR_DIET)
502
503    csh = ctypes.c_size_t()
504    status = _cs.cs_open(arch, mode, ctypes.byref(csh))
505    if status != CS_ERR_OK:
506        raise CsError(status)
507
508    all_insn = ctypes.POINTER(_cs_insn)()
509    res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn))
510    if res > 0:
511        try:
512            for i in range(res):
513                insn = all_insn[i]
514                yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
515        finally:
516            _cs.cs_free(all_insn, res)
517    else:
518        status = _cs.cs_errno(csh)
519        if status != CS_ERR_OK:
520            raise CsError(status)
521        return
522        yield
523
524    status = _cs.cs_close(ctypes.byref(csh))
525    if status != CS_ERR_OK:
526        raise CsError(status)
527
528def _ascii_name_or_default(name, default):
529    return default if name is None else name.decode('ascii')
530
531
532# Python-style class to disasm code
533class CsInsn(object):
534    def __init__(self, cs, all_info):
535        self._raw = copy_ctypes(all_info)
536        self._cs = cs
537        if self._cs._detail and self._raw.id != 0:
538            # save detail
539            self._raw.detail = ctypes.pointer(all_info.detail._type_())
540            ctypes.memmove(ctypes.byref(self._raw.detail[0]), ctypes.byref(all_info.detail[0]), ctypes.sizeof(type(all_info.detail[0])))
541
542    def __repr__(self):
543        return '<CsInsn 0x%x [%s]: %s %s>' % (self.address, self.bytes.hex(), self.mnemonic, self.op_str)
544
545    # return instruction's ID.
546    @property
547    def id(self):
548        return self._raw.id
549
550    # return instruction's address.
551    @property
552    def address(self):
553        return self._raw.address
554
555    # return instruction's size.
556    @property
557    def size(self):
558        return self._raw.size
559
560    # return instruction's machine bytes (which should have @size bytes).
561    @property
562    def bytes(self):
563        return bytearray(self._raw.bytes)[:self._raw.size]
564
565    # return instruction's mnemonic.
566    @property
567    def mnemonic(self):
568        if self._cs._diet:
569            # Diet engine cannot provide @mnemonic.
570            raise CsError(CS_ERR_DIET)
571
572        return self._raw.mnemonic.decode('ascii')
573
574    # return instruction's operands (in string).
575    @property
576    def op_str(self):
577        if self._cs._diet:
578            # Diet engine cannot provide @op_str.
579            raise CsError(CS_ERR_DIET)
580
581        return self._raw.op_str.decode('ascii')
582
583    # return list of all implicit registers being read.
584    @property
585    def regs_read(self):
586        if self._raw.id == 0:
587            raise CsError(CS_ERR_SKIPDATA)
588
589        if self._cs._diet:
590            # Diet engine cannot provide @regs_read.
591            raise CsError(CS_ERR_DIET)
592
593        if self._cs._detail:
594            return self._raw.detail.contents.regs_read[:self._raw.detail.contents.regs_read_count]
595
596        raise CsError(CS_ERR_DETAIL)
597
598    # return list of all implicit registers being modified
599    @property
600    def regs_write(self):
601        if self._raw.id == 0:
602            raise CsError(CS_ERR_SKIPDATA)
603
604        if self._cs._diet:
605            # Diet engine cannot provide @regs_write
606            raise CsError(CS_ERR_DIET)
607
608        if self._cs._detail:
609            return self._raw.detail.contents.regs_write[:self._raw.detail.contents.regs_write_count]
610
611        raise CsError(CS_ERR_DETAIL)
612
613    # return list of semantic groups this instruction belongs to.
614    @property
615    def groups(self):
616        if self._raw.id == 0:
617            raise CsError(CS_ERR_SKIPDATA)
618
619        if self._cs._diet:
620            # Diet engine cannot provide @groups
621            raise CsError(CS_ERR_DIET)
622
623        if self._cs._detail:
624            return self._raw.detail.contents.groups[:self._raw.detail.contents.groups_count]
625
626        raise CsError(CS_ERR_DETAIL)
627
628    def __gen_detail(self):
629        if self._raw.id == 0:
630            # do nothing in skipdata mode
631            return
632
633        arch = self._cs.arch
634        if arch == CS_ARCH_ARM:
635            (self.usermode, self.vector_size, self.vector_data, self.cps_mode, self.cps_flag, self.cc, self.update_flags, \
636            self.writeback, self.mem_barrier, self.operands) = arm.get_arch_info(self._raw.detail.contents.arch.arm)
637        elif arch == CS_ARCH_ARM64:
638            (self.cc, self.update_flags, self.writeback, self.operands) = \
639                arm64.get_arch_info(self._raw.detail.contents.arch.arm64)
640        elif arch == CS_ARCH_X86:
641            (self.prefix, self.opcode, self.rex, self.addr_size, \
642                self.modrm, self.sib, self.disp, \
643                self.sib_index, self.sib_scale, self.sib_base, self.xop_cc, self.sse_cc, \
644                self.avx_cc, self.avx_sae, self.avx_rm, self.eflags, \
645                self.modrm_offset, self.disp_offset, self.disp_size, self.imm_offset, self.imm_size, \
646                self.operands) = x86.get_arch_info(self._raw.detail.contents.arch.x86)
647        elif arch == CS_ARCH_M68K:
648                (self.operands, self.op_size) = m68k.get_arch_info(self._raw.detail.contents.arch.m68k)
649        elif arch == CS_ARCH_MIPS:
650                self.operands = mips.get_arch_info(self._raw.detail.contents.arch.mips)
651        elif arch == CS_ARCH_PPC:
652            (self.bc, self.bh, self.update_cr0, self.operands) = \
653                ppc.get_arch_info(self._raw.detail.contents.arch.ppc)
654        elif arch == CS_ARCH_SPARC:
655            (self.cc, self.hint, self.operands) = sparc.get_arch_info(self._raw.detail.contents.arch.sparc)
656        elif arch == CS_ARCH_SYSZ:
657            (self.cc, self.operands) = systemz.get_arch_info(self._raw.detail.contents.arch.sysz)
658        elif arch == CS_ARCH_XCORE:
659            (self.operands) = xcore.get_arch_info(self._raw.detail.contents.arch.xcore)
660        elif arch == CS_ARCH_TMS320C64X:
661            (self.condition, self.funit, self.parallel, self.operands) = tms320c64x.get_arch_info(self._raw.detail.contents.arch.tms320c64x)
662        elif arch == CS_ARCH_M680X:
663            (self.flags, self.operands) = m680x.get_arch_info(self._raw.detail.contents.arch.m680x)
664        elif arch == CS_ARCH_EVM:
665            (self.pop, self.push, self.fee) = evm.get_arch_info(self._raw.detail.contents.arch.evm)
666        elif arch == CS_ARCH_MOS65XX:
667            (self.am, self.modifies_flags, self.operands) = mos65xx.get_arch_info(self._raw.detail.contents.arch.mos65xx)
668
669
670    def __getattr__(self, name):
671        if not self._cs._detail:
672            raise CsError(CS_ERR_DETAIL)
673
674        attr = object.__getattribute__
675        if not attr(self, '_cs')._detail:
676            raise AttributeError(name)
677        _dict = attr(self, '__dict__')
678        if 'operands' not in _dict:
679            self.__gen_detail()
680        if name not in _dict:
681            if self._raw.id == 0:
682                raise CsError(CS_ERR_SKIPDATA)
683            raise AttributeError(name)
684        return _dict[name]
685
686    # get the last error code
687    def errno(self):
688        return _cs.cs_errno(self._cs.csh)
689
690    # get the register name, given the register ID
691    def reg_name(self, reg_id, default=None):
692        if self._cs._diet:
693            # Diet engine cannot provide register name
694            raise CsError(CS_ERR_DIET)
695
696        return _ascii_name_or_default(_cs.cs_reg_name(self._cs.csh, reg_id), default)
697
698    # get the instruction name
699    def insn_name(self, default=None):
700        if self._cs._diet:
701            # Diet engine cannot provide instruction name
702            raise CsError(CS_ERR_DIET)
703
704        if self._raw.id == 0:
705            return default
706
707        return _ascii_name_or_default(_cs.cs_insn_name(self._cs.csh, self.id), default)
708
709    # get the group name
710    def group_name(self, group_id, default=None):
711        if self._cs._diet:
712            # Diet engine cannot provide group name
713            raise CsError(CS_ERR_DIET)
714
715        return _ascii_name_or_default(_cs.cs_group_name(self._cs.csh, group_id), default)
716
717
718    # verify if this insn belong to group with id as @group_id
719    def group(self, group_id):
720        if self._raw.id == 0:
721            raise CsError(CS_ERR_SKIPDATA)
722
723        if self._cs._diet:
724            # Diet engine cannot provide group information
725            raise CsError(CS_ERR_DIET)
726
727        return group_id in self.groups
728
729    # verify if this instruction implicitly read register @reg_id
730    def reg_read(self, reg_id):
731        if self._raw.id == 0:
732            raise CsError(CS_ERR_SKIPDATA)
733
734        if self._cs._diet:
735            # Diet engine cannot provide regs_read information
736            raise CsError(CS_ERR_DIET)
737
738        return reg_id in self.regs_read
739
740    # verify if this instruction implicitly modified register @reg_id
741    def reg_write(self, reg_id):
742        if self._raw.id == 0:
743            raise CsError(CS_ERR_SKIPDATA)
744
745        if self._cs._diet:
746            # Diet engine cannot provide regs_write information
747            raise CsError(CS_ERR_DIET)
748
749        return reg_id in self.regs_write
750
751    # return number of operands having same operand type @op_type
752    def op_count(self, op_type):
753        if self._raw.id == 0:
754            raise CsError(CS_ERR_SKIPDATA)
755
756        c = 0
757        for op in self.operands:
758            if op.type == op_type:
759                c += 1
760        return c
761
762    # get the operand at position @position of all operands having the same type @op_type
763    def op_find(self, op_type, position):
764        if self._raw.id == 0:
765            raise CsError(CS_ERR_SKIPDATA)
766
767        c = 0
768        for op in self.operands:
769            if op.type == op_type:
770                c += 1
771            if c == position:
772                return op
773
774    # Return (list-of-registers-read, list-of-registers-modified) by this instructions.
775    # This includes all the implicit & explicit registers.
776    def regs_access(self):
777        if self._raw.id == 0:
778            raise CsError(CS_ERR_SKIPDATA)
779
780        regs_read = (ctypes.c_uint16 * 64)()
781        regs_read_count = ctypes.c_uint8()
782        regs_write = (ctypes.c_uint16 * 64)()
783        regs_write_count = ctypes.c_uint8()
784
785        status = _cs.cs_regs_access(self._cs.csh, self._raw, ctypes.byref(regs_read), ctypes.byref(regs_read_count), ctypes.byref(regs_write), ctypes.byref(regs_write_count))
786        if status != CS_ERR_OK:
787            raise CsError(status)
788
789        if regs_read_count.value > 0:
790            regs_read = regs_read[:regs_read_count.value]
791        else:
792            regs_read = ()
793
794        if regs_write_count.value > 0:
795            regs_write = regs_write[:regs_write_count.value]
796        else:
797            regs_write = ()
798
799        return (regs_read, regs_write)
800
801
802
803class Cs(object):
804    def __init__(self, arch, mode):
805        # verify version compatibility with the core before doing anything
806        (major, minor, _combined) = cs_version()
807        if major != CS_API_MAJOR or minor != CS_API_MINOR:
808            self.csh = None
809            # our binding version is different from the core's API version
810            raise CsError(CS_ERR_VERSION)
811
812        self.arch, self._mode = arch, mode
813        self.csh = ctypes.c_size_t()
814        status = _cs.cs_open(arch, mode, ctypes.byref(self.csh))
815        if status != CS_ERR_OK:
816            self.csh = None
817            raise CsError(status)
818
819        try:
820            import ccapstone
821            # rewire disasm to use the faster version
822            self.disasm = ccapstone.Cs(self).disasm
823        except:
824            pass
825
826        if arch == CS_ARCH_X86:
827            # Intel syntax is default for X86
828            self._syntax = CS_OPT_SYNTAX_INTEL
829        else:
830            self._syntax = None
831
832        self._detail = False  # by default, do not produce instruction details
833        self._imm_unsigned = False  # by default, print immediate operands as signed numbers
834        self._diet = cs_support(CS_SUPPORT_DIET)
835        self._x86reduce = cs_support(CS_SUPPORT_X86_REDUCE)
836
837        # default mnemonic for SKIPDATA
838        self._skipdata_mnem = ".byte"
839        self._skipdata_cb = (None, None)
840        # store reference to option object to avoid it being freed
841        # because C code uses it by reference
842        self._skipdata_opt = _cs_opt_skipdata()
843        self._skipdata = False
844
845
846
847    # destructor to be called automatically when object is destroyed.
848    def __del__(self):
849        if self.csh:
850            try:
851                status = _cs.cs_close(ctypes.byref(self.csh))
852                if status != CS_ERR_OK:
853                    raise CsError(status)
854            except: # _cs might be pulled from under our feet
855                pass
856
857
858    # def option(self, opt_type, opt_value):
859    #    return _cs.cs_option(self.csh, opt_type, opt_value)
860
861
862    # is this a diet engine?
863    @property
864    def diet(self):
865        return self._diet
866
867
868    # is this engine compiled with X86-reduce option?
869    @property
870    def x86_reduce(self):
871        return self._x86reduce
872
873
874    # return assembly syntax.
875    @property
876    def syntax(self):
877        return self._syntax
878
879
880    # syntax setter: modify assembly syntax.
881    @syntax.setter
882    def syntax(self, style):
883        status = _cs.cs_option(self.csh, CS_OPT_SYNTAX, style)
884        if status != CS_ERR_OK:
885            raise CsError(status)
886        # save syntax
887        self._syntax = style
888
889
890    # return current skipdata status
891    @property
892    def skipdata(self):
893        return self._skipdata
894
895
896    # setter: modify skipdata status
897    @skipdata.setter
898    def skipdata(self, opt):
899        if opt == False:
900            status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA, CS_OPT_OFF)
901        else:
902            status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA, CS_OPT_ON)
903        if status != CS_ERR_OK:
904            raise CsError(status)
905
906        # save this option
907        self._skipdata = opt
908
909
910    @property
911    def skipdata_setup(self):
912        return (self._skipdata_mnem,) + self._skipdata_cb
913
914
915    @skipdata_setup.setter
916    def skipdata_setup(self, opt):
917        _mnem, _cb, _ud = opt
918        self._skipdata_opt.mnemonic = _mnem.encode()
919        self._skipdata_opt.callback = CS_SKIPDATA_CALLBACK(_cb or 0)
920        self._skipdata_opt.user_data = ctypes.cast(_ud, ctypes.c_void_p)
921        status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA_SETUP, ctypes.cast(ctypes.byref(self._skipdata_opt), ctypes.c_void_p))
922        if status != CS_ERR_OK:
923            raise CsError(status)
924
925        self._skipdata_mnem = _mnem
926        self._skipdata_cb = (_cb, _ud)
927
928
929    @property
930    def skipdata_mnem(self):
931        return self._skipdata_mnem
932
933
934    @skipdata_mnem.setter
935    def skipdata_mnem(self, mnem):
936        self.skipdata_setup = (mnem,) + self._skipdata_cb
937
938
939    @property
940    def skipdata_callback(self):
941        return self._skipdata_cb
942
943
944    @skipdata_callback.setter
945    def skipdata_callback(self, val):
946        if not isinstance(val, tuple):
947            val = (val, None)
948        func, data = val
949        self.skipdata_setup = (self._skipdata_mnem, func, data)
950
951
952    # customize instruction mnemonic
953    def mnemonic_setup(self, id, mnem):
954        _mnem_opt = _cs_opt_mnem()
955        _mnem_opt.id = id
956        if mnem:
957            _mnem_opt.mnemonic = mnem.encode()
958        else:
959            _mnem_opt.mnemonic = mnem
960        status = _cs.cs_option(self.csh, CS_OPT_MNEMONIC, ctypes.cast(ctypes.byref(_mnem_opt), ctypes.c_void_p))
961        if status != CS_ERR_OK:
962            raise CsError(status)
963
964
965    # check to see if this engine supports a particular arch,
966    # or diet mode (depending on @query).
967    def support(self, query):
968        return cs_support(query)
969
970
971    # is detail mode enable?
972    @property
973    def detail(self):
974        return self._detail
975
976
977    # modify detail mode.
978    @detail.setter
979    def detail(self, opt):  # opt is boolean type, so must be either 'True' or 'False'
980        if opt == False:
981            status = _cs.cs_option(self.csh, CS_OPT_DETAIL, CS_OPT_OFF)
982        else:
983            status = _cs.cs_option(self.csh, CS_OPT_DETAIL, CS_OPT_ON)
984        if status != CS_ERR_OK:
985            raise CsError(status)
986        # save detail
987        self._detail = opt
988
989
990    # is detail mode enable?
991    @property
992    def imm_unsigned(self):
993        return self._imm_unsigned
994
995
996    # modify detail mode.
997    @imm_unsigned.setter
998    def imm_unsigned(self, opt):  # opt is boolean type, so must be either 'True' or 'False'
999        if opt == False:
1000            status = _cs.cs_option(self.csh, CS_OPT_UNSIGNED, CS_OPT_OFF)
1001        else:
1002            status = _cs.cs_option(self.csh, CS_OPT_UNSIGNED, CS_OPT_ON)
1003        if status != CS_ERR_OK:
1004            raise CsError(status)
1005        # save detail
1006        self._imm_unsigned = opt
1007
1008
1009    # return disassembly mode of this engine.
1010    @property
1011    def mode(self):
1012        return self._mode
1013
1014
1015    # modify engine's mode at run-time.
1016    @mode.setter
1017    def mode(self, opt):  # opt is new disasm mode, of int type
1018        status = _cs.cs_option(self.csh, CS_OPT_MODE, opt)
1019        if status != CS_ERR_OK:
1020            raise CsError(status)
1021        # save mode
1022        self._mode = opt
1023
1024    # get the last error code
1025    def errno(self):
1026        return _cs.cs_errno(self.csh)
1027
1028    # get the register name, given the register ID
1029    def reg_name(self, reg_id, default=None):
1030        if self._diet:
1031            # Diet engine cannot provide register name
1032            raise CsError(CS_ERR_DIET)
1033
1034        return _ascii_name_or_default(_cs.cs_reg_name(self.csh, reg_id), default)
1035
1036    # get the instruction name, given the instruction ID
1037    def insn_name(self, insn_id, default=None):
1038        if self._diet:
1039            # Diet engine cannot provide instruction name
1040            raise CsError(CS_ERR_DIET)
1041
1042        return _ascii_name_or_default(_cs.cs_insn_name(self.csh, insn_id), default)
1043
1044    # get the group name
1045    def group_name(self, group_id, default=None):
1046        if self._diet:
1047            # Diet engine cannot provide group name
1048            raise CsError(CS_ERR_DIET)
1049
1050        return _ascii_name_or_default(_cs.cs_group_name(self.csh, group_id), default)
1051
1052    # Disassemble binary & return disassembled instructions in CsInsn objects
1053    def disasm(self, code, offset, count=0):
1054        all_insn = ctypes.POINTER(_cs_insn)()
1055        '''if not _python2:
1056            print(code)
1057            code = code.encode()
1058            print(code)'''
1059        # Pass a bytearray by reference
1060        size = len(code)
1061        if isinstance(code, bytearray):
1062            code = ctypes.byref(ctypes.c_char.from_buffer(code))
1063        res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
1064        if res > 0:
1065            try:
1066                for i in range(res):
1067                    yield CsInsn(self, all_insn[i])
1068            finally:
1069                _cs.cs_free(all_insn, res)
1070        else:
1071            status = _cs.cs_errno(self.csh)
1072            if status != CS_ERR_OK:
1073                raise CsError(status)
1074            return
1075            yield
1076
1077
1078    # Light function to disassemble binary. This is about 20% faster than disasm() because
1079    # unlike disasm(), disasm_lite() only return tuples of (address, size, mnemonic, op_str),
1080    # rather than CsInsn objects.
1081    def disasm_lite(self, code, offset, count=0):
1082        if self._diet:
1083            # Diet engine cannot provide @mnemonic & @op_str
1084            raise CsError(CS_ERR_DIET)
1085
1086        all_insn = ctypes.POINTER(_cs_insn)()
1087        size = len(code)
1088        # Pass a bytearray by reference
1089        if isinstance(code, bytearray):
1090            code = ctypes.byref(ctypes.c_char.from_buffer(code))
1091        res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
1092        if res > 0:
1093            try:
1094                for i in range(res):
1095                    insn = all_insn[i]
1096                    yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
1097            finally:
1098                _cs.cs_free(all_insn, res)
1099        else:
1100            status = _cs.cs_errno(self.csh)
1101            if status != CS_ERR_OK:
1102                raise CsError(status)
1103            return
1104            yield
1105
1106
1107# print out debugging info
1108def debug():
1109    # is Cython there?
1110    try:
1111        from . import ccapstone
1112        return ccapstone.debug()
1113    except:
1114        # no Cython, fallback to Python code below
1115        pass
1116
1117    if cs_support(CS_SUPPORT_DIET):
1118        diet = "diet"
1119    else:
1120        diet = "standard"
1121
1122    archs = { "arm": CS_ARCH_ARM, "arm64": CS_ARCH_ARM64, "m68k": CS_ARCH_M68K, \
1123        "mips": CS_ARCH_MIPS, "ppc": CS_ARCH_PPC, "sparc": CS_ARCH_SPARC, \
1124        "sysz": CS_ARCH_SYSZ, 'xcore': CS_ARCH_XCORE, "tms320c64x": CS_ARCH_TMS320C64X, \
1125        "m680x": CS_ARCH_M680X, 'evm': CS_ARCH_EVM, 'mos65xx': CS_ARCH_MOS65XX }
1126
1127    all_archs = ""
1128    keys = archs.keys()
1129    for k in sorted(keys):
1130        if cs_support(archs[k]):
1131            all_archs += "-%s" % k
1132
1133    if cs_support(CS_ARCH_X86):
1134        all_archs += "-x86"
1135        if cs_support(CS_SUPPORT_X86_REDUCE):
1136            all_archs += "_reduce"
1137
1138    (major, minor, _combined) = cs_version()
1139
1140    return "python-%s%s-c%u.%u-b%u.%u" % (diet, all_archs, major, minor, CS_API_MAJOR, CS_API_MINOR)
1141