1 // Capstone Java binding 2 // By Nguyen Anh Quynh & Dang Hoang Vu, 2013 3 4 package capstone; 5 6 import com.sun.jna.Library; 7 import com.sun.jna.Native; 8 import com.sun.jna.NativeLong; 9 import com.sun.jna.ptr.NativeLongByReference; 10 import com.sun.jna.Structure; 11 import com.sun.jna.Union; 12 import com.sun.jna.Pointer; 13 import com.sun.jna.ptr.PointerByReference; 14 import com.sun.jna.ptr.IntByReference; 15 16 import java.util.List; 17 import java.util.Arrays; 18 import java.lang.RuntimeException; 19 20 public class Capstone { 21 22 protected static abstract class OpInfo {}; 23 protected static abstract class UnionOpInfo extends Structure {}; 24 25 public static class UnionArch extends Union { 26 public static class ByValue extends UnionArch implements Union.ByValue {}; 27 28 public Arm.UnionOpInfo arm; 29 public Arm64.UnionOpInfo arm64; 30 public X86.UnionOpInfo x86; 31 public Mips.UnionOpInfo mips; 32 public Ppc.UnionOpInfo ppc; 33 public Sparc.UnionOpInfo sparc; 34 public Systemz.UnionOpInfo sysz; 35 public Xcore.UnionOpInfo xcore; 36 } 37 38 protected static class _cs_insn extends Structure { 39 // instruction ID. 40 public int id; 41 // instruction address. 42 public long address; 43 // instruction size. 44 public short size; 45 // machine bytes of instruction. 46 public byte[] bytes; 47 // instruction mnemonic. NOTE: irrelevant for diet engine. 48 public byte[] mnemonic; 49 // instruction operands. NOTE: irrelevant for diet engine. 50 public byte[] op_str; 51 // detail information of instruction. 52 public _cs_detail.ByReference cs_detail; 53 _cs_insn()54 public _cs_insn() { 55 bytes = new byte[16]; 56 mnemonic = new byte[32]; 57 op_str = new byte[160]; 58 java.util.Arrays.fill(mnemonic, (byte) 0); 59 java.util.Arrays.fill(op_str, (byte) 0); 60 } 61 _cs_insn(Pointer p)62 public _cs_insn(Pointer p) { 63 this(); 64 useMemory(p); 65 read(); 66 } 67 68 @Override getFieldOrder()69 public List getFieldOrder() { 70 return Arrays.asList("id", "address", "size", "bytes", "mnemonic", "op_str", "cs_detail"); 71 } 72 } 73 74 protected static class _cs_detail extends Structure { 75 public static class ByReference extends _cs_detail implements Structure.ByReference {}; 76 77 // list of all implicit registers being read. 78 public byte[] regs_read = new byte[12]; 79 public byte regs_read_count; 80 // list of all implicit registers being written. 81 public byte[] regs_write = new byte[20]; 82 public byte regs_write_count; 83 // list of semantic groups this instruction belongs to. 84 public byte[] groups = new byte[8]; 85 public byte groups_count; 86 87 public UnionArch arch; 88 89 @Override getFieldOrder()90 public List getFieldOrder() { 91 return Arrays.asList("regs_read", "regs_read_count", "regs_write", "regs_write_count", "groups", "groups_count", "arch"); 92 } 93 } 94 95 public static class CsInsn { 96 private NativeLong csh; 97 private CS cs; 98 private _cs_insn raw; 99 private int arch; 100 101 // instruction ID. 102 public int id; 103 // instruction address. 104 public long address; 105 // instruction size. 106 public short size; 107 // instruction mnemonic. NOTE: irrelevant for diet engine. 108 public String mnemonic; 109 // instruction operands. NOTE: irrelevant for diet engine. 110 public String opStr; 111 // list of all implicit registers being read. 112 public byte[] regsRead; 113 // list of all implicit registers being written. 114 public byte[] regsWrite; 115 // list of semantic groups this instruction belongs to. 116 public byte[] groups; 117 public OpInfo operands; 118 CsInsn(_cs_insn insn, int _arch, NativeLong _csh, CS _cs, boolean diet)119 public CsInsn (_cs_insn insn, int _arch, NativeLong _csh, CS _cs, boolean diet) { 120 id = insn.id; 121 address = insn.address; 122 size = insn.size; 123 124 if (!diet) { 125 int lm = 0; 126 while (insn.mnemonic[lm++] != 0); 127 int lo = 0; 128 while (insn.op_str[lo++] != 0); 129 mnemonic = new String(insn.mnemonic, 0, lm-1); 130 opStr = new String(insn.op_str, 0, lo-1); 131 } 132 133 cs = _cs; 134 arch = _arch; 135 raw = insn; 136 csh = _csh; 137 138 if (insn.cs_detail != null) { 139 if (!diet) { 140 regsRead = new byte[insn.cs_detail.regs_read_count]; 141 for (int i=0; i<regsRead.length; i++) 142 regsRead[i] = insn.cs_detail.regs_read[i]; 143 regsWrite = new byte[insn.cs_detail.regs_write_count]; 144 for (int i=0; i<regsWrite.length; i++) 145 regsWrite[i] = insn.cs_detail.regs_write[i]; 146 groups = new byte[insn.cs_detail.groups_count]; 147 for (int i=0; i<groups.length; i++) 148 groups[i] = insn.cs_detail.groups[i]; 149 } 150 151 operands = getOptInfo(insn.cs_detail); 152 } 153 } 154 getOptInfo(_cs_detail detail)155 private OpInfo getOptInfo(_cs_detail detail) { 156 OpInfo op_info = null; 157 158 switch (this.arch) { 159 case CS_ARCH_ARM: 160 detail.arch.setType(Arm.UnionOpInfo.class); 161 detail.arch.read(); 162 op_info = new Arm.OpInfo((Arm.UnionOpInfo) detail.arch.arm); 163 break; 164 case CS_ARCH_ARM64: 165 detail.arch.setType(Arm64.UnionOpInfo.class); 166 detail.arch.read(); 167 op_info = new Arm64.OpInfo((Arm64.UnionOpInfo) detail.arch.arm64); 168 break; 169 case CS_ARCH_MIPS: 170 detail.arch.setType(Mips.UnionOpInfo.class); 171 detail.arch.read(); 172 op_info = new Mips.OpInfo((Mips.UnionOpInfo) detail.arch.mips); 173 break; 174 case CS_ARCH_X86: 175 detail.arch.setType(X86.UnionOpInfo.class); 176 detail.arch.read(); 177 op_info = new X86.OpInfo((X86.UnionOpInfo) detail.arch.x86); 178 break; 179 case CS_ARCH_SPARC: 180 detail.arch.setType(Sparc.UnionOpInfo.class); 181 detail.arch.read(); 182 op_info = new Sparc.OpInfo((Sparc.UnionOpInfo) detail.arch.sparc); 183 break; 184 case CS_ARCH_SYSZ: 185 detail.arch.setType(Systemz.UnionOpInfo.class); 186 detail.arch.read(); 187 op_info = new Systemz.OpInfo((Systemz.UnionOpInfo) detail.arch.sysz); 188 break; 189 case CS_ARCH_PPC: 190 detail.arch.setType(Ppc.UnionOpInfo.class); 191 detail.arch.read(); 192 op_info = new Ppc.OpInfo((Ppc.UnionOpInfo) detail.arch.ppc); 193 break; 194 case CS_ARCH_XCORE: 195 detail.arch.setType(Xcore.UnionOpInfo.class); 196 detail.arch.read(); 197 op_info = new Xcore.OpInfo((Xcore.UnionOpInfo) detail.arch.xcore); 198 break; 199 default: 200 } 201 202 return op_info; 203 } 204 opCount(int type)205 public int opCount(int type) { 206 return cs.cs_op_count(csh, raw.getPointer(), type); 207 } 208 opIndex(int type, int index)209 public int opIndex(int type, int index) { 210 return cs.cs_op_index(csh, raw.getPointer(), type, index); 211 } 212 regRead(int reg_id)213 public boolean regRead(int reg_id) { 214 return cs.cs_reg_read(csh, raw.getPointer(), reg_id) != 0; 215 } 216 regWrite(int reg_id)217 public boolean regWrite(int reg_id) { 218 return cs.cs_reg_write(csh, raw.getPointer(), reg_id) != 0; 219 } 220 errno()221 public int errno() { 222 return cs.cs_errno(csh); 223 } 224 regName(int reg_id)225 public String regName(int reg_id) { 226 return cs.cs_reg_name(csh, reg_id); 227 } 228 insnName()229 public String insnName() { 230 return cs.cs_insn_name(csh, id); 231 } 232 groupName(int id)233 public String groupName(int id) { 234 return cs.cs_group_name(csh, id); 235 } 236 group(int gid)237 public boolean group(int gid) { 238 return cs.cs_insn_group(csh, raw.getPointer(), gid) != 0; 239 } 240 241 } 242 fromArrayRaw(_cs_insn[] arr_raw)243 private CsInsn[] fromArrayRaw(_cs_insn[] arr_raw) { 244 CsInsn[] arr = new CsInsn[arr_raw.length]; 245 246 for (int i = 0; i < arr_raw.length; i++) { 247 arr[i] = new CsInsn(arr_raw[i], this.arch, ns.csh, cs, this.diet); 248 } 249 250 return arr; 251 } 252 253 private interface CS extends Library { cs_open(int arch, int mode, NativeLongByReference handle)254 public int cs_open(int arch, int mode, NativeLongByReference handle); cs_disasm(NativeLong handle, byte[] code, NativeLong code_len, long addr, NativeLong count, PointerByReference insn)255 public NativeLong cs_disasm(NativeLong handle, byte[] code, NativeLong code_len, 256 long addr, NativeLong count, PointerByReference insn); cs_free(Pointer p, NativeLong count)257 public void cs_free(Pointer p, NativeLong count); cs_close(NativeLongByReference handle)258 public int cs_close(NativeLongByReference handle); cs_option(NativeLong handle, int option, NativeLong optionValue)259 public int cs_option(NativeLong handle, int option, NativeLong optionValue); 260 cs_reg_name(NativeLong csh, int id)261 public String cs_reg_name(NativeLong csh, int id); cs_op_count(NativeLong csh, Pointer insn, int type)262 public int cs_op_count(NativeLong csh, Pointer insn, int type); cs_op_index(NativeLong csh, Pointer insn, int type, int index)263 public int cs_op_index(NativeLong csh, Pointer insn, int type, int index); 264 cs_insn_name(NativeLong csh, int id)265 public String cs_insn_name(NativeLong csh, int id); cs_group_name(NativeLong csh, int id)266 public String cs_group_name(NativeLong csh, int id); cs_insn_group(NativeLong csh, Pointer insn, int id)267 public byte cs_insn_group(NativeLong csh, Pointer insn, int id); cs_reg_read(NativeLong csh, Pointer insn, int id)268 public byte cs_reg_read(NativeLong csh, Pointer insn, int id); cs_reg_write(NativeLong csh, Pointer insn, int id)269 public byte cs_reg_write(NativeLong csh, Pointer insn, int id); cs_errno(NativeLong csh)270 public int cs_errno(NativeLong csh); cs_version(IntByReference major, IntByReference minor)271 public int cs_version(IntByReference major, IntByReference minor); cs_support(int query)272 public boolean cs_support(int query); 273 } 274 275 // Capstone API version 276 public static final int CS_API_MAJOR = 3; 277 public static final int CS_API_MINOR = 0; 278 279 // architectures 280 public static final int CS_ARCH_ARM = 0; 281 public static final int CS_ARCH_ARM64 = 1; 282 public static final int CS_ARCH_MIPS = 2; 283 public static final int CS_ARCH_X86 = 3; 284 public static final int CS_ARCH_PPC = 4; 285 public static final int CS_ARCH_SPARC = 5; 286 public static final int CS_ARCH_SYSZ = 6; 287 public static final int CS_ARCH_XCORE = 7; 288 public static final int CS_ARCH_MAX = 8; 289 public static final int CS_ARCH_ALL = 0xFFFF; // query id for cs_support() 290 291 // disasm mode 292 public static final int CS_MODE_LITTLE_ENDIAN = 0; // little-endian mode (default mode) 293 public static final int CS_MODE_ARM = 0; // 32-bit ARM 294 public static final int CS_MODE_16 = 1 << 1; // 16-bit mode for X86 295 public static final int CS_MODE_32 = 1 << 2; // 32-bit mode for X86 296 public static final int CS_MODE_64 = 1 << 3; // 64-bit mode for X86, PPC 297 public static final int CS_MODE_THUMB = 1 << 4; // ARM's Thumb mode, including Thumb-2 298 public static final int CS_MODE_MCLASS = 1 << 5; // ARM's Cortex-M series 299 public static final int CS_MODE_V8 = 1 << 6; // ARMv8 A32 encodings for ARM 300 public static final int CS_MODE_MICRO = 1 << 4; // MicroMips mode (Mips arch) 301 public static final int CS_MODE_MIPS3 = 1 << 5; // Mips III ISA 302 public static final int CS_MODE_MIPS32R6 = 1 << 6; // Mips32r6 ISA 303 public static final int CS_MODE_MIPSGP64 = 1 << 7; // General Purpose Registers are 64-bit wide (MIPS arch) 304 public static final int CS_MODE_BIG_ENDIAN = 1 << 31; // big-endian mode 305 public static final int CS_MODE_V9 = 1 << 4; // SparcV9 mode (Sparc arch) 306 public static final int CS_MODE_MIPS32 = CS_MODE_32; // Mips32 ISA 307 public static final int CS_MODE_MIPS64 = CS_MODE_64; // Mips64 ISA 308 309 // Capstone error 310 public static final int CS_ERR_OK = 0; 311 public static final int CS_ERR_MEM = 1; // Out-Of-Memory error 312 public static final int CS_ERR_ARCH = 2; // Unsupported architecture 313 public static final int CS_ERR_HANDLE = 3; // Invalid handle 314 public static final int CS_ERR_CSH = 4; // Invalid csh argument 315 public static final int CS_ERR_MODE = 5; // Invalid/unsupported mode 316 public static final int CS_ERR_OPTION = 6; // Invalid/unsupported option: cs_option() 317 public static final int CS_ERR_DETAIL = 7; // Invalid/unsupported option: cs_option() 318 public static final int CS_ERR_MEMSETUP = 8; 319 public static final int CS_ERR_VERSION = 9; //Unsupported version (bindings) 320 public static final int CS_ERR_DIET = 10; //Information irrelevant in diet engine 321 public static final int CS_ERR_SKIPDATA = 11; //Access irrelevant data for "data" instruction in SKIPDATA mode 322 public static final int CS_ERR_X86_ATT = 12; //X86 AT&T syntax is unsupported (opt-out at compile time) 323 public static final int CS_ERR_X86_INTEL = 13; //X86 Intel syntax is unsupported (opt-out at compile time) 324 325 // Capstone option type 326 public static final int CS_OPT_SYNTAX = 1; // Intel X86 asm syntax (CS_ARCH_X86 arch) 327 public static final int CS_OPT_DETAIL = 2; // Break down instruction structure into details 328 public static final int CS_OPT_MODE = 3; // Change engine's mode at run-time 329 330 // Capstone option value 331 public static final int CS_OPT_OFF = 0; // Turn OFF an option - default option of CS_OPT_DETAIL 332 public static final int CS_OPT_SYNTAX_INTEL = 1; // Intel X86 asm syntax - default syntax on X86 (CS_OPT_SYNTAX, CS_ARCH_X86) 333 public static final int CS_OPT_SYNTAX_ATT = 2; // ATT asm syntax (CS_OPT_SYNTAX, CS_ARCH_X86) 334 public static final int CS_OPT_ON = 3; // Turn ON an option (CS_OPT_DETAIL) 335 public static final int CS_OPT_SYNTAX_NOREGNAME = 3; // PPC asm syntax: Prints register name with only number (CS_OPT_SYNTAX) 336 337 // Common instruction operand types - to be consistent across all architectures. 338 public static final int CS_OP_INVALID = 0; 339 public static final int CS_OP_REG = 1; 340 public static final int CS_OP_IMM = 2; 341 public static final int CS_OP_MEM = 3; 342 public static final int CS_OP_FP = 4; 343 344 // Common instruction groups - to be consistent across all architectures. 345 public static final int CS_GRP_INVALID = 0; // uninitialized/invalid group. 346 public static final int CS_GRP_JUMP = 1; // all jump instructions (conditional+direct+indirect jumps) 347 public static final int CS_GRP_CALL = 2; // all call instructions 348 public static final int CS_GRP_RET = 3; // all return instructions 349 public static final int CS_GRP_INT = 4; // all interrupt instructions (int+syscall) 350 public static final int CS_GRP_IRET = 5; // all interrupt return instructions 351 352 // Query id for cs_support() 353 public static final int CS_SUPPORT_DIET = CS_ARCH_ALL+1; // diet mode 354 public static final int CS_SUPPORT_X86_REDUCE = CS_ARCH_ALL+2; // X86 reduce mode 355 356 protected class NativeStruct { 357 private NativeLong csh; 358 private NativeLongByReference handleRef; 359 } 360 361 private static final CsInsn[] EMPTY_INSN = new CsInsn[0]; 362 363 protected NativeStruct ns; // for memory retention 364 private CS cs; 365 public int arch; 366 public int mode; 367 private int syntax; 368 private int detail; 369 private boolean diet; 370 Capstone(int arch, int mode)371 public Capstone(int arch, int mode) { 372 cs = (CS)Native.loadLibrary("capstone", CS.class); 373 int version = cs.cs_version(null, null); 374 if (version != (CS_API_MAJOR << 8) + CS_API_MINOR) { 375 throw new RuntimeException("Different API version between core & binding (CS_ERR_VERSION)"); 376 } 377 378 this.arch = arch; 379 this.mode = mode; 380 ns = new NativeStruct(); 381 ns.handleRef = new NativeLongByReference(); 382 if (cs.cs_open(arch, mode, ns.handleRef) != CS_ERR_OK) { 383 throw new RuntimeException("ERROR: Wrong arch or mode"); 384 } 385 ns.csh = ns.handleRef.getValue(); 386 this.detail = CS_OPT_OFF; 387 this.diet = cs.cs_support(CS_SUPPORT_DIET); 388 } 389 390 // return combined API version version()391 public int version() { 392 return cs.cs_version(null, null); 393 } 394 395 // set Assembly syntax setSyntax(int syntax)396 public void setSyntax(int syntax) { 397 if (cs.cs_option(ns.csh, CS_OPT_SYNTAX, new NativeLong(syntax)) == CS_ERR_OK) { 398 this.syntax = syntax; 399 } else { 400 throw new RuntimeException("ERROR: Failed to set assembly syntax"); 401 } 402 } 403 404 // set detail option at run-time setDetail(int opt)405 public void setDetail(int opt) { 406 if (cs.cs_option(ns.csh, CS_OPT_DETAIL, new NativeLong(opt)) == CS_ERR_OK) { 407 this.detail = opt; 408 } else { 409 throw new RuntimeException("ERROR: Failed to set detail option"); 410 } 411 } 412 413 // set mode option at run-time setMode(int opt)414 public void setMode(int opt) { 415 if (cs.cs_option(ns.csh, CS_OPT_MODE, new NativeLong(opt)) == CS_ERR_OK) { 416 this.mode = opt; 417 } else { 418 throw new RuntimeException("ERROR: Failed to set mode option"); 419 } 420 } 421 422 // destructor automatically caled at destroyed time. finalize()423 protected void finalize() { 424 // FIXME: crashed on Ubuntu 14.04 64bit, OpenJDK java 1.6.0_33 425 // cs.cs_close(ns.handleRef); 426 } 427 428 // destructor automatically caled at destroyed time. close()429 public int close() { 430 return cs.cs_close(ns.handleRef); 431 } 432 433 /** 434 * Disassemble instructions from @code assumed to be located at @address, 435 * stop when encountering first broken instruction. 436 * 437 * @param code The source machine code bytes. 438 * @param address The address of the first machine code byte. 439 * @return the array of successfully disassembled instructions, empty if no instruction could be disassembled. 440 */ disasm(byte[] code, long address)441 public CsInsn[] disasm(byte[] code, long address) { 442 return disasm(code, address, 0); 443 } 444 445 /** 446 * Disassemble up to @count instructions from @code assumed to be located at @address, 447 * stop when encountering first broken instruction. 448 * 449 * @param code The source machine code bytes. 450 * @param address The address of the first machine code byte. 451 * @param count The maximum number of instructions to disassemble, 0 for no maximum. 452 * @return the array of successfully disassembled instructions, empty if no instruction could be disassembled. 453 */ disasm(byte[] code, long address, long count)454 public CsInsn[] disasm(byte[] code, long address, long count) { 455 PointerByReference insnRef = new PointerByReference(); 456 457 NativeLong c = cs.cs_disasm(ns.csh, code, new NativeLong(code.length), address, new NativeLong(count), insnRef); 458 459 if (0 == c.intValue()) { 460 return EMPTY_INSN; 461 } 462 463 Pointer p = insnRef.getValue(); 464 _cs_insn byref = new _cs_insn(p); 465 466 CsInsn[] allInsn = fromArrayRaw((_cs_insn[]) byref.toArray(c.intValue())); 467 468 // free allocated memory 469 // cs.cs_free(p, c); 470 // FIXME(danghvu): Can't free because memory is still inside CsInsn 471 472 return allInsn; 473 } 474 } 475