1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include "dex_format.h" 20 21 #include <iosfwd> 22 #include <stddef.h> 23 24 // .dex bytecode definitions and helpers: 25 // https://source.android.com/devices/tech/dalvik/dalvik-bytecode.html 26 27 namespace dex { 28 29 // The number of Dalvik opcodes 30 constexpr size_t kNumPackedOpcodes = 0x100; 31 32 // Switch table and array data signatures are a code unit consisting 33 // of "NOP" (0x00) in the low-order byte and a non-zero identifying 34 // code in the high-order byte. (A true NOP is 0x0000.) 35 constexpr u2 kPackedSwitchSignature = 0x0100; 36 constexpr u2 kSparseSwitchSignature = 0x0200; 37 constexpr u2 kArrayDataSignature = 0x0300; 38 39 // Include for DEX_INSTRUCTION_LIST and DEX_INSTRUCTION_FORMAT_LIST 40 #include "dex_instruction_list.h" 41 42 // Enumeration of all Dalvik opcodes 43 enum Opcode : u1 { 44 #define INSTRUCTION_ENUM(opcode, cname, ...) OP_##cname = (opcode), 45 DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM) 46 #undef INSTRUCTION_ENUM 47 }; 48 49 // Instruction formats associated with Dalvik opcodes 50 enum InstructionFormat : u1 { 51 #define INSTRUCTION_FORMAT_ENUM(name) k##name, 52 #include "dex_instruction_list.h" 53 DEX_INSTRUCTION_FORMAT_LIST(INSTRUCTION_FORMAT_ENUM) 54 #undef INSTRUCTION_FORMAT_ENUM 55 }; 56 57 #undef DEX_INSTRUCTION_FORMAT_LIST 58 #undef DEX_INSTRUCTION_LIST 59 60 using OpcodeFlags = u1; 61 enum : OpcodeFlags { 62 kBranch = 0x01, // conditional or unconditional branch 63 kContinue = 0x02, // flow can continue to next statement 64 kSwitch = 0x04, // switch statement 65 kThrow = 0x08, // could cause an exception to be thrown 66 kReturn = 0x10, // returns, no additional statements 67 kInvoke = 0x20, // a flavor of invoke 68 kUnconditional = 0x40, // unconditional branch 69 kExperimental = 0x80, // is an experimental opcode 70 }; 71 72 using VerifyFlags = u4; 73 enum : VerifyFlags { 74 kVerifyNothing = 0x0000000, 75 kVerifyRegA = 0x0000001, 76 kVerifyRegAWide = 0x0000002, 77 kVerifyRegB = 0x0000004, 78 kVerifyRegBField = 0x0000008, 79 kVerifyRegBMethod = 0x0000010, 80 kVerifyRegBNewInstance = 0x0000020, 81 kVerifyRegBString = 0x0000040, 82 kVerifyRegBType = 0x0000080, 83 kVerifyRegBWide = 0x0000100, 84 kVerifyRegC = 0x0000200, 85 kVerifyRegCField = 0x0000400, 86 kVerifyRegCNewArray = 0x0000800, 87 kVerifyRegCType = 0x0001000, 88 kVerifyRegCWide = 0x0002000, 89 kVerifyArrayData = 0x0004000, 90 kVerifyBranchTarget = 0x0008000, 91 kVerifySwitchTargets = 0x0010000, 92 kVerifyVarArg = 0x0020000, 93 kVerifyVarArgNonZero = 0x0040000, 94 kVerifyVarArgRange = 0x0080000, 95 kVerifyVarArgRangeNonZero = 0x0100000, 96 kVerifyRuntimeOnly = 0x0200000, 97 kVerifyError = 0x0400000, 98 kVerifyRegHPrototype = 0x0800000, 99 kVerifyRegBCallSite = 0x1000000, 100 kVerifyRegBMethodHandle = 0x2000000, 101 kVerifyRegBPrototype = 0x4000000, 102 }; 103 104 // Types of indexed reference that are associated with opcodes whose 105 // formats include such an indexed reference (e.g., 21c and 35c). 106 enum InstructionIndexType : u1 { 107 kIndexUnknown = 0, 108 kIndexNone, // has no index 109 kIndexVaries, // "It depends." Used for throw-verification-error 110 kIndexTypeRef, // type reference index 111 kIndexStringRef, // string reference index 112 kIndexMethodRef, // method reference index 113 kIndexFieldRef, // field reference index 114 kIndexInlineMethod, // inline method index (for inline linked methods) 115 kIndexVtableOffset, // vtable offset (for static linked methods) 116 kIndexFieldOffset, // field offset (for static linked fields) 117 kIndexMethodAndProtoRef, // method index and proto index 118 kIndexCallSiteRef, // call site index 119 kIndexMethodHandleRef, // constant method handle reference index 120 kIndexProtoRef, // constant prototype reference index 121 }; 122 123 // Holds the contents of a decoded instruction. 124 struct Instruction { 125 u4 vA; // the A field of the instruction 126 u4 vB; // the B field of the instruction 127 u8 vB_wide; // 64bit version of the B field (for k51l) 128 u4 vC; // the C field of the instruction 129 u4 arg[5]; // vC/D/E/F/G in invoke or filled-new-array 130 Opcode opcode; // instruction opcode 131 }; 132 133 // "packed-switch-payload" format 134 struct PackedSwitchPayload { 135 u2 ident; 136 u2 size; 137 s4 first_key; 138 s4 targets[]; 139 }; 140 141 // "sparse-switch-payload" format 142 struct SparseSwitchPayload { 143 u2 ident; 144 u2 size; 145 s4 data[]; 146 }; 147 148 // "fill-array-data-payload" format 149 struct ArrayData { 150 u2 ident; 151 u2 element_width; 152 u4 size; 153 u1 data[]; 154 }; 155 156 // Collect the enums in a struct for better locality. 157 struct InstructionDescriptor { 158 u4 verify_flags; // Set of VerifyFlag. 159 InstructionFormat format; 160 InstructionIndexType index_type; 161 u1 flags; // Set of Flags. 162 }; 163 164 // Extracts the opcode from a Dalvik code unit (bytecode) 165 Opcode OpcodeFromBytecode(u2 bytecode); 166 167 // Returns the name of an opcode 168 const char* GetOpcodeName(Opcode opcode); 169 170 // Returns the index type associated with the specified opcode 171 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode); 172 173 // Returns the format associated with the specified opcode 174 InstructionFormat GetFormatFromOpcode(Opcode opcode); 175 176 // Returns the flags for the specified opcode 177 OpcodeFlags GetFlagsFromOpcode(Opcode opcode); 178 179 // Returns the verify flags for the specified opcode 180 VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode); 181 182 // Returns the instruction width for the specified opcode format 183 size_t GetWidthFromFormat(InstructionFormat format); 184 185 // Return the width of the specified instruction, or 0 if not defined. Also 186 // works for special OP_NOP entries, including switch statement data tables 187 // and array data. 188 size_t GetWidthFromBytecode(const u2* bytecode); 189 190 // Decode a .dex bytecode 191 Instruction DecodeInstruction(const u2* bytecode); 192 193 // Writes a hex formatted opcode to an output stream. 194 std::ostream& operator<<(std::ostream& os, Opcode opcode); 195 196 // Writes name of format to an outputstream. 197 std::ostream& operator<<(std::ostream& os, InstructionFormat format); 198 199 } // namespace dex 200