1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "slicer/dex_bytecode.h"
18
19 #include "slicer/common.h"
20
21 #include <array>
22 #include <iomanip>
23 #include <sstream>
24
25 namespace dex {
26
OpcodeFromBytecode(u2 bytecode)27 Opcode OpcodeFromBytecode(u2 bytecode) {
28 Opcode opcode = Opcode(bytecode & 0xff);
29 return opcode;
30 }
31
32 // Table that maps each opcode to the index type implied by that opcode
33 static constexpr std::array<InstructionDescriptor, kNumPackedOpcodes>
34 gInstructionDescriptors = {{
35 #define INSTRUCTION_DESCR(o, c, p, format, index, flags, e, vflags) \
36 { \
37 vflags, \
38 format, \
39 index, \
40 flags, \
41 },
42 #include "export/slicer/dex_instruction_list.h"
43 DEX_INSTRUCTION_LIST(INSTRUCTION_DESCR)
44 #undef DEX_INSTRUCTION_LIST
45 #undef INSTRUCTION_DESCR
46 }};
47
GetIndexTypeFromOpcode(Opcode opcode)48 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode) {
49 return gInstructionDescriptors[opcode].index_type;
50 }
51
GetFormatFromOpcode(Opcode opcode)52 InstructionFormat GetFormatFromOpcode(Opcode opcode) {
53 return gInstructionDescriptors[opcode].format;
54 }
55
GetFlagsFromOpcode(Opcode opcode)56 OpcodeFlags GetFlagsFromOpcode(Opcode opcode) {
57 return gInstructionDescriptors[opcode].flags;
58 }
59
GetVerifyFlagsFromOpcode(Opcode opcode)60 VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode) {
61 return gInstructionDescriptors[opcode].verify_flags;
62 }
63
GetWidthFromFormat(InstructionFormat format)64 size_t GetWidthFromFormat(InstructionFormat format) {
65 switch (format) {
66 case k10x:
67 case k12x:
68 case k11n:
69 case k11x:
70 case k10t:
71 return 1;
72 case k20t:
73 case k20bc:
74 case k21c:
75 case k22x:
76 case k21s:
77 case k21t:
78 case k21h:
79 case k23x:
80 case k22b:
81 case k22s:
82 case k22t:
83 case k22c:
84 case k22cs:
85 return 2;
86 case k30t:
87 case k31t:
88 case k31c:
89 case k32x:
90 case k31i:
91 case k35c:
92 case k35ms:
93 case k35mi:
94 case k3rc:
95 case k3rms:
96 case k3rmi:
97 return 3;
98 case k45cc:
99 case k4rcc:
100 return 4;
101 case k51l:
102 return 5;
103 }
104 }
105
GetWidthFromBytecode(const u2 * bytecode)106 size_t GetWidthFromBytecode(const u2* bytecode) {
107 size_t width = 0;
108 if (*bytecode == kPackedSwitchSignature) {
109 width = 4 + bytecode[1] * 2;
110 } else if (*bytecode == kSparseSwitchSignature) {
111 width = 2 + bytecode[1] * 4;
112 } else if (*bytecode == kArrayDataSignature) {
113 u2 elemWidth = bytecode[1];
114 u4 len = bytecode[2] | (((u4)bytecode[3]) << 16);
115 // The plus 1 is to round up for odd size and width.
116 width = 4 + (elemWidth * len + 1) / 2;
117 } else {
118 width = GetWidthFromFormat(
119 GetFormatFromOpcode(OpcodeFromBytecode(bytecode[0])));
120 }
121 return width;
122 }
123
124 // Dalvik opcode names.
125 static constexpr std::array<const char*, kNumPackedOpcodes> gOpcodeNames = {
126 #define INSTRUCTION_NAME(o, c, pname, f, i, a, e, v) pname,
127 #include "export/slicer/dex_instruction_list.h"
128 DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
129 #undef DEX_INSTRUCTION_LIST
130 #undef INSTRUCTION_NAME
131 };
132
GetOpcodeName(Opcode opcode)133 const char* GetOpcodeName(Opcode opcode) { return gOpcodeNames[opcode]; }
134
135 // Helpers for DecodeInstruction()
InstA(u2 inst)136 static u4 InstA(u2 inst) { return (inst >> 8) & 0x0f; }
InstB(u2 inst)137 static u4 InstB(u2 inst) { return inst >> 12; }
InstAA(u2 inst)138 static u4 InstAA(u2 inst) { return inst >> 8; }
139
140 // Helper for DecodeInstruction()
FetchU4(const u2 * ptr)141 static u4 FetchU4(const u2* ptr) { return ptr[0] | (u4(ptr[1]) << 16); }
142
143 // Helper for DecodeInstruction()
FetchU8(const u2 * ptr)144 static u8 FetchU8(const u2* ptr) {
145 return FetchU4(ptr) | (u8(FetchU4(ptr + 2)) << 32);
146 }
147
148 // Decode a Dalvik bytecode and extract the individual fields
DecodeInstruction(const u2 * bytecode)149 Instruction DecodeInstruction(const u2* bytecode) {
150 u2 inst = bytecode[0];
151 Opcode opcode = OpcodeFromBytecode(inst);
152 InstructionFormat format = GetFormatFromOpcode(opcode);
153
154 Instruction dec = {};
155 dec.opcode = opcode;
156
157 switch (format) {
158 case k10x: // op
159 return dec;
160 case k12x: // op vA, vB
161 dec.vA = InstA(inst);
162 dec.vB = InstB(inst);
163 return dec;
164 case k11n: // op vA, #+B
165 dec.vA = InstA(inst);
166 dec.vB = s4(InstB(inst) << 28) >> 28; // sign extend 4-bit value
167 return dec;
168 case k11x: // op vAA
169 dec.vA = InstAA(inst);
170 return dec;
171 case k10t: // op +AA
172 dec.vA = s1(InstAA(inst)); // sign-extend 8-bit value
173 return dec;
174 case k20t: // op +AAAA
175 dec.vA = s2(bytecode[1]); // sign-extend 16-bit value
176 return dec;
177 case k20bc: // [opt] op AA, thing@BBBB
178 case k21c: // op vAA, thing@BBBB
179 case k22x: // op vAA, vBBBB
180 dec.vA = InstAA(inst);
181 dec.vB = bytecode[1];
182 return dec;
183 case k21s: // op vAA, #+BBBB
184 case k21t: // op vAA, +BBBB
185 dec.vA = InstAA(inst);
186 dec.vB = s2(bytecode[1]); // sign-extend 16-bit value
187 return dec;
188 case k21h: // op vAA, #+BBBB0000[00000000]
189 dec.vA = InstAA(inst);
190 // The value should be treated as right-zero-extended, but we don't
191 // actually do that here. Among other things, we don't know if it's
192 // the top bits of a 32- or 64-bit value.
193 dec.vB = bytecode[1];
194 return dec;
195 case k23x: // op vAA, vBB, vCC
196 dec.vA = InstAA(inst);
197 dec.vB = bytecode[1] & 0xff;
198 dec.vC = bytecode[1] >> 8;
199 return dec;
200 case k22b: // op vAA, vBB, #+CC
201 dec.vA = InstAA(inst);
202 dec.vB = bytecode[1] & 0xff;
203 dec.vC = s1(bytecode[1] >> 8); // sign-extend 8-bit value
204 return dec;
205 case k22s: // op vA, vB, #+CCCC
206 case k22t: // op vA, vB, +CCCC
207 dec.vA = InstA(inst);
208 dec.vB = InstB(inst);
209 dec.vC = s2(bytecode[1]); // sign-extend 16-bit value
210 return dec;
211 case k22c: // op vA, vB, thing@CCCC
212 case k22cs: // [opt] op vA, vB, field offset CCCC
213 dec.vA = InstA(inst);
214 dec.vB = InstB(inst);
215 dec.vC = bytecode[1];
216 return dec;
217 case k30t: // op +AAAAAAAA
218 dec.vA = FetchU4(bytecode + 1);
219 return dec;
220 case k31t: // op vAA, +BBBBBBBB
221 case k31c: // op vAA, string@BBBBBBBB
222 dec.vA = InstAA(inst);
223 dec.vB = FetchU4(bytecode + 1);
224 return dec;
225 case k32x: // op vAAAA, vBBBB
226 dec.vA = bytecode[1];
227 dec.vB = bytecode[2];
228 return dec;
229 case k31i: // op vAA, #+BBBBBBBB
230 dec.vA = InstAA(inst);
231 dec.vB = FetchU4(bytecode + 1);
232 return dec;
233 case k35c: // op {vC, vD, vE, vF, vG}, thing@BBBB
234 case k35ms: // [opt] invoke-virtual+super
235 case k35mi: { // [opt] inline invoke
236 dec.vA = InstB(inst); // This is labeled A in the spec.
237 dec.vB = bytecode[1];
238
239 u2 regList = bytecode[2];
240
241 // Copy the argument registers into the arg[] array, and
242 // also copy the first argument (if any) into vC. (The
243 // Instruction structure doesn't have separate
244 // fields for {vD, vE, vF, vG}, so there's no need to make
245 // copies of those.) Note that cases 5..2 fall through.
246 switch (dec.vA) {
247 case 5:
248 // A fifth arg is verboten for inline invokes
249 SLICER_CHECK_NE(format, k35mi);
250
251 // Per note at the top of this format decoder, the
252 // fifth argument comes from the A field in the
253 // instruction, but it's labeled G in the spec.
254 dec.arg[4] = InstA(inst);
255 FALLTHROUGH_INTENDED;
256 case 4:
257 dec.arg[3] = (regList >> 12) & 0x0f;
258 FALLTHROUGH_INTENDED;
259 case 3:
260 dec.arg[2] = (regList >> 8) & 0x0f;
261 FALLTHROUGH_INTENDED;
262 case 2:
263 dec.arg[1] = (regList >> 4) & 0x0f;
264 FALLTHROUGH_INTENDED;
265 case 1:
266 dec.vC = dec.arg[0] = regList & 0x0f;
267 FALLTHROUGH_INTENDED;
268 case 0:
269 // Valid, but no need to do anything
270 return dec;
271 }
272 }
273 SLICER_CHECK(!"Invalid arg count in 35c/35ms/35mi");
274 case k3rc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
275 case k3rms: // [opt] invoke-virtual+super/range
276 case k3rmi: // [opt] execute-inline/range
277 dec.vA = InstAA(inst);
278 dec.vB = bytecode[1];
279 dec.vC = bytecode[2];
280 return dec;
281 case k45cc: {
282 // AG op BBBB FEDC HHHH
283 dec.vA = InstB(inst); // This is labelled A in the spec.
284 dec.vB = bytecode[1]; // vB meth@BBBB
285
286 u2 regList = bytecode[2];
287 dec.vC = regList & 0xf;
288 dec.arg[0] = (regList >> 4) & 0xf; // vD
289 dec.arg[1] = (regList >> 8) & 0xf; // vE
290 dec.arg[2] = (regList >> 12); // vF
291 dec.arg[3] = InstA(inst); // vG
292 dec.arg[4] = bytecode[3]; // vH proto@HHHH
293 }
294 return dec;
295 case k4rcc:
296 // AA op BBBB CCCC HHHH
297 dec.vA = InstAA(inst);
298 dec.vB = bytecode[1];
299 dec.vC = bytecode[2];
300 dec.arg[4] = bytecode[3]; // vH proto@HHHH
301 return dec;
302 case k51l: // op vAA, #+BBBBBBBBBBBBBBBB
303 dec.vA = InstAA(inst);
304 dec.vB_wide = FetchU8(bytecode + 1);
305 return dec;
306 }
307
308 std::stringstream ss;
309 ss << "Can't decode unexpected format " << format << " for " << opcode;
310 SLICER_FATAL(ss.str());
311 }
312
HexByte(int value)313 static inline std::string HexByte(int value) {
314 std::stringstream ss;
315 ss << "0x" << std::setw(2) << std::setfill('0') << std::hex << value;
316 return ss.str();
317 }
318
operator <<(std::ostream & os,Opcode opcode)319 std::ostream& operator<<(std::ostream& os, Opcode opcode) {
320 return os << "[" << HexByte(opcode) << "] " << gOpcodeNames[opcode];
321 }
322
operator <<(std::ostream & os,InstructionFormat format)323 std::ostream& operator<<(std::ostream& os, InstructionFormat format) {
324 switch (format) {
325 #define EMIT_INSTRUCTION_FORMAT_NAME(name) \
326 case InstructionFormat::k##name: return os << #name;
327 #include "export/slicer/dex_instruction_list.h"
328 DEX_INSTRUCTION_FORMAT_LIST(EMIT_INSTRUCTION_FORMAT_NAME)
329 #undef EMIT_INSTRUCTION_FORMAT_NAME
330 #undef DEX_INSTRUCTION_FORMAT_LIST
331 #undef DEX_INSTRUCTION_LIST
332 }
333 return os << "[" << HexByte(format) << "] " << "Unknown";
334 }
335
336 } // namespace dex
337