1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Implementation file for control flow graph dumping for the dexdump utility.
17 */
18
19 #include "dexdump_cfg.h"
20
21 #include <inttypes.h>
22
23 #include <map>
24 #include <ostream>
25 #include <set>
26 #include <sstream>
27
28 #include "dex/code_item_accessors-inl.h"
29 #include "dex/dex_file-inl.h"
30 #include "dex/dex_file_exception_helpers.h"
31 #include "dex/dex_instruction-inl.h"
32
33 namespace art {
34
dumpMethodCFGImpl(const DexFile * dex_file,uint32_t dex_method_idx,const DexFile::CodeItem * code_item,std::ostream & os)35 static void dumpMethodCFGImpl(const DexFile* dex_file,
36 uint32_t dex_method_idx,
37 const DexFile::CodeItem* code_item,
38 std::ostream& os) {
39 os << "digraph {\n";
40 os << " # /* " << dex_file->PrettyMethod(dex_method_idx, true) << " */\n";
41
42 CodeItemDataAccessor accessor(*dex_file, code_item);
43
44 std::set<uint32_t> dex_pc_is_branch_target;
45 {
46 // Go and populate.
47 for (const DexInstructionPcPair& pair : accessor) {
48 const Instruction* inst = &pair.Inst();
49 if (inst->IsBranch()) {
50 dex_pc_is_branch_target.insert(pair.DexPc() + inst->GetTargetOffset());
51 } else if (inst->IsSwitch()) {
52 const uint16_t* insns = reinterpret_cast<const uint16_t*>(inst);
53 int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
54 const uint16_t* switch_insns = insns + switch_offset;
55 uint32_t switch_count = switch_insns[1];
56 int32_t targets_offset;
57 if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
58 /* 0=sig, 1=count, 2/3=firstKey */
59 targets_offset = 4;
60 } else {
61 /* 0=sig, 1=count, 2..count*2 = keys */
62 targets_offset = 2 + 2 * switch_count;
63 }
64 for (uint32_t targ = 0; targ < switch_count; targ++) {
65 int32_t offset =
66 static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
67 static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
68 dex_pc_is_branch_target.insert(pair.DexPc() + offset);
69 }
70 }
71 }
72 }
73
74 // Create nodes for "basic blocks."
75 std::map<uint32_t, uint32_t> dex_pc_to_node_id; // This only has entries for block starts.
76 std::map<uint32_t, uint32_t> dex_pc_to_incl_id; // This has entries for all dex pcs.
77
78 {
79 bool first_in_block = true;
80 bool force_new_block = false;
81 for (const DexInstructionPcPair& pair : accessor) {
82 const uint32_t dex_pc = pair.DexPc();
83 if (dex_pc == 0 ||
84 (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) ||
85 force_new_block) {
86 uint32_t id = dex_pc_to_node_id.size();
87 if (id > 0) {
88 // End last node.
89 os << "}\"];\n";
90 }
91 // Start next node.
92 os << " node" << id << " [shape=record,label=\"{";
93 dex_pc_to_node_id.insert(std::make_pair(dex_pc, id));
94 first_in_block = true;
95 force_new_block = false;
96 }
97
98 // Register instruction.
99 dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1));
100
101 // Print instruction.
102 if (!first_in_block) {
103 os << " | ";
104 } else {
105 first_in_block = false;
106 }
107
108 // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'.
109 os << "<" << "p" << dex_pc << ">";
110 os << " 0x" << std::hex << dex_pc << std::dec << ": ";
111 std::string inst_str = pair.Inst().DumpString(dex_file);
112 size_t cur_start = 0; // It's OK to start at zero, instruction dumps don't start with chars
113 // we need to escape.
114 while (cur_start != std::string::npos) {
115 size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1);
116 if (next_escape == std::string::npos) {
117 os << inst_str.substr(cur_start, inst_str.size() - cur_start);
118 break;
119 } else {
120 os << inst_str.substr(cur_start, next_escape - cur_start);
121 // Escape all necessary characters.
122 while (next_escape < inst_str.size()) {
123 char c = inst_str.at(next_escape);
124 if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
125 os << '\\' << c;
126 } else {
127 break;
128 }
129 next_escape++;
130 }
131 if (next_escape >= inst_str.size()) {
132 next_escape = std::string::npos;
133 }
134 cur_start = next_escape;
135 }
136 }
137
138 // Force a new block for some fall-throughs and some instructions that terminate the "local"
139 // control flow.
140 force_new_block = pair.Inst().IsSwitch() || pair.Inst().IsBasicBlockEnd();
141 }
142 // Close last node.
143 if (dex_pc_to_node_id.size() > 0) {
144 os << "}\"];\n";
145 }
146 }
147
148 // Create edges between them.
149 {
150 std::ostringstream regular_edges;
151 std::ostringstream taken_edges;
152 std::ostringstream exception_edges;
153
154 // Common set of exception edges.
155 std::set<uint32_t> exception_targets;
156
157 // These blocks (given by the first dex pc) need exception per dex-pc handling in a second
158 // pass. In the first pass we try and see whether we can use a common set of edges.
159 std::set<uint32_t> blocks_with_detailed_exceptions;
160
161 {
162 uint32_t last_node_id = std::numeric_limits<uint32_t>::max();
163 uint32_t old_dex_pc = 0;
164 uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max();
165 for (const DexInstructionPcPair& pair : accessor) {
166 const Instruction* inst = &pair.Inst();
167 const uint32_t dex_pc = pair.DexPc();
168 {
169 auto it = dex_pc_to_node_id.find(dex_pc);
170 if (it != dex_pc_to_node_id.end()) {
171 if (!exception_targets.empty()) {
172 // It seems the last block had common exception handlers. Add the exception edges now.
173 uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
174 for (uint32_t handler_pc : exception_targets) {
175 auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
176 if (node_id_it != dex_pc_to_incl_id.end()) {
177 exception_edges << " node" << node_id
178 << " -> node" << node_id_it->second << ":p" << handler_pc
179 << ";\n";
180 }
181 }
182 exception_targets.clear();
183 }
184
185 block_start_dex_pc = dex_pc;
186
187 // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things
188 // like switch data.
189 uint32_t old_last = last_node_id;
190 last_node_id = it->second;
191 if (old_last != std::numeric_limits<uint32_t>::max()) {
192 regular_edges << " node" << old_last << ":p" << old_dex_pc
193 << " -> node" << last_node_id << ":p" << dex_pc
194 << ";\n";
195 }
196 }
197
198 // Look at the exceptions of the first entry.
199 CatchHandlerIterator catch_it(accessor, dex_pc);
200 for (; catch_it.HasNext(); catch_it.Next()) {
201 exception_targets.insert(catch_it.GetHandlerAddress());
202 }
203 }
204
205 // Handle instruction.
206
207 // Branch: something with at most two targets.
208 if (inst->IsBranch()) {
209 const int32_t offset = inst->GetTargetOffset();
210 const bool conditional = !inst->IsUnconditional();
211
212 auto target_it = dex_pc_to_node_id.find(dex_pc + offset);
213 if (target_it != dex_pc_to_node_id.end()) {
214 taken_edges << " node" << last_node_id << ":p" << dex_pc
215 << " -> node" << target_it->second << ":p" << (dex_pc + offset)
216 << ";\n";
217 }
218 if (!conditional) {
219 // No fall-through.
220 last_node_id = std::numeric_limits<uint32_t>::max();
221 }
222 } else if (inst->IsSwitch()) {
223 // TODO: Iterate through all switch targets.
224 const uint16_t* insns = reinterpret_cast<const uint16_t*>(inst);
225 /* make sure the start of the switch is in range */
226 int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
227 /* offset to switch table is a relative branch-style offset */
228 const uint16_t* switch_insns = insns + switch_offset;
229 uint32_t switch_count = switch_insns[1];
230 int32_t targets_offset;
231 if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
232 /* 0=sig, 1=count, 2/3=firstKey */
233 targets_offset = 4;
234 } else {
235 /* 0=sig, 1=count, 2..count*2 = keys */
236 targets_offset = 2 + 2 * switch_count;
237 }
238 /* make sure the end of the switch is in range */
239 /* verify each switch target */
240 for (uint32_t targ = 0; targ < switch_count; targ++) {
241 int32_t offset =
242 static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
243 static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
244 int32_t abs_offset = dex_pc + offset;
245 auto target_it = dex_pc_to_node_id.find(abs_offset);
246 if (target_it != dex_pc_to_node_id.end()) {
247 // TODO: value label.
248 taken_edges << " node" << last_node_id << ":p" << dex_pc
249 << " -> node" << target_it->second << ":p" << (abs_offset)
250 << ";\n";
251 }
252 }
253 }
254
255 // Exception edges. If this is not the first instruction in the block
256 if (block_start_dex_pc != dex_pc) {
257 std::set<uint32_t> current_handler_pcs;
258 CatchHandlerIterator catch_it(accessor, dex_pc);
259 for (; catch_it.HasNext(); catch_it.Next()) {
260 current_handler_pcs.insert(catch_it.GetHandlerAddress());
261 }
262 if (current_handler_pcs != exception_targets) {
263 exception_targets.clear(); // Clear so we don't do something at the end.
264 blocks_with_detailed_exceptions.insert(block_start_dex_pc);
265 }
266 }
267
268 if (inst->IsReturn() ||
269 (inst->Opcode() == Instruction::THROW) ||
270 (inst->IsBranch() && inst->IsUnconditional())) {
271 // No fall-through.
272 last_node_id = std::numeric_limits<uint32_t>::max();
273 }
274 old_dex_pc = pair.DexPc();
275 }
276 // Finish up the last block, if it had common exceptions.
277 if (!exception_targets.empty()) {
278 // It seems the last block had common exception handlers. Add the exception edges now.
279 uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
280 for (uint32_t handler_pc : exception_targets) {
281 auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
282 if (node_id_it != dex_pc_to_incl_id.end()) {
283 exception_edges << " node" << node_id
284 << " -> node" << node_id_it->second << ":p" << handler_pc
285 << ";\n";
286 }
287 }
288 exception_targets.clear();
289 }
290 }
291
292 // Second pass for detailed exception blocks.
293 // TODO
294 // Exception edges. If this is not the first instruction in the block
295 for (uint32_t dex_pc : blocks_with_detailed_exceptions) {
296 const Instruction* inst = &accessor.InstructionAt(dex_pc);
297 uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second;
298 while (true) {
299 CatchHandlerIterator catch_it(accessor, dex_pc);
300 if (catch_it.HasNext()) {
301 std::set<uint32_t> handled_targets;
302 for (; catch_it.HasNext(); catch_it.Next()) {
303 uint32_t handler_pc = catch_it.GetHandlerAddress();
304 auto it = handled_targets.find(handler_pc);
305 if (it == handled_targets.end()) {
306 auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
307 if (node_id_it != dex_pc_to_incl_id.end()) {
308 exception_edges << " node" << this_node_id << ":p" << dex_pc
309 << " -> node" << node_id_it->second << ":p" << handler_pc
310 << ";\n";
311 }
312
313 // Mark as done.
314 handled_targets.insert(handler_pc);
315 }
316 }
317 }
318 if (inst->IsBasicBlockEnd()) {
319 break;
320 }
321
322 // Loop update. Have a break-out if the next instruction is a branch target and thus in
323 // another block.
324 dex_pc += inst->SizeInCodeUnits();
325 if (dex_pc >= accessor.InsnsSizeInCodeUnits()) {
326 break;
327 }
328 if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) {
329 break;
330 }
331 inst = inst->Next();
332 }
333 }
334
335 // Write out the sub-graphs to make edges styled.
336 os << "\n";
337 os << " subgraph regular_edges {\n";
338 os << " edge [color=\"#000000\",weight=.3,len=3];\n\n";
339 os << " " << regular_edges.str() << "\n";
340 os << " }\n\n";
341
342 os << " subgraph taken_edges {\n";
343 os << " edge [color=\"#00FF00\",weight=.3,len=3];\n\n";
344 os << " " << taken_edges.str() << "\n";
345 os << " }\n\n";
346
347 os << " subgraph exception_edges {\n";
348 os << " edge [color=\"#FF0000\",weight=.3,len=3];\n\n";
349 os << " " << exception_edges.str() << "\n";
350 os << " }\n\n";
351 }
352
353 os << "}\n";
354 }
355
DumpMethodCFG(const DexFile * dex_file,uint32_t dex_method_idx,std::ostream & os)356 void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) {
357 // This is painful, we need to find the code item. That means finding the class, and then
358 // iterating the table.
359 if (dex_method_idx >= dex_file->NumMethodIds()) {
360 os << "Could not find method-idx.";
361 return;
362 }
363 const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
364
365 const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_);
366 if (class_def == nullptr) {
367 os << "Could not find class-def.";
368 return;
369 }
370
371 const uint8_t* class_data = dex_file->GetClassData(*class_def);
372 if (class_data == nullptr) {
373 os << "No class data.";
374 return;
375 }
376
377 ClassDataItemIterator it(*dex_file, class_data);
378 it.SkipAllFields();
379
380 // Find method, and dump it.
381 while (it.HasNextMethod()) {
382 uint32_t method_idx = it.GetMemberIndex();
383 if (method_idx == dex_method_idx) {
384 dumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os);
385 return;
386 }
387 it.Next();
388 }
389
390 // Otherwise complain.
391 os << "Something went wrong, didn't find the method in the class data.";
392 }
393
394 } // namespace art
395