1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Implementation file for control flow graph dumping for the dexdump utility.
17 */
18
19 #include "dexdump_cfg.h"
20
21 #include <inttypes.h>
22 #include <ostream>
23 #include <map>
24 #include <set>
25
26 #include "dex_file-inl.h"
27 #include "dex_instruction-inl.h"
28
29 namespace art {
30
dumpMethodCFGImpl(const DexFile * dex_file,uint32_t dex_method_idx,const DexFile::CodeItem * code_item,std::ostream & os)31 static void dumpMethodCFGImpl(const DexFile* dex_file,
32 uint32_t dex_method_idx,
33 const DexFile::CodeItem* code_item,
34 std::ostream& os) {
35 os << "digraph {\n";
36 os << " # /* " << dex_file->PrettyMethod(dex_method_idx, true) << " */\n";
37
38 std::set<uint32_t> dex_pc_is_branch_target;
39 {
40 // Go and populate.
41 const Instruction* inst = Instruction::At(code_item->insns_);
42 for (uint32_t dex_pc = 0;
43 dex_pc < code_item->insns_size_in_code_units_;
44 dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
45 if (inst->IsBranch()) {
46 dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset());
47 } else if (inst->IsSwitch()) {
48 const uint16_t* insns = code_item->insns_ + dex_pc;
49 int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
50 const uint16_t* switch_insns = insns + switch_offset;
51 uint32_t switch_count = switch_insns[1];
52 int32_t targets_offset;
53 if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
54 /* 0=sig, 1=count, 2/3=firstKey */
55 targets_offset = 4;
56 } else {
57 /* 0=sig, 1=count, 2..count*2 = keys */
58 targets_offset = 2 + 2 * switch_count;
59 }
60 for (uint32_t targ = 0; targ < switch_count; targ++) {
61 int32_t offset =
62 static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
63 static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
64 dex_pc_is_branch_target.insert(dex_pc + offset);
65 }
66 }
67 }
68 }
69
70 // Create nodes for "basic blocks."
71 std::map<uint32_t, uint32_t> dex_pc_to_node_id; // This only has entries for block starts.
72 std::map<uint32_t, uint32_t> dex_pc_to_incl_id; // This has entries for all dex pcs.
73
74 {
75 const Instruction* inst = Instruction::At(code_item->insns_);
76 bool first_in_block = true;
77 bool force_new_block = false;
78 for (uint32_t dex_pc = 0;
79 dex_pc < code_item->insns_size_in_code_units_;
80 dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
81 if (dex_pc == 0 ||
82 (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) ||
83 force_new_block) {
84 uint32_t id = dex_pc_to_node_id.size();
85 if (id > 0) {
86 // End last node.
87 os << "}\"];\n";
88 }
89 // Start next node.
90 os << " node" << id << " [shape=record,label=\"{";
91 dex_pc_to_node_id.insert(std::make_pair(dex_pc, id));
92 first_in_block = true;
93 force_new_block = false;
94 }
95
96 // Register instruction.
97 dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1));
98
99 // Print instruction.
100 if (!first_in_block) {
101 os << " | ";
102 } else {
103 first_in_block = false;
104 }
105
106 // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'.
107 os << "<" << "p" << dex_pc << ">";
108 os << " 0x" << std::hex << dex_pc << std::dec << ": ";
109 std::string inst_str = inst->DumpString(dex_file);
110 size_t cur_start = 0; // It's OK to start at zero, instruction dumps don't start with chars
111 // we need to escape.
112 while (cur_start != std::string::npos) {
113 size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1);
114 if (next_escape == std::string::npos) {
115 os << inst_str.substr(cur_start, inst_str.size() - cur_start);
116 break;
117 } else {
118 os << inst_str.substr(cur_start, next_escape - cur_start);
119 // Escape all necessary characters.
120 while (next_escape < inst_str.size()) {
121 char c = inst_str.at(next_escape);
122 if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
123 os << '\\' << c;
124 } else {
125 break;
126 }
127 next_escape++;
128 }
129 if (next_escape >= inst_str.size()) {
130 next_escape = std::string::npos;
131 }
132 cur_start = next_escape;
133 }
134 }
135
136 // Force a new block for some fall-throughs and some instructions that terminate the "local"
137 // control flow.
138 force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd();
139 }
140 // Close last node.
141 if (dex_pc_to_node_id.size() > 0) {
142 os << "}\"];\n";
143 }
144 }
145
146 // Create edges between them.
147 {
148 std::ostringstream regular_edges;
149 std::ostringstream taken_edges;
150 std::ostringstream exception_edges;
151
152 // Common set of exception edges.
153 std::set<uint32_t> exception_targets;
154
155 // These blocks (given by the first dex pc) need exception per dex-pc handling in a second
156 // pass. In the first pass we try and see whether we can use a common set of edges.
157 std::set<uint32_t> blocks_with_detailed_exceptions;
158
159 {
160 uint32_t last_node_id = std::numeric_limits<uint32_t>::max();
161 uint32_t old_dex_pc = 0;
162 uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max();
163 const Instruction* inst = Instruction::At(code_item->insns_);
164 for (uint32_t dex_pc = 0;
165 dex_pc < code_item->insns_size_in_code_units_;
166 old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
167 {
168 auto it = dex_pc_to_node_id.find(dex_pc);
169 if (it != dex_pc_to_node_id.end()) {
170 if (!exception_targets.empty()) {
171 // It seems the last block had common exception handlers. Add the exception edges now.
172 uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
173 for (uint32_t handler_pc : exception_targets) {
174 auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
175 if (node_id_it != dex_pc_to_incl_id.end()) {
176 exception_edges << " node" << node_id
177 << " -> node" << node_id_it->second << ":p" << handler_pc
178 << ";\n";
179 }
180 }
181 exception_targets.clear();
182 }
183
184 block_start_dex_pc = dex_pc;
185
186 // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things
187 // like switch data.
188 uint32_t old_last = last_node_id;
189 last_node_id = it->second;
190 if (old_last != std::numeric_limits<uint32_t>::max()) {
191 regular_edges << " node" << old_last << ":p" << old_dex_pc
192 << " -> node" << last_node_id << ":p" << dex_pc
193 << ";\n";
194 }
195 }
196
197 // Look at the exceptions of the first entry.
198 CatchHandlerIterator catch_it(*code_item, dex_pc);
199 for (; catch_it.HasNext(); catch_it.Next()) {
200 exception_targets.insert(catch_it.GetHandlerAddress());
201 }
202 }
203
204 // Handle instruction.
205
206 // Branch: something with at most two targets.
207 if (inst->IsBranch()) {
208 const int32_t offset = inst->GetTargetOffset();
209 const bool conditional = !inst->IsUnconditional();
210
211 auto target_it = dex_pc_to_node_id.find(dex_pc + offset);
212 if (target_it != dex_pc_to_node_id.end()) {
213 taken_edges << " node" << last_node_id << ":p" << dex_pc
214 << " -> node" << target_it->second << ":p" << (dex_pc + offset)
215 << ";\n";
216 }
217 if (!conditional) {
218 // No fall-through.
219 last_node_id = std::numeric_limits<uint32_t>::max();
220 }
221 } else if (inst->IsSwitch()) {
222 // TODO: Iterate through all switch targets.
223 const uint16_t* insns = code_item->insns_ + dex_pc;
224 /* make sure the start of the switch is in range */
225 int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
226 /* offset to switch table is a relative branch-style offset */
227 const uint16_t* switch_insns = insns + switch_offset;
228 uint32_t switch_count = switch_insns[1];
229 int32_t targets_offset;
230 if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
231 /* 0=sig, 1=count, 2/3=firstKey */
232 targets_offset = 4;
233 } else {
234 /* 0=sig, 1=count, 2..count*2 = keys */
235 targets_offset = 2 + 2 * switch_count;
236 }
237 /* make sure the end of the switch is in range */
238 /* verify each switch target */
239 for (uint32_t targ = 0; targ < switch_count; targ++) {
240 int32_t offset =
241 static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
242 static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
243 int32_t abs_offset = dex_pc + offset;
244 auto target_it = dex_pc_to_node_id.find(abs_offset);
245 if (target_it != dex_pc_to_node_id.end()) {
246 // TODO: value label.
247 taken_edges << " node" << last_node_id << ":p" << dex_pc
248 << " -> node" << target_it->second << ":p" << (abs_offset)
249 << ";\n";
250 }
251 }
252 }
253
254 // Exception edges. If this is not the first instruction in the block
255 if (block_start_dex_pc != dex_pc) {
256 std::set<uint32_t> current_handler_pcs;
257 CatchHandlerIterator catch_it(*code_item, dex_pc);
258 for (; catch_it.HasNext(); catch_it.Next()) {
259 current_handler_pcs.insert(catch_it.GetHandlerAddress());
260 }
261 if (current_handler_pcs != exception_targets) {
262 exception_targets.clear(); // Clear so we don't do something at the end.
263 blocks_with_detailed_exceptions.insert(block_start_dex_pc);
264 }
265 }
266
267 if (inst->IsReturn() ||
268 (inst->Opcode() == Instruction::THROW) ||
269 (inst->IsBranch() && inst->IsUnconditional())) {
270 // No fall-through.
271 last_node_id = std::numeric_limits<uint32_t>::max();
272 }
273 }
274 // Finish up the last block, if it had common exceptions.
275 if (!exception_targets.empty()) {
276 // It seems the last block had common exception handlers. Add the exception edges now.
277 uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
278 for (uint32_t handler_pc : exception_targets) {
279 auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
280 if (node_id_it != dex_pc_to_incl_id.end()) {
281 exception_edges << " node" << node_id
282 << " -> node" << node_id_it->second << ":p" << handler_pc
283 << ";\n";
284 }
285 }
286 exception_targets.clear();
287 }
288 }
289
290 // Second pass for detailed exception blocks.
291 // TODO
292 // Exception edges. If this is not the first instruction in the block
293 for (uint32_t dex_pc : blocks_with_detailed_exceptions) {
294 const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]);
295 uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second;
296 while (true) {
297 CatchHandlerIterator catch_it(*code_item, dex_pc);
298 if (catch_it.HasNext()) {
299 std::set<uint32_t> handled_targets;
300 for (; catch_it.HasNext(); catch_it.Next()) {
301 uint32_t handler_pc = catch_it.GetHandlerAddress();
302 auto it = handled_targets.find(handler_pc);
303 if (it == handled_targets.end()) {
304 auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
305 if (node_id_it != dex_pc_to_incl_id.end()) {
306 exception_edges << " node" << this_node_id << ":p" << dex_pc
307 << " -> node" << node_id_it->second << ":p" << handler_pc
308 << ";\n";
309 }
310
311 // Mark as done.
312 handled_targets.insert(handler_pc);
313 }
314 }
315 }
316 if (inst->IsBasicBlockEnd()) {
317 break;
318 }
319
320 // Loop update. Have a break-out if the next instruction is a branch target and thus in
321 // another block.
322 dex_pc += inst->SizeInCodeUnits();
323 if (dex_pc >= code_item->insns_size_in_code_units_) {
324 break;
325 }
326 if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) {
327 break;
328 }
329 inst = inst->Next();
330 }
331 }
332
333 // Write out the sub-graphs to make edges styled.
334 os << "\n";
335 os << " subgraph regular_edges {\n";
336 os << " edge [color=\"#000000\",weight=.3,len=3];\n\n";
337 os << " " << regular_edges.str() << "\n";
338 os << " }\n\n";
339
340 os << " subgraph taken_edges {\n";
341 os << " edge [color=\"#00FF00\",weight=.3,len=3];\n\n";
342 os << " " << taken_edges.str() << "\n";
343 os << " }\n\n";
344
345 os << " subgraph exception_edges {\n";
346 os << " edge [color=\"#FF0000\",weight=.3,len=3];\n\n";
347 os << " " << exception_edges.str() << "\n";
348 os << " }\n\n";
349 }
350
351 os << "}\n";
352 }
353
DumpMethodCFG(const DexFile * dex_file,uint32_t dex_method_idx,std::ostream & os)354 void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) {
355 // This is painful, we need to find the code item. That means finding the class, and then
356 // iterating the table.
357 if (dex_method_idx >= dex_file->NumMethodIds()) {
358 os << "Could not find method-idx.";
359 return;
360 }
361 const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
362
363 const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_);
364 if (class_def == nullptr) {
365 os << "Could not find class-def.";
366 return;
367 }
368
369 const uint8_t* class_data = dex_file->GetClassData(*class_def);
370 if (class_data == nullptr) {
371 os << "No class data.";
372 return;
373 }
374
375 ClassDataItemIterator it(*dex_file, class_data);
376 it.SkipAllFields();
377
378 // Find method, and dump it.
379 while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
380 uint32_t method_idx = it.GetMemberIndex();
381 if (method_idx == dex_method_idx) {
382 dumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os);
383 return;
384 }
385 it.Next();
386 }
387
388 // Otherwise complain.
389 os << "Something went wrong, didn't find the method in the class data.";
390 }
391
392 } // namespace art
393