1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Implementation file of the dexlayout utility.
17 *
18 * This is a tool to read dex files into an internal representation,
19 * reorganize the representation, and emit dex files with a better
20 * file layout.
21 */
22
23 #include "dexlayout.h"
24
25 #include <inttypes.h>
26 #include <stdio.h>
27
28 #include <iostream>
29 #include <memory>
30 #include <sstream>
31 #include <unordered_set>
32 #include <vector>
33
34 #include "android-base/stringprintf.h"
35
36 #include "base/logging.h" // For VLOG_IS_ON.
37 #include "base/hiddenapi_flags.h"
38 #include "base/mem_map.h"
39 #include "base/mman.h" // For the PROT_* and MAP_* constants.
40 #include "base/os.h"
41 #include "base/utils.h"
42 #include "dex/art_dex_file_loader.h"
43 #include "dex/descriptors_names.h"
44 #include "dex/dex_file-inl.h"
45 #include "dex/dex_file_layout.h"
46 #include "dex/dex_file_loader.h"
47 #include "dex/dex_file_types.h"
48 #include "dex/dex_file_verifier.h"
49 #include "dex/dex_instruction-inl.h"
50 #include "dex_ir_builder.h"
51 #include "dex_verify.h"
52 #include "dex_visualize.h"
53 #include "dex_writer.h"
54 #include "profile/profile_compilation_info.h"
55
56 namespace art {
57
58 using android::base::StringPrintf;
59
60 /*
61 * Flags for use with createAccessFlagStr().
62 */
63 enum AccessFor {
64 kAccessForClass = 0, kAccessForMethod = 1, kAccessForField = 2, kAccessForMAX
65 };
66 const int kNumFlags = 18;
67
68 /*
69 * Gets 2 little-endian bytes.
70 */
Get2LE(unsigned char const * src)71 static inline uint16_t Get2LE(unsigned char const* src) {
72 return src[0] | (src[1] << 8);
73 }
74
75 /*
76 * Converts the class name portion of a type descriptor to human-readable
77 * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
78 */
DescriptorClassToName(const char * str)79 static std::string DescriptorClassToName(const char* str) {
80 std::string descriptor(str);
81 // Reduce to just the class name prefix.
82 size_t last_slash = descriptor.rfind('/');
83 if (last_slash == std::string::npos) {
84 last_slash = 0;
85 }
86 // Start past the '/' or 'L'.
87 last_slash++;
88
89 // Copy class name over, trimming trailing ';'.
90 size_t size = descriptor.size() - 1 - last_slash;
91 std::string result(descriptor.substr(last_slash, size));
92
93 return result;
94 }
95
96 /*
97 * Returns string representing the boolean value.
98 */
StrBool(bool val)99 static const char* StrBool(bool val) {
100 return val ? "true" : "false";
101 }
102
103 /*
104 * Returns a quoted string representing the boolean value.
105 */
QuotedBool(bool val)106 static const char* QuotedBool(bool val) {
107 return val ? "\"true\"" : "\"false\"";
108 }
109
110 /*
111 * Returns a quoted string representing the access flags.
112 */
QuotedVisibility(uint32_t access_flags)113 static const char* QuotedVisibility(uint32_t access_flags) {
114 if (access_flags & kAccPublic) {
115 return "\"public\"";
116 } else if (access_flags & kAccProtected) {
117 return "\"protected\"";
118 } else if (access_flags & kAccPrivate) {
119 return "\"private\"";
120 } else {
121 return "\"package\"";
122 }
123 }
124
125 /*
126 * Counts the number of '1' bits in a word.
127 */
CountOnes(uint32_t val)128 static int CountOnes(uint32_t val) {
129 val = val - ((val >> 1) & 0x55555555);
130 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
131 return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
132 }
133
134 /*
135 * Creates a new string with human-readable access flags.
136 *
137 * In the base language the access_flags fields are type uint16_t; in Dalvik they're uint32_t.
138 */
CreateAccessFlagStr(uint32_t flags,AccessFor for_what)139 static char* CreateAccessFlagStr(uint32_t flags, AccessFor for_what) {
140 static const char* kAccessStrings[kAccessForMAX][kNumFlags] = {
141 {
142 "PUBLIC", /* 0x00001 */
143 "PRIVATE", /* 0x00002 */
144 "PROTECTED", /* 0x00004 */
145 "STATIC", /* 0x00008 */
146 "FINAL", /* 0x00010 */
147 "?", /* 0x00020 */
148 "?", /* 0x00040 */
149 "?", /* 0x00080 */
150 "?", /* 0x00100 */
151 "INTERFACE", /* 0x00200 */
152 "ABSTRACT", /* 0x00400 */
153 "?", /* 0x00800 */
154 "SYNTHETIC", /* 0x01000 */
155 "ANNOTATION", /* 0x02000 */
156 "ENUM", /* 0x04000 */
157 "?", /* 0x08000 */
158 "VERIFIED", /* 0x10000 */
159 "OPTIMIZED", /* 0x20000 */
160 }, {
161 "PUBLIC", /* 0x00001 */
162 "PRIVATE", /* 0x00002 */
163 "PROTECTED", /* 0x00004 */
164 "STATIC", /* 0x00008 */
165 "FINAL", /* 0x00010 */
166 "SYNCHRONIZED", /* 0x00020 */
167 "BRIDGE", /* 0x00040 */
168 "VARARGS", /* 0x00080 */
169 "NATIVE", /* 0x00100 */
170 "?", /* 0x00200 */
171 "ABSTRACT", /* 0x00400 */
172 "STRICT", /* 0x00800 */
173 "SYNTHETIC", /* 0x01000 */
174 "?", /* 0x02000 */
175 "?", /* 0x04000 */
176 "MIRANDA", /* 0x08000 */
177 "CONSTRUCTOR", /* 0x10000 */
178 "DECLARED_SYNCHRONIZED", /* 0x20000 */
179 }, {
180 "PUBLIC", /* 0x00001 */
181 "PRIVATE", /* 0x00002 */
182 "PROTECTED", /* 0x00004 */
183 "STATIC", /* 0x00008 */
184 "FINAL", /* 0x00010 */
185 "?", /* 0x00020 */
186 "VOLATILE", /* 0x00040 */
187 "TRANSIENT", /* 0x00080 */
188 "?", /* 0x00100 */
189 "?", /* 0x00200 */
190 "?", /* 0x00400 */
191 "?", /* 0x00800 */
192 "SYNTHETIC", /* 0x01000 */
193 "?", /* 0x02000 */
194 "ENUM", /* 0x04000 */
195 "?", /* 0x08000 */
196 "?", /* 0x10000 */
197 "?", /* 0x20000 */
198 },
199 };
200
201 // Allocate enough storage to hold the expected number of strings,
202 // plus a space between each. We over-allocate, using the longest
203 // string above as the base metric.
204 const int kLongest = 21; // The strlen of longest string above.
205 const int count = CountOnes(flags);
206 char* str;
207 char* cp;
208 cp = str = reinterpret_cast<char*>(malloc(count * (kLongest + 1) + 1));
209
210 for (int i = 0; i < kNumFlags; i++) {
211 if (flags & 0x01) {
212 const char* accessStr = kAccessStrings[for_what][i];
213 const int len = strlen(accessStr);
214 if (cp != str) {
215 *cp++ = ' ';
216 }
217 memcpy(cp, accessStr, len);
218 cp += len;
219 }
220 flags >>= 1;
221 } // for
222
223 *cp = '\0';
224 return str;
225 }
226
GetHiddenapiFlagStr(uint32_t hiddenapi_flags)227 static std::string GetHiddenapiFlagStr(uint32_t hiddenapi_flags) {
228 std::stringstream ss;
229 hiddenapi::ApiList(hiddenapi_flags).Dump(ss);
230 std::string api_list = ss.str();
231 std::transform(api_list.begin(), api_list.end(), api_list.begin(), ::toupper);
232 return api_list;
233 }
234
GetSignatureForProtoId(const dex_ir::ProtoId * proto)235 static std::string GetSignatureForProtoId(const dex_ir::ProtoId* proto) {
236 if (proto == nullptr) {
237 return "<no signature>";
238 }
239
240 std::string result("(");
241 const dex_ir::TypeList* type_list = proto->Parameters();
242 if (type_list != nullptr) {
243 for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
244 result += type_id->GetStringId()->Data();
245 }
246 }
247 result += ")";
248 result += proto->ReturnType()->GetStringId()->Data();
249 return result;
250 }
251
252 /*
253 * Copies character data from "data" to "out", converting non-ASCII values
254 * to fprintf format chars or an ASCII filler ('.' or '?').
255 *
256 * The output buffer must be able to hold (2*len)+1 bytes. The result is
257 * NULL-terminated.
258 */
Asciify(char * out,const unsigned char * data,size_t len)259 static void Asciify(char* out, const unsigned char* data, size_t len) {
260 for (; len != 0u; --len) {
261 if (*data < 0x20) {
262 // Could do more here, but we don't need them yet.
263 switch (*data) {
264 case '\0':
265 *out++ = '\\';
266 *out++ = '0';
267 break;
268 case '\n':
269 *out++ = '\\';
270 *out++ = 'n';
271 break;
272 default:
273 *out++ = '.';
274 break;
275 } // switch
276 } else if (*data >= 0x80) {
277 *out++ = '?';
278 } else {
279 *out++ = *data;
280 }
281 data++;
282 } // while
283 *out = '\0';
284 }
285 /* clang-format off */
286 constexpr char kEscapedLength[256] = {
287 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 4, 2, 2, 4, 4, // \a, \b, \t, \n, \r
288 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
289 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // ",
290 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9'
291 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O'
292 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\'
293 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o'
294 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL
295 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // Unicode range, keep
296 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
297 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
298 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
299 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
300 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
301 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
302 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
303 };
304 /* clang-format on */
305
306 /*
307 * Check if a UTF8 string contains characters we should quote.
308 */
needsEscape(std::string_view s)309 static bool needsEscape(std::string_view s) {
310 for (unsigned char c : s) {
311 if (kEscapedLength[c] != 1) {
312 return true;
313 }
314 }
315 return false;
316 }
317
escapeString(std::string_view s)318 std::string escapeString(std::string_view s) {
319 std::ostringstream oss;
320 for (unsigned char c : s) {
321 switch (kEscapedLength[c]) {
322 case 1:
323 oss << static_cast<char>(c);
324 break;
325 case 2:
326 switch (c) {
327 case '\b':
328 oss << '\\' << 'b';
329 break;
330 case '\f':
331 oss << '\\' << 'f';
332 break;
333 case '\n':
334 oss << '\\' << 'n';
335 break;
336 case '\r':
337 oss << '\\' << 'r';
338 break;
339 case '\t':
340 oss << '\\' << 't';
341 break;
342 case '\"':
343 oss << '\\' << '"';
344 break;
345 case '\\':
346 oss << '\\' << '\\';
347 break;
348 }
349 break;
350 case 4:
351 oss << '\\' << '0' + (c / 64) << '0' + ((c % 64) / 8) << '0' + (c % 8);
352 break;
353 }
354 }
355 return oss.str();
356 }
357
358 /*
359 * Dumps a string value with some escape characters.
360 */
DumpEscapedString(std::string_view s,FILE * out_file)361 static void DumpEscapedString(std::string_view s, FILE* out_file) {
362 fputs("\"", out_file);
363 if (needsEscape(s)) {
364 std::string e = escapeString(s);
365 fputs(e.c_str(), out_file);
366 } else {
367 for (char c : s) {
368 fputc(c, out_file);
369 }
370 }
371 fputs("\"", out_file);
372 }
373
374 /*
375 * Dumps a string as an XML attribute value.
376 */
DumpXmlAttribute(const char * p,FILE * out_file)377 static void DumpXmlAttribute(const char* p, FILE* out_file) {
378 for (; *p; p++) {
379 switch (*p) {
380 case '&':
381 fputs("&", out_file);
382 break;
383 case '<':
384 fputs("<", out_file);
385 break;
386 case '>':
387 fputs(">", out_file);
388 break;
389 case '"':
390 fputs(""", out_file);
391 break;
392 case '\t':
393 fputs("	", out_file);
394 break;
395 case '\n':
396 fputs("
", out_file);
397 break;
398 case '\r':
399 fputs("
", out_file);
400 break;
401 default:
402 putc(*p, out_file);
403 } // switch
404 } // for
405 }
406
407 /*
408 * Helper for dumpInstruction(), which builds the string
409 * representation for the index in the given instruction.
410 * Returns a pointer to a buffer of sufficient size.
411 */
IndexString(dex_ir::Header * header,const Instruction * dec_insn,size_t buf_size)412 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
413 const Instruction* dec_insn,
414 size_t buf_size) {
415 std::unique_ptr<char[]> buf(new char[buf_size]);
416 // Determine index and width of the string.
417 uint32_t index = 0;
418 uint32_t secondary_index = dex::kDexNoIndex;
419 uint32_t width = 4;
420 switch (Instruction::FormatOf(dec_insn->Opcode())) {
421 // SOME NOT SUPPORTED:
422 // case Instruction::k20bc:
423 case Instruction::k21c:
424 case Instruction::k35c:
425 // case Instruction::k35ms:
426 case Instruction::k3rc:
427 // case Instruction::k3rms:
428 // case Instruction::k35mi:
429 // case Instruction::k3rmi:
430 index = dec_insn->VRegB();
431 width = 4;
432 break;
433 case Instruction::k31c:
434 index = dec_insn->VRegB();
435 width = 8;
436 break;
437 case Instruction::k22c:
438 // case Instruction::k22cs:
439 index = dec_insn->VRegC();
440 width = 4;
441 break;
442 case Instruction::k45cc:
443 case Instruction::k4rcc:
444 index = dec_insn->VRegB();
445 secondary_index = dec_insn->VRegH();
446 width = 4;
447 break;
448 default:
449 break;
450 } // switch
451
452 // Determine index type.
453 size_t outSize = 0;
454 switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
455 case Instruction::kIndexUnknown:
456 // This function should never get called for this type, but do
457 // something sensible here, just to help with debugging.
458 outSize = snprintf(buf.get(), buf_size, "<unknown-index>");
459 break;
460 case Instruction::kIndexNone:
461 // This function should never get called for this type, but do
462 // something sensible here, just to help with debugging.
463 outSize = snprintf(buf.get(), buf_size, "<no-index>");
464 break;
465 case Instruction::kIndexTypeRef:
466 if (index < header->TypeIds().Size()) {
467 const char* tp = header->TypeIds()[index]->GetStringId()->Data();
468 outSize = snprintf(buf.get(), buf_size, "%s // type@%0*x", tp, width, index);
469 } else {
470 outSize = snprintf(buf.get(), buf_size, "<type?> // type@%0*x", width, index);
471 }
472 break;
473 case Instruction::kIndexStringRef:
474 if (index < header->StringIds().Size()) {
475 const char* st = header->StringIds()[index]->Data();
476 if (needsEscape(std::string_view(st))) {
477 std::string escaped = escapeString(st);
478 outSize =
479 snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", escaped.c_str(), width, index);
480 } else {
481 outSize = snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", st, width, index);
482 }
483 } else {
484 outSize = snprintf(buf.get(), buf_size, "<string?> // string@%0*x", width, index);
485 }
486 break;
487 case Instruction::kIndexMethodRef:
488 if (index < header->MethodIds().Size()) {
489 dex_ir::MethodId* method_id = header->MethodIds()[index];
490 const char* name = method_id->Name()->Data();
491 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
492 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
493 outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // method@%0*x",
494 back_descriptor, name, type_descriptor.c_str(), width, index);
495 } else {
496 outSize = snprintf(buf.get(), buf_size, "<method?> // method@%0*x", width, index);
497 }
498 break;
499 case Instruction::kIndexFieldRef:
500 if (index < header->FieldIds().Size()) {
501 dex_ir::FieldId* field_id = header->FieldIds()[index];
502 const char* name = field_id->Name()->Data();
503 const char* type_descriptor = field_id->Type()->GetStringId()->Data();
504 const char* back_descriptor = field_id->Class()->GetStringId()->Data();
505 outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // field@%0*x",
506 back_descriptor, name, type_descriptor, width, index);
507 } else {
508 outSize = snprintf(buf.get(), buf_size, "<field?> // field@%0*x", width, index);
509 }
510 break;
511 case Instruction::kIndexVtableOffset:
512 outSize = snprintf(buf.get(), buf_size, "[%0*x] // vtable #%0*x",
513 width, index, width, index);
514 break;
515 case Instruction::kIndexFieldOffset:
516 outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
517 break;
518 case Instruction::kIndexMethodAndProtoRef: {
519 std::string method("<method?>");
520 std::string proto("<proto?>");
521 if (index < header->MethodIds().Size()) {
522 dex_ir::MethodId* method_id = header->MethodIds()[index];
523 const char* name = method_id->Name()->Data();
524 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
525 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
526 method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
527 }
528 if (secondary_index < header->ProtoIds().Size()) {
529 dex_ir::ProtoId* proto_id = header->ProtoIds()[secondary_index];
530 proto = GetSignatureForProtoId(proto_id);
531 }
532 outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
533 method.c_str(), proto.c_str(), width, index, width, secondary_index);
534 }
535 break;
536 // SOME NOT SUPPORTED:
537 // case Instruction::kIndexVaries:
538 // case Instruction::kIndexInlineMethod:
539 default:
540 outSize = snprintf(buf.get(), buf_size, "<?>");
541 break;
542 } // switch
543
544 // Determine success of string construction.
545 if (outSize >= buf_size) {
546 // The buffer wasn't big enough; retry with computed size. Note: snprintf()
547 // doesn't count/ the '\0' as part of its returned size, so we add explicit
548 // space for it here.
549 return IndexString(header, dec_insn, outSize + 1);
550 }
551 return buf;
552 }
553
554 /*
555 * Dumps encoded annotation.
556 */
DumpEncodedAnnotation(dex_ir::EncodedAnnotation * annotation)557 void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
558 fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
559 // Display all name=value pairs.
560 for (auto& subannotation : *annotation->GetAnnotationElements()) {
561 fputc(' ', out_file_);
562 fputs(subannotation->GetName()->Data(), out_file_);
563 fputc('=', out_file_);
564 DumpEncodedValue(subannotation->GetValue());
565 }
566 }
567 /*
568 * Dumps encoded value.
569 */
DumpEncodedValue(const dex_ir::EncodedValue * data)570 void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
571 switch (data->Type()) {
572 case DexFile::kDexAnnotationByte:
573 fprintf(out_file_, "%" PRId8, data->GetByte());
574 break;
575 case DexFile::kDexAnnotationShort:
576 fprintf(out_file_, "%" PRId16, data->GetShort());
577 break;
578 case DexFile::kDexAnnotationChar:
579 fprintf(out_file_, "%" PRIu16, data->GetChar());
580 break;
581 case DexFile::kDexAnnotationInt:
582 fprintf(out_file_, "%" PRId32, data->GetInt());
583 break;
584 case DexFile::kDexAnnotationLong:
585 fprintf(out_file_, "%" PRId64, data->GetLong());
586 break;
587 case DexFile::kDexAnnotationFloat: {
588 fprintf(out_file_, "%g", data->GetFloat());
589 break;
590 }
591 case DexFile::kDexAnnotationDouble: {
592 fprintf(out_file_, "%g", data->GetDouble());
593 break;
594 }
595 case DexFile::kDexAnnotationString: {
596 dex_ir::StringId* string_id = data->GetStringId();
597 if (options_.output_format_ == kOutputPlain) {
598 DumpEscapedString(string_id->Data(), out_file_);
599 } else {
600 DumpXmlAttribute(string_id->Data(), out_file_);
601 }
602 break;
603 }
604 case DexFile::kDexAnnotationType: {
605 dex_ir::TypeId* type_id = data->GetTypeId();
606 fputs(type_id->GetStringId()->Data(), out_file_);
607 break;
608 }
609 case DexFile::kDexAnnotationField:
610 case DexFile::kDexAnnotationEnum: {
611 dex_ir::FieldId* field_id = data->GetFieldId();
612 fputs(field_id->Name()->Data(), out_file_);
613 break;
614 }
615 case DexFile::kDexAnnotationMethod: {
616 dex_ir::MethodId* method_id = data->GetMethodId();
617 fputs(method_id->Name()->Data(), out_file_);
618 break;
619 }
620 case DexFile::kDexAnnotationArray: {
621 fputc('{', out_file_);
622 // Display all elements.
623 for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
624 fputc(' ', out_file_);
625 DumpEncodedValue(value.get());
626 }
627 fputs(" }", out_file_);
628 break;
629 }
630 case DexFile::kDexAnnotationAnnotation: {
631 DumpEncodedAnnotation(data->GetEncodedAnnotation());
632 break;
633 }
634 case DexFile::kDexAnnotationNull:
635 fputs("null", out_file_);
636 break;
637 case DexFile::kDexAnnotationBoolean:
638 fputs(StrBool(data->GetBoolean()), out_file_);
639 break;
640 default:
641 fputs("????", out_file_);
642 break;
643 } // switch
644 }
645
646 /*
647 * Dumps the file header.
648 */
DumpFileHeader()649 void DexLayout::DumpFileHeader() {
650 char sanitized[8 * 2 + 1];
651 fprintf(out_file_, "DEX file header:\n");
652 Asciify(sanitized, header_->Magic(), 8);
653 fprintf(out_file_, "magic : '%s'\n", sanitized);
654 fprintf(out_file_, "checksum : %08x\n", header_->Checksum());
655 fprintf(out_file_, "signature : %02x%02x...%02x%02x\n",
656 header_->Signature()[0], header_->Signature()[1],
657 header_->Signature()[DexFile::kSha1DigestSize - 2],
658 header_->Signature()[DexFile::kSha1DigestSize - 1]);
659 fprintf(out_file_, "file_size : %d\n", header_->FileSize());
660 fprintf(out_file_, "header_size : %d\n", header_->HeaderSize());
661 fprintf(out_file_, "link_size : %d\n", header_->LinkSize());
662 fprintf(out_file_, "link_off : %d (0x%06x)\n",
663 header_->LinkOffset(), header_->LinkOffset());
664 fprintf(out_file_, "string_ids_size : %d\n", header_->StringIds().Size());
665 fprintf(out_file_, "string_ids_off : %d (0x%06x)\n",
666 header_->StringIds().GetOffset(), header_->StringIds().GetOffset());
667 fprintf(out_file_, "type_ids_size : %d\n", header_->TypeIds().Size());
668 fprintf(out_file_, "type_ids_off : %d (0x%06x)\n",
669 header_->TypeIds().GetOffset(), header_->TypeIds().GetOffset());
670 fprintf(out_file_, "proto_ids_size : %d\n", header_->ProtoIds().Size());
671 fprintf(out_file_, "proto_ids_off : %d (0x%06x)\n",
672 header_->ProtoIds().GetOffset(), header_->ProtoIds().GetOffset());
673 fprintf(out_file_, "field_ids_size : %d\n", header_->FieldIds().Size());
674 fprintf(out_file_, "field_ids_off : %d (0x%06x)\n",
675 header_->FieldIds().GetOffset(), header_->FieldIds().GetOffset());
676 fprintf(out_file_, "method_ids_size : %d\n", header_->MethodIds().Size());
677 fprintf(out_file_, "method_ids_off : %d (0x%06x)\n",
678 header_->MethodIds().GetOffset(), header_->MethodIds().GetOffset());
679 fprintf(out_file_, "class_defs_size : %d\n", header_->ClassDefs().Size());
680 fprintf(out_file_, "class_defs_off : %d (0x%06x)\n",
681 header_->ClassDefs().GetOffset(), header_->ClassDefs().GetOffset());
682 fprintf(out_file_, "data_size : %d\n", header_->DataSize());
683 fprintf(out_file_, "data_off : %d (0x%06x)\n\n",
684 header_->DataOffset(), header_->DataOffset());
685 }
686
687 /*
688 * Dumps a class_def_item.
689 */
DumpClassDef(int idx)690 void DexLayout::DumpClassDef(int idx) {
691 // General class information.
692 dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
693 fprintf(out_file_, "Class #%d header:\n", idx);
694 fprintf(out_file_, "class_idx : %d\n", class_def->ClassType()->GetIndex());
695 fprintf(out_file_, "access_flags : %d (0x%04x)\n",
696 class_def->GetAccessFlags(), class_def->GetAccessFlags());
697 uint32_t superclass_idx = class_def->Superclass() == nullptr ?
698 DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
699 fprintf(out_file_, "superclass_idx : %d\n", superclass_idx);
700 fprintf(out_file_, "interfaces_off : %d (0x%06x)\n",
701 class_def->InterfacesOffset(), class_def->InterfacesOffset());
702 uint32_t source_file_offset = 0xffffffffU;
703 if (class_def->SourceFile() != nullptr) {
704 source_file_offset = class_def->SourceFile()->GetIndex();
705 }
706 fprintf(out_file_, "source_file_idx : %d\n", source_file_offset);
707 uint32_t annotations_offset = 0;
708 if (class_def->Annotations() != nullptr) {
709 annotations_offset = class_def->Annotations()->GetOffset();
710 }
711 fprintf(out_file_, "annotations_off : %d (0x%06x)\n",
712 annotations_offset, annotations_offset);
713 if (class_def->GetClassData() == nullptr) {
714 fprintf(out_file_, "class_data_off : %d (0x%06x)\n", 0, 0);
715 } else {
716 fprintf(out_file_, "class_data_off : %d (0x%06x)\n",
717 class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
718 }
719
720 // Fields and methods.
721 dex_ir::ClassData* class_data = class_def->GetClassData();
722 if (class_data != nullptr && class_data->StaticFields() != nullptr) {
723 fprintf(out_file_, "static_fields_size : %zu\n", class_data->StaticFields()->size());
724 } else {
725 fprintf(out_file_, "static_fields_size : 0\n");
726 }
727 if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
728 fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
729 } else {
730 fprintf(out_file_, "instance_fields_size: 0\n");
731 }
732 if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
733 fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
734 } else {
735 fprintf(out_file_, "direct_methods_size : 0\n");
736 }
737 if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
738 fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
739 } else {
740 fprintf(out_file_, "virtual_methods_size: 0\n");
741 }
742 fprintf(out_file_, "\n");
743 }
744
745 /**
746 * Dumps an annotation set item.
747 */
DumpAnnotationSetItem(dex_ir::AnnotationSetItem * set_item)748 void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
749 if (set_item == nullptr || set_item->GetItems()->size() == 0) {
750 fputs(" empty-annotation-set\n", out_file_);
751 return;
752 }
753 for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
754 if (annotation == nullptr) {
755 continue;
756 }
757 fputs(" ", out_file_);
758 switch (annotation->GetVisibility()) {
759 case DexFile::kDexVisibilityBuild: fputs("VISIBILITY_BUILD ", out_file_); break;
760 case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
761 case DexFile::kDexVisibilitySystem: fputs("VISIBILITY_SYSTEM ", out_file_); break;
762 default: fputs("VISIBILITY_UNKNOWN ", out_file_); break;
763 } // switch
764 DumpEncodedAnnotation(annotation->GetAnnotation());
765 fputc('\n', out_file_);
766 }
767 }
768
769 /*
770 * Dumps class annotations.
771 */
DumpClassAnnotations(int idx)772 void DexLayout::DumpClassAnnotations(int idx) {
773 dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
774 dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
775 if (annotations_directory == nullptr) {
776 return; // none
777 }
778
779 fprintf(out_file_, "Class #%d annotations:\n", idx);
780
781 dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
782 dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
783 dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
784 dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
785
786 // Annotations on the class itself.
787 if (class_set_item != nullptr) {
788 fprintf(out_file_, "Annotations on class\n");
789 DumpAnnotationSetItem(class_set_item);
790 }
791
792 // Annotations on fields.
793 if (fields != nullptr) {
794 for (auto& field : *fields) {
795 const dex_ir::FieldId* field_id = field->GetFieldId();
796 const uint32_t field_idx = field_id->GetIndex();
797 const char* field_name = field_id->Name()->Data();
798 fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
799 DumpAnnotationSetItem(field->GetAnnotationSetItem());
800 }
801 }
802
803 // Annotations on methods.
804 if (methods != nullptr) {
805 for (auto& method : *methods) {
806 const dex_ir::MethodId* method_id = method->GetMethodId();
807 const uint32_t method_idx = method_id->GetIndex();
808 const char* method_name = method_id->Name()->Data();
809 fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
810 DumpAnnotationSetItem(method->GetAnnotationSetItem());
811 }
812 }
813
814 // Annotations on method parameters.
815 if (parameters != nullptr) {
816 for (auto& parameter : *parameters) {
817 const dex_ir::MethodId* method_id = parameter->GetMethodId();
818 const uint32_t method_idx = method_id->GetIndex();
819 const char* method_name = method_id->Name()->Data();
820 fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
821 uint32_t j = 0;
822 for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
823 fprintf(out_file_, "#%u\n", j);
824 DumpAnnotationSetItem(annotation);
825 ++j;
826 }
827 }
828 }
829
830 fputc('\n', out_file_);
831 }
832
833 /*
834 * Dumps an interface that a class declares to implement.
835 */
DumpInterface(const dex_ir::TypeId * type_item,int i)836 void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
837 const char* interface_name = type_item->GetStringId()->Data();
838 if (options_.output_format_ == kOutputPlain) {
839 fprintf(out_file_, " #%d : '%s'\n", i, interface_name);
840 } else {
841 std::string dot(DescriptorToDot(interface_name));
842 fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
843 }
844 }
845
846 /*
847 * Dumps the catches table associated with the code.
848 */
DumpCatches(const dex_ir::CodeItem * code)849 void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
850 const uint16_t tries_size = code->TriesSize();
851
852 // No catch table.
853 if (tries_size == 0) {
854 fprintf(out_file_, " catches : (none)\n");
855 return;
856 }
857
858 // Dump all table entries.
859 fprintf(out_file_, " catches : %d\n", tries_size);
860 std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
861 for (uint32_t i = 0; i < tries_size; i++) {
862 const dex_ir::TryItem* try_item = (*tries)[i].get();
863 const uint32_t start = try_item->StartAddr();
864 const uint32_t end = start + try_item->InsnCount();
865 fprintf(out_file_, " 0x%04x - 0x%04x\n", start, end);
866 for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
867 const dex_ir::TypeId* type_id = handler->GetTypeId();
868 const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
869 fprintf(out_file_, " %s -> 0x%04x\n", descriptor, handler->GetAddress());
870 } // for
871 } // for
872 }
873
874 /*
875 * Dumps a single instruction.
876 */
DumpInstruction(const dex_ir::CodeItem * code,uint32_t code_offset,uint32_t insn_idx,uint32_t insn_width,const Instruction * dec_insn)877 void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
878 uint32_t code_offset,
879 uint32_t insn_idx,
880 uint32_t insn_width,
881 const Instruction* dec_insn) {
882 // Address of instruction (expressed as byte offset).
883 fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
884
885 // Dump (part of) raw bytes.
886 const uint16_t* insns = code->Insns();
887 for (uint32_t i = 0; i < 8; i++) {
888 if (i < insn_width) {
889 if (i == 7) {
890 fprintf(out_file_, " ... ");
891 } else {
892 // Print 16-bit value in little-endian order.
893 const uint8_t* bytePtr = (const uint8_t*) &insns[insn_idx + i];
894 fprintf(out_file_, " %02x%02x", bytePtr[0], bytePtr[1]);
895 }
896 } else {
897 fputs(" ", out_file_);
898 }
899 } // for
900
901 // Dump pseudo-instruction or opcode.
902 if (dec_insn->Opcode() == Instruction::NOP) {
903 const uint16_t instr = Get2LE((const uint8_t*) &insns[insn_idx]);
904 if (instr == Instruction::kPackedSwitchSignature) {
905 fprintf(out_file_, "|%04x: packed-switch-data (%d units)", insn_idx, insn_width);
906 } else if (instr == Instruction::kSparseSwitchSignature) {
907 fprintf(out_file_, "|%04x: sparse-switch-data (%d units)", insn_idx, insn_width);
908 } else if (instr == Instruction::kArrayDataSignature) {
909 fprintf(out_file_, "|%04x: array-data (%d units)", insn_idx, insn_width);
910 } else {
911 fprintf(out_file_, "|%04x: nop // spacer", insn_idx);
912 }
913 } else {
914 fprintf(out_file_, "|%04x: %s", insn_idx, dec_insn->Name());
915 }
916
917 // Set up additional argument.
918 std::unique_ptr<char[]> index_buf;
919 if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
920 index_buf = IndexString(header_, dec_insn, 200);
921 }
922
923 // Dump the instruction.
924 //
925 // NOTE: pDecInsn->DumpString(pDexFile) differs too much from original.
926 //
927 switch (Instruction::FormatOf(dec_insn->Opcode())) {
928 case Instruction::k10x: // op
929 break;
930 case Instruction::k12x: // op vA, vB
931 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
932 break;
933 case Instruction::k11n: // op vA, #+B
934 fprintf(out_file_, " v%d, #int %d // #%x",
935 dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint8_t)dec_insn->VRegB());
936 break;
937 case Instruction::k11x: // op vAA
938 fprintf(out_file_, " v%d", dec_insn->VRegA());
939 break;
940 case Instruction::k10t: // op +AA
941 case Instruction::k20t: { // op +AAAA
942 const int32_t targ = (int32_t) dec_insn->VRegA();
943 fprintf(out_file_, " %04x // %c%04x",
944 insn_idx + targ,
945 (targ < 0) ? '-' : '+',
946 (targ < 0) ? -targ : targ);
947 break;
948 }
949 case Instruction::k22x: // op vAA, vBBBB
950 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
951 break;
952 case Instruction::k21t: { // op vAA, +BBBB
953 const int32_t targ = (int32_t) dec_insn->VRegB();
954 fprintf(out_file_, " v%d, %04x // %c%04x", dec_insn->VRegA(),
955 insn_idx + targ,
956 (targ < 0) ? '-' : '+',
957 (targ < 0) ? -targ : targ);
958 break;
959 }
960 case Instruction::k21s: // op vAA, #+BBBB
961 fprintf(out_file_, " v%d, #int %d // #%x",
962 dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint16_t)dec_insn->VRegB());
963 break;
964 case Instruction::k21h: // op vAA, #+BBBB0000[00000000]
965 // The printed format varies a bit based on the actual opcode.
966 if (dec_insn->Opcode() == Instruction::CONST_HIGH16) {
967 const int32_t value = dec_insn->VRegB() << 16;
968 fprintf(out_file_, " v%d, #int %d // #%x",
969 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
970 } else {
971 const int64_t value = ((int64_t) dec_insn->VRegB()) << 48;
972 fprintf(out_file_, " v%d, #long %" PRId64 " // #%x",
973 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
974 }
975 break;
976 case Instruction::k21c: // op vAA, thing@BBBB
977 case Instruction::k31c: // op vAA, thing@BBBBBBBB
978 fprintf(out_file_, " v%d, %s", dec_insn->VRegA(), index_buf.get());
979 break;
980 case Instruction::k23x: // op vAA, vBB, vCC
981 fprintf(out_file_, " v%d, v%d, v%d",
982 dec_insn->VRegA(), dec_insn->VRegB(), dec_insn->VRegC());
983 break;
984 case Instruction::k22b: // op vAA, vBB, #+CC
985 fprintf(out_file_, " v%d, v%d, #int %d // #%02x",
986 dec_insn->VRegA(), dec_insn->VRegB(),
987 (int32_t) dec_insn->VRegC(), (uint8_t) dec_insn->VRegC());
988 break;
989 case Instruction::k22t: { // op vA, vB, +CCCC
990 const int32_t targ = (int32_t) dec_insn->VRegC();
991 fprintf(out_file_, " v%d, v%d, %04x // %c%04x",
992 dec_insn->VRegA(), dec_insn->VRegB(),
993 insn_idx + targ,
994 (targ < 0) ? '-' : '+',
995 (targ < 0) ? -targ : targ);
996 break;
997 }
998 case Instruction::k22s: // op vA, vB, #+CCCC
999 fprintf(out_file_, " v%d, v%d, #int %d // #%04x",
1000 dec_insn->VRegA(), dec_insn->VRegB(),
1001 (int32_t) dec_insn->VRegC(), (uint16_t) dec_insn->VRegC());
1002 break;
1003 case Instruction::k22c: // op vA, vB, thing@CCCC
1004 // NOT SUPPORTED:
1005 // case Instruction::k22cs: // [opt] op vA, vB, field offset CCCC
1006 fprintf(out_file_, " v%d, v%d, %s",
1007 dec_insn->VRegA(), dec_insn->VRegB(), index_buf.get());
1008 break;
1009 case Instruction::k30t:
1010 fprintf(out_file_, " #%08x", dec_insn->VRegA());
1011 break;
1012 case Instruction::k31i: { // op vAA, #+BBBBBBBB
1013 // This is often, but not always, a float.
1014 union {
1015 float f;
1016 uint32_t i;
1017 } conv;
1018 conv.i = dec_insn->VRegB();
1019 fprintf(out_file_, " v%d, #float %g // #%08x",
1020 dec_insn->VRegA(), conv.f, dec_insn->VRegB());
1021 break;
1022 }
1023 case Instruction::k31t: // op vAA, offset +BBBBBBBB
1024 fprintf(out_file_, " v%d, %08x // +%08x",
1025 dec_insn->VRegA(), insn_idx + dec_insn->VRegB(), dec_insn->VRegB());
1026 break;
1027 case Instruction::k32x: // op vAAAA, vBBBB
1028 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
1029 break;
1030 case Instruction::k35c: // op {vC, vD, vE, vF, vG}, thing@BBBB
1031 case Instruction::k45cc: { // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
1032 // NOT SUPPORTED:
1033 // case Instruction::k35ms: // [opt] invoke-virtual+super
1034 // case Instruction::k35mi: // [opt] inline invoke
1035 uint32_t arg[Instruction::kMaxVarArgRegs];
1036 dec_insn->GetVarArgs(arg);
1037 fputs(" {", out_file_);
1038 for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1039 if (i == 0) {
1040 fprintf(out_file_, "v%d", arg[i]);
1041 } else {
1042 fprintf(out_file_, ", v%d", arg[i]);
1043 }
1044 } // for
1045 fprintf(out_file_, "}, %s", index_buf.get());
1046 break;
1047 }
1048 case Instruction::k3rc: // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
1049 case Instruction::k4rcc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
1050 // NOT SUPPORTED:
1051 // case Instruction::k3rms: // [opt] invoke-virtual+super/range
1052 // case Instruction::k3rmi: // [opt] execute-inline/range
1053 {
1054 // This doesn't match the "dx" output when some of the args are
1055 // 64-bit values -- dx only shows the first register.
1056 fputs(" {", out_file_);
1057 for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1058 if (i == 0) {
1059 fprintf(out_file_, "v%d", dec_insn->VRegC() + i);
1060 } else {
1061 fprintf(out_file_, ", v%d", dec_insn->VRegC() + i);
1062 }
1063 } // for
1064 fprintf(out_file_, "}, %s", index_buf.get());
1065 }
1066 break;
1067 case Instruction::k51l: { // op vAA, #+BBBBBBBBBBBBBBBB
1068 // This is often, but not always, a double.
1069 union {
1070 double d;
1071 uint64_t j;
1072 } conv;
1073 conv.j = dec_insn->WideVRegB();
1074 fprintf(out_file_, " v%d, #double %g // #%016" PRIx64,
1075 dec_insn->VRegA(), conv.d, dec_insn->WideVRegB());
1076 break;
1077 }
1078 // NOT SUPPORTED:
1079 // case Instruction::k00x: // unknown op or breakpoint
1080 // break;
1081 default:
1082 fprintf(out_file_, " ???");
1083 break;
1084 } // switch
1085
1086 fputc('\n', out_file_);
1087 }
1088
1089 /*
1090 * Dumps a bytecode disassembly.
1091 */
DumpBytecodes(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1092 void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1093 dex_ir::MethodId* method_id = header_->MethodIds()[idx];
1094 const char* name = method_id->Name()->Data();
1095 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
1096 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1097
1098 // Generate header.
1099 std::string dot(DescriptorToDot(back_descriptor));
1100 fprintf(out_file_, "%06x: |[%06x] %s.%s:%s\n",
1101 code_offset, code_offset, dot.c_str(), name, type_descriptor.c_str());
1102
1103 // Iterate over all instructions.
1104 for (const DexInstructionPcPair& inst : code->Instructions()) {
1105 const uint32_t insn_width = inst->SizeInCodeUnits();
1106 if (insn_width == 0) {
1107 LOG(WARNING) << "GLITCH: zero-width instruction at idx=0x" << std::hex << inst.DexPc();
1108 break;
1109 }
1110 DumpInstruction(code, code_offset, inst.DexPc(), insn_width, &inst.Inst());
1111 } // for
1112 }
1113
1114 /*
1115 * Lookup functions.
1116 */
StringDataByIdx(uint32_t idx,dex_ir::Header * header)1117 static const char* StringDataByIdx(uint32_t idx, dex_ir::Header* header) {
1118 dex_ir::StringId* string_id = header->GetStringIdOrNullPtr(idx);
1119 if (string_id == nullptr) {
1120 return nullptr;
1121 }
1122 return string_id->Data();
1123 }
1124
StringDataByTypeIdx(uint16_t idx,dex_ir::Header * header)1125 static const char* StringDataByTypeIdx(uint16_t idx, dex_ir::Header* header) {
1126 dex_ir::TypeId* type_id = header->GetTypeIdOrNullPtr(idx);
1127 if (type_id == nullptr) {
1128 return nullptr;
1129 }
1130 dex_ir::StringId* string_id = type_id->GetStringId();
1131 if (string_id == nullptr) {
1132 return nullptr;
1133 }
1134 return string_id->Data();
1135 }
1136
1137
1138 /*
1139 * Dumps code of a method.
1140 */
DumpCode(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset,const char * declaring_class_descriptor,const char * method_name,bool is_static,const dex_ir::ProtoId * proto)1141 void DexLayout::DumpCode(uint32_t idx,
1142 const dex_ir::CodeItem* code,
1143 uint32_t code_offset,
1144 const char* declaring_class_descriptor,
1145 const char* method_name,
1146 bool is_static,
1147 const dex_ir::ProtoId* proto) {
1148 fprintf(out_file_, " registers : %d\n", code->RegistersSize());
1149 fprintf(out_file_, " ins : %d\n", code->InsSize());
1150 fprintf(out_file_, " outs : %d\n", code->OutsSize());
1151 fprintf(out_file_, " insns size : %d 16-bit code units\n",
1152 code->InsnsSize());
1153
1154 // Bytecode disassembly, if requested.
1155 if (options_.disassemble_) {
1156 DumpBytecodes(idx, code, code_offset);
1157 }
1158
1159 // Try-catch blocks.
1160 DumpCatches(code);
1161
1162 // Positions and locals table in the debug info.
1163 dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
1164 fprintf(out_file_, " positions :\n");
1165 if (debug_info != nullptr) {
1166 DexFile::DecodeDebugPositionInfo(debug_info->GetDebugInfo(),
1167 [this](uint32_t idx) {
1168 return StringDataByIdx(idx, this->header_);
1169 },
1170 [&](const DexFile::PositionInfo& entry) {
1171 fprintf(out_file_,
1172 " 0x%04x line=%d\n",
1173 entry.address_,
1174 entry.line_);
1175 return false;
1176 });
1177 }
1178 fprintf(out_file_, " locals :\n");
1179 if (debug_info != nullptr) {
1180 std::vector<const char*> arg_descriptors;
1181 const dex_ir::TypeList* parameters = proto->Parameters();
1182 if (parameters != nullptr) {
1183 const dex_ir::TypeIdVector* parameter_type_vector = parameters->GetTypeList();
1184 if (parameter_type_vector != nullptr) {
1185 for (const dex_ir::TypeId* type_id : *parameter_type_vector) {
1186 arg_descriptors.push_back(type_id->GetStringId()->Data());
1187 }
1188 }
1189 }
1190 DexFile::DecodeDebugLocalInfo(debug_info->GetDebugInfo(),
1191 "DexLayout in-memory",
1192 declaring_class_descriptor,
1193 arg_descriptors,
1194 method_name,
1195 is_static,
1196 code->RegistersSize(),
1197 code->InsSize(),
1198 code->InsnsSize(),
1199 [this](uint32_t idx) {
1200 return StringDataByIdx(idx, this->header_);
1201 },
1202 [this](uint32_t idx) {
1203 return
1204 StringDataByTypeIdx(dchecked_integral_cast<uint16_t>(idx),
1205 this->header_);
1206 },
1207 [&](const DexFile::LocalInfo& entry) {
1208 fprintf(out_file_,
1209 " 0x%04x - 0x%04x reg=%d %s %s",
1210 entry.start_address_,
1211 entry.end_address_,
1212 entry.reg_,
1213 entry.name_,
1214 entry.descriptor_);
1215 if (entry.signature_) {
1216 fputc(' ', out_file_);
1217 fputs(entry.signature_, out_file_);
1218 }
1219 fputc('\n', out_file_);
1220 });
1221 }
1222 }
1223
1224 /*
1225 * Dumps a method.
1226 */
DumpMethod(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,const dex_ir::CodeItem * code,int i)1227 void DexLayout::DumpMethod(uint32_t idx,
1228 uint32_t flags,
1229 uint32_t hiddenapi_flags,
1230 const dex_ir::CodeItem* code,
1231 int i) {
1232 // Bail for anything private if export only requested.
1233 if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1234 return;
1235 }
1236
1237 dex_ir::MethodId* method_id = header_->MethodIds()[idx];
1238 const char* name = method_id->Name()->Data();
1239 char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
1240 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1241 char* access_str = CreateAccessFlagStr(flags, kAccessForMethod);
1242
1243 if (options_.output_format_ == kOutputPlain) {
1244 fprintf(out_file_, " #%d : (in %s)\n", i, back_descriptor);
1245 fprintf(out_file_, " name : '%s'\n", name);
1246 fprintf(out_file_, " type : '%s'\n", type_descriptor);
1247 fprintf(out_file_, " access : 0x%04x (%s)\n", flags, access_str);
1248 if (options_.show_section_headers_) {
1249 fprintf(out_file_, " method_idx : %d\n", method_id->GetIndex());
1250 }
1251 if (hiddenapi_flags != 0u) {
1252 fprintf(out_file_,
1253 " hiddenapi : 0x%04x (%s)\n",
1254 hiddenapi_flags,
1255 GetHiddenapiFlagStr(hiddenapi_flags).c_str());
1256 }
1257 if (code == nullptr) {
1258 fprintf(out_file_, " code : (none)\n");
1259 } else {
1260 fprintf(out_file_, " code -\n");
1261 DumpCode(idx,
1262 code,
1263 code->GetOffset(),
1264 back_descriptor,
1265 name,
1266 (flags & kAccStatic) != 0,
1267 method_id->Proto());
1268 }
1269 if (options_.disassemble_) {
1270 fputc('\n', out_file_);
1271 }
1272 } else if (options_.output_format_ == kOutputXml) {
1273 const bool constructor = (name[0] == '<');
1274
1275 // Method name and prototype.
1276 if (constructor) {
1277 std::string dot(DescriptorClassToName(back_descriptor));
1278 fprintf(out_file_, "<constructor name=\"%s\"\n", dot.c_str());
1279 dot = DescriptorToDot(back_descriptor);
1280 fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1281 } else {
1282 fprintf(out_file_, "<method name=\"%s\"\n", name);
1283 const char* return_type = strrchr(type_descriptor, ')');
1284 if (return_type == nullptr) {
1285 LOG(ERROR) << "bad method type descriptor '" << type_descriptor << "'";
1286 goto bail;
1287 }
1288 std::string dot(DescriptorToDot(return_type + 1));
1289 fprintf(out_file_, " return=\"%s\"\n", dot.c_str());
1290 fprintf(out_file_, " abstract=%s\n", QuotedBool((flags & kAccAbstract) != 0));
1291 fprintf(out_file_, " native=%s\n", QuotedBool((flags & kAccNative) != 0));
1292 fprintf(out_file_, " synchronized=%s\n", QuotedBool(
1293 (flags & (kAccSynchronized | kAccDeclaredSynchronized)) != 0));
1294 }
1295
1296 // Additional method flags.
1297 fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1298 fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1299 // The "deprecated=" not knowable w/o parsing annotations.
1300 fprintf(out_file_, " visibility=%s\n>\n", QuotedVisibility(flags));
1301
1302 // Parameters.
1303 if (type_descriptor[0] != '(') {
1304 LOG(ERROR) << "ERROR: bad descriptor '" << type_descriptor << "'";
1305 goto bail;
1306 }
1307 char* tmp_buf = reinterpret_cast<char*>(malloc(strlen(type_descriptor) + 1));
1308 const char* base = type_descriptor + 1;
1309 int arg_num = 0;
1310 while (*base != ')') {
1311 char* cp = tmp_buf;
1312 while (*base == '[') {
1313 *cp++ = *base++;
1314 }
1315 if (*base == 'L') {
1316 // Copy through ';'.
1317 do {
1318 *cp = *base++;
1319 } while (*cp++ != ';');
1320 } else {
1321 // Primitive char, copy it.
1322 if (strchr("ZBCSIFJD", *base) == nullptr) {
1323 LOG(ERROR) << "ERROR: bad method signature '" << base << "'";
1324 break; // while
1325 }
1326 *cp++ = *base++;
1327 }
1328 // Null terminate and display.
1329 *cp++ = '\0';
1330 std::string dot(DescriptorToDot(tmp_buf));
1331 fprintf(out_file_, "<parameter name=\"arg%d\" type=\"%s\">\n"
1332 "</parameter>\n", arg_num++, dot.c_str());
1333 } // while
1334 free(tmp_buf);
1335 if (constructor) {
1336 fprintf(out_file_, "</constructor>\n");
1337 } else {
1338 fprintf(out_file_, "</method>\n");
1339 }
1340 }
1341
1342 bail:
1343 free(type_descriptor);
1344 free(access_str);
1345 }
1346
1347 /*
1348 * Dumps a static (class) field.
1349 */
DumpSField(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,int i,dex_ir::EncodedValue * init)1350 void DexLayout::DumpSField(uint32_t idx,
1351 uint32_t flags,
1352 uint32_t hiddenapi_flags,
1353 int i,
1354 dex_ir::EncodedValue* init) {
1355 // Bail for anything private if export only requested.
1356 if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1357 return;
1358 }
1359
1360 dex_ir::FieldId* field_id = header_->FieldIds()[idx];
1361 const char* name = field_id->Name()->Data();
1362 const char* type_descriptor = field_id->Type()->GetStringId()->Data();
1363 const char* back_descriptor = field_id->Class()->GetStringId()->Data();
1364 char* access_str = CreateAccessFlagStr(flags, kAccessForField);
1365
1366 if (options_.output_format_ == kOutputPlain) {
1367 fprintf(out_file_, " #%d : (in %s)\n", i, back_descriptor);
1368 fprintf(out_file_, " name : '%s'\n", name);
1369 fprintf(out_file_, " type : '%s'\n", type_descriptor);
1370 fprintf(out_file_, " access : 0x%04x (%s)\n", flags, access_str);
1371 if (hiddenapi_flags != 0u) {
1372 fprintf(out_file_,
1373 " hiddenapi : 0x%04x (%s)\n",
1374 hiddenapi_flags,
1375 GetHiddenapiFlagStr(hiddenapi_flags).c_str());
1376 }
1377 if (init != nullptr) {
1378 fputs(" value : ", out_file_);
1379 DumpEncodedValue(init);
1380 fputs("\n", out_file_);
1381 }
1382 } else if (options_.output_format_ == kOutputXml) {
1383 fprintf(out_file_, "<field name=\"%s\"\n", name);
1384 std::string dot(DescriptorToDot(type_descriptor));
1385 fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1386 fprintf(out_file_, " transient=%s\n", QuotedBool((flags & kAccTransient) != 0));
1387 fprintf(out_file_, " volatile=%s\n", QuotedBool((flags & kAccVolatile) != 0));
1388 // The "value=" is not knowable w/o parsing annotations.
1389 fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1390 fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1391 // The "deprecated=" is not knowable w/o parsing annotations.
1392 fprintf(out_file_, " visibility=%s\n", QuotedVisibility(flags));
1393 if (init != nullptr) {
1394 fputs(" value=\"", out_file_);
1395 DumpEncodedValue(init);
1396 fputs("\"\n", out_file_);
1397 }
1398 fputs(">\n</field>\n", out_file_);
1399 }
1400
1401 free(access_str);
1402 }
1403
1404 /*
1405 * Dumps an instance field.
1406 */
DumpIField(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,int i)1407 void DexLayout::DumpIField(uint32_t idx,
1408 uint32_t flags,
1409 uint32_t hiddenapi_flags,
1410 int i) {
1411 DumpSField(idx, flags, hiddenapi_flags, i, nullptr);
1412 }
1413
1414 /*
1415 * Dumps the class.
1416 *
1417 * Note "idx" is a DexClassDef index, not a DexTypeId index.
1418 *
1419 * If "*last_package" is nullptr or does not match the current class' package,
1420 * the value will be replaced with a newly-allocated string.
1421 */
DumpClass(int idx,char ** last_package)1422 void DexLayout::DumpClass(int idx, char** last_package) {
1423 dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
1424 // Omitting non-public class.
1425 if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
1426 return;
1427 }
1428
1429 if (options_.show_section_headers_) {
1430 DumpClassDef(idx);
1431 }
1432
1433 if (options_.show_annotations_) {
1434 DumpClassAnnotations(idx);
1435 }
1436
1437 // For the XML output, show the package name. Ideally we'd gather
1438 // up the classes, sort them, and dump them alphabetically so the
1439 // package name wouldn't jump around, but that's not a great plan
1440 // for something that needs to run on the device.
1441 const char* class_descriptor = header_->ClassDefs()[idx]->ClassType()->GetStringId()->Data();
1442 if (!(class_descriptor[0] == 'L' &&
1443 class_descriptor[strlen(class_descriptor)-1] == ';')) {
1444 // Arrays and primitives should not be defined explicitly. Keep going?
1445 LOG(ERROR) << "Malformed class name '" << class_descriptor << "'";
1446 } else if (options_.output_format_ == kOutputXml) {
1447 char* mangle = strdup(class_descriptor + 1);
1448 mangle[strlen(mangle)-1] = '\0';
1449
1450 // Reduce to just the package name.
1451 char* last_slash = strrchr(mangle, '/');
1452 if (last_slash != nullptr) {
1453 *last_slash = '\0';
1454 } else {
1455 *mangle = '\0';
1456 }
1457
1458 for (char* cp = mangle; *cp != '\0'; cp++) {
1459 if (*cp == '/') {
1460 *cp = '.';
1461 }
1462 } // for
1463
1464 if (*last_package == nullptr || strcmp(mangle, *last_package) != 0) {
1465 // Start of a new package.
1466 if (*last_package != nullptr) {
1467 fprintf(out_file_, "</package>\n");
1468 }
1469 fprintf(out_file_, "<package name=\"%s\"\n>\n", mangle);
1470 free(*last_package);
1471 *last_package = mangle;
1472 } else {
1473 free(mangle);
1474 }
1475 }
1476
1477 // General class information.
1478 char* access_str = CreateAccessFlagStr(class_def->GetAccessFlags(), kAccessForClass);
1479 const char* superclass_descriptor = nullptr;
1480 if (class_def->Superclass() != nullptr) {
1481 superclass_descriptor = class_def->Superclass()->GetStringId()->Data();
1482 }
1483 if (options_.output_format_ == kOutputPlain) {
1484 fprintf(out_file_, "Class #%d -\n", idx);
1485 fprintf(out_file_, " Class descriptor : '%s'\n", class_descriptor);
1486 fprintf(out_file_, " Access flags : 0x%04x (%s)\n",
1487 class_def->GetAccessFlags(), access_str);
1488 if (superclass_descriptor != nullptr) {
1489 fprintf(out_file_, " Superclass : '%s'\n", superclass_descriptor);
1490 }
1491 fprintf(out_file_, " Interfaces -\n");
1492 } else {
1493 std::string dot(DescriptorClassToName(class_descriptor));
1494 fprintf(out_file_, "<class name=\"%s\"\n", dot.c_str());
1495 if (superclass_descriptor != nullptr) {
1496 dot = DescriptorToDot(superclass_descriptor);
1497 fprintf(out_file_, " extends=\"%s\"\n", dot.c_str());
1498 }
1499 fprintf(out_file_, " interface=%s\n",
1500 QuotedBool((class_def->GetAccessFlags() & kAccInterface) != 0));
1501 fprintf(out_file_, " abstract=%s\n",
1502 QuotedBool((class_def->GetAccessFlags() & kAccAbstract) != 0));
1503 fprintf(out_file_, " static=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccStatic) != 0));
1504 fprintf(out_file_, " final=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccFinal) != 0));
1505 // The "deprecated=" not knowable w/o parsing annotations.
1506 fprintf(out_file_, " visibility=%s\n", QuotedVisibility(class_def->GetAccessFlags()));
1507 fprintf(out_file_, ">\n");
1508 }
1509
1510 // Interfaces.
1511 const dex_ir::TypeList* interfaces = class_def->Interfaces();
1512 if (interfaces != nullptr) {
1513 const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
1514 for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
1515 DumpInterface((*interfaces_vector)[i], i);
1516 } // for
1517 }
1518
1519 // Fields and methods.
1520 dex_ir::ClassData* class_data = class_def->GetClassData();
1521 // Prepare data for static fields.
1522 dex_ir::EncodedArrayItem* static_values = class_def->StaticValues();
1523 dex_ir::EncodedValueVector* encoded_values =
1524 static_values == nullptr ? nullptr : static_values->GetEncodedValues();
1525 const uint32_t encoded_values_size = (encoded_values == nullptr) ? 0 : encoded_values->size();
1526
1527 // Static fields.
1528 if (options_.output_format_ == kOutputPlain) {
1529 fprintf(out_file_, " Static fields -\n");
1530 }
1531 if (class_data != nullptr) {
1532 dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
1533 if (static_fields != nullptr) {
1534 for (uint32_t i = 0; i < static_fields->size(); i++) {
1535 DumpSField((*static_fields)[i].GetFieldId()->GetIndex(),
1536 (*static_fields)[i].GetAccessFlags(),
1537 dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*static_fields)[i]),
1538 i,
1539 i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
1540 } // for
1541 }
1542 }
1543
1544 // Instance fields.
1545 if (options_.output_format_ == kOutputPlain) {
1546 fprintf(out_file_, " Instance fields -\n");
1547 }
1548 if (class_data != nullptr) {
1549 dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
1550 if (instance_fields != nullptr) {
1551 for (uint32_t i = 0; i < instance_fields->size(); i++) {
1552 DumpIField((*instance_fields)[i].GetFieldId()->GetIndex(),
1553 (*instance_fields)[i].GetAccessFlags(),
1554 dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*instance_fields)[i]),
1555 i);
1556 } // for
1557 }
1558 }
1559
1560 // Direct methods.
1561 if (options_.output_format_ == kOutputPlain) {
1562 fprintf(out_file_, " Direct methods -\n");
1563 }
1564 if (class_data != nullptr) {
1565 dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
1566 if (direct_methods != nullptr) {
1567 for (uint32_t i = 0; i < direct_methods->size(); i++) {
1568 DumpMethod((*direct_methods)[i].GetMethodId()->GetIndex(),
1569 (*direct_methods)[i].GetAccessFlags(),
1570 dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*direct_methods)[i]),
1571 (*direct_methods)[i].GetCodeItem(),
1572 i);
1573 } // for
1574 }
1575 }
1576
1577 // Virtual methods.
1578 if (options_.output_format_ == kOutputPlain) {
1579 fprintf(out_file_, " Virtual methods -\n");
1580 }
1581 if (class_data != nullptr) {
1582 dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
1583 if (virtual_methods != nullptr) {
1584 for (uint32_t i = 0; i < virtual_methods->size(); i++) {
1585 DumpMethod((*virtual_methods)[i].GetMethodId()->GetIndex(),
1586 (*virtual_methods)[i].GetAccessFlags(),
1587 dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*virtual_methods)[i]),
1588 (*virtual_methods)[i].GetCodeItem(),
1589 i);
1590 } // for
1591 }
1592 }
1593
1594 // End of class.
1595 if (options_.output_format_ == kOutputPlain) {
1596 const char* file_name = "unknown";
1597 if (class_def->SourceFile() != nullptr) {
1598 file_name = class_def->SourceFile()->Data();
1599 }
1600 const dex_ir::StringId* source_file = class_def->SourceFile();
1601 fprintf(out_file_, " source_file_idx : %d (%s)\n\n",
1602 source_file == nullptr ? 0xffffffffU : source_file->GetIndex(), file_name);
1603 } else if (options_.output_format_ == kOutputXml) {
1604 fprintf(out_file_, "</class>\n");
1605 }
1606
1607 free(access_str);
1608 }
1609
DumpDexFile()1610 void DexLayout::DumpDexFile() {
1611 // Headers.
1612 if (options_.show_file_headers_) {
1613 DumpFileHeader();
1614 }
1615
1616 // Open XML context.
1617 if (options_.output_format_ == kOutputXml) {
1618 fprintf(out_file_, "<api>\n");
1619 }
1620
1621 // Iterate over all classes.
1622 char* package = nullptr;
1623 const uint32_t class_defs_size = header_->ClassDefs().Size();
1624 for (uint32_t i = 0; i < class_defs_size; i++) {
1625 DumpClass(i, &package);
1626 } // for
1627
1628 // Free the last package allocated.
1629 if (package != nullptr) {
1630 fprintf(out_file_, "</package>\n");
1631 free(package);
1632 }
1633
1634 // Close XML context.
1635 if (options_.output_format_ == kOutputXml) {
1636 fprintf(out_file_, "</api>\n");
1637 }
1638 }
1639
LayoutClassDefsAndClassData(const DexFile * dex_file)1640 void DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
1641 std::vector<dex_ir::ClassDef*> new_class_def_order;
1642 for (auto& class_def : header_->ClassDefs()) {
1643 dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1644 if (info_->ContainsClass(*dex_file, type_idx)) {
1645 new_class_def_order.push_back(class_def.get());
1646 }
1647 }
1648 for (auto& class_def : header_->ClassDefs()) {
1649 dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1650 if (!info_->ContainsClass(*dex_file, type_idx)) {
1651 new_class_def_order.push_back(class_def.get());
1652 }
1653 }
1654 std::unordered_set<dex_ir::ClassData*> visited_class_data;
1655 size_t class_data_index = 0;
1656 auto& class_datas = header_->ClassDatas();
1657 for (dex_ir::ClassDef* class_def : new_class_def_order) {
1658 dex_ir::ClassData* class_data = class_def->GetClassData();
1659 if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
1660 visited_class_data.insert(class_data);
1661 // Overwrite the existing vector with the new ordering, note that the sets of objects are
1662 // equivalent, but the order changes. This is why this is not a memory leak.
1663 // TODO: Consider cleaning this up with a shared_ptr.
1664 class_datas[class_data_index].release(); // NOLINT b/117926937
1665 class_datas[class_data_index].reset(class_data);
1666 ++class_data_index;
1667 }
1668 }
1669 CHECK_EQ(class_data_index, class_datas.Size());
1670
1671 if (DexLayout::kChangeClassDefOrder) {
1672 // This currently produces dex files that violate the spec since the super class class_def is
1673 // supposed to occur before any subclasses.
1674 dex_ir::CollectionVector<dex_ir::ClassDef>& class_defs = header_->ClassDefs();
1675 CHECK_EQ(new_class_def_order.size(), class_defs.Size());
1676 for (size_t i = 0; i < class_defs.Size(); ++i) {
1677 // Overwrite the existing vector with the new ordering, note that the sets of objects are
1678 // equivalent, but the order changes. This is why this is not a memory leak.
1679 // TODO: Consider cleaning this up with a shared_ptr.
1680 class_defs[i].release(); // NOLINT b/117926937
1681 class_defs[i].reset(new_class_def_order[i]);
1682 }
1683 }
1684 }
1685
LayoutStringData(const DexFile * dex_file)1686 void DexLayout::LayoutStringData(const DexFile* dex_file) {
1687 const size_t num_strings = header_->StringIds().Size();
1688 std::vector<bool> is_shorty(num_strings, false);
1689 std::vector<bool> from_hot_method(num_strings, false);
1690 for (auto& class_def : header_->ClassDefs()) {
1691 // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
1692 // as hot. Add its super class and interfaces as well, which can be used during initialization.
1693 const bool is_profile_class =
1694 info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1695 if (is_profile_class) {
1696 from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
1697 const dex_ir::TypeId* superclass = class_def->Superclass();
1698 if (superclass != nullptr) {
1699 from_hot_method[superclass->GetStringId()->GetIndex()] = true;
1700 }
1701 const dex_ir::TypeList* interfaces = class_def->Interfaces();
1702 if (interfaces != nullptr) {
1703 for (const dex_ir::TypeId* interface_type : *interfaces->GetTypeList()) {
1704 from_hot_method[interface_type->GetStringId()->GetIndex()] = true;
1705 }
1706 }
1707 }
1708 dex_ir::ClassData* data = class_def->GetClassData();
1709 if (data == nullptr) {
1710 continue;
1711 }
1712 for (size_t i = 0; i < 2; ++i) {
1713 for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
1714 const dex_ir::MethodId* method_id = method.GetMethodId();
1715 dex_ir::CodeItem* code_item = method.GetCodeItem();
1716 if (code_item == nullptr) {
1717 continue;
1718 }
1719 const bool is_clinit = is_profile_class &&
1720 (method.GetAccessFlags() & kAccConstructor) != 0 &&
1721 (method.GetAccessFlags() & kAccStatic) != 0;
1722 const bool method_executed = is_clinit ||
1723 info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex())).IsInProfile();
1724 if (!method_executed) {
1725 continue;
1726 }
1727 is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
1728 dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
1729 if (fixups == nullptr) {
1730 continue;
1731 }
1732 // Add const-strings.
1733 for (dex_ir::StringId* id : fixups->StringIds()) {
1734 from_hot_method[id->GetIndex()] = true;
1735 }
1736 // Add field classes, names, and types.
1737 for (dex_ir::FieldId* id : fixups->FieldIds()) {
1738 // TODO: Only visit field ids from static getters and setters.
1739 from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1740 from_hot_method[id->Name()->GetIndex()] = true;
1741 from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
1742 }
1743 // For clinits, add referenced method classes, names, and protos.
1744 if (is_clinit) {
1745 for (dex_ir::MethodId* id : fixups->MethodIds()) {
1746 from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1747 from_hot_method[id->Name()->GetIndex()] = true;
1748 is_shorty[id->Proto()->Shorty()->GetIndex()] = true;
1749 }
1750 }
1751 }
1752 }
1753 }
1754 // Sort string data by specified order.
1755 std::vector<dex_ir::StringId*> string_ids;
1756 for (auto& string_id : header_->StringIds()) {
1757 string_ids.push_back(string_id.get());
1758 }
1759 std::sort(string_ids.begin(),
1760 string_ids.end(),
1761 [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
1762 const dex_ir::StringId* b) {
1763 const bool a_is_hot = from_hot_method[a->GetIndex()];
1764 const bool b_is_hot = from_hot_method[b->GetIndex()];
1765 if (a_is_hot != b_is_hot) {
1766 return a_is_hot < b_is_hot;
1767 }
1768 // After hot methods are partitioned, subpartition shorties.
1769 const bool a_is_shorty = is_shorty[a->GetIndex()];
1770 const bool b_is_shorty = is_shorty[b->GetIndex()];
1771 if (a_is_shorty != b_is_shorty) {
1772 return a_is_shorty < b_is_shorty;
1773 }
1774 // Order by index by default.
1775 return a->GetIndex() < b->GetIndex();
1776 });
1777 auto& string_datas = header_->StringDatas();
1778 // Now we know what order we want the string data, reorder them.
1779 size_t data_index = 0;
1780 for (dex_ir::StringId* string_id : string_ids) {
1781 string_datas[data_index].release(); // NOLINT b/117926937
1782 string_datas[data_index].reset(string_id->DataItem());
1783 ++data_index;
1784 }
1785 if (kIsDebugBuild) {
1786 std::unordered_set<dex_ir::StringData*> visited;
1787 for (const std::unique_ptr<dex_ir::StringData>& data : string_datas) {
1788 visited.insert(data.get());
1789 }
1790 for (auto& string_id : header_->StringIds()) {
1791 CHECK(visited.find(string_id->DataItem()) != visited.end());
1792 }
1793 }
1794 CHECK_EQ(data_index, string_datas.Size());
1795 }
1796
1797 // Orders code items according to specified class data ordering.
LayoutCodeItems(const DexFile * dex_file)1798 void DexLayout::LayoutCodeItems(const DexFile* dex_file) {
1799 static constexpr InvokeType invoke_types[] = {
1800 kDirect,
1801 kVirtual
1802 };
1803
1804 std::unordered_map<dex_ir::CodeItem*, LayoutType>& code_item_layout =
1805 layout_hotness_info_.code_item_layout_;
1806
1807 // Assign hotness flags to all code items.
1808 for (InvokeType invoke_type : invoke_types) {
1809 for (auto& class_def : header_->ClassDefs()) {
1810 const bool is_profile_class =
1811 info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1812
1813 // Skip classes that are not defined in this dex file.
1814 dex_ir::ClassData* class_data = class_def->GetClassData();
1815 if (class_data == nullptr) {
1816 continue;
1817 }
1818 for (auto& method : *(invoke_type == InvokeType::kDirect
1819 ? class_data->DirectMethods()
1820 : class_data->VirtualMethods())) {
1821 const dex_ir::MethodId *method_id = method.GetMethodId();
1822 dex_ir::CodeItem *code_item = method.GetCodeItem();
1823 if (code_item == nullptr) {
1824 continue;
1825 }
1826 // Separate executed methods (clinits and profiled methods) from unexecuted methods.
1827 const bool is_clinit = (method.GetAccessFlags() & kAccConstructor) != 0 &&
1828 (method.GetAccessFlags() & kAccStatic) != 0;
1829 const bool is_startup_clinit = is_profile_class && is_clinit;
1830 using Hotness = ProfileCompilationInfo::MethodHotness;
1831 Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
1832 LayoutType state = LayoutType::kLayoutTypeUnused;
1833 if (hotness.IsHot()) {
1834 // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
1835 // now.
1836 state = LayoutType::kLayoutTypeHot;
1837 } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
1838 // Startup clinit or a method that only has the startup flag.
1839 state = LayoutType::kLayoutTypeStartupOnly;
1840 } else if (is_clinit) {
1841 state = LayoutType::kLayoutTypeUsedOnce;
1842 } else if (hotness.IsInProfile()) {
1843 state = LayoutType::kLayoutTypeSometimesUsed;
1844 }
1845 auto it = code_item_layout.emplace(code_item, state);
1846 if (!it.second) {
1847 LayoutType& layout_type = it.first->second;
1848 // Already exists, merge the hotness.
1849 layout_type = MergeLayoutType(layout_type, state);
1850 }
1851 }
1852 }
1853 }
1854
1855 const auto& code_items = header_->CodeItems();
1856 if (VLOG_IS_ON(dex)) {
1857 size_t layout_count[static_cast<size_t>(LayoutType::kLayoutTypeCount)] = {};
1858 for (const std::unique_ptr<dex_ir::CodeItem>& code_item : code_items) {
1859 auto it = code_item_layout.find(code_item.get());
1860 DCHECK(it != code_item_layout.end());
1861 ++layout_count[static_cast<size_t>(it->second)];
1862 }
1863 for (size_t i = 0; i < static_cast<size_t>(LayoutType::kLayoutTypeCount); ++i) {
1864 LOG(INFO) << "Code items in category " << i << " count=" << layout_count[i];
1865 }
1866 }
1867
1868 // Sort the code items vector by new layout. The writing process will take care of calculating
1869 // all the offsets. Stable sort to preserve any existing locality that might be there.
1870 std::stable_sort(code_items.begin(),
1871 code_items.end(),
1872 [&](const std::unique_ptr<dex_ir::CodeItem>& a,
1873 const std::unique_ptr<dex_ir::CodeItem>& b) {
1874 auto it_a = code_item_layout.find(a.get());
1875 auto it_b = code_item_layout.find(b.get());
1876 DCHECK(it_a != code_item_layout.end());
1877 DCHECK(it_b != code_item_layout.end());
1878 const LayoutType layout_type_a = it_a->second;
1879 const LayoutType layout_type_b = it_b->second;
1880 return layout_type_a < layout_type_b;
1881 });
1882 }
1883
LayoutOutputFile(const DexFile * dex_file)1884 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
1885 LayoutStringData(dex_file);
1886 LayoutClassDefsAndClassData(dex_file);
1887 LayoutCodeItems(dex_file);
1888 }
1889
OutputDexFile(const DexFile * input_dex_file,bool compute_offsets,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1890 bool DexLayout::OutputDexFile(const DexFile* input_dex_file,
1891 bool compute_offsets,
1892 std::unique_ptr<DexContainer>* dex_container,
1893 std::string* error_msg) {
1894 const std::string& dex_file_location = input_dex_file->GetLocation();
1895 std::unique_ptr<File> new_file;
1896 // If options_.output_dex_directory_ is non null, we are outputting to a file.
1897 if (options_.output_dex_directory_ != nullptr) {
1898 std::string output_location(options_.output_dex_directory_);
1899 const size_t last_slash = dex_file_location.rfind('/');
1900 std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
1901 if (output_location == dex_file_directory) {
1902 output_location = dex_file_location + ".new";
1903 } else {
1904 if (!output_location.empty() && output_location.back() != '/') {
1905 output_location += "/";
1906 }
1907 const size_t separator = dex_file_location.rfind('!');
1908 if (separator != std::string::npos) {
1909 output_location += dex_file_location.substr(separator + 1);
1910 } else {
1911 output_location += "classes.dex";
1912 }
1913 }
1914 new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
1915 if (new_file == nullptr) {
1916 LOG(ERROR) << "Could not create dex writer output file: " << output_location;
1917 return false;
1918 }
1919 }
1920 if (!DexWriter::Output(this, dex_container, compute_offsets, error_msg)) {
1921 return false;
1922 }
1923 if (new_file != nullptr) {
1924 DexContainer* const container = dex_container->get();
1925 DexContainer::Section* const main_section = container->GetMainSection();
1926 if (!new_file->WriteFully(main_section->Begin(), main_section->Size())) {
1927 LOG(ERROR) << "Failed to write main section for dex file " << dex_file_location;
1928 new_file->Erase();
1929 return false;
1930 }
1931 DexContainer::Section* const data_section = container->GetDataSection();
1932 if (!new_file->WriteFully(data_section->Begin(), data_section->Size())) {
1933 LOG(ERROR) << "Failed to write data section for dex file " << dex_file_location;
1934 new_file->Erase();
1935 return false;
1936 }
1937 UNUSED(new_file->FlushCloseOrErase());
1938 }
1939 return true;
1940 }
1941
1942 /*
1943 * Dumps the requested sections of the file.
1944 */
ProcessDexFile(const char * file_name,const DexFile * dex_file,size_t dex_file_index,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1945 bool DexLayout::ProcessDexFile(const char* file_name,
1946 const DexFile* dex_file,
1947 size_t dex_file_index,
1948 std::unique_ptr<DexContainer>* dex_container,
1949 std::string* error_msg) {
1950 const bool has_output_container = dex_container != nullptr;
1951 const bool output = options_.output_dex_directory_ != nullptr || has_output_container;
1952
1953 // Try to avoid eagerly assigning offsets to find bugs since Offset will abort if the offset
1954 // is unassigned.
1955 bool eagerly_assign_offsets = false;
1956 if (options_.visualize_pattern_ || options_.show_section_statistics_ || options_.dump_) {
1957 // These options required the offsets for dumping purposes.
1958 eagerly_assign_offsets = true;
1959 }
1960 std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file,
1961 eagerly_assign_offsets,
1962 GetOptions()));
1963 SetHeader(header.get());
1964
1965 if (options_.verbose_) {
1966 fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
1967 file_name, dex_file->GetHeader().magic_ + 4);
1968 }
1969
1970 if (options_.visualize_pattern_) {
1971 VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
1972 return true;
1973 }
1974
1975 if (options_.show_section_statistics_) {
1976 ShowDexSectionStatistics(header_, dex_file_index);
1977 return true;
1978 }
1979
1980 // Dump dex file.
1981 if (options_.dump_) {
1982 DumpDexFile();
1983 }
1984
1985 // In case we are outputting to a file, keep it open so we can verify.
1986 if (output) {
1987 // Layout information about what strings and code items are hot. Used by the writing process
1988 // to generate the sections that are stored in the oat file.
1989 bool do_layout = info_ != nullptr && !info_->IsEmpty();
1990 if (do_layout) {
1991 LayoutOutputFile(dex_file);
1992 }
1993 // The output needs a dex container, use a temporary one.
1994 std::unique_ptr<DexContainer> temp_container;
1995 if (dex_container == nullptr) {
1996 dex_container = &temp_container;
1997 }
1998 // If we didn't set the offsets eagerly, we definitely need to compute them here.
1999 if (!OutputDexFile(dex_file, do_layout || !eagerly_assign_offsets, dex_container, error_msg)) {
2000 return false;
2001 }
2002
2003 // Clear header before verifying to reduce peak RAM usage.
2004 const size_t file_size = header_->FileSize();
2005 header.reset();
2006
2007 // Verify the output dex file's structure, only enabled by default for debug builds.
2008 if (options_.verify_output_ && has_output_container) {
2009 std::string location = "memory mapped file for " + std::string(file_name);
2010 // Dex file verifier cannot handle compact dex.
2011 bool verify = options_.compact_dex_level_ == CompactDexLevel::kCompactDexLevelNone;
2012 const ArtDexFileLoader dex_file_loader;
2013 DexContainer::Section* const main_section = (*dex_container)->GetMainSection();
2014 DexContainer::Section* const data_section = (*dex_container)->GetDataSection();
2015 DCHECK_EQ(file_size, main_section->Size())
2016 << main_section->Size() << " " << data_section->Size();
2017 std::unique_ptr<const DexFile> output_dex_file(
2018 dex_file_loader.OpenWithDataSection(
2019 main_section->Begin(),
2020 main_section->Size(),
2021 data_section->Begin(),
2022 data_section->Size(),
2023 location,
2024 /* location_checksum= */ 0,
2025 /*oat_dex_file=*/ nullptr,
2026 verify,
2027 /*verify_checksum=*/ false,
2028 error_msg));
2029 CHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << *error_msg;
2030
2031 // Do IR-level comparison between input and output. This check ignores potential differences
2032 // due to layout, so offsets are not checked. Instead, it checks the data contents of each
2033 // item.
2034 //
2035 // Regenerate output IR to catch any bugs that might happen during writing.
2036 std::unique_ptr<dex_ir::Header> output_header(
2037 dex_ir::DexIrBuilder(*output_dex_file,
2038 /*eagerly_assign_offsets=*/ true,
2039 GetOptions()));
2040 std::unique_ptr<dex_ir::Header> orig_header(
2041 dex_ir::DexIrBuilder(*dex_file,
2042 /*eagerly_assign_offsets=*/ true,
2043 GetOptions()));
2044 CHECK(VerifyOutputDexFile(output_header.get(), orig_header.get(), error_msg)) << *error_msg;
2045 }
2046 }
2047 return true;
2048 }
2049
2050 /*
2051 * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
2052 */
ProcessFile(const char * file_name)2053 int DexLayout::ProcessFile(const char* file_name) {
2054 if (options_.verbose_) {
2055 fprintf(out_file_, "Processing '%s'...\n", file_name);
2056 }
2057
2058 // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
2059 // all of which are Zip archives with "classes.dex" inside.
2060 const bool verify_checksum = !options_.ignore_bad_checksum_;
2061 std::string error_msg;
2062 const ArtDexFileLoader dex_file_loader;
2063 std::vector<std::unique_ptr<const DexFile>> dex_files;
2064 if (!dex_file_loader.Open(
2065 file_name, file_name, /* verify= */ true, verify_checksum, &error_msg, &dex_files)) {
2066 // Display returned error message to user. Note that this error behavior
2067 // differs from the error messages shown by the original Dalvik dexdump.
2068 LOG(ERROR) << error_msg;
2069 return -1;
2070 }
2071
2072 // Success. Either report checksum verification or process
2073 // all dex files found in given file.
2074 if (options_.checksum_only_) {
2075 fprintf(out_file_, "Checksum verified\n");
2076 } else {
2077 for (size_t i = 0; i < dex_files.size(); i++) {
2078 // Pass in a null container to avoid output by default.
2079 if (!ProcessDexFile(file_name,
2080 dex_files[i].get(),
2081 i,
2082 /*dex_container=*/ nullptr,
2083 &error_msg)) {
2084 LOG(WARNING) << "Failed to run dex file " << i << " in " << file_name << " : " << error_msg;
2085 }
2086 }
2087 }
2088 return 0;
2089 }
2090
2091 } // namespace art
2092