1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Implementation file of the dexlayout utility.
17 *
18 * This is a tool to read dex files into an internal representation,
19 * reorganize the representation, and emit dex files with a better
20 * file layout.
21 */
22
23 #include "dexlayout.h"
24
25 #include <inttypes.h>
26 #include <stdio.h>
27 #include <sys/mman.h> // For the PROT_* and MAP_* constants.
28
29 #include <iostream>
30 #include <memory>
31 #include <sstream>
32 #include <vector>
33
34 #include "android-base/stringprintf.h"
35
36 #include "dex_ir_builder.h"
37 #include "dex_file-inl.h"
38 #include "dex_file_layout.h"
39 #include "dex_file_verifier.h"
40 #include "dex_instruction-inl.h"
41 #include "dex_verify.h"
42 #include "dex_visualize.h"
43 #include "dex_writer.h"
44 #include "jit/profile_compilation_info.h"
45 #include "mem_map.h"
46 #include "os.h"
47 #include "utils.h"
48
49 namespace art {
50
51 using android::base::StringPrintf;
52
53 static constexpr uint32_t kDexCodeItemAlignment = 4;
54
55 /*
56 * Flags for use with createAccessFlagStr().
57 */
58 enum AccessFor {
59 kAccessForClass = 0, kAccessForMethod = 1, kAccessForField = 2, kAccessForMAX
60 };
61 const int kNumFlags = 18;
62
63 /*
64 * Gets 2 little-endian bytes.
65 */
Get2LE(unsigned char const * src)66 static inline uint16_t Get2LE(unsigned char const* src) {
67 return src[0] | (src[1] << 8);
68 }
69
70 /*
71 * Converts a type descriptor to human-readable "dotted" form. For
72 * example, "Ljava/lang/String;" becomes "java.lang.String", and
73 * "[I" becomes "int[]". Also converts '$' to '.', which means this
74 * form can't be converted back to a descriptor.
75 */
DescriptorToDotWrapper(const char * descriptor)76 static std::string DescriptorToDotWrapper(const char* descriptor) {
77 std::string result = DescriptorToDot(descriptor);
78 size_t found = result.find('$');
79 while (found != std::string::npos) {
80 result[found] = '.';
81 found = result.find('$', found);
82 }
83 return result;
84 }
85
86 /*
87 * Converts the class name portion of a type descriptor to human-readable
88 * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
89 */
DescriptorClassToDot(const char * str)90 static std::string DescriptorClassToDot(const char* str) {
91 std::string descriptor(str);
92 // Reduce to just the class name prefix.
93 size_t last_slash = descriptor.rfind('/');
94 if (last_slash == std::string::npos) {
95 last_slash = 0;
96 }
97 // Start past the '/' or 'L'.
98 last_slash++;
99
100 // Copy class name over, trimming trailing ';'.
101 size_t size = descriptor.size() - 1 - last_slash;
102 std::string result(descriptor.substr(last_slash, size));
103
104 // Replace '$' with '.'.
105 size_t dollar_sign = result.find('$');
106 while (dollar_sign != std::string::npos) {
107 result[dollar_sign] = '.';
108 dollar_sign = result.find('$', dollar_sign);
109 }
110
111 return result;
112 }
113
114 /*
115 * Returns string representing the boolean value.
116 */
StrBool(bool val)117 static const char* StrBool(bool val) {
118 return val ? "true" : "false";
119 }
120
121 /*
122 * Returns a quoted string representing the boolean value.
123 */
QuotedBool(bool val)124 static const char* QuotedBool(bool val) {
125 return val ? "\"true\"" : "\"false\"";
126 }
127
128 /*
129 * Returns a quoted string representing the access flags.
130 */
QuotedVisibility(uint32_t access_flags)131 static const char* QuotedVisibility(uint32_t access_flags) {
132 if (access_flags & kAccPublic) {
133 return "\"public\"";
134 } else if (access_flags & kAccProtected) {
135 return "\"protected\"";
136 } else if (access_flags & kAccPrivate) {
137 return "\"private\"";
138 } else {
139 return "\"package\"";
140 }
141 }
142
143 /*
144 * Counts the number of '1' bits in a word.
145 */
CountOnes(uint32_t val)146 static int CountOnes(uint32_t val) {
147 val = val - ((val >> 1) & 0x55555555);
148 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
149 return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
150 }
151
152 /*
153 * Creates a new string with human-readable access flags.
154 *
155 * In the base language the access_flags fields are type uint16_t; in Dalvik they're uint32_t.
156 */
CreateAccessFlagStr(uint32_t flags,AccessFor for_what)157 static char* CreateAccessFlagStr(uint32_t flags, AccessFor for_what) {
158 static const char* kAccessStrings[kAccessForMAX][kNumFlags] = {
159 {
160 "PUBLIC", /* 0x00001 */
161 "PRIVATE", /* 0x00002 */
162 "PROTECTED", /* 0x00004 */
163 "STATIC", /* 0x00008 */
164 "FINAL", /* 0x00010 */
165 "?", /* 0x00020 */
166 "?", /* 0x00040 */
167 "?", /* 0x00080 */
168 "?", /* 0x00100 */
169 "INTERFACE", /* 0x00200 */
170 "ABSTRACT", /* 0x00400 */
171 "?", /* 0x00800 */
172 "SYNTHETIC", /* 0x01000 */
173 "ANNOTATION", /* 0x02000 */
174 "ENUM", /* 0x04000 */
175 "?", /* 0x08000 */
176 "VERIFIED", /* 0x10000 */
177 "OPTIMIZED", /* 0x20000 */
178 }, {
179 "PUBLIC", /* 0x00001 */
180 "PRIVATE", /* 0x00002 */
181 "PROTECTED", /* 0x00004 */
182 "STATIC", /* 0x00008 */
183 "FINAL", /* 0x00010 */
184 "SYNCHRONIZED", /* 0x00020 */
185 "BRIDGE", /* 0x00040 */
186 "VARARGS", /* 0x00080 */
187 "NATIVE", /* 0x00100 */
188 "?", /* 0x00200 */
189 "ABSTRACT", /* 0x00400 */
190 "STRICT", /* 0x00800 */
191 "SYNTHETIC", /* 0x01000 */
192 "?", /* 0x02000 */
193 "?", /* 0x04000 */
194 "MIRANDA", /* 0x08000 */
195 "CONSTRUCTOR", /* 0x10000 */
196 "DECLARED_SYNCHRONIZED", /* 0x20000 */
197 }, {
198 "PUBLIC", /* 0x00001 */
199 "PRIVATE", /* 0x00002 */
200 "PROTECTED", /* 0x00004 */
201 "STATIC", /* 0x00008 */
202 "FINAL", /* 0x00010 */
203 "?", /* 0x00020 */
204 "VOLATILE", /* 0x00040 */
205 "TRANSIENT", /* 0x00080 */
206 "?", /* 0x00100 */
207 "?", /* 0x00200 */
208 "?", /* 0x00400 */
209 "?", /* 0x00800 */
210 "SYNTHETIC", /* 0x01000 */
211 "?", /* 0x02000 */
212 "ENUM", /* 0x04000 */
213 "?", /* 0x08000 */
214 "?", /* 0x10000 */
215 "?", /* 0x20000 */
216 },
217 };
218
219 // Allocate enough storage to hold the expected number of strings,
220 // plus a space between each. We over-allocate, using the longest
221 // string above as the base metric.
222 const int kLongest = 21; // The strlen of longest string above.
223 const int count = CountOnes(flags);
224 char* str;
225 char* cp;
226 cp = str = reinterpret_cast<char*>(malloc(count * (kLongest + 1) + 1));
227
228 for (int i = 0; i < kNumFlags; i++) {
229 if (flags & 0x01) {
230 const char* accessStr = kAccessStrings[for_what][i];
231 const int len = strlen(accessStr);
232 if (cp != str) {
233 *cp++ = ' ';
234 }
235 memcpy(cp, accessStr, len);
236 cp += len;
237 }
238 flags >>= 1;
239 } // for
240
241 *cp = '\0';
242 return str;
243 }
244
GetSignatureForProtoId(const dex_ir::ProtoId * proto)245 static std::string GetSignatureForProtoId(const dex_ir::ProtoId* proto) {
246 if (proto == nullptr) {
247 return "<no signature>";
248 }
249
250 std::string result("(");
251 const dex_ir::TypeList* type_list = proto->Parameters();
252 if (type_list != nullptr) {
253 for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
254 result += type_id->GetStringId()->Data();
255 }
256 }
257 result += ")";
258 result += proto->ReturnType()->GetStringId()->Data();
259 return result;
260 }
261
262 /*
263 * Copies character data from "data" to "out", converting non-ASCII values
264 * to fprintf format chars or an ASCII filler ('.' or '?').
265 *
266 * The output buffer must be able to hold (2*len)+1 bytes. The result is
267 * NULL-terminated.
268 */
Asciify(char * out,const unsigned char * data,size_t len)269 static void Asciify(char* out, const unsigned char* data, size_t len) {
270 while (len--) {
271 if (*data < 0x20) {
272 // Could do more here, but we don't need them yet.
273 switch (*data) {
274 case '\0':
275 *out++ = '\\';
276 *out++ = '0';
277 break;
278 case '\n':
279 *out++ = '\\';
280 *out++ = 'n';
281 break;
282 default:
283 *out++ = '.';
284 break;
285 } // switch
286 } else if (*data >= 0x80) {
287 *out++ = '?';
288 } else {
289 *out++ = *data;
290 }
291 data++;
292 } // while
293 *out = '\0';
294 }
295
296 /*
297 * Dumps a string value with some escape characters.
298 */
DumpEscapedString(const char * p,FILE * out_file)299 static void DumpEscapedString(const char* p, FILE* out_file) {
300 fputs("\"", out_file);
301 for (; *p; p++) {
302 switch (*p) {
303 case '\\':
304 fputs("\\\\", out_file);
305 break;
306 case '\"':
307 fputs("\\\"", out_file);
308 break;
309 case '\t':
310 fputs("\\t", out_file);
311 break;
312 case '\n':
313 fputs("\\n", out_file);
314 break;
315 case '\r':
316 fputs("\\r", out_file);
317 break;
318 default:
319 putc(*p, out_file);
320 } // switch
321 } // for
322 fputs("\"", out_file);
323 }
324
325 /*
326 * Dumps a string as an XML attribute value.
327 */
DumpXmlAttribute(const char * p,FILE * out_file)328 static void DumpXmlAttribute(const char* p, FILE* out_file) {
329 for (; *p; p++) {
330 switch (*p) {
331 case '&':
332 fputs("&", out_file);
333 break;
334 case '<':
335 fputs("<", out_file);
336 break;
337 case '>':
338 fputs(">", out_file);
339 break;
340 case '"':
341 fputs(""", out_file);
342 break;
343 case '\t':
344 fputs("	", out_file);
345 break;
346 case '\n':
347 fputs("
", out_file);
348 break;
349 case '\r':
350 fputs("
", out_file);
351 break;
352 default:
353 putc(*p, out_file);
354 } // switch
355 } // for
356 }
357
358 /*
359 * Helper for dumpInstruction(), which builds the string
360 * representation for the index in the given instruction.
361 * Returns a pointer to a buffer of sufficient size.
362 */
IndexString(dex_ir::Header * header,const Instruction * dec_insn,size_t buf_size)363 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
364 const Instruction* dec_insn,
365 size_t buf_size) {
366 std::unique_ptr<char[]> buf(new char[buf_size]);
367 // Determine index and width of the string.
368 uint32_t index = 0;
369 uint32_t secondary_index = DexFile::kDexNoIndex;
370 uint32_t width = 4;
371 switch (Instruction::FormatOf(dec_insn->Opcode())) {
372 // SOME NOT SUPPORTED:
373 // case Instruction::k20bc:
374 case Instruction::k21c:
375 case Instruction::k35c:
376 // case Instruction::k35ms:
377 case Instruction::k3rc:
378 // case Instruction::k3rms:
379 // case Instruction::k35mi:
380 // case Instruction::k3rmi:
381 index = dec_insn->VRegB();
382 width = 4;
383 break;
384 case Instruction::k31c:
385 index = dec_insn->VRegB();
386 width = 8;
387 break;
388 case Instruction::k22c:
389 // case Instruction::k22cs:
390 index = dec_insn->VRegC();
391 width = 4;
392 break;
393 case Instruction::k45cc:
394 case Instruction::k4rcc:
395 index = dec_insn->VRegB();
396 secondary_index = dec_insn->VRegH();
397 width = 4;
398 break;
399 default:
400 break;
401 } // switch
402
403 // Determine index type.
404 size_t outSize = 0;
405 switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
406 case Instruction::kIndexUnknown:
407 // This function should never get called for this type, but do
408 // something sensible here, just to help with debugging.
409 outSize = snprintf(buf.get(), buf_size, "<unknown-index>");
410 break;
411 case Instruction::kIndexNone:
412 // This function should never get called for this type, but do
413 // something sensible here, just to help with debugging.
414 outSize = snprintf(buf.get(), buf_size, "<no-index>");
415 break;
416 case Instruction::kIndexTypeRef:
417 if (index < header->GetCollections().TypeIdsSize()) {
418 const char* tp = header->GetCollections().GetTypeId(index)->GetStringId()->Data();
419 outSize = snprintf(buf.get(), buf_size, "%s // type@%0*x", tp, width, index);
420 } else {
421 outSize = snprintf(buf.get(), buf_size, "<type?> // type@%0*x", width, index);
422 }
423 break;
424 case Instruction::kIndexStringRef:
425 if (index < header->GetCollections().StringIdsSize()) {
426 const char* st = header->GetCollections().GetStringId(index)->Data();
427 outSize = snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", st, width, index);
428 } else {
429 outSize = snprintf(buf.get(), buf_size, "<string?> // string@%0*x", width, index);
430 }
431 break;
432 case Instruction::kIndexMethodRef:
433 if (index < header->GetCollections().MethodIdsSize()) {
434 dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
435 const char* name = method_id->Name()->Data();
436 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
437 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
438 outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // method@%0*x",
439 back_descriptor, name, type_descriptor.c_str(), width, index);
440 } else {
441 outSize = snprintf(buf.get(), buf_size, "<method?> // method@%0*x", width, index);
442 }
443 break;
444 case Instruction::kIndexFieldRef:
445 if (index < header->GetCollections().FieldIdsSize()) {
446 dex_ir::FieldId* field_id = header->GetCollections().GetFieldId(index);
447 const char* name = field_id->Name()->Data();
448 const char* type_descriptor = field_id->Type()->GetStringId()->Data();
449 const char* back_descriptor = field_id->Class()->GetStringId()->Data();
450 outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // field@%0*x",
451 back_descriptor, name, type_descriptor, width, index);
452 } else {
453 outSize = snprintf(buf.get(), buf_size, "<field?> // field@%0*x", width, index);
454 }
455 break;
456 case Instruction::kIndexVtableOffset:
457 outSize = snprintf(buf.get(), buf_size, "[%0*x] // vtable #%0*x",
458 width, index, width, index);
459 break;
460 case Instruction::kIndexFieldOffset:
461 outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
462 break;
463 case Instruction::kIndexMethodAndProtoRef: {
464 std::string method("<method?>");
465 std::string proto("<proto?>");
466 if (index < header->GetCollections().MethodIdsSize()) {
467 dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
468 const char* name = method_id->Name()->Data();
469 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
470 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
471 method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
472 }
473 if (secondary_index < header->GetCollections().ProtoIdsSize()) {
474 dex_ir::ProtoId* proto_id = header->GetCollections().GetProtoId(secondary_index);
475 proto = GetSignatureForProtoId(proto_id);
476 }
477 outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
478 method.c_str(), proto.c_str(), width, index, width, secondary_index);
479 }
480 break;
481 // SOME NOT SUPPORTED:
482 // case Instruction::kIndexVaries:
483 // case Instruction::kIndexInlineMethod:
484 default:
485 outSize = snprintf(buf.get(), buf_size, "<?>");
486 break;
487 } // switch
488
489 // Determine success of string construction.
490 if (outSize >= buf_size) {
491 // The buffer wasn't big enough; retry with computed size. Note: snprintf()
492 // doesn't count/ the '\0' as part of its returned size, so we add explicit
493 // space for it here.
494 return IndexString(header, dec_insn, outSize + 1);
495 }
496 return buf;
497 }
498
499 /*
500 * Dumps encoded annotation.
501 */
DumpEncodedAnnotation(dex_ir::EncodedAnnotation * annotation)502 void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
503 fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
504 // Display all name=value pairs.
505 for (auto& subannotation : *annotation->GetAnnotationElements()) {
506 fputc(' ', out_file_);
507 fputs(subannotation->GetName()->Data(), out_file_);
508 fputc('=', out_file_);
509 DumpEncodedValue(subannotation->GetValue());
510 }
511 }
512 /*
513 * Dumps encoded value.
514 */
DumpEncodedValue(const dex_ir::EncodedValue * data)515 void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
516 switch (data->Type()) {
517 case DexFile::kDexAnnotationByte:
518 fprintf(out_file_, "%" PRId8, data->GetByte());
519 break;
520 case DexFile::kDexAnnotationShort:
521 fprintf(out_file_, "%" PRId16, data->GetShort());
522 break;
523 case DexFile::kDexAnnotationChar:
524 fprintf(out_file_, "%" PRIu16, data->GetChar());
525 break;
526 case DexFile::kDexAnnotationInt:
527 fprintf(out_file_, "%" PRId32, data->GetInt());
528 break;
529 case DexFile::kDexAnnotationLong:
530 fprintf(out_file_, "%" PRId64, data->GetLong());
531 break;
532 case DexFile::kDexAnnotationFloat: {
533 fprintf(out_file_, "%g", data->GetFloat());
534 break;
535 }
536 case DexFile::kDexAnnotationDouble: {
537 fprintf(out_file_, "%g", data->GetDouble());
538 break;
539 }
540 case DexFile::kDexAnnotationString: {
541 dex_ir::StringId* string_id = data->GetStringId();
542 if (options_.output_format_ == kOutputPlain) {
543 DumpEscapedString(string_id->Data(), out_file_);
544 } else {
545 DumpXmlAttribute(string_id->Data(), out_file_);
546 }
547 break;
548 }
549 case DexFile::kDexAnnotationType: {
550 dex_ir::TypeId* type_id = data->GetTypeId();
551 fputs(type_id->GetStringId()->Data(), out_file_);
552 break;
553 }
554 case DexFile::kDexAnnotationField:
555 case DexFile::kDexAnnotationEnum: {
556 dex_ir::FieldId* field_id = data->GetFieldId();
557 fputs(field_id->Name()->Data(), out_file_);
558 break;
559 }
560 case DexFile::kDexAnnotationMethod: {
561 dex_ir::MethodId* method_id = data->GetMethodId();
562 fputs(method_id->Name()->Data(), out_file_);
563 break;
564 }
565 case DexFile::kDexAnnotationArray: {
566 fputc('{', out_file_);
567 // Display all elements.
568 for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
569 fputc(' ', out_file_);
570 DumpEncodedValue(value.get());
571 }
572 fputs(" }", out_file_);
573 break;
574 }
575 case DexFile::kDexAnnotationAnnotation: {
576 DumpEncodedAnnotation(data->GetEncodedAnnotation());
577 break;
578 }
579 case DexFile::kDexAnnotationNull:
580 fputs("null", out_file_);
581 break;
582 case DexFile::kDexAnnotationBoolean:
583 fputs(StrBool(data->GetBoolean()), out_file_);
584 break;
585 default:
586 fputs("????", out_file_);
587 break;
588 } // switch
589 }
590
591 /*
592 * Dumps the file header.
593 */
DumpFileHeader()594 void DexLayout::DumpFileHeader() {
595 char sanitized[8 * 2 + 1];
596 dex_ir::Collections& collections = header_->GetCollections();
597 fprintf(out_file_, "DEX file header:\n");
598 Asciify(sanitized, header_->Magic(), 8);
599 fprintf(out_file_, "magic : '%s'\n", sanitized);
600 fprintf(out_file_, "checksum : %08x\n", header_->Checksum());
601 fprintf(out_file_, "signature : %02x%02x...%02x%02x\n",
602 header_->Signature()[0], header_->Signature()[1],
603 header_->Signature()[DexFile::kSha1DigestSize - 2],
604 header_->Signature()[DexFile::kSha1DigestSize - 1]);
605 fprintf(out_file_, "file_size : %d\n", header_->FileSize());
606 fprintf(out_file_, "header_size : %d\n", header_->HeaderSize());
607 fprintf(out_file_, "link_size : %d\n", header_->LinkSize());
608 fprintf(out_file_, "link_off : %d (0x%06x)\n",
609 header_->LinkOffset(), header_->LinkOffset());
610 fprintf(out_file_, "string_ids_size : %d\n", collections.StringIdsSize());
611 fprintf(out_file_, "string_ids_off : %d (0x%06x)\n",
612 collections.StringIdsOffset(), collections.StringIdsOffset());
613 fprintf(out_file_, "type_ids_size : %d\n", collections.TypeIdsSize());
614 fprintf(out_file_, "type_ids_off : %d (0x%06x)\n",
615 collections.TypeIdsOffset(), collections.TypeIdsOffset());
616 fprintf(out_file_, "proto_ids_size : %d\n", collections.ProtoIdsSize());
617 fprintf(out_file_, "proto_ids_off : %d (0x%06x)\n",
618 collections.ProtoIdsOffset(), collections.ProtoIdsOffset());
619 fprintf(out_file_, "field_ids_size : %d\n", collections.FieldIdsSize());
620 fprintf(out_file_, "field_ids_off : %d (0x%06x)\n",
621 collections.FieldIdsOffset(), collections.FieldIdsOffset());
622 fprintf(out_file_, "method_ids_size : %d\n", collections.MethodIdsSize());
623 fprintf(out_file_, "method_ids_off : %d (0x%06x)\n",
624 collections.MethodIdsOffset(), collections.MethodIdsOffset());
625 fprintf(out_file_, "class_defs_size : %d\n", collections.ClassDefsSize());
626 fprintf(out_file_, "class_defs_off : %d (0x%06x)\n",
627 collections.ClassDefsOffset(), collections.ClassDefsOffset());
628 fprintf(out_file_, "data_size : %d\n", header_->DataSize());
629 fprintf(out_file_, "data_off : %d (0x%06x)\n\n",
630 header_->DataOffset(), header_->DataOffset());
631 }
632
633 /*
634 * Dumps a class_def_item.
635 */
DumpClassDef(int idx)636 void DexLayout::DumpClassDef(int idx) {
637 // General class information.
638 dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
639 fprintf(out_file_, "Class #%d header:\n", idx);
640 fprintf(out_file_, "class_idx : %d\n", class_def->ClassType()->GetIndex());
641 fprintf(out_file_, "access_flags : %d (0x%04x)\n",
642 class_def->GetAccessFlags(), class_def->GetAccessFlags());
643 uint32_t superclass_idx = class_def->Superclass() == nullptr ?
644 DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
645 fprintf(out_file_, "superclass_idx : %d\n", superclass_idx);
646 fprintf(out_file_, "interfaces_off : %d (0x%06x)\n",
647 class_def->InterfacesOffset(), class_def->InterfacesOffset());
648 uint32_t source_file_offset = 0xffffffffU;
649 if (class_def->SourceFile() != nullptr) {
650 source_file_offset = class_def->SourceFile()->GetIndex();
651 }
652 fprintf(out_file_, "source_file_idx : %d\n", source_file_offset);
653 uint32_t annotations_offset = 0;
654 if (class_def->Annotations() != nullptr) {
655 annotations_offset = class_def->Annotations()->GetOffset();
656 }
657 fprintf(out_file_, "annotations_off : %d (0x%06x)\n",
658 annotations_offset, annotations_offset);
659 if (class_def->GetClassData() == nullptr) {
660 fprintf(out_file_, "class_data_off : %d (0x%06x)\n", 0, 0);
661 } else {
662 fprintf(out_file_, "class_data_off : %d (0x%06x)\n",
663 class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
664 }
665
666 // Fields and methods.
667 dex_ir::ClassData* class_data = class_def->GetClassData();
668 if (class_data != nullptr && class_data->StaticFields() != nullptr) {
669 fprintf(out_file_, "static_fields_size : %zu\n", class_data->StaticFields()->size());
670 } else {
671 fprintf(out_file_, "static_fields_size : 0\n");
672 }
673 if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
674 fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
675 } else {
676 fprintf(out_file_, "instance_fields_size: 0\n");
677 }
678 if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
679 fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
680 } else {
681 fprintf(out_file_, "direct_methods_size : 0\n");
682 }
683 if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
684 fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
685 } else {
686 fprintf(out_file_, "virtual_methods_size: 0\n");
687 }
688 fprintf(out_file_, "\n");
689 }
690
691 /**
692 * Dumps an annotation set item.
693 */
DumpAnnotationSetItem(dex_ir::AnnotationSetItem * set_item)694 void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
695 if (set_item == nullptr || set_item->GetItems()->size() == 0) {
696 fputs(" empty-annotation-set\n", out_file_);
697 return;
698 }
699 for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
700 if (annotation == nullptr) {
701 continue;
702 }
703 fputs(" ", out_file_);
704 switch (annotation->GetVisibility()) {
705 case DexFile::kDexVisibilityBuild: fputs("VISIBILITY_BUILD ", out_file_); break;
706 case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
707 case DexFile::kDexVisibilitySystem: fputs("VISIBILITY_SYSTEM ", out_file_); break;
708 default: fputs("VISIBILITY_UNKNOWN ", out_file_); break;
709 } // switch
710 DumpEncodedAnnotation(annotation->GetAnnotation());
711 fputc('\n', out_file_);
712 }
713 }
714
715 /*
716 * Dumps class annotations.
717 */
DumpClassAnnotations(int idx)718 void DexLayout::DumpClassAnnotations(int idx) {
719 dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
720 dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
721 if (annotations_directory == nullptr) {
722 return; // none
723 }
724
725 fprintf(out_file_, "Class #%d annotations:\n", idx);
726
727 dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
728 dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
729 dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
730 dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
731
732 // Annotations on the class itself.
733 if (class_set_item != nullptr) {
734 fprintf(out_file_, "Annotations on class\n");
735 DumpAnnotationSetItem(class_set_item);
736 }
737
738 // Annotations on fields.
739 if (fields != nullptr) {
740 for (auto& field : *fields) {
741 const dex_ir::FieldId* field_id = field->GetFieldId();
742 const uint32_t field_idx = field_id->GetIndex();
743 const char* field_name = field_id->Name()->Data();
744 fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
745 DumpAnnotationSetItem(field->GetAnnotationSetItem());
746 }
747 }
748
749 // Annotations on methods.
750 if (methods != nullptr) {
751 for (auto& method : *methods) {
752 const dex_ir::MethodId* method_id = method->GetMethodId();
753 const uint32_t method_idx = method_id->GetIndex();
754 const char* method_name = method_id->Name()->Data();
755 fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
756 DumpAnnotationSetItem(method->GetAnnotationSetItem());
757 }
758 }
759
760 // Annotations on method parameters.
761 if (parameters != nullptr) {
762 for (auto& parameter : *parameters) {
763 const dex_ir::MethodId* method_id = parameter->GetMethodId();
764 const uint32_t method_idx = method_id->GetIndex();
765 const char* method_name = method_id->Name()->Data();
766 fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
767 uint32_t j = 0;
768 for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
769 fprintf(out_file_, "#%u\n", j);
770 DumpAnnotationSetItem(annotation);
771 ++j;
772 }
773 }
774 }
775
776 fputc('\n', out_file_);
777 }
778
779 /*
780 * Dumps an interface that a class declares to implement.
781 */
DumpInterface(const dex_ir::TypeId * type_item,int i)782 void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
783 const char* interface_name = type_item->GetStringId()->Data();
784 if (options_.output_format_ == kOutputPlain) {
785 fprintf(out_file_, " #%d : '%s'\n", i, interface_name);
786 } else {
787 std::string dot(DescriptorToDotWrapper(interface_name));
788 fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
789 }
790 }
791
792 /*
793 * Dumps the catches table associated with the code.
794 */
DumpCatches(const dex_ir::CodeItem * code)795 void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
796 const uint16_t tries_size = code->TriesSize();
797
798 // No catch table.
799 if (tries_size == 0) {
800 fprintf(out_file_, " catches : (none)\n");
801 return;
802 }
803
804 // Dump all table entries.
805 fprintf(out_file_, " catches : %d\n", tries_size);
806 std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
807 for (uint32_t i = 0; i < tries_size; i++) {
808 const dex_ir::TryItem* try_item = (*tries)[i].get();
809 const uint32_t start = try_item->StartAddr();
810 const uint32_t end = start + try_item->InsnCount();
811 fprintf(out_file_, " 0x%04x - 0x%04x\n", start, end);
812 for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
813 const dex_ir::TypeId* type_id = handler->GetTypeId();
814 const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
815 fprintf(out_file_, " %s -> 0x%04x\n", descriptor, handler->GetAddress());
816 } // for
817 } // for
818 }
819
820 /*
821 * Dumps all positions table entries associated with the code.
822 */
DumpPositionInfo(const dex_ir::CodeItem * code)823 void DexLayout::DumpPositionInfo(const dex_ir::CodeItem* code) {
824 dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
825 if (debug_info == nullptr) {
826 return;
827 }
828 std::vector<std::unique_ptr<dex_ir::PositionInfo>>& positions = debug_info->GetPositionInfo();
829 for (size_t i = 0; i < positions.size(); ++i) {
830 fprintf(out_file_, " 0x%04x line=%d\n", positions[i]->address_, positions[i]->line_);
831 }
832 }
833
834 /*
835 * Dumps all locals table entries associated with the code.
836 */
DumpLocalInfo(const dex_ir::CodeItem * code)837 void DexLayout::DumpLocalInfo(const dex_ir::CodeItem* code) {
838 dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
839 if (debug_info == nullptr) {
840 return;
841 }
842 std::vector<std::unique_ptr<dex_ir::LocalInfo>>& locals = debug_info->GetLocalInfo();
843 for (size_t i = 0; i < locals.size(); ++i) {
844 dex_ir::LocalInfo* entry = locals[i].get();
845 fprintf(out_file_, " 0x%04x - 0x%04x reg=%d %s %s %s\n",
846 entry->start_address_, entry->end_address_, entry->reg_,
847 entry->name_.c_str(), entry->descriptor_.c_str(), entry->signature_.c_str());
848 }
849 }
850
851 /*
852 * Dumps a single instruction.
853 */
DumpInstruction(const dex_ir::CodeItem * code,uint32_t code_offset,uint32_t insn_idx,uint32_t insn_width,const Instruction * dec_insn)854 void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
855 uint32_t code_offset,
856 uint32_t insn_idx,
857 uint32_t insn_width,
858 const Instruction* dec_insn) {
859 // Address of instruction (expressed as byte offset).
860 fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
861
862 // Dump (part of) raw bytes.
863 const uint16_t* insns = code->Insns();
864 for (uint32_t i = 0; i < 8; i++) {
865 if (i < insn_width) {
866 if (i == 7) {
867 fprintf(out_file_, " ... ");
868 } else {
869 // Print 16-bit value in little-endian order.
870 const uint8_t* bytePtr = (const uint8_t*) &insns[insn_idx + i];
871 fprintf(out_file_, " %02x%02x", bytePtr[0], bytePtr[1]);
872 }
873 } else {
874 fputs(" ", out_file_);
875 }
876 } // for
877
878 // Dump pseudo-instruction or opcode.
879 if (dec_insn->Opcode() == Instruction::NOP) {
880 const uint16_t instr = Get2LE((const uint8_t*) &insns[insn_idx]);
881 if (instr == Instruction::kPackedSwitchSignature) {
882 fprintf(out_file_, "|%04x: packed-switch-data (%d units)", insn_idx, insn_width);
883 } else if (instr == Instruction::kSparseSwitchSignature) {
884 fprintf(out_file_, "|%04x: sparse-switch-data (%d units)", insn_idx, insn_width);
885 } else if (instr == Instruction::kArrayDataSignature) {
886 fprintf(out_file_, "|%04x: array-data (%d units)", insn_idx, insn_width);
887 } else {
888 fprintf(out_file_, "|%04x: nop // spacer", insn_idx);
889 }
890 } else {
891 fprintf(out_file_, "|%04x: %s", insn_idx, dec_insn->Name());
892 }
893
894 // Set up additional argument.
895 std::unique_ptr<char[]> index_buf;
896 if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
897 index_buf = IndexString(header_, dec_insn, 200);
898 }
899
900 // Dump the instruction.
901 //
902 // NOTE: pDecInsn->DumpString(pDexFile) differs too much from original.
903 //
904 switch (Instruction::FormatOf(dec_insn->Opcode())) {
905 case Instruction::k10x: // op
906 break;
907 case Instruction::k12x: // op vA, vB
908 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
909 break;
910 case Instruction::k11n: // op vA, #+B
911 fprintf(out_file_, " v%d, #int %d // #%x",
912 dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint8_t)dec_insn->VRegB());
913 break;
914 case Instruction::k11x: // op vAA
915 fprintf(out_file_, " v%d", dec_insn->VRegA());
916 break;
917 case Instruction::k10t: // op +AA
918 case Instruction::k20t: { // op +AAAA
919 const int32_t targ = (int32_t) dec_insn->VRegA();
920 fprintf(out_file_, " %04x // %c%04x",
921 insn_idx + targ,
922 (targ < 0) ? '-' : '+',
923 (targ < 0) ? -targ : targ);
924 break;
925 }
926 case Instruction::k22x: // op vAA, vBBBB
927 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
928 break;
929 case Instruction::k21t: { // op vAA, +BBBB
930 const int32_t targ = (int32_t) dec_insn->VRegB();
931 fprintf(out_file_, " v%d, %04x // %c%04x", dec_insn->VRegA(),
932 insn_idx + targ,
933 (targ < 0) ? '-' : '+',
934 (targ < 0) ? -targ : targ);
935 break;
936 }
937 case Instruction::k21s: // op vAA, #+BBBB
938 fprintf(out_file_, " v%d, #int %d // #%x",
939 dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint16_t)dec_insn->VRegB());
940 break;
941 case Instruction::k21h: // op vAA, #+BBBB0000[00000000]
942 // The printed format varies a bit based on the actual opcode.
943 if (dec_insn->Opcode() == Instruction::CONST_HIGH16) {
944 const int32_t value = dec_insn->VRegB() << 16;
945 fprintf(out_file_, " v%d, #int %d // #%x",
946 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
947 } else {
948 const int64_t value = ((int64_t) dec_insn->VRegB()) << 48;
949 fprintf(out_file_, " v%d, #long %" PRId64 " // #%x",
950 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
951 }
952 break;
953 case Instruction::k21c: // op vAA, thing@BBBB
954 case Instruction::k31c: // op vAA, thing@BBBBBBBB
955 fprintf(out_file_, " v%d, %s", dec_insn->VRegA(), index_buf.get());
956 break;
957 case Instruction::k23x: // op vAA, vBB, vCC
958 fprintf(out_file_, " v%d, v%d, v%d",
959 dec_insn->VRegA(), dec_insn->VRegB(), dec_insn->VRegC());
960 break;
961 case Instruction::k22b: // op vAA, vBB, #+CC
962 fprintf(out_file_, " v%d, v%d, #int %d // #%02x",
963 dec_insn->VRegA(), dec_insn->VRegB(),
964 (int32_t) dec_insn->VRegC(), (uint8_t) dec_insn->VRegC());
965 break;
966 case Instruction::k22t: { // op vA, vB, +CCCC
967 const int32_t targ = (int32_t) dec_insn->VRegC();
968 fprintf(out_file_, " v%d, v%d, %04x // %c%04x",
969 dec_insn->VRegA(), dec_insn->VRegB(),
970 insn_idx + targ,
971 (targ < 0) ? '-' : '+',
972 (targ < 0) ? -targ : targ);
973 break;
974 }
975 case Instruction::k22s: // op vA, vB, #+CCCC
976 fprintf(out_file_, " v%d, v%d, #int %d // #%04x",
977 dec_insn->VRegA(), dec_insn->VRegB(),
978 (int32_t) dec_insn->VRegC(), (uint16_t) dec_insn->VRegC());
979 break;
980 case Instruction::k22c: // op vA, vB, thing@CCCC
981 // NOT SUPPORTED:
982 // case Instruction::k22cs: // [opt] op vA, vB, field offset CCCC
983 fprintf(out_file_, " v%d, v%d, %s",
984 dec_insn->VRegA(), dec_insn->VRegB(), index_buf.get());
985 break;
986 case Instruction::k30t:
987 fprintf(out_file_, " #%08x", dec_insn->VRegA());
988 break;
989 case Instruction::k31i: { // op vAA, #+BBBBBBBB
990 // This is often, but not always, a float.
991 union {
992 float f;
993 uint32_t i;
994 } conv;
995 conv.i = dec_insn->VRegB();
996 fprintf(out_file_, " v%d, #float %g // #%08x",
997 dec_insn->VRegA(), conv.f, dec_insn->VRegB());
998 break;
999 }
1000 case Instruction::k31t: // op vAA, offset +BBBBBBBB
1001 fprintf(out_file_, " v%d, %08x // +%08x",
1002 dec_insn->VRegA(), insn_idx + dec_insn->VRegB(), dec_insn->VRegB());
1003 break;
1004 case Instruction::k32x: // op vAAAA, vBBBB
1005 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
1006 break;
1007 case Instruction::k35c: // op {vC, vD, vE, vF, vG}, thing@BBBB
1008 case Instruction::k45cc: { // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
1009 // NOT SUPPORTED:
1010 // case Instruction::k35ms: // [opt] invoke-virtual+super
1011 // case Instruction::k35mi: // [opt] inline invoke
1012 uint32_t arg[Instruction::kMaxVarArgRegs];
1013 dec_insn->GetVarArgs(arg);
1014 fputs(" {", out_file_);
1015 for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1016 if (i == 0) {
1017 fprintf(out_file_, "v%d", arg[i]);
1018 } else {
1019 fprintf(out_file_, ", v%d", arg[i]);
1020 }
1021 } // for
1022 fprintf(out_file_, "}, %s", index_buf.get());
1023 break;
1024 }
1025 case Instruction::k3rc: // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
1026 case Instruction::k4rcc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
1027 // NOT SUPPORTED:
1028 // case Instruction::k3rms: // [opt] invoke-virtual+super/range
1029 // case Instruction::k3rmi: // [opt] execute-inline/range
1030 {
1031 // This doesn't match the "dx" output when some of the args are
1032 // 64-bit values -- dx only shows the first register.
1033 fputs(" {", out_file_);
1034 for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1035 if (i == 0) {
1036 fprintf(out_file_, "v%d", dec_insn->VRegC() + i);
1037 } else {
1038 fprintf(out_file_, ", v%d", dec_insn->VRegC() + i);
1039 }
1040 } // for
1041 fprintf(out_file_, "}, %s", index_buf.get());
1042 }
1043 break;
1044 case Instruction::k51l: { // op vAA, #+BBBBBBBBBBBBBBBB
1045 // This is often, but not always, a double.
1046 union {
1047 double d;
1048 uint64_t j;
1049 } conv;
1050 conv.j = dec_insn->WideVRegB();
1051 fprintf(out_file_, " v%d, #double %g // #%016" PRIx64,
1052 dec_insn->VRegA(), conv.d, dec_insn->WideVRegB());
1053 break;
1054 }
1055 // NOT SUPPORTED:
1056 // case Instruction::k00x: // unknown op or breakpoint
1057 // break;
1058 default:
1059 fprintf(out_file_, " ???");
1060 break;
1061 } // switch
1062
1063 fputc('\n', out_file_);
1064 }
1065
1066 /*
1067 * Dumps a bytecode disassembly.
1068 */
DumpBytecodes(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1069 void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1070 dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
1071 const char* name = method_id->Name()->Data();
1072 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
1073 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1074
1075 // Generate header.
1076 std::string dot(DescriptorToDotWrapper(back_descriptor));
1077 fprintf(out_file_, "%06x: |[%06x] %s.%s:%s\n",
1078 code_offset, code_offset, dot.c_str(), name, type_descriptor.c_str());
1079
1080 // Iterate over all instructions.
1081 const uint16_t* insns = code->Insns();
1082 for (uint32_t insn_idx = 0; insn_idx < code->InsnsSize();) {
1083 const Instruction* instruction = Instruction::At(&insns[insn_idx]);
1084 const uint32_t insn_width = instruction->SizeInCodeUnits();
1085 if (insn_width == 0) {
1086 fprintf(stderr, "GLITCH: zero-width instruction at idx=0x%04x\n", insn_idx);
1087 break;
1088 }
1089 DumpInstruction(code, code_offset, insn_idx, insn_width, instruction);
1090 insn_idx += insn_width;
1091 } // for
1092 }
1093
1094 /*
1095 * Dumps code of a method.
1096 */
DumpCode(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1097 void DexLayout::DumpCode(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1098 fprintf(out_file_, " registers : %d\n", code->RegistersSize());
1099 fprintf(out_file_, " ins : %d\n", code->InsSize());
1100 fprintf(out_file_, " outs : %d\n", code->OutsSize());
1101 fprintf(out_file_, " insns size : %d 16-bit code units\n",
1102 code->InsnsSize());
1103
1104 // Bytecode disassembly, if requested.
1105 if (options_.disassemble_) {
1106 DumpBytecodes(idx, code, code_offset);
1107 }
1108
1109 // Try-catch blocks.
1110 DumpCatches(code);
1111
1112 // Positions and locals table in the debug info.
1113 fprintf(out_file_, " positions : \n");
1114 DumpPositionInfo(code);
1115 fprintf(out_file_, " locals : \n");
1116 DumpLocalInfo(code);
1117 }
1118
1119 /*
1120 * Dumps a method.
1121 */
DumpMethod(uint32_t idx,uint32_t flags,const dex_ir::CodeItem * code,int i)1122 void DexLayout::DumpMethod(uint32_t idx, uint32_t flags, const dex_ir::CodeItem* code, int i) {
1123 // Bail for anything private if export only requested.
1124 if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1125 return;
1126 }
1127
1128 dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
1129 const char* name = method_id->Name()->Data();
1130 char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
1131 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1132 char* access_str = CreateAccessFlagStr(flags, kAccessForMethod);
1133
1134 if (options_.output_format_ == kOutputPlain) {
1135 fprintf(out_file_, " #%d : (in %s)\n", i, back_descriptor);
1136 fprintf(out_file_, " name : '%s'\n", name);
1137 fprintf(out_file_, " type : '%s'\n", type_descriptor);
1138 fprintf(out_file_, " access : 0x%04x (%s)\n", flags, access_str);
1139 if (code == nullptr) {
1140 fprintf(out_file_, " code : (none)\n");
1141 } else {
1142 fprintf(out_file_, " code -\n");
1143 DumpCode(idx, code, code->GetOffset());
1144 }
1145 if (options_.disassemble_) {
1146 fputc('\n', out_file_);
1147 }
1148 } else if (options_.output_format_ == kOutputXml) {
1149 const bool constructor = (name[0] == '<');
1150
1151 // Method name and prototype.
1152 if (constructor) {
1153 std::string dot(DescriptorClassToDot(back_descriptor));
1154 fprintf(out_file_, "<constructor name=\"%s\"\n", dot.c_str());
1155 dot = DescriptorToDotWrapper(back_descriptor);
1156 fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1157 } else {
1158 fprintf(out_file_, "<method name=\"%s\"\n", name);
1159 const char* return_type = strrchr(type_descriptor, ')');
1160 if (return_type == nullptr) {
1161 fprintf(stderr, "bad method type descriptor '%s'\n", type_descriptor);
1162 goto bail;
1163 }
1164 std::string dot(DescriptorToDotWrapper(return_type + 1));
1165 fprintf(out_file_, " return=\"%s\"\n", dot.c_str());
1166 fprintf(out_file_, " abstract=%s\n", QuotedBool((flags & kAccAbstract) != 0));
1167 fprintf(out_file_, " native=%s\n", QuotedBool((flags & kAccNative) != 0));
1168 fprintf(out_file_, " synchronized=%s\n", QuotedBool(
1169 (flags & (kAccSynchronized | kAccDeclaredSynchronized)) != 0));
1170 }
1171
1172 // Additional method flags.
1173 fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1174 fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1175 // The "deprecated=" not knowable w/o parsing annotations.
1176 fprintf(out_file_, " visibility=%s\n>\n", QuotedVisibility(flags));
1177
1178 // Parameters.
1179 if (type_descriptor[0] != '(') {
1180 fprintf(stderr, "ERROR: bad descriptor '%s'\n", type_descriptor);
1181 goto bail;
1182 }
1183 char* tmp_buf = reinterpret_cast<char*>(malloc(strlen(type_descriptor) + 1));
1184 const char* base = type_descriptor + 1;
1185 int arg_num = 0;
1186 while (*base != ')') {
1187 char* cp = tmp_buf;
1188 while (*base == '[') {
1189 *cp++ = *base++;
1190 }
1191 if (*base == 'L') {
1192 // Copy through ';'.
1193 do {
1194 *cp = *base++;
1195 } while (*cp++ != ';');
1196 } else {
1197 // Primitive char, copy it.
1198 if (strchr("ZBCSIFJD", *base) == nullptr) {
1199 fprintf(stderr, "ERROR: bad method signature '%s'\n", base);
1200 break; // while
1201 }
1202 *cp++ = *base++;
1203 }
1204 // Null terminate and display.
1205 *cp++ = '\0';
1206 std::string dot(DescriptorToDotWrapper(tmp_buf));
1207 fprintf(out_file_, "<parameter name=\"arg%d\" type=\"%s\">\n"
1208 "</parameter>\n", arg_num++, dot.c_str());
1209 } // while
1210 free(tmp_buf);
1211 if (constructor) {
1212 fprintf(out_file_, "</constructor>\n");
1213 } else {
1214 fprintf(out_file_, "</method>\n");
1215 }
1216 }
1217
1218 bail:
1219 free(type_descriptor);
1220 free(access_str);
1221 }
1222
1223 /*
1224 * Dumps a static (class) field.
1225 */
DumpSField(uint32_t idx,uint32_t flags,int i,dex_ir::EncodedValue * init)1226 void DexLayout::DumpSField(uint32_t idx, uint32_t flags, int i, dex_ir::EncodedValue* init) {
1227 // Bail for anything private if export only requested.
1228 if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1229 return;
1230 }
1231
1232 dex_ir::FieldId* field_id = header_->GetCollections().GetFieldId(idx);
1233 const char* name = field_id->Name()->Data();
1234 const char* type_descriptor = field_id->Type()->GetStringId()->Data();
1235 const char* back_descriptor = field_id->Class()->GetStringId()->Data();
1236 char* access_str = CreateAccessFlagStr(flags, kAccessForField);
1237
1238 if (options_.output_format_ == kOutputPlain) {
1239 fprintf(out_file_, " #%d : (in %s)\n", i, back_descriptor);
1240 fprintf(out_file_, " name : '%s'\n", name);
1241 fprintf(out_file_, " type : '%s'\n", type_descriptor);
1242 fprintf(out_file_, " access : 0x%04x (%s)\n", flags, access_str);
1243 if (init != nullptr) {
1244 fputs(" value : ", out_file_);
1245 DumpEncodedValue(init);
1246 fputs("\n", out_file_);
1247 }
1248 } else if (options_.output_format_ == kOutputXml) {
1249 fprintf(out_file_, "<field name=\"%s\"\n", name);
1250 std::string dot(DescriptorToDotWrapper(type_descriptor));
1251 fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1252 fprintf(out_file_, " transient=%s\n", QuotedBool((flags & kAccTransient) != 0));
1253 fprintf(out_file_, " volatile=%s\n", QuotedBool((flags & kAccVolatile) != 0));
1254 // The "value=" is not knowable w/o parsing annotations.
1255 fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1256 fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1257 // The "deprecated=" is not knowable w/o parsing annotations.
1258 fprintf(out_file_, " visibility=%s\n", QuotedVisibility(flags));
1259 if (init != nullptr) {
1260 fputs(" value=\"", out_file_);
1261 DumpEncodedValue(init);
1262 fputs("\"\n", out_file_);
1263 }
1264 fputs(">\n</field>\n", out_file_);
1265 }
1266
1267 free(access_str);
1268 }
1269
1270 /*
1271 * Dumps an instance field.
1272 */
DumpIField(uint32_t idx,uint32_t flags,int i)1273 void DexLayout::DumpIField(uint32_t idx, uint32_t flags, int i) {
1274 DumpSField(idx, flags, i, nullptr);
1275 }
1276
1277 /*
1278 * Dumps the class.
1279 *
1280 * Note "idx" is a DexClassDef index, not a DexTypeId index.
1281 *
1282 * If "*last_package" is nullptr or does not match the current class' package,
1283 * the value will be replaced with a newly-allocated string.
1284 */
DumpClass(int idx,char ** last_package)1285 void DexLayout::DumpClass(int idx, char** last_package) {
1286 dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
1287 // Omitting non-public class.
1288 if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
1289 return;
1290 }
1291
1292 if (options_.show_section_headers_) {
1293 DumpClassDef(idx);
1294 }
1295
1296 if (options_.show_annotations_) {
1297 DumpClassAnnotations(idx);
1298 }
1299
1300 // For the XML output, show the package name. Ideally we'd gather
1301 // up the classes, sort them, and dump them alphabetically so the
1302 // package name wouldn't jump around, but that's not a great plan
1303 // for something that needs to run on the device.
1304 const char* class_descriptor =
1305 header_->GetCollections().GetClassDef(idx)->ClassType()->GetStringId()->Data();
1306 if (!(class_descriptor[0] == 'L' &&
1307 class_descriptor[strlen(class_descriptor)-1] == ';')) {
1308 // Arrays and primitives should not be defined explicitly. Keep going?
1309 fprintf(stderr, "Malformed class name '%s'\n", class_descriptor);
1310 } else if (options_.output_format_ == kOutputXml) {
1311 char* mangle = strdup(class_descriptor + 1);
1312 mangle[strlen(mangle)-1] = '\0';
1313
1314 // Reduce to just the package name.
1315 char* last_slash = strrchr(mangle, '/');
1316 if (last_slash != nullptr) {
1317 *last_slash = '\0';
1318 } else {
1319 *mangle = '\0';
1320 }
1321
1322 for (char* cp = mangle; *cp != '\0'; cp++) {
1323 if (*cp == '/') {
1324 *cp = '.';
1325 }
1326 } // for
1327
1328 if (*last_package == nullptr || strcmp(mangle, *last_package) != 0) {
1329 // Start of a new package.
1330 if (*last_package != nullptr) {
1331 fprintf(out_file_, "</package>\n");
1332 }
1333 fprintf(out_file_, "<package name=\"%s\"\n>\n", mangle);
1334 free(*last_package);
1335 *last_package = mangle;
1336 } else {
1337 free(mangle);
1338 }
1339 }
1340
1341 // General class information.
1342 char* access_str = CreateAccessFlagStr(class_def->GetAccessFlags(), kAccessForClass);
1343 const char* superclass_descriptor = nullptr;
1344 if (class_def->Superclass() != nullptr) {
1345 superclass_descriptor = class_def->Superclass()->GetStringId()->Data();
1346 }
1347 if (options_.output_format_ == kOutputPlain) {
1348 fprintf(out_file_, "Class #%d -\n", idx);
1349 fprintf(out_file_, " Class descriptor : '%s'\n", class_descriptor);
1350 fprintf(out_file_, " Access flags : 0x%04x (%s)\n",
1351 class_def->GetAccessFlags(), access_str);
1352 if (superclass_descriptor != nullptr) {
1353 fprintf(out_file_, " Superclass : '%s'\n", superclass_descriptor);
1354 }
1355 fprintf(out_file_, " Interfaces -\n");
1356 } else {
1357 std::string dot(DescriptorClassToDot(class_descriptor));
1358 fprintf(out_file_, "<class name=\"%s\"\n", dot.c_str());
1359 if (superclass_descriptor != nullptr) {
1360 dot = DescriptorToDotWrapper(superclass_descriptor);
1361 fprintf(out_file_, " extends=\"%s\"\n", dot.c_str());
1362 }
1363 fprintf(out_file_, " interface=%s\n",
1364 QuotedBool((class_def->GetAccessFlags() & kAccInterface) != 0));
1365 fprintf(out_file_, " abstract=%s\n",
1366 QuotedBool((class_def->GetAccessFlags() & kAccAbstract) != 0));
1367 fprintf(out_file_, " static=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccStatic) != 0));
1368 fprintf(out_file_, " final=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccFinal) != 0));
1369 // The "deprecated=" not knowable w/o parsing annotations.
1370 fprintf(out_file_, " visibility=%s\n", QuotedVisibility(class_def->GetAccessFlags()));
1371 fprintf(out_file_, ">\n");
1372 }
1373
1374 // Interfaces.
1375 const dex_ir::TypeList* interfaces = class_def->Interfaces();
1376 if (interfaces != nullptr) {
1377 const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
1378 for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
1379 DumpInterface((*interfaces_vector)[i], i);
1380 } // for
1381 }
1382
1383 // Fields and methods.
1384 dex_ir::ClassData* class_data = class_def->GetClassData();
1385 // Prepare data for static fields.
1386 dex_ir::EncodedArrayItem* static_values = class_def->StaticValues();
1387 dex_ir::EncodedValueVector* encoded_values =
1388 static_values == nullptr ? nullptr : static_values->GetEncodedValues();
1389 const uint32_t encoded_values_size = (encoded_values == nullptr) ? 0 : encoded_values->size();
1390
1391 // Static fields.
1392 if (options_.output_format_ == kOutputPlain) {
1393 fprintf(out_file_, " Static fields -\n");
1394 }
1395 if (class_data != nullptr) {
1396 dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
1397 if (static_fields != nullptr) {
1398 for (uint32_t i = 0; i < static_fields->size(); i++) {
1399 DumpSField((*static_fields)[i]->GetFieldId()->GetIndex(),
1400 (*static_fields)[i]->GetAccessFlags(),
1401 i,
1402 i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
1403 } // for
1404 }
1405 }
1406
1407 // Instance fields.
1408 if (options_.output_format_ == kOutputPlain) {
1409 fprintf(out_file_, " Instance fields -\n");
1410 }
1411 if (class_data != nullptr) {
1412 dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
1413 if (instance_fields != nullptr) {
1414 for (uint32_t i = 0; i < instance_fields->size(); i++) {
1415 DumpIField((*instance_fields)[i]->GetFieldId()->GetIndex(),
1416 (*instance_fields)[i]->GetAccessFlags(),
1417 i);
1418 } // for
1419 }
1420 }
1421
1422 // Direct methods.
1423 if (options_.output_format_ == kOutputPlain) {
1424 fprintf(out_file_, " Direct methods -\n");
1425 }
1426 if (class_data != nullptr) {
1427 dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
1428 if (direct_methods != nullptr) {
1429 for (uint32_t i = 0; i < direct_methods->size(); i++) {
1430 DumpMethod((*direct_methods)[i]->GetMethodId()->GetIndex(),
1431 (*direct_methods)[i]->GetAccessFlags(),
1432 (*direct_methods)[i]->GetCodeItem(),
1433 i);
1434 } // for
1435 }
1436 }
1437
1438 // Virtual methods.
1439 if (options_.output_format_ == kOutputPlain) {
1440 fprintf(out_file_, " Virtual methods -\n");
1441 }
1442 if (class_data != nullptr) {
1443 dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
1444 if (virtual_methods != nullptr) {
1445 for (uint32_t i = 0; i < virtual_methods->size(); i++) {
1446 DumpMethod((*virtual_methods)[i]->GetMethodId()->GetIndex(),
1447 (*virtual_methods)[i]->GetAccessFlags(),
1448 (*virtual_methods)[i]->GetCodeItem(),
1449 i);
1450 } // for
1451 }
1452 }
1453
1454 // End of class.
1455 if (options_.output_format_ == kOutputPlain) {
1456 const char* file_name = "unknown";
1457 if (class_def->SourceFile() != nullptr) {
1458 file_name = class_def->SourceFile()->Data();
1459 }
1460 const dex_ir::StringId* source_file = class_def->SourceFile();
1461 fprintf(out_file_, " source_file_idx : %d (%s)\n\n",
1462 source_file == nullptr ? 0xffffffffU : source_file->GetIndex(), file_name);
1463 } else if (options_.output_format_ == kOutputXml) {
1464 fprintf(out_file_, "</class>\n");
1465 }
1466
1467 free(access_str);
1468 }
1469
DumpDexFile()1470 void DexLayout::DumpDexFile() {
1471 // Headers.
1472 if (options_.show_file_headers_) {
1473 DumpFileHeader();
1474 }
1475
1476 // Open XML context.
1477 if (options_.output_format_ == kOutputXml) {
1478 fprintf(out_file_, "<api>\n");
1479 }
1480
1481 // Iterate over all classes.
1482 char* package = nullptr;
1483 const uint32_t class_defs_size = header_->GetCollections().ClassDefsSize();
1484 for (uint32_t i = 0; i < class_defs_size; i++) {
1485 DumpClass(i, &package);
1486 } // for
1487
1488 // Free the last package allocated.
1489 if (package != nullptr) {
1490 fprintf(out_file_, "</package>\n");
1491 free(package);
1492 }
1493
1494 // Close XML context.
1495 if (options_.output_format_ == kOutputXml) {
1496 fprintf(out_file_, "</api>\n");
1497 }
1498 }
1499
LayoutClassDefsAndClassData(const DexFile * dex_file)1500 std::vector<dex_ir::ClassData*> DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
1501 std::vector<dex_ir::ClassDef*> new_class_def_order;
1502 for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1503 dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1504 if (info_->ContainsClass(*dex_file, type_idx)) {
1505 new_class_def_order.push_back(class_def.get());
1506 }
1507 }
1508 for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1509 dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1510 if (!info_->ContainsClass(*dex_file, type_idx)) {
1511 new_class_def_order.push_back(class_def.get());
1512 }
1513 }
1514 uint32_t class_defs_offset = header_->GetCollections().ClassDefsOffset();
1515 uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
1516 std::unordered_set<dex_ir::ClassData*> visited_class_data;
1517 std::vector<dex_ir::ClassData*> new_class_data_order;
1518 for (uint32_t i = 0; i < new_class_def_order.size(); ++i) {
1519 dex_ir::ClassDef* class_def = new_class_def_order[i];
1520 class_def->SetIndex(i);
1521 class_def->SetOffset(class_defs_offset);
1522 class_defs_offset += dex_ir::ClassDef::ItemSize();
1523 dex_ir::ClassData* class_data = class_def->GetClassData();
1524 if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
1525 class_data->SetOffset(class_data_offset);
1526 class_data_offset += class_data->GetSize();
1527 visited_class_data.insert(class_data);
1528 new_class_data_order.push_back(class_data);
1529 }
1530 }
1531 return new_class_data_order;
1532 }
1533
LayoutStringData(const DexFile * dex_file)1534 void DexLayout::LayoutStringData(const DexFile* dex_file) {
1535 const size_t num_strings = header_->GetCollections().StringIds().size();
1536 std::vector<bool> is_shorty(num_strings, false);
1537 std::vector<bool> from_hot_method(num_strings, false);
1538 for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1539 // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
1540 // as hot. Add its super class and interfaces as well, which can be used during initialization.
1541 const bool is_profile_class =
1542 info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1543 if (is_profile_class) {
1544 from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
1545 const dex_ir::TypeId* superclass = class_def->Superclass();
1546 if (superclass != nullptr) {
1547 from_hot_method[superclass->GetStringId()->GetIndex()] = true;
1548 }
1549 const dex_ir::TypeList* interfaces = class_def->Interfaces();
1550 if (interfaces != nullptr) {
1551 for (const dex_ir::TypeId* interface_type : *interfaces->GetTypeList()) {
1552 from_hot_method[interface_type->GetStringId()->GetIndex()] = true;
1553 }
1554 }
1555 }
1556 dex_ir::ClassData* data = class_def->GetClassData();
1557 if (data == nullptr) {
1558 continue;
1559 }
1560 for (size_t i = 0; i < 2; ++i) {
1561 for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
1562 const dex_ir::MethodId* method_id = method->GetMethodId();
1563 dex_ir::CodeItem* code_item = method->GetCodeItem();
1564 if (code_item == nullptr) {
1565 continue;
1566 }
1567 const bool is_clinit = is_profile_class &&
1568 (method->GetAccessFlags() & kAccConstructor) != 0 &&
1569 (method->GetAccessFlags() & kAccStatic) != 0;
1570 const bool method_executed = is_clinit ||
1571 info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex())).IsInProfile();
1572 if (!method_executed) {
1573 continue;
1574 }
1575 is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
1576 dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
1577 if (fixups == nullptr) {
1578 continue;
1579 }
1580 // Add const-strings.
1581 for (dex_ir::StringId* id : *fixups->StringIds()) {
1582 from_hot_method[id->GetIndex()] = true;
1583 }
1584 // Add field classes, names, and types.
1585 for (dex_ir::FieldId* id : *fixups->FieldIds()) {
1586 // TODO: Only visit field ids from static getters and setters.
1587 from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1588 from_hot_method[id->Name()->GetIndex()] = true;
1589 from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
1590 }
1591 // For clinits, add referenced method classes, names, and protos.
1592 if (is_clinit) {
1593 for (dex_ir::MethodId* id : *fixups->MethodIds()) {
1594 from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1595 from_hot_method[id->Name()->GetIndex()] = true;
1596 is_shorty[id->Proto()->Shorty()->GetIndex()] = true;
1597 }
1598 }
1599 }
1600 }
1601 }
1602 // Sort string data by specified order.
1603 std::vector<dex_ir::StringId*> string_ids;
1604 size_t min_offset = std::numeric_limits<size_t>::max();
1605 size_t max_offset = 0;
1606 size_t hot_bytes = 0;
1607 for (auto& string_id : header_->GetCollections().StringIds()) {
1608 string_ids.push_back(string_id.get());
1609 const size_t cur_offset = string_id->DataItem()->GetOffset();
1610 CHECK_NE(cur_offset, 0u);
1611 min_offset = std::min(min_offset, cur_offset);
1612 dex_ir::StringData* data = string_id->DataItem();
1613 const size_t element_size = data->GetSize() + 1; // Add one extra for null.
1614 size_t end_offset = cur_offset + element_size;
1615 if (is_shorty[string_id->GetIndex()] || from_hot_method[string_id->GetIndex()]) {
1616 hot_bytes += element_size;
1617 }
1618 max_offset = std::max(max_offset, end_offset);
1619 }
1620 VLOG(compiler) << "Hot string data bytes " << hot_bytes << "/" << max_offset - min_offset;
1621 std::sort(string_ids.begin(),
1622 string_ids.end(),
1623 [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
1624 const dex_ir::StringId* b) {
1625 const bool a_is_hot = from_hot_method[a->GetIndex()];
1626 const bool b_is_hot = from_hot_method[b->GetIndex()];
1627 if (a_is_hot != b_is_hot) {
1628 return a_is_hot < b_is_hot;
1629 }
1630 // After hot methods are partitioned, subpartition shorties.
1631 const bool a_is_shorty = is_shorty[a->GetIndex()];
1632 const bool b_is_shorty = is_shorty[b->GetIndex()];
1633 if (a_is_shorty != b_is_shorty) {
1634 return a_is_shorty < b_is_shorty;
1635 }
1636 // Preserve order.
1637 return a->DataItem()->GetOffset() < b->DataItem()->GetOffset();
1638 });
1639 // Now we know what order we want the string data, reorder the offsets.
1640 size_t offset = min_offset;
1641 for (dex_ir::StringId* string_id : string_ids) {
1642 dex_ir::StringData* data = string_id->DataItem();
1643 data->SetOffset(offset);
1644 offset += data->GetSize() + 1; // Add one extra for null.
1645 }
1646 if (offset > max_offset) {
1647 const uint32_t diff = offset - max_offset;
1648 // If we expanded the string data section, we need to update the offsets or else we will
1649 // corrupt the next section when writing out.
1650 FixupSections(header_->GetCollections().StringDatasOffset(), diff);
1651 // Update file size.
1652 header_->SetFileSize(header_->FileSize() + diff);
1653 }
1654 }
1655
1656 // Orders code items according to specified class data ordering.
1657 // NOTE: If the section following the code items is byte aligned, the last code item is left in
1658 // place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
LayoutCodeItems(const DexFile * dex_file,std::vector<dex_ir::ClassData * > new_class_data_order)1659 int32_t DexLayout::LayoutCodeItems(const DexFile* dex_file,
1660 std::vector<dex_ir::ClassData*> new_class_data_order) {
1661 // Do not move code items if class data section precedes code item section.
1662 // ULEB encoding is variable length, causing problems determining the offset of the code items.
1663 // TODO: We should swap the order of these sections in the future to avoid this issue.
1664 uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
1665 uint32_t code_item_offset = header_->GetCollections().CodeItemsOffset();
1666 if (class_data_offset < code_item_offset) {
1667 return 0;
1668 }
1669
1670 // Find the last code item so we can leave it in place if the next section is not 4 byte aligned.
1671 dex_ir::CodeItem* last_code_item = nullptr;
1672 std::unordered_set<dex_ir::CodeItem*> visited_code_items;
1673 bool is_code_item_aligned = IsNextSectionCodeItemAligned(code_item_offset);
1674 if (!is_code_item_aligned) {
1675 for (auto& code_item_pair : header_->GetCollections().CodeItems()) {
1676 std::unique_ptr<dex_ir::CodeItem>& code_item = code_item_pair.second;
1677 if (last_code_item == nullptr
1678 || last_code_item->GetOffset() < code_item->GetOffset()) {
1679 last_code_item = code_item.get();
1680 }
1681 }
1682 }
1683
1684 static constexpr InvokeType invoke_types[] = {
1685 kDirect,
1686 kVirtual
1687 };
1688
1689 const size_t num_layout_types = static_cast<size_t>(LayoutType::kLayoutTypeCount);
1690 std::unordered_set<dex_ir::CodeItem*> code_items[num_layout_types];
1691 for (InvokeType invoke_type : invoke_types) {
1692 for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1693 const bool is_profile_class =
1694 info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1695
1696 // Skip classes that are not defined in this dex file.
1697 dex_ir::ClassData* class_data = class_def->GetClassData();
1698 if (class_data == nullptr) {
1699 continue;
1700 }
1701 for (auto& method : *(invoke_type == InvokeType::kDirect
1702 ? class_data->DirectMethods()
1703 : class_data->VirtualMethods())) {
1704 const dex_ir::MethodId *method_id = method->GetMethodId();
1705 dex_ir::CodeItem *code_item = method->GetCodeItem();
1706 if (code_item == last_code_item || code_item == nullptr) {
1707 continue;
1708 }
1709 // Separate executed methods (clinits and profiled methods) from unexecuted methods.
1710 const bool is_clinit = (method->GetAccessFlags() & kAccConstructor) != 0 &&
1711 (method->GetAccessFlags() & kAccStatic) != 0;
1712 const bool is_startup_clinit = is_profile_class && is_clinit;
1713 using Hotness = ProfileCompilationInfo::MethodHotness;
1714 Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
1715 LayoutType state = LayoutType::kLayoutTypeUnused;
1716 if (hotness.IsHot()) {
1717 // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
1718 // now.
1719 state = LayoutType::kLayoutTypeHot;
1720 } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
1721 // Startup clinit or a method that only has the startup flag.
1722 state = LayoutType::kLayoutTypeStartupOnly;
1723 } else if (is_clinit) {
1724 state = LayoutType::kLayoutTypeUsedOnce;
1725 } else if (hotness.IsInProfile()) {
1726 state = LayoutType::kLayoutTypeSometimesUsed;
1727 }
1728 code_items[static_cast<size_t>(state)].insert(code_item);
1729 }
1730 }
1731 }
1732
1733 // Total_diff includes diffs generated by clinits, executed, and non-executed methods.
1734 int32_t total_diff = 0;
1735 // The relative placement has no effect on correctness; it is used to ensure
1736 // the layout is deterministic
1737 for (size_t index = 0; index < num_layout_types; ++index) {
1738 const std::unordered_set<dex_ir::CodeItem*>& code_items_set = code_items[index];
1739 // diff is reset for each class of code items.
1740 int32_t diff = 0;
1741 const uint32_t start_offset = code_item_offset;
1742 for (dex_ir::ClassData* data : new_class_data_order) {
1743 data->SetOffset(data->GetOffset() + diff);
1744 for (InvokeType invoke_type : invoke_types) {
1745 for (auto &method : *(invoke_type == InvokeType::kDirect
1746 ? data->DirectMethods()
1747 : data->VirtualMethods())) {
1748 dex_ir::CodeItem* code_item = method->GetCodeItem();
1749 if (code_item != nullptr &&
1750 code_items_set.find(code_item) != code_items_set.end()) {
1751 diff += UnsignedLeb128Size(code_item_offset)
1752 - UnsignedLeb128Size(code_item->GetOffset());
1753 code_item->SetOffset(code_item_offset);
1754 code_item_offset +=
1755 RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
1756 }
1757 }
1758 }
1759 }
1760 DexLayoutSection& code_section = dex_sections_.sections_[static_cast<size_t>(
1761 DexLayoutSections::SectionType::kSectionTypeCode)];
1762 code_section.parts_[index].offset_ = start_offset;
1763 code_section.parts_[index].size_ = code_item_offset - start_offset;
1764 for (size_t i = 0; i < num_layout_types; ++i) {
1765 VLOG(dex) << "Code item layout bucket " << i << " count=" << code_items[i].size()
1766 << " bytes=" << code_section.parts_[i].size_;
1767 }
1768 total_diff += diff;
1769 }
1770 // Adjust diff to be 4-byte aligned.
1771 return RoundUp(total_diff, kDexCodeItemAlignment);
1772 }
1773
IsNextSectionCodeItemAligned(uint32_t offset)1774 bool DexLayout::IsNextSectionCodeItemAligned(uint32_t offset) {
1775 dex_ir::Collections& collections = header_->GetCollections();
1776 std::set<uint32_t> section_offsets;
1777 section_offsets.insert(collections.MapListOffset());
1778 section_offsets.insert(collections.TypeListsOffset());
1779 section_offsets.insert(collections.AnnotationSetRefListsOffset());
1780 section_offsets.insert(collections.AnnotationSetItemsOffset());
1781 section_offsets.insert(collections.ClassDatasOffset());
1782 section_offsets.insert(collections.CodeItemsOffset());
1783 section_offsets.insert(collections.StringDatasOffset());
1784 section_offsets.insert(collections.DebugInfoItemsOffset());
1785 section_offsets.insert(collections.AnnotationItemsOffset());
1786 section_offsets.insert(collections.EncodedArrayItemsOffset());
1787 section_offsets.insert(collections.AnnotationsDirectoryItemsOffset());
1788
1789 auto found = section_offsets.find(offset);
1790 if (found != section_offsets.end()) {
1791 found++;
1792 if (found != section_offsets.end()) {
1793 return *found % kDexCodeItemAlignment == 0;
1794 }
1795 }
1796 return false;
1797 }
1798
1799 // Adjust offsets of every item in the specified section by diff bytes.
FixupSection(std::map<uint32_t,std::unique_ptr<T>> & map,uint32_t diff)1800 template<class T> void DexLayout::FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map,
1801 uint32_t diff) {
1802 for (auto& pair : map) {
1803 std::unique_ptr<T>& item = pair.second;
1804 item->SetOffset(item->GetOffset() + diff);
1805 }
1806 }
1807
1808 // Adjust offsets of all sections with an address after the specified offset by diff bytes.
FixupSections(uint32_t offset,uint32_t diff)1809 void DexLayout::FixupSections(uint32_t offset, uint32_t diff) {
1810 dex_ir::Collections& collections = header_->GetCollections();
1811 uint32_t map_list_offset = collections.MapListOffset();
1812 if (map_list_offset > offset) {
1813 collections.SetMapListOffset(map_list_offset + diff);
1814 }
1815
1816 uint32_t type_lists_offset = collections.TypeListsOffset();
1817 if (type_lists_offset > offset) {
1818 collections.SetTypeListsOffset(type_lists_offset + diff);
1819 FixupSection(collections.TypeLists(), diff);
1820 }
1821
1822 uint32_t annotation_set_ref_lists_offset = collections.AnnotationSetRefListsOffset();
1823 if (annotation_set_ref_lists_offset > offset) {
1824 collections.SetAnnotationSetRefListsOffset(annotation_set_ref_lists_offset + diff);
1825 FixupSection(collections.AnnotationSetRefLists(), diff);
1826 }
1827
1828 uint32_t annotation_set_items_offset = collections.AnnotationSetItemsOffset();
1829 if (annotation_set_items_offset > offset) {
1830 collections.SetAnnotationSetItemsOffset(annotation_set_items_offset + diff);
1831 FixupSection(collections.AnnotationSetItems(), diff);
1832 }
1833
1834 uint32_t class_datas_offset = collections.ClassDatasOffset();
1835 if (class_datas_offset > offset) {
1836 collections.SetClassDatasOffset(class_datas_offset + diff);
1837 FixupSection(collections.ClassDatas(), diff);
1838 }
1839
1840 uint32_t code_items_offset = collections.CodeItemsOffset();
1841 if (code_items_offset > offset) {
1842 collections.SetCodeItemsOffset(code_items_offset + diff);
1843 FixupSection(collections.CodeItems(), diff);
1844 }
1845
1846 uint32_t string_datas_offset = collections.StringDatasOffset();
1847 if (string_datas_offset > offset) {
1848 collections.SetStringDatasOffset(string_datas_offset + diff);
1849 FixupSection(collections.StringDatas(), diff);
1850 }
1851
1852 uint32_t debug_info_items_offset = collections.DebugInfoItemsOffset();
1853 if (debug_info_items_offset > offset) {
1854 collections.SetDebugInfoItemsOffset(debug_info_items_offset + diff);
1855 FixupSection(collections.DebugInfoItems(), diff);
1856 }
1857
1858 uint32_t annotation_items_offset = collections.AnnotationItemsOffset();
1859 if (annotation_items_offset > offset) {
1860 collections.SetAnnotationItemsOffset(annotation_items_offset + diff);
1861 FixupSection(collections.AnnotationItems(), diff);
1862 }
1863
1864 uint32_t encoded_array_items_offset = collections.EncodedArrayItemsOffset();
1865 if (encoded_array_items_offset > offset) {
1866 collections.SetEncodedArrayItemsOffset(encoded_array_items_offset + diff);
1867 FixupSection(collections.EncodedArrayItems(), diff);
1868 }
1869
1870 uint32_t annotations_directory_items_offset = collections.AnnotationsDirectoryItemsOffset();
1871 if (annotations_directory_items_offset > offset) {
1872 collections.SetAnnotationsDirectoryItemsOffset(annotations_directory_items_offset + diff);
1873 FixupSection(collections.AnnotationsDirectoryItems(), diff);
1874 }
1875 }
1876
LayoutOutputFile(const DexFile * dex_file)1877 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
1878 LayoutStringData(dex_file);
1879 std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
1880 int32_t diff = LayoutCodeItems(dex_file, new_class_data_order);
1881 // Move sections after ClassData by diff bytes.
1882 FixupSections(header_->GetCollections().ClassDatasOffset(), diff);
1883 // Update file size.
1884 header_->SetFileSize(header_->FileSize() + diff);
1885 }
1886
OutputDexFile(const DexFile * dex_file)1887 void DexLayout::OutputDexFile(const DexFile* dex_file) {
1888 const std::string& dex_file_location = dex_file->GetLocation();
1889 std::string error_msg;
1890 std::unique_ptr<File> new_file;
1891 if (!options_.output_to_memmap_) {
1892 std::string output_location(options_.output_dex_directory_);
1893 size_t last_slash = dex_file_location.rfind('/');
1894 std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
1895 if (output_location == dex_file_directory) {
1896 output_location = dex_file_location + ".new";
1897 } else if (last_slash != std::string::npos) {
1898 output_location += dex_file_location.substr(last_slash);
1899 } else {
1900 output_location += "/" + dex_file_location + ".new";
1901 }
1902 new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
1903 if (new_file == nullptr) {
1904 LOG(ERROR) << "Could not create dex writer output file: " << output_location;
1905 return;
1906 }
1907 if (ftruncate(new_file->Fd(), header_->FileSize()) != 0) {
1908 LOG(ERROR) << "Could not grow dex writer output file: " << output_location;;
1909 new_file->Erase();
1910 return;
1911 }
1912 mem_map_.reset(MemMap::MapFile(header_->FileSize(), PROT_READ | PROT_WRITE, MAP_SHARED,
1913 new_file->Fd(), 0, /*low_4gb*/ false, output_location.c_str(), &error_msg));
1914 } else {
1915 mem_map_.reset(MemMap::MapAnonymous("layout dex", nullptr, header_->FileSize(),
1916 PROT_READ | PROT_WRITE, /* low_4gb */ false, /* reuse */ false, &error_msg));
1917 }
1918 if (mem_map_ == nullptr) {
1919 LOG(ERROR) << "Could not create mem map for dex writer output: " << error_msg;
1920 if (new_file != nullptr) {
1921 new_file->Erase();
1922 }
1923 return;
1924 }
1925 DexWriter::Output(header_, mem_map_.get());
1926 if (new_file != nullptr) {
1927 UNUSED(new_file->FlushCloseOrErase());
1928 }
1929 // Verify the output dex file's structure for debug builds.
1930 if (kIsDebugBuild) {
1931 std::string location = "memory mapped file for " + dex_file_location;
1932 std::unique_ptr<const DexFile> output_dex_file(DexFile::Open(mem_map_->Begin(),
1933 mem_map_->Size(),
1934 location,
1935 header_->Checksum(),
1936 /*oat_dex_file*/ nullptr,
1937 /*verify*/ true,
1938 /*verify_checksum*/ false,
1939 &error_msg));
1940 DCHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << error_msg;
1941 }
1942 // Do IR-level comparison between input and output. This check ignores potential differences
1943 // due to layout, so offsets are not checked. Instead, it checks the data contents of each item.
1944 if (kIsDebugBuild || options_.verify_output_) {
1945 std::unique_ptr<dex_ir::Header> orig_header(dex_ir::DexIrBuilder(*dex_file));
1946 CHECK(VerifyOutputDexFile(orig_header.get(), header_, &error_msg)) << error_msg;
1947 }
1948 }
1949
1950 /*
1951 * Dumps the requested sections of the file.
1952 */
ProcessDexFile(const char * file_name,const DexFile * dex_file,size_t dex_file_index)1953 void DexLayout::ProcessDexFile(const char* file_name,
1954 const DexFile* dex_file,
1955 size_t dex_file_index) {
1956 std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file));
1957 SetHeader(header.get());
1958
1959 if (options_.verbose_) {
1960 fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
1961 file_name, dex_file->GetHeader().magic_ + 4);
1962 }
1963
1964 if (options_.visualize_pattern_) {
1965 VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
1966 return;
1967 }
1968
1969 if (options_.show_section_statistics_) {
1970 ShowDexSectionStatistics(header_, dex_file_index);
1971 return;
1972 }
1973
1974 // Dump dex file.
1975 if (options_.dump_) {
1976 DumpDexFile();
1977 }
1978
1979 // Output dex file as file or memmap.
1980 if (options_.output_dex_directory_ != nullptr || options_.output_to_memmap_) {
1981 if (info_ != nullptr) {
1982 LayoutOutputFile(dex_file);
1983 }
1984 OutputDexFile(dex_file);
1985 }
1986 }
1987
1988 /*
1989 * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
1990 */
ProcessFile(const char * file_name)1991 int DexLayout::ProcessFile(const char* file_name) {
1992 if (options_.verbose_) {
1993 fprintf(out_file_, "Processing '%s'...\n", file_name);
1994 }
1995
1996 // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
1997 // all of which are Zip archives with "classes.dex" inside.
1998 const bool verify_checksum = !options_.ignore_bad_checksum_;
1999 std::string error_msg;
2000 std::vector<std::unique_ptr<const DexFile>> dex_files;
2001 if (!DexFile::Open(file_name, file_name, verify_checksum, &error_msg, &dex_files)) {
2002 // Display returned error message to user. Note that this error behavior
2003 // differs from the error messages shown by the original Dalvik dexdump.
2004 fputs(error_msg.c_str(), stderr);
2005 fputc('\n', stderr);
2006 return -1;
2007 }
2008
2009 // Success. Either report checksum verification or process
2010 // all dex files found in given file.
2011 if (options_.checksum_only_) {
2012 fprintf(out_file_, "Checksum verified\n");
2013 } else {
2014 for (size_t i = 0; i < dex_files.size(); i++) {
2015 ProcessDexFile(file_name, dex_files[i].get(), i);
2016 }
2017 }
2018 return 0;
2019 }
2020
2021 } // namespace art
2022