• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  * Implementation file of the dexlayout utility.
17  *
18  * This is a tool to read dex files into an internal representation,
19  * reorganize the representation, and emit dex files with a better
20  * file layout.
21  */
22 
23 #include "dexlayout.h"
24 
25 #include <inttypes.h>
26 #include <stdio.h>
27 #include <sys/mman.h>  // For the PROT_* and MAP_* constants.
28 
29 #include <iostream>
30 #include <memory>
31 #include <sstream>
32 #include <vector>
33 
34 #include "android-base/stringprintf.h"
35 
36 #include "dex_ir_builder.h"
37 #include "dex_file-inl.h"
38 #include "dex_file_layout.h"
39 #include "dex_file_verifier.h"
40 #include "dex_instruction-inl.h"
41 #include "dex_verify.h"
42 #include "dex_visualize.h"
43 #include "dex_writer.h"
44 #include "jit/profile_compilation_info.h"
45 #include "mem_map.h"
46 #include "os.h"
47 #include "utils.h"
48 
49 namespace art {
50 
51 using android::base::StringPrintf;
52 
53 static constexpr uint32_t kDexCodeItemAlignment = 4;
54 
55 /*
56  * Flags for use with createAccessFlagStr().
57  */
58 enum AccessFor {
59   kAccessForClass = 0, kAccessForMethod = 1, kAccessForField = 2, kAccessForMAX
60 };
61 const int kNumFlags = 18;
62 
63 /*
64  * Gets 2 little-endian bytes.
65  */
Get2LE(unsigned char const * src)66 static inline uint16_t Get2LE(unsigned char const* src) {
67   return src[0] | (src[1] << 8);
68 }
69 
70 /*
71  * Converts a type descriptor to human-readable "dotted" form.  For
72  * example, "Ljava/lang/String;" becomes "java.lang.String", and
73  * "[I" becomes "int[]".  Also converts '$' to '.', which means this
74  * form can't be converted back to a descriptor.
75  */
DescriptorToDotWrapper(const char * descriptor)76 static std::string DescriptorToDotWrapper(const char* descriptor) {
77   std::string result = DescriptorToDot(descriptor);
78   size_t found = result.find('$');
79   while (found != std::string::npos) {
80     result[found] = '.';
81     found = result.find('$', found);
82   }
83   return result;
84 }
85 
86 /*
87  * Converts the class name portion of a type descriptor to human-readable
88  * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
89  */
DescriptorClassToDot(const char * str)90 static std::string DescriptorClassToDot(const char* str) {
91   std::string descriptor(str);
92   // Reduce to just the class name prefix.
93   size_t last_slash = descriptor.rfind('/');
94   if (last_slash == std::string::npos) {
95     last_slash = 0;
96   }
97   // Start past the '/' or 'L'.
98   last_slash++;
99 
100   // Copy class name over, trimming trailing ';'.
101   size_t size = descriptor.size() - 1 - last_slash;
102   std::string result(descriptor.substr(last_slash, size));
103 
104   // Replace '$' with '.'.
105   size_t dollar_sign = result.find('$');
106   while (dollar_sign != std::string::npos) {
107     result[dollar_sign] = '.';
108     dollar_sign = result.find('$', dollar_sign);
109   }
110 
111   return result;
112 }
113 
114 /*
115  * Returns string representing the boolean value.
116  */
StrBool(bool val)117 static const char* StrBool(bool val) {
118   return val ? "true" : "false";
119 }
120 
121 /*
122  * Returns a quoted string representing the boolean value.
123  */
QuotedBool(bool val)124 static const char* QuotedBool(bool val) {
125   return val ? "\"true\"" : "\"false\"";
126 }
127 
128 /*
129  * Returns a quoted string representing the access flags.
130  */
QuotedVisibility(uint32_t access_flags)131 static const char* QuotedVisibility(uint32_t access_flags) {
132   if (access_flags & kAccPublic) {
133     return "\"public\"";
134   } else if (access_flags & kAccProtected) {
135     return "\"protected\"";
136   } else if (access_flags & kAccPrivate) {
137     return "\"private\"";
138   } else {
139     return "\"package\"";
140   }
141 }
142 
143 /*
144  * Counts the number of '1' bits in a word.
145  */
CountOnes(uint32_t val)146 static int CountOnes(uint32_t val) {
147   val = val - ((val >> 1) & 0x55555555);
148   val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
149   return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
150 }
151 
152 /*
153  * Creates a new string with human-readable access flags.
154  *
155  * In the base language the access_flags fields are type uint16_t; in Dalvik they're uint32_t.
156  */
CreateAccessFlagStr(uint32_t flags,AccessFor for_what)157 static char* CreateAccessFlagStr(uint32_t flags, AccessFor for_what) {
158   static const char* kAccessStrings[kAccessForMAX][kNumFlags] = {
159     {
160       "PUBLIC",                /* 0x00001 */
161       "PRIVATE",               /* 0x00002 */
162       "PROTECTED",             /* 0x00004 */
163       "STATIC",                /* 0x00008 */
164       "FINAL",                 /* 0x00010 */
165       "?",                     /* 0x00020 */
166       "?",                     /* 0x00040 */
167       "?",                     /* 0x00080 */
168       "?",                     /* 0x00100 */
169       "INTERFACE",             /* 0x00200 */
170       "ABSTRACT",              /* 0x00400 */
171       "?",                     /* 0x00800 */
172       "SYNTHETIC",             /* 0x01000 */
173       "ANNOTATION",            /* 0x02000 */
174       "ENUM",                  /* 0x04000 */
175       "?",                     /* 0x08000 */
176       "VERIFIED",              /* 0x10000 */
177       "OPTIMIZED",             /* 0x20000 */
178     }, {
179       "PUBLIC",                /* 0x00001 */
180       "PRIVATE",               /* 0x00002 */
181       "PROTECTED",             /* 0x00004 */
182       "STATIC",                /* 0x00008 */
183       "FINAL",                 /* 0x00010 */
184       "SYNCHRONIZED",          /* 0x00020 */
185       "BRIDGE",                /* 0x00040 */
186       "VARARGS",               /* 0x00080 */
187       "NATIVE",                /* 0x00100 */
188       "?",                     /* 0x00200 */
189       "ABSTRACT",              /* 0x00400 */
190       "STRICT",                /* 0x00800 */
191       "SYNTHETIC",             /* 0x01000 */
192       "?",                     /* 0x02000 */
193       "?",                     /* 0x04000 */
194       "MIRANDA",               /* 0x08000 */
195       "CONSTRUCTOR",           /* 0x10000 */
196       "DECLARED_SYNCHRONIZED", /* 0x20000 */
197     }, {
198       "PUBLIC",                /* 0x00001 */
199       "PRIVATE",               /* 0x00002 */
200       "PROTECTED",             /* 0x00004 */
201       "STATIC",                /* 0x00008 */
202       "FINAL",                 /* 0x00010 */
203       "?",                     /* 0x00020 */
204       "VOLATILE",              /* 0x00040 */
205       "TRANSIENT",             /* 0x00080 */
206       "?",                     /* 0x00100 */
207       "?",                     /* 0x00200 */
208       "?",                     /* 0x00400 */
209       "?",                     /* 0x00800 */
210       "SYNTHETIC",             /* 0x01000 */
211       "?",                     /* 0x02000 */
212       "ENUM",                  /* 0x04000 */
213       "?",                     /* 0x08000 */
214       "?",                     /* 0x10000 */
215       "?",                     /* 0x20000 */
216     },
217   };
218 
219   // Allocate enough storage to hold the expected number of strings,
220   // plus a space between each.  We over-allocate, using the longest
221   // string above as the base metric.
222   const int kLongest = 21;  // The strlen of longest string above.
223   const int count = CountOnes(flags);
224   char* str;
225   char* cp;
226   cp = str = reinterpret_cast<char*>(malloc(count * (kLongest + 1) + 1));
227 
228   for (int i = 0; i < kNumFlags; i++) {
229     if (flags & 0x01) {
230       const char* accessStr = kAccessStrings[for_what][i];
231       const int len = strlen(accessStr);
232       if (cp != str) {
233         *cp++ = ' ';
234       }
235       memcpy(cp, accessStr, len);
236       cp += len;
237     }
238     flags >>= 1;
239   }  // for
240 
241   *cp = '\0';
242   return str;
243 }
244 
GetSignatureForProtoId(const dex_ir::ProtoId * proto)245 static std::string GetSignatureForProtoId(const dex_ir::ProtoId* proto) {
246   if (proto == nullptr) {
247     return "<no signature>";
248   }
249 
250   std::string result("(");
251   const dex_ir::TypeList* type_list = proto->Parameters();
252   if (type_list != nullptr) {
253     for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
254       result += type_id->GetStringId()->Data();
255     }
256   }
257   result += ")";
258   result += proto->ReturnType()->GetStringId()->Data();
259   return result;
260 }
261 
262 /*
263  * Copies character data from "data" to "out", converting non-ASCII values
264  * to fprintf format chars or an ASCII filler ('.' or '?').
265  *
266  * The output buffer must be able to hold (2*len)+1 bytes.  The result is
267  * NULL-terminated.
268  */
Asciify(char * out,const unsigned char * data,size_t len)269 static void Asciify(char* out, const unsigned char* data, size_t len) {
270   while (len--) {
271     if (*data < 0x20) {
272       // Could do more here, but we don't need them yet.
273       switch (*data) {
274         case '\0':
275           *out++ = '\\';
276           *out++ = '0';
277           break;
278         case '\n':
279           *out++ = '\\';
280           *out++ = 'n';
281           break;
282         default:
283           *out++ = '.';
284           break;
285       }  // switch
286     } else if (*data >= 0x80) {
287       *out++ = '?';
288     } else {
289       *out++ = *data;
290     }
291     data++;
292   }  // while
293   *out = '\0';
294 }
295 
296 /*
297  * Dumps a string value with some escape characters.
298  */
DumpEscapedString(const char * p,FILE * out_file)299 static void DumpEscapedString(const char* p, FILE* out_file) {
300   fputs("\"", out_file);
301   for (; *p; p++) {
302     switch (*p) {
303       case '\\':
304         fputs("\\\\", out_file);
305         break;
306       case '\"':
307         fputs("\\\"", out_file);
308         break;
309       case '\t':
310         fputs("\\t", out_file);
311         break;
312       case '\n':
313         fputs("\\n", out_file);
314         break;
315       case '\r':
316         fputs("\\r", out_file);
317         break;
318       default:
319         putc(*p, out_file);
320     }  // switch
321   }  // for
322   fputs("\"", out_file);
323 }
324 
325 /*
326  * Dumps a string as an XML attribute value.
327  */
DumpXmlAttribute(const char * p,FILE * out_file)328 static void DumpXmlAttribute(const char* p, FILE* out_file) {
329   for (; *p; p++) {
330     switch (*p) {
331       case '&':
332         fputs("&amp;", out_file);
333         break;
334       case '<':
335         fputs("&lt;", out_file);
336         break;
337       case '>':
338         fputs("&gt;", out_file);
339         break;
340       case '"':
341         fputs("&quot;", out_file);
342         break;
343       case '\t':
344         fputs("&#x9;", out_file);
345         break;
346       case '\n':
347         fputs("&#xA;", out_file);
348         break;
349       case '\r':
350         fputs("&#xD;", out_file);
351         break;
352       default:
353         putc(*p, out_file);
354     }  // switch
355   }  // for
356 }
357 
358 /*
359  * Helper for dumpInstruction(), which builds the string
360  * representation for the index in the given instruction.
361  * Returns a pointer to a buffer of sufficient size.
362  */
IndexString(dex_ir::Header * header,const Instruction * dec_insn,size_t buf_size)363 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
364                                            const Instruction* dec_insn,
365                                            size_t buf_size) {
366   std::unique_ptr<char[]> buf(new char[buf_size]);
367   // Determine index and width of the string.
368   uint32_t index = 0;
369   uint32_t secondary_index = DexFile::kDexNoIndex;
370   uint32_t width = 4;
371   switch (Instruction::FormatOf(dec_insn->Opcode())) {
372     // SOME NOT SUPPORTED:
373     // case Instruction::k20bc:
374     case Instruction::k21c:
375     case Instruction::k35c:
376     // case Instruction::k35ms:
377     case Instruction::k3rc:
378     // case Instruction::k3rms:
379     // case Instruction::k35mi:
380     // case Instruction::k3rmi:
381       index = dec_insn->VRegB();
382       width = 4;
383       break;
384     case Instruction::k31c:
385       index = dec_insn->VRegB();
386       width = 8;
387       break;
388     case Instruction::k22c:
389     // case Instruction::k22cs:
390       index = dec_insn->VRegC();
391       width = 4;
392       break;
393     case Instruction::k45cc:
394     case Instruction::k4rcc:
395       index = dec_insn->VRegB();
396       secondary_index = dec_insn->VRegH();
397       width = 4;
398       break;
399     default:
400       break;
401   }  // switch
402 
403   // Determine index type.
404   size_t outSize = 0;
405   switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
406     case Instruction::kIndexUnknown:
407       // This function should never get called for this type, but do
408       // something sensible here, just to help with debugging.
409       outSize = snprintf(buf.get(), buf_size, "<unknown-index>");
410       break;
411     case Instruction::kIndexNone:
412       // This function should never get called for this type, but do
413       // something sensible here, just to help with debugging.
414       outSize = snprintf(buf.get(), buf_size, "<no-index>");
415       break;
416     case Instruction::kIndexTypeRef:
417       if (index < header->GetCollections().TypeIdsSize()) {
418         const char* tp = header->GetCollections().GetTypeId(index)->GetStringId()->Data();
419         outSize = snprintf(buf.get(), buf_size, "%s // type@%0*x", tp, width, index);
420       } else {
421         outSize = snprintf(buf.get(), buf_size, "<type?> // type@%0*x", width, index);
422       }
423       break;
424     case Instruction::kIndexStringRef:
425       if (index < header->GetCollections().StringIdsSize()) {
426         const char* st = header->GetCollections().GetStringId(index)->Data();
427         outSize = snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", st, width, index);
428       } else {
429         outSize = snprintf(buf.get(), buf_size, "<string?> // string@%0*x", width, index);
430       }
431       break;
432     case Instruction::kIndexMethodRef:
433       if (index < header->GetCollections().MethodIdsSize()) {
434         dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
435         const char* name = method_id->Name()->Data();
436         std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
437         const char* back_descriptor = method_id->Class()->GetStringId()->Data();
438         outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // method@%0*x",
439                            back_descriptor, name, type_descriptor.c_str(), width, index);
440       } else {
441         outSize = snprintf(buf.get(), buf_size, "<method?> // method@%0*x", width, index);
442       }
443       break;
444     case Instruction::kIndexFieldRef:
445       if (index < header->GetCollections().FieldIdsSize()) {
446         dex_ir::FieldId* field_id = header->GetCollections().GetFieldId(index);
447         const char* name = field_id->Name()->Data();
448         const char* type_descriptor = field_id->Type()->GetStringId()->Data();
449         const char* back_descriptor = field_id->Class()->GetStringId()->Data();
450         outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // field@%0*x",
451                            back_descriptor, name, type_descriptor, width, index);
452       } else {
453         outSize = snprintf(buf.get(), buf_size, "<field?> // field@%0*x", width, index);
454       }
455       break;
456     case Instruction::kIndexVtableOffset:
457       outSize = snprintf(buf.get(), buf_size, "[%0*x] // vtable #%0*x",
458                          width, index, width, index);
459       break;
460     case Instruction::kIndexFieldOffset:
461       outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
462       break;
463     case Instruction::kIndexMethodAndProtoRef: {
464       std::string method("<method?>");
465       std::string proto("<proto?>");
466       if (index < header->GetCollections().MethodIdsSize()) {
467         dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
468         const char* name = method_id->Name()->Data();
469         std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
470         const char* back_descriptor = method_id->Class()->GetStringId()->Data();
471         method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
472       }
473       if (secondary_index < header->GetCollections().ProtoIdsSize()) {
474         dex_ir::ProtoId* proto_id = header->GetCollections().GetProtoId(secondary_index);
475         proto = GetSignatureForProtoId(proto_id);
476       }
477       outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
478                          method.c_str(), proto.c_str(), width, index, width, secondary_index);
479     }
480     break;
481     // SOME NOT SUPPORTED:
482     // case Instruction::kIndexVaries:
483     // case Instruction::kIndexInlineMethod:
484     default:
485       outSize = snprintf(buf.get(), buf_size, "<?>");
486       break;
487   }  // switch
488 
489   // Determine success of string construction.
490   if (outSize >= buf_size) {
491     // The buffer wasn't big enough; retry with computed size. Note: snprintf()
492     // doesn't count/ the '\0' as part of its returned size, so we add explicit
493     // space for it here.
494     return IndexString(header, dec_insn, outSize + 1);
495   }
496   return buf;
497 }
498 
499 /*
500  * Dumps encoded annotation.
501  */
DumpEncodedAnnotation(dex_ir::EncodedAnnotation * annotation)502 void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
503   fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
504   // Display all name=value pairs.
505   for (auto& subannotation : *annotation->GetAnnotationElements()) {
506     fputc(' ', out_file_);
507     fputs(subannotation->GetName()->Data(), out_file_);
508     fputc('=', out_file_);
509     DumpEncodedValue(subannotation->GetValue());
510   }
511 }
512 /*
513  * Dumps encoded value.
514  */
DumpEncodedValue(const dex_ir::EncodedValue * data)515 void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
516   switch (data->Type()) {
517     case DexFile::kDexAnnotationByte:
518       fprintf(out_file_, "%" PRId8, data->GetByte());
519       break;
520     case DexFile::kDexAnnotationShort:
521       fprintf(out_file_, "%" PRId16, data->GetShort());
522       break;
523     case DexFile::kDexAnnotationChar:
524       fprintf(out_file_, "%" PRIu16, data->GetChar());
525       break;
526     case DexFile::kDexAnnotationInt:
527       fprintf(out_file_, "%" PRId32, data->GetInt());
528       break;
529     case DexFile::kDexAnnotationLong:
530       fprintf(out_file_, "%" PRId64, data->GetLong());
531       break;
532     case DexFile::kDexAnnotationFloat: {
533       fprintf(out_file_, "%g", data->GetFloat());
534       break;
535     }
536     case DexFile::kDexAnnotationDouble: {
537       fprintf(out_file_, "%g", data->GetDouble());
538       break;
539     }
540     case DexFile::kDexAnnotationString: {
541       dex_ir::StringId* string_id = data->GetStringId();
542       if (options_.output_format_ == kOutputPlain) {
543         DumpEscapedString(string_id->Data(), out_file_);
544       } else {
545         DumpXmlAttribute(string_id->Data(), out_file_);
546       }
547       break;
548     }
549     case DexFile::kDexAnnotationType: {
550       dex_ir::TypeId* type_id = data->GetTypeId();
551       fputs(type_id->GetStringId()->Data(), out_file_);
552       break;
553     }
554     case DexFile::kDexAnnotationField:
555     case DexFile::kDexAnnotationEnum: {
556       dex_ir::FieldId* field_id = data->GetFieldId();
557       fputs(field_id->Name()->Data(), out_file_);
558       break;
559     }
560     case DexFile::kDexAnnotationMethod: {
561       dex_ir::MethodId* method_id = data->GetMethodId();
562       fputs(method_id->Name()->Data(), out_file_);
563       break;
564     }
565     case DexFile::kDexAnnotationArray: {
566       fputc('{', out_file_);
567       // Display all elements.
568       for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
569         fputc(' ', out_file_);
570         DumpEncodedValue(value.get());
571       }
572       fputs(" }", out_file_);
573       break;
574     }
575     case DexFile::kDexAnnotationAnnotation: {
576       DumpEncodedAnnotation(data->GetEncodedAnnotation());
577       break;
578     }
579     case DexFile::kDexAnnotationNull:
580       fputs("null", out_file_);
581       break;
582     case DexFile::kDexAnnotationBoolean:
583       fputs(StrBool(data->GetBoolean()), out_file_);
584       break;
585     default:
586       fputs("????", out_file_);
587       break;
588   }  // switch
589 }
590 
591 /*
592  * Dumps the file header.
593  */
DumpFileHeader()594 void DexLayout::DumpFileHeader() {
595   char sanitized[8 * 2 + 1];
596   dex_ir::Collections& collections = header_->GetCollections();
597   fprintf(out_file_, "DEX file header:\n");
598   Asciify(sanitized, header_->Magic(), 8);
599   fprintf(out_file_, "magic               : '%s'\n", sanitized);
600   fprintf(out_file_, "checksum            : %08x\n", header_->Checksum());
601   fprintf(out_file_, "signature           : %02x%02x...%02x%02x\n",
602           header_->Signature()[0], header_->Signature()[1],
603           header_->Signature()[DexFile::kSha1DigestSize - 2],
604           header_->Signature()[DexFile::kSha1DigestSize - 1]);
605   fprintf(out_file_, "file_size           : %d\n", header_->FileSize());
606   fprintf(out_file_, "header_size         : %d\n", header_->HeaderSize());
607   fprintf(out_file_, "link_size           : %d\n", header_->LinkSize());
608   fprintf(out_file_, "link_off            : %d (0x%06x)\n",
609           header_->LinkOffset(), header_->LinkOffset());
610   fprintf(out_file_, "string_ids_size     : %d\n", collections.StringIdsSize());
611   fprintf(out_file_, "string_ids_off      : %d (0x%06x)\n",
612           collections.StringIdsOffset(), collections.StringIdsOffset());
613   fprintf(out_file_, "type_ids_size       : %d\n", collections.TypeIdsSize());
614   fprintf(out_file_, "type_ids_off        : %d (0x%06x)\n",
615           collections.TypeIdsOffset(), collections.TypeIdsOffset());
616   fprintf(out_file_, "proto_ids_size      : %d\n", collections.ProtoIdsSize());
617   fprintf(out_file_, "proto_ids_off       : %d (0x%06x)\n",
618           collections.ProtoIdsOffset(), collections.ProtoIdsOffset());
619   fprintf(out_file_, "field_ids_size      : %d\n", collections.FieldIdsSize());
620   fprintf(out_file_, "field_ids_off       : %d (0x%06x)\n",
621           collections.FieldIdsOffset(), collections.FieldIdsOffset());
622   fprintf(out_file_, "method_ids_size     : %d\n", collections.MethodIdsSize());
623   fprintf(out_file_, "method_ids_off      : %d (0x%06x)\n",
624           collections.MethodIdsOffset(), collections.MethodIdsOffset());
625   fprintf(out_file_, "class_defs_size     : %d\n", collections.ClassDefsSize());
626   fprintf(out_file_, "class_defs_off      : %d (0x%06x)\n",
627           collections.ClassDefsOffset(), collections.ClassDefsOffset());
628   fprintf(out_file_, "data_size           : %d\n", header_->DataSize());
629   fprintf(out_file_, "data_off            : %d (0x%06x)\n\n",
630           header_->DataOffset(), header_->DataOffset());
631 }
632 
633 /*
634  * Dumps a class_def_item.
635  */
DumpClassDef(int idx)636 void DexLayout::DumpClassDef(int idx) {
637   // General class information.
638   dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
639   fprintf(out_file_, "Class #%d header:\n", idx);
640   fprintf(out_file_, "class_idx           : %d\n", class_def->ClassType()->GetIndex());
641   fprintf(out_file_, "access_flags        : %d (0x%04x)\n",
642           class_def->GetAccessFlags(), class_def->GetAccessFlags());
643   uint32_t superclass_idx =  class_def->Superclass() == nullptr ?
644       DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
645   fprintf(out_file_, "superclass_idx      : %d\n", superclass_idx);
646   fprintf(out_file_, "interfaces_off      : %d (0x%06x)\n",
647           class_def->InterfacesOffset(), class_def->InterfacesOffset());
648   uint32_t source_file_offset = 0xffffffffU;
649   if (class_def->SourceFile() != nullptr) {
650     source_file_offset = class_def->SourceFile()->GetIndex();
651   }
652   fprintf(out_file_, "source_file_idx     : %d\n", source_file_offset);
653   uint32_t annotations_offset = 0;
654   if (class_def->Annotations() != nullptr) {
655     annotations_offset = class_def->Annotations()->GetOffset();
656   }
657   fprintf(out_file_, "annotations_off     : %d (0x%06x)\n",
658           annotations_offset, annotations_offset);
659   if (class_def->GetClassData() == nullptr) {
660     fprintf(out_file_, "class_data_off      : %d (0x%06x)\n", 0, 0);
661   } else {
662     fprintf(out_file_, "class_data_off      : %d (0x%06x)\n",
663             class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
664   }
665 
666   // Fields and methods.
667   dex_ir::ClassData* class_data = class_def->GetClassData();
668   if (class_data != nullptr && class_data->StaticFields() != nullptr) {
669     fprintf(out_file_, "static_fields_size  : %zu\n", class_data->StaticFields()->size());
670   } else {
671     fprintf(out_file_, "static_fields_size  : 0\n");
672   }
673   if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
674     fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
675   } else {
676     fprintf(out_file_, "instance_fields_size: 0\n");
677   }
678   if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
679     fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
680   } else {
681     fprintf(out_file_, "direct_methods_size : 0\n");
682   }
683   if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
684     fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
685   } else {
686     fprintf(out_file_, "virtual_methods_size: 0\n");
687   }
688   fprintf(out_file_, "\n");
689 }
690 
691 /**
692  * Dumps an annotation set item.
693  */
DumpAnnotationSetItem(dex_ir::AnnotationSetItem * set_item)694 void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
695   if (set_item == nullptr || set_item->GetItems()->size() == 0) {
696     fputs("  empty-annotation-set\n", out_file_);
697     return;
698   }
699   for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
700     if (annotation == nullptr) {
701       continue;
702     }
703     fputs("  ", out_file_);
704     switch (annotation->GetVisibility()) {
705       case DexFile::kDexVisibilityBuild:   fputs("VISIBILITY_BUILD ",   out_file_); break;
706       case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
707       case DexFile::kDexVisibilitySystem:  fputs("VISIBILITY_SYSTEM ",  out_file_); break;
708       default:                             fputs("VISIBILITY_UNKNOWN ", out_file_); break;
709     }  // switch
710     DumpEncodedAnnotation(annotation->GetAnnotation());
711     fputc('\n', out_file_);
712   }
713 }
714 
715 /*
716  * Dumps class annotations.
717  */
DumpClassAnnotations(int idx)718 void DexLayout::DumpClassAnnotations(int idx) {
719   dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
720   dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
721   if (annotations_directory == nullptr) {
722     return;  // none
723   }
724 
725   fprintf(out_file_, "Class #%d annotations:\n", idx);
726 
727   dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
728   dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
729   dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
730   dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
731 
732   // Annotations on the class itself.
733   if (class_set_item != nullptr) {
734     fprintf(out_file_, "Annotations on class\n");
735     DumpAnnotationSetItem(class_set_item);
736   }
737 
738   // Annotations on fields.
739   if (fields != nullptr) {
740     for (auto& field : *fields) {
741       const dex_ir::FieldId* field_id = field->GetFieldId();
742       const uint32_t field_idx = field_id->GetIndex();
743       const char* field_name = field_id->Name()->Data();
744       fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
745       DumpAnnotationSetItem(field->GetAnnotationSetItem());
746     }
747   }
748 
749   // Annotations on methods.
750   if (methods != nullptr) {
751     for (auto& method : *methods) {
752       const dex_ir::MethodId* method_id = method->GetMethodId();
753       const uint32_t method_idx = method_id->GetIndex();
754       const char* method_name = method_id->Name()->Data();
755       fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
756       DumpAnnotationSetItem(method->GetAnnotationSetItem());
757     }
758   }
759 
760   // Annotations on method parameters.
761   if (parameters != nullptr) {
762     for (auto& parameter : *parameters) {
763       const dex_ir::MethodId* method_id = parameter->GetMethodId();
764       const uint32_t method_idx = method_id->GetIndex();
765       const char* method_name = method_id->Name()->Data();
766       fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
767       uint32_t j = 0;
768       for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
769         fprintf(out_file_, "#%u\n", j);
770         DumpAnnotationSetItem(annotation);
771         ++j;
772       }
773     }
774   }
775 
776   fputc('\n', out_file_);
777 }
778 
779 /*
780  * Dumps an interface that a class declares to implement.
781  */
DumpInterface(const dex_ir::TypeId * type_item,int i)782 void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
783   const char* interface_name = type_item->GetStringId()->Data();
784   if (options_.output_format_ == kOutputPlain) {
785     fprintf(out_file_, "    #%d              : '%s'\n", i, interface_name);
786   } else {
787     std::string dot(DescriptorToDotWrapper(interface_name));
788     fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
789   }
790 }
791 
792 /*
793  * Dumps the catches table associated with the code.
794  */
DumpCatches(const dex_ir::CodeItem * code)795 void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
796   const uint16_t tries_size = code->TriesSize();
797 
798   // No catch table.
799   if (tries_size == 0) {
800     fprintf(out_file_, "      catches       : (none)\n");
801     return;
802   }
803 
804   // Dump all table entries.
805   fprintf(out_file_, "      catches       : %d\n", tries_size);
806   std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
807   for (uint32_t i = 0; i < tries_size; i++) {
808     const dex_ir::TryItem* try_item = (*tries)[i].get();
809     const uint32_t start = try_item->StartAddr();
810     const uint32_t end = start + try_item->InsnCount();
811     fprintf(out_file_, "        0x%04x - 0x%04x\n", start, end);
812     for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
813       const dex_ir::TypeId* type_id = handler->GetTypeId();
814       const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
815       fprintf(out_file_, "          %s -> 0x%04x\n", descriptor, handler->GetAddress());
816     }  // for
817   }  // for
818 }
819 
820 /*
821  * Dumps all positions table entries associated with the code.
822  */
DumpPositionInfo(const dex_ir::CodeItem * code)823 void DexLayout::DumpPositionInfo(const dex_ir::CodeItem* code) {
824   dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
825   if (debug_info == nullptr) {
826     return;
827   }
828   std::vector<std::unique_ptr<dex_ir::PositionInfo>>& positions = debug_info->GetPositionInfo();
829   for (size_t i = 0; i < positions.size(); ++i) {
830     fprintf(out_file_, "        0x%04x line=%d\n", positions[i]->address_, positions[i]->line_);
831   }
832 }
833 
834 /*
835  * Dumps all locals table entries associated with the code.
836  */
DumpLocalInfo(const dex_ir::CodeItem * code)837 void DexLayout::DumpLocalInfo(const dex_ir::CodeItem* code) {
838   dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
839   if (debug_info == nullptr) {
840     return;
841   }
842   std::vector<std::unique_ptr<dex_ir::LocalInfo>>& locals = debug_info->GetLocalInfo();
843   for (size_t i = 0; i < locals.size(); ++i) {
844     dex_ir::LocalInfo* entry = locals[i].get();
845     fprintf(out_file_, "        0x%04x - 0x%04x reg=%d %s %s %s\n",
846             entry->start_address_, entry->end_address_, entry->reg_,
847             entry->name_.c_str(), entry->descriptor_.c_str(), entry->signature_.c_str());
848   }
849 }
850 
851 /*
852  * Dumps a single instruction.
853  */
DumpInstruction(const dex_ir::CodeItem * code,uint32_t code_offset,uint32_t insn_idx,uint32_t insn_width,const Instruction * dec_insn)854 void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
855                                 uint32_t code_offset,
856                                 uint32_t insn_idx,
857                                 uint32_t insn_width,
858                                 const Instruction* dec_insn) {
859   // Address of instruction (expressed as byte offset).
860   fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
861 
862   // Dump (part of) raw bytes.
863   const uint16_t* insns = code->Insns();
864   for (uint32_t i = 0; i < 8; i++) {
865     if (i < insn_width) {
866       if (i == 7) {
867         fprintf(out_file_, " ... ");
868       } else {
869         // Print 16-bit value in little-endian order.
870         const uint8_t* bytePtr = (const uint8_t*) &insns[insn_idx + i];
871         fprintf(out_file_, " %02x%02x", bytePtr[0], bytePtr[1]);
872       }
873     } else {
874       fputs("     ", out_file_);
875     }
876   }  // for
877 
878   // Dump pseudo-instruction or opcode.
879   if (dec_insn->Opcode() == Instruction::NOP) {
880     const uint16_t instr = Get2LE((const uint8_t*) &insns[insn_idx]);
881     if (instr == Instruction::kPackedSwitchSignature) {
882       fprintf(out_file_, "|%04x: packed-switch-data (%d units)", insn_idx, insn_width);
883     } else if (instr == Instruction::kSparseSwitchSignature) {
884       fprintf(out_file_, "|%04x: sparse-switch-data (%d units)", insn_idx, insn_width);
885     } else if (instr == Instruction::kArrayDataSignature) {
886       fprintf(out_file_, "|%04x: array-data (%d units)", insn_idx, insn_width);
887     } else {
888       fprintf(out_file_, "|%04x: nop // spacer", insn_idx);
889     }
890   } else {
891     fprintf(out_file_, "|%04x: %s", insn_idx, dec_insn->Name());
892   }
893 
894   // Set up additional argument.
895   std::unique_ptr<char[]> index_buf;
896   if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
897     index_buf = IndexString(header_, dec_insn, 200);
898   }
899 
900   // Dump the instruction.
901   //
902   // NOTE: pDecInsn->DumpString(pDexFile) differs too much from original.
903   //
904   switch (Instruction::FormatOf(dec_insn->Opcode())) {
905     case Instruction::k10x:        // op
906       break;
907     case Instruction::k12x:        // op vA, vB
908       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
909       break;
910     case Instruction::k11n:        // op vA, #+B
911       fprintf(out_file_, " v%d, #int %d // #%x",
912               dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint8_t)dec_insn->VRegB());
913       break;
914     case Instruction::k11x:        // op vAA
915       fprintf(out_file_, " v%d", dec_insn->VRegA());
916       break;
917     case Instruction::k10t:        // op +AA
918     case Instruction::k20t: {      // op +AAAA
919       const int32_t targ = (int32_t) dec_insn->VRegA();
920       fprintf(out_file_, " %04x // %c%04x",
921               insn_idx + targ,
922               (targ < 0) ? '-' : '+',
923               (targ < 0) ? -targ : targ);
924       break;
925     }
926     case Instruction::k22x:        // op vAA, vBBBB
927       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
928       break;
929     case Instruction::k21t: {     // op vAA, +BBBB
930       const int32_t targ = (int32_t) dec_insn->VRegB();
931       fprintf(out_file_, " v%d, %04x // %c%04x", dec_insn->VRegA(),
932               insn_idx + targ,
933               (targ < 0) ? '-' : '+',
934               (targ < 0) ? -targ : targ);
935       break;
936     }
937     case Instruction::k21s:        // op vAA, #+BBBB
938       fprintf(out_file_, " v%d, #int %d // #%x",
939               dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint16_t)dec_insn->VRegB());
940       break;
941     case Instruction::k21h:        // op vAA, #+BBBB0000[00000000]
942       // The printed format varies a bit based on the actual opcode.
943       if (dec_insn->Opcode() == Instruction::CONST_HIGH16) {
944         const int32_t value = dec_insn->VRegB() << 16;
945         fprintf(out_file_, " v%d, #int %d // #%x",
946                 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
947       } else {
948         const int64_t value = ((int64_t) dec_insn->VRegB()) << 48;
949         fprintf(out_file_, " v%d, #long %" PRId64 " // #%x",
950                 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
951       }
952       break;
953     case Instruction::k21c:        // op vAA, thing@BBBB
954     case Instruction::k31c:        // op vAA, thing@BBBBBBBB
955       fprintf(out_file_, " v%d, %s", dec_insn->VRegA(), index_buf.get());
956       break;
957     case Instruction::k23x:        // op vAA, vBB, vCC
958       fprintf(out_file_, " v%d, v%d, v%d",
959               dec_insn->VRegA(), dec_insn->VRegB(), dec_insn->VRegC());
960       break;
961     case Instruction::k22b:        // op vAA, vBB, #+CC
962       fprintf(out_file_, " v%d, v%d, #int %d // #%02x",
963               dec_insn->VRegA(), dec_insn->VRegB(),
964               (int32_t) dec_insn->VRegC(), (uint8_t) dec_insn->VRegC());
965       break;
966     case Instruction::k22t: {      // op vA, vB, +CCCC
967       const int32_t targ = (int32_t) dec_insn->VRegC();
968       fprintf(out_file_, " v%d, v%d, %04x // %c%04x",
969               dec_insn->VRegA(), dec_insn->VRegB(),
970               insn_idx + targ,
971               (targ < 0) ? '-' : '+',
972               (targ < 0) ? -targ : targ);
973       break;
974     }
975     case Instruction::k22s:        // op vA, vB, #+CCCC
976       fprintf(out_file_, " v%d, v%d, #int %d // #%04x",
977               dec_insn->VRegA(), dec_insn->VRegB(),
978               (int32_t) dec_insn->VRegC(), (uint16_t) dec_insn->VRegC());
979       break;
980     case Instruction::k22c:        // op vA, vB, thing@CCCC
981     // NOT SUPPORTED:
982     // case Instruction::k22cs:    // [opt] op vA, vB, field offset CCCC
983       fprintf(out_file_, " v%d, v%d, %s",
984               dec_insn->VRegA(), dec_insn->VRegB(), index_buf.get());
985       break;
986     case Instruction::k30t:
987       fprintf(out_file_, " #%08x", dec_insn->VRegA());
988       break;
989     case Instruction::k31i: {     // op vAA, #+BBBBBBBB
990       // This is often, but not always, a float.
991       union {
992         float f;
993         uint32_t i;
994       } conv;
995       conv.i = dec_insn->VRegB();
996       fprintf(out_file_, " v%d, #float %g // #%08x",
997               dec_insn->VRegA(), conv.f, dec_insn->VRegB());
998       break;
999     }
1000     case Instruction::k31t:       // op vAA, offset +BBBBBBBB
1001       fprintf(out_file_, " v%d, %08x // +%08x",
1002               dec_insn->VRegA(), insn_idx + dec_insn->VRegB(), dec_insn->VRegB());
1003       break;
1004     case Instruction::k32x:        // op vAAAA, vBBBB
1005       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
1006       break;
1007     case Instruction::k35c:           // op {vC, vD, vE, vF, vG}, thing@BBBB
1008     case Instruction::k45cc: {        // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
1009     // NOT SUPPORTED:
1010     // case Instruction::k35ms:       // [opt] invoke-virtual+super
1011     // case Instruction::k35mi:       // [opt] inline invoke
1012       uint32_t arg[Instruction::kMaxVarArgRegs];
1013       dec_insn->GetVarArgs(arg);
1014       fputs(" {", out_file_);
1015       for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1016         if (i == 0) {
1017           fprintf(out_file_, "v%d", arg[i]);
1018         } else {
1019           fprintf(out_file_, ", v%d", arg[i]);
1020         }
1021       }  // for
1022       fprintf(out_file_, "}, %s", index_buf.get());
1023       break;
1024     }
1025     case Instruction::k3rc:           // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
1026     case Instruction::k4rcc:          // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
1027     // NOT SUPPORTED:
1028     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
1029     // case Instruction::k3rmi:       // [opt] execute-inline/range
1030       {
1031         // This doesn't match the "dx" output when some of the args are
1032         // 64-bit values -- dx only shows the first register.
1033         fputs(" {", out_file_);
1034         for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1035           if (i == 0) {
1036             fprintf(out_file_, "v%d", dec_insn->VRegC() + i);
1037           } else {
1038             fprintf(out_file_, ", v%d", dec_insn->VRegC() + i);
1039           }
1040         }  // for
1041         fprintf(out_file_, "}, %s", index_buf.get());
1042       }
1043       break;
1044     case Instruction::k51l: {      // op vAA, #+BBBBBBBBBBBBBBBB
1045       // This is often, but not always, a double.
1046       union {
1047         double d;
1048         uint64_t j;
1049       } conv;
1050       conv.j = dec_insn->WideVRegB();
1051       fprintf(out_file_, " v%d, #double %g // #%016" PRIx64,
1052               dec_insn->VRegA(), conv.d, dec_insn->WideVRegB());
1053       break;
1054     }
1055     // NOT SUPPORTED:
1056     // case Instruction::k00x:        // unknown op or breakpoint
1057     //    break;
1058     default:
1059       fprintf(out_file_, " ???");
1060       break;
1061   }  // switch
1062 
1063   fputc('\n', out_file_);
1064 }
1065 
1066 /*
1067  * Dumps a bytecode disassembly.
1068  */
DumpBytecodes(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1069 void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1070   dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
1071   const char* name = method_id->Name()->Data();
1072   std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
1073   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1074 
1075   // Generate header.
1076   std::string dot(DescriptorToDotWrapper(back_descriptor));
1077   fprintf(out_file_, "%06x:                                        |[%06x] %s.%s:%s\n",
1078           code_offset, code_offset, dot.c_str(), name, type_descriptor.c_str());
1079 
1080   // Iterate over all instructions.
1081   const uint16_t* insns = code->Insns();
1082   for (uint32_t insn_idx = 0; insn_idx < code->InsnsSize();) {
1083     const Instruction* instruction = Instruction::At(&insns[insn_idx]);
1084     const uint32_t insn_width = instruction->SizeInCodeUnits();
1085     if (insn_width == 0) {
1086       fprintf(stderr, "GLITCH: zero-width instruction at idx=0x%04x\n", insn_idx);
1087       break;
1088     }
1089     DumpInstruction(code, code_offset, insn_idx, insn_width, instruction);
1090     insn_idx += insn_width;
1091   }  // for
1092 }
1093 
1094 /*
1095  * Dumps code of a method.
1096  */
DumpCode(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1097 void DexLayout::DumpCode(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1098   fprintf(out_file_, "      registers     : %d\n", code->RegistersSize());
1099   fprintf(out_file_, "      ins           : %d\n", code->InsSize());
1100   fprintf(out_file_, "      outs          : %d\n", code->OutsSize());
1101   fprintf(out_file_, "      insns size    : %d 16-bit code units\n",
1102           code->InsnsSize());
1103 
1104   // Bytecode disassembly, if requested.
1105   if (options_.disassemble_) {
1106     DumpBytecodes(idx, code, code_offset);
1107   }
1108 
1109   // Try-catch blocks.
1110   DumpCatches(code);
1111 
1112   // Positions and locals table in the debug info.
1113   fprintf(out_file_, "      positions     : \n");
1114   DumpPositionInfo(code);
1115   fprintf(out_file_, "      locals        : \n");
1116   DumpLocalInfo(code);
1117 }
1118 
1119 /*
1120  * Dumps a method.
1121  */
DumpMethod(uint32_t idx,uint32_t flags,const dex_ir::CodeItem * code,int i)1122 void DexLayout::DumpMethod(uint32_t idx, uint32_t flags, const dex_ir::CodeItem* code, int i) {
1123   // Bail for anything private if export only requested.
1124   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1125     return;
1126   }
1127 
1128   dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
1129   const char* name = method_id->Name()->Data();
1130   char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
1131   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1132   char* access_str = CreateAccessFlagStr(flags, kAccessForMethod);
1133 
1134   if (options_.output_format_ == kOutputPlain) {
1135     fprintf(out_file_, "    #%d              : (in %s)\n", i, back_descriptor);
1136     fprintf(out_file_, "      name          : '%s'\n", name);
1137     fprintf(out_file_, "      type          : '%s'\n", type_descriptor);
1138     fprintf(out_file_, "      access        : 0x%04x (%s)\n", flags, access_str);
1139     if (code == nullptr) {
1140       fprintf(out_file_, "      code          : (none)\n");
1141     } else {
1142       fprintf(out_file_, "      code          -\n");
1143       DumpCode(idx, code, code->GetOffset());
1144     }
1145     if (options_.disassemble_) {
1146       fputc('\n', out_file_);
1147     }
1148   } else if (options_.output_format_ == kOutputXml) {
1149     const bool constructor = (name[0] == '<');
1150 
1151     // Method name and prototype.
1152     if (constructor) {
1153       std::string dot(DescriptorClassToDot(back_descriptor));
1154       fprintf(out_file_, "<constructor name=\"%s\"\n", dot.c_str());
1155       dot = DescriptorToDotWrapper(back_descriptor);
1156       fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1157     } else {
1158       fprintf(out_file_, "<method name=\"%s\"\n", name);
1159       const char* return_type = strrchr(type_descriptor, ')');
1160       if (return_type == nullptr) {
1161         fprintf(stderr, "bad method type descriptor '%s'\n", type_descriptor);
1162         goto bail;
1163       }
1164       std::string dot(DescriptorToDotWrapper(return_type + 1));
1165       fprintf(out_file_, " return=\"%s\"\n", dot.c_str());
1166       fprintf(out_file_, " abstract=%s\n", QuotedBool((flags & kAccAbstract) != 0));
1167       fprintf(out_file_, " native=%s\n", QuotedBool((flags & kAccNative) != 0));
1168       fprintf(out_file_, " synchronized=%s\n", QuotedBool(
1169           (flags & (kAccSynchronized | kAccDeclaredSynchronized)) != 0));
1170     }
1171 
1172     // Additional method flags.
1173     fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1174     fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1175     // The "deprecated=" not knowable w/o parsing annotations.
1176     fprintf(out_file_, " visibility=%s\n>\n", QuotedVisibility(flags));
1177 
1178     // Parameters.
1179     if (type_descriptor[0] != '(') {
1180       fprintf(stderr, "ERROR: bad descriptor '%s'\n", type_descriptor);
1181       goto bail;
1182     }
1183     char* tmp_buf = reinterpret_cast<char*>(malloc(strlen(type_descriptor) + 1));
1184     const char* base = type_descriptor + 1;
1185     int arg_num = 0;
1186     while (*base != ')') {
1187       char* cp = tmp_buf;
1188       while (*base == '[') {
1189         *cp++ = *base++;
1190       }
1191       if (*base == 'L') {
1192         // Copy through ';'.
1193         do {
1194           *cp = *base++;
1195         } while (*cp++ != ';');
1196       } else {
1197         // Primitive char, copy it.
1198         if (strchr("ZBCSIFJD", *base) == nullptr) {
1199           fprintf(stderr, "ERROR: bad method signature '%s'\n", base);
1200           break;  // while
1201         }
1202         *cp++ = *base++;
1203       }
1204       // Null terminate and display.
1205       *cp++ = '\0';
1206       std::string dot(DescriptorToDotWrapper(tmp_buf));
1207       fprintf(out_file_, "<parameter name=\"arg%d\" type=\"%s\">\n"
1208                         "</parameter>\n", arg_num++, dot.c_str());
1209     }  // while
1210     free(tmp_buf);
1211     if (constructor) {
1212       fprintf(out_file_, "</constructor>\n");
1213     } else {
1214       fprintf(out_file_, "</method>\n");
1215     }
1216   }
1217 
1218  bail:
1219   free(type_descriptor);
1220   free(access_str);
1221 }
1222 
1223 /*
1224  * Dumps a static (class) field.
1225  */
DumpSField(uint32_t idx,uint32_t flags,int i,dex_ir::EncodedValue * init)1226 void DexLayout::DumpSField(uint32_t idx, uint32_t flags, int i, dex_ir::EncodedValue* init) {
1227   // Bail for anything private if export only requested.
1228   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1229     return;
1230   }
1231 
1232   dex_ir::FieldId* field_id = header_->GetCollections().GetFieldId(idx);
1233   const char* name = field_id->Name()->Data();
1234   const char* type_descriptor = field_id->Type()->GetStringId()->Data();
1235   const char* back_descriptor = field_id->Class()->GetStringId()->Data();
1236   char* access_str = CreateAccessFlagStr(flags, kAccessForField);
1237 
1238   if (options_.output_format_ == kOutputPlain) {
1239     fprintf(out_file_, "    #%d              : (in %s)\n", i, back_descriptor);
1240     fprintf(out_file_, "      name          : '%s'\n", name);
1241     fprintf(out_file_, "      type          : '%s'\n", type_descriptor);
1242     fprintf(out_file_, "      access        : 0x%04x (%s)\n", flags, access_str);
1243     if (init != nullptr) {
1244       fputs("      value         : ", out_file_);
1245       DumpEncodedValue(init);
1246       fputs("\n", out_file_);
1247     }
1248   } else if (options_.output_format_ == kOutputXml) {
1249     fprintf(out_file_, "<field name=\"%s\"\n", name);
1250     std::string dot(DescriptorToDotWrapper(type_descriptor));
1251     fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1252     fprintf(out_file_, " transient=%s\n", QuotedBool((flags & kAccTransient) != 0));
1253     fprintf(out_file_, " volatile=%s\n", QuotedBool((flags & kAccVolatile) != 0));
1254     // The "value=" is not knowable w/o parsing annotations.
1255     fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1256     fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1257     // The "deprecated=" is not knowable w/o parsing annotations.
1258     fprintf(out_file_, " visibility=%s\n", QuotedVisibility(flags));
1259     if (init != nullptr) {
1260       fputs(" value=\"", out_file_);
1261       DumpEncodedValue(init);
1262       fputs("\"\n", out_file_);
1263     }
1264     fputs(">\n</field>\n", out_file_);
1265   }
1266 
1267   free(access_str);
1268 }
1269 
1270 /*
1271  * Dumps an instance field.
1272  */
DumpIField(uint32_t idx,uint32_t flags,int i)1273 void DexLayout::DumpIField(uint32_t idx, uint32_t flags, int i) {
1274   DumpSField(idx, flags, i, nullptr);
1275 }
1276 
1277 /*
1278  * Dumps the class.
1279  *
1280  * Note "idx" is a DexClassDef index, not a DexTypeId index.
1281  *
1282  * If "*last_package" is nullptr or does not match the current class' package,
1283  * the value will be replaced with a newly-allocated string.
1284  */
DumpClass(int idx,char ** last_package)1285 void DexLayout::DumpClass(int idx, char** last_package) {
1286   dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
1287   // Omitting non-public class.
1288   if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
1289     return;
1290   }
1291 
1292   if (options_.show_section_headers_) {
1293     DumpClassDef(idx);
1294   }
1295 
1296   if (options_.show_annotations_) {
1297     DumpClassAnnotations(idx);
1298   }
1299 
1300   // For the XML output, show the package name.  Ideally we'd gather
1301   // up the classes, sort them, and dump them alphabetically so the
1302   // package name wouldn't jump around, but that's not a great plan
1303   // for something that needs to run on the device.
1304   const char* class_descriptor =
1305       header_->GetCollections().GetClassDef(idx)->ClassType()->GetStringId()->Data();
1306   if (!(class_descriptor[0] == 'L' &&
1307         class_descriptor[strlen(class_descriptor)-1] == ';')) {
1308     // Arrays and primitives should not be defined explicitly. Keep going?
1309     fprintf(stderr, "Malformed class name '%s'\n", class_descriptor);
1310   } else if (options_.output_format_ == kOutputXml) {
1311     char* mangle = strdup(class_descriptor + 1);
1312     mangle[strlen(mangle)-1] = '\0';
1313 
1314     // Reduce to just the package name.
1315     char* last_slash = strrchr(mangle, '/');
1316     if (last_slash != nullptr) {
1317       *last_slash = '\0';
1318     } else {
1319       *mangle = '\0';
1320     }
1321 
1322     for (char* cp = mangle; *cp != '\0'; cp++) {
1323       if (*cp == '/') {
1324         *cp = '.';
1325       }
1326     }  // for
1327 
1328     if (*last_package == nullptr || strcmp(mangle, *last_package) != 0) {
1329       // Start of a new package.
1330       if (*last_package != nullptr) {
1331         fprintf(out_file_, "</package>\n");
1332       }
1333       fprintf(out_file_, "<package name=\"%s\"\n>\n", mangle);
1334       free(*last_package);
1335       *last_package = mangle;
1336     } else {
1337       free(mangle);
1338     }
1339   }
1340 
1341   // General class information.
1342   char* access_str = CreateAccessFlagStr(class_def->GetAccessFlags(), kAccessForClass);
1343   const char* superclass_descriptor = nullptr;
1344   if (class_def->Superclass() != nullptr) {
1345     superclass_descriptor = class_def->Superclass()->GetStringId()->Data();
1346   }
1347   if (options_.output_format_ == kOutputPlain) {
1348     fprintf(out_file_, "Class #%d            -\n", idx);
1349     fprintf(out_file_, "  Class descriptor  : '%s'\n", class_descriptor);
1350     fprintf(out_file_, "  Access flags      : 0x%04x (%s)\n",
1351             class_def->GetAccessFlags(), access_str);
1352     if (superclass_descriptor != nullptr) {
1353       fprintf(out_file_, "  Superclass        : '%s'\n", superclass_descriptor);
1354     }
1355     fprintf(out_file_, "  Interfaces        -\n");
1356   } else {
1357     std::string dot(DescriptorClassToDot(class_descriptor));
1358     fprintf(out_file_, "<class name=\"%s\"\n", dot.c_str());
1359     if (superclass_descriptor != nullptr) {
1360       dot = DescriptorToDotWrapper(superclass_descriptor);
1361       fprintf(out_file_, " extends=\"%s\"\n", dot.c_str());
1362     }
1363     fprintf(out_file_, " interface=%s\n",
1364             QuotedBool((class_def->GetAccessFlags() & kAccInterface) != 0));
1365     fprintf(out_file_, " abstract=%s\n",
1366             QuotedBool((class_def->GetAccessFlags() & kAccAbstract) != 0));
1367     fprintf(out_file_, " static=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccStatic) != 0));
1368     fprintf(out_file_, " final=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccFinal) != 0));
1369     // The "deprecated=" not knowable w/o parsing annotations.
1370     fprintf(out_file_, " visibility=%s\n", QuotedVisibility(class_def->GetAccessFlags()));
1371     fprintf(out_file_, ">\n");
1372   }
1373 
1374   // Interfaces.
1375   const dex_ir::TypeList* interfaces = class_def->Interfaces();
1376   if (interfaces != nullptr) {
1377     const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
1378     for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
1379       DumpInterface((*interfaces_vector)[i], i);
1380     }  // for
1381   }
1382 
1383   // Fields and methods.
1384   dex_ir::ClassData* class_data = class_def->GetClassData();
1385   // Prepare data for static fields.
1386   dex_ir::EncodedArrayItem* static_values = class_def->StaticValues();
1387   dex_ir::EncodedValueVector* encoded_values =
1388       static_values == nullptr ? nullptr : static_values->GetEncodedValues();
1389   const uint32_t encoded_values_size = (encoded_values == nullptr) ? 0 : encoded_values->size();
1390 
1391   // Static fields.
1392   if (options_.output_format_ == kOutputPlain) {
1393     fprintf(out_file_, "  Static fields     -\n");
1394   }
1395   if (class_data != nullptr) {
1396     dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
1397     if (static_fields != nullptr) {
1398       for (uint32_t i = 0; i < static_fields->size(); i++) {
1399         DumpSField((*static_fields)[i]->GetFieldId()->GetIndex(),
1400                    (*static_fields)[i]->GetAccessFlags(),
1401                    i,
1402                    i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
1403       }  // for
1404     }
1405   }
1406 
1407   // Instance fields.
1408   if (options_.output_format_ == kOutputPlain) {
1409     fprintf(out_file_, "  Instance fields   -\n");
1410   }
1411   if (class_data != nullptr) {
1412     dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
1413     if (instance_fields != nullptr) {
1414       for (uint32_t i = 0; i < instance_fields->size(); i++) {
1415         DumpIField((*instance_fields)[i]->GetFieldId()->GetIndex(),
1416                    (*instance_fields)[i]->GetAccessFlags(),
1417                    i);
1418       }  // for
1419     }
1420   }
1421 
1422   // Direct methods.
1423   if (options_.output_format_ == kOutputPlain) {
1424     fprintf(out_file_, "  Direct methods    -\n");
1425   }
1426   if (class_data != nullptr) {
1427     dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
1428     if (direct_methods != nullptr) {
1429       for (uint32_t i = 0; i < direct_methods->size(); i++) {
1430         DumpMethod((*direct_methods)[i]->GetMethodId()->GetIndex(),
1431                    (*direct_methods)[i]->GetAccessFlags(),
1432                    (*direct_methods)[i]->GetCodeItem(),
1433                  i);
1434       }  // for
1435     }
1436   }
1437 
1438   // Virtual methods.
1439   if (options_.output_format_ == kOutputPlain) {
1440     fprintf(out_file_, "  Virtual methods   -\n");
1441   }
1442   if (class_data != nullptr) {
1443     dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
1444     if (virtual_methods != nullptr) {
1445       for (uint32_t i = 0; i < virtual_methods->size(); i++) {
1446         DumpMethod((*virtual_methods)[i]->GetMethodId()->GetIndex(),
1447                    (*virtual_methods)[i]->GetAccessFlags(),
1448                    (*virtual_methods)[i]->GetCodeItem(),
1449                    i);
1450       }  // for
1451     }
1452   }
1453 
1454   // End of class.
1455   if (options_.output_format_ == kOutputPlain) {
1456     const char* file_name = "unknown";
1457     if (class_def->SourceFile() != nullptr) {
1458       file_name = class_def->SourceFile()->Data();
1459     }
1460     const dex_ir::StringId* source_file = class_def->SourceFile();
1461     fprintf(out_file_, "  source_file_idx   : %d (%s)\n\n",
1462             source_file == nullptr ? 0xffffffffU : source_file->GetIndex(), file_name);
1463   } else if (options_.output_format_ == kOutputXml) {
1464     fprintf(out_file_, "</class>\n");
1465   }
1466 
1467   free(access_str);
1468 }
1469 
DumpDexFile()1470 void DexLayout::DumpDexFile() {
1471   // Headers.
1472   if (options_.show_file_headers_) {
1473     DumpFileHeader();
1474   }
1475 
1476   // Open XML context.
1477   if (options_.output_format_ == kOutputXml) {
1478     fprintf(out_file_, "<api>\n");
1479   }
1480 
1481   // Iterate over all classes.
1482   char* package = nullptr;
1483   const uint32_t class_defs_size = header_->GetCollections().ClassDefsSize();
1484   for (uint32_t i = 0; i < class_defs_size; i++) {
1485     DumpClass(i, &package);
1486   }  // for
1487 
1488   // Free the last package allocated.
1489   if (package != nullptr) {
1490     fprintf(out_file_, "</package>\n");
1491     free(package);
1492   }
1493 
1494   // Close XML context.
1495   if (options_.output_format_ == kOutputXml) {
1496     fprintf(out_file_, "</api>\n");
1497   }
1498 }
1499 
LayoutClassDefsAndClassData(const DexFile * dex_file)1500 std::vector<dex_ir::ClassData*> DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
1501   std::vector<dex_ir::ClassDef*> new_class_def_order;
1502   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1503     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1504     if (info_->ContainsClass(*dex_file, type_idx)) {
1505       new_class_def_order.push_back(class_def.get());
1506     }
1507   }
1508   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1509     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1510     if (!info_->ContainsClass(*dex_file, type_idx)) {
1511       new_class_def_order.push_back(class_def.get());
1512     }
1513   }
1514   uint32_t class_defs_offset = header_->GetCollections().ClassDefsOffset();
1515   uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
1516   std::unordered_set<dex_ir::ClassData*> visited_class_data;
1517   std::vector<dex_ir::ClassData*> new_class_data_order;
1518   for (uint32_t i = 0; i < new_class_def_order.size(); ++i) {
1519     dex_ir::ClassDef* class_def = new_class_def_order[i];
1520     class_def->SetIndex(i);
1521     class_def->SetOffset(class_defs_offset);
1522     class_defs_offset += dex_ir::ClassDef::ItemSize();
1523     dex_ir::ClassData* class_data = class_def->GetClassData();
1524     if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
1525       class_data->SetOffset(class_data_offset);
1526       class_data_offset += class_data->GetSize();
1527       visited_class_data.insert(class_data);
1528       new_class_data_order.push_back(class_data);
1529     }
1530   }
1531   return new_class_data_order;
1532 }
1533 
LayoutStringData(const DexFile * dex_file)1534 void DexLayout::LayoutStringData(const DexFile* dex_file) {
1535   const size_t num_strings = header_->GetCollections().StringIds().size();
1536   std::vector<bool> is_shorty(num_strings, false);
1537   std::vector<bool> from_hot_method(num_strings, false);
1538   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1539     // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
1540     // as hot. Add its super class and interfaces as well, which can be used during initialization.
1541     const bool is_profile_class =
1542         info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1543     if (is_profile_class) {
1544       from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
1545       const dex_ir::TypeId* superclass = class_def->Superclass();
1546       if (superclass != nullptr) {
1547         from_hot_method[superclass->GetStringId()->GetIndex()] = true;
1548       }
1549       const dex_ir::TypeList* interfaces = class_def->Interfaces();
1550       if (interfaces != nullptr) {
1551         for (const dex_ir::TypeId* interface_type : *interfaces->GetTypeList()) {
1552           from_hot_method[interface_type->GetStringId()->GetIndex()] = true;
1553         }
1554       }
1555     }
1556     dex_ir::ClassData* data = class_def->GetClassData();
1557     if (data == nullptr) {
1558       continue;
1559     }
1560     for (size_t i = 0; i < 2; ++i) {
1561       for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
1562         const dex_ir::MethodId* method_id = method->GetMethodId();
1563         dex_ir::CodeItem* code_item = method->GetCodeItem();
1564         if (code_item == nullptr) {
1565           continue;
1566         }
1567         const bool is_clinit = is_profile_class &&
1568             (method->GetAccessFlags() & kAccConstructor) != 0 &&
1569             (method->GetAccessFlags() & kAccStatic) != 0;
1570         const bool method_executed = is_clinit ||
1571             info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex())).IsInProfile();
1572         if (!method_executed) {
1573           continue;
1574         }
1575         is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
1576         dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
1577         if (fixups == nullptr) {
1578           continue;
1579         }
1580         // Add const-strings.
1581         for (dex_ir::StringId* id : *fixups->StringIds()) {
1582           from_hot_method[id->GetIndex()] = true;
1583         }
1584         // Add field classes, names, and types.
1585         for (dex_ir::FieldId* id : *fixups->FieldIds()) {
1586           // TODO: Only visit field ids from static getters and setters.
1587           from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1588           from_hot_method[id->Name()->GetIndex()] = true;
1589           from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
1590         }
1591         // For clinits, add referenced method classes, names, and protos.
1592         if (is_clinit) {
1593           for (dex_ir::MethodId* id : *fixups->MethodIds()) {
1594             from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1595             from_hot_method[id->Name()->GetIndex()] = true;
1596             is_shorty[id->Proto()->Shorty()->GetIndex()] = true;
1597           }
1598         }
1599       }
1600     }
1601   }
1602   // Sort string data by specified order.
1603   std::vector<dex_ir::StringId*> string_ids;
1604   size_t min_offset = std::numeric_limits<size_t>::max();
1605   size_t max_offset = 0;
1606   size_t hot_bytes = 0;
1607   for (auto& string_id : header_->GetCollections().StringIds()) {
1608     string_ids.push_back(string_id.get());
1609     const size_t cur_offset = string_id->DataItem()->GetOffset();
1610     CHECK_NE(cur_offset, 0u);
1611     min_offset = std::min(min_offset, cur_offset);
1612     dex_ir::StringData* data = string_id->DataItem();
1613     const size_t element_size = data->GetSize() + 1;  // Add one extra for null.
1614     size_t end_offset = cur_offset + element_size;
1615     if (is_shorty[string_id->GetIndex()] || from_hot_method[string_id->GetIndex()]) {
1616       hot_bytes += element_size;
1617     }
1618     max_offset = std::max(max_offset, end_offset);
1619   }
1620   VLOG(compiler) << "Hot string data bytes " << hot_bytes << "/" << max_offset - min_offset;
1621   std::sort(string_ids.begin(),
1622             string_ids.end(),
1623             [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
1624                                            const dex_ir::StringId* b) {
1625     const bool a_is_hot = from_hot_method[a->GetIndex()];
1626     const bool b_is_hot = from_hot_method[b->GetIndex()];
1627     if (a_is_hot != b_is_hot) {
1628       return a_is_hot < b_is_hot;
1629     }
1630     // After hot methods are partitioned, subpartition shorties.
1631     const bool a_is_shorty = is_shorty[a->GetIndex()];
1632     const bool b_is_shorty = is_shorty[b->GetIndex()];
1633     if (a_is_shorty != b_is_shorty) {
1634       return a_is_shorty < b_is_shorty;
1635     }
1636     // Preserve order.
1637     return a->DataItem()->GetOffset() < b->DataItem()->GetOffset();
1638   });
1639   // Now we know what order we want the string data, reorder the offsets.
1640   size_t offset = min_offset;
1641   for (dex_ir::StringId* string_id : string_ids) {
1642     dex_ir::StringData* data = string_id->DataItem();
1643     data->SetOffset(offset);
1644     offset += data->GetSize() + 1;  // Add one extra for null.
1645   }
1646   if (offset > max_offset) {
1647     const uint32_t diff = offset - max_offset;
1648     // If we expanded the string data section, we need to update the offsets or else we will
1649     // corrupt the next section when writing out.
1650     FixupSections(header_->GetCollections().StringDatasOffset(), diff);
1651     // Update file size.
1652     header_->SetFileSize(header_->FileSize() + diff);
1653   }
1654 }
1655 
1656 // Orders code items according to specified class data ordering.
1657 // NOTE: If the section following the code items is byte aligned, the last code item is left in
1658 // place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
LayoutCodeItems(const DexFile * dex_file,std::vector<dex_ir::ClassData * > new_class_data_order)1659 int32_t DexLayout::LayoutCodeItems(const DexFile* dex_file,
1660                                    std::vector<dex_ir::ClassData*> new_class_data_order) {
1661   // Do not move code items if class data section precedes code item section.
1662   // ULEB encoding is variable length, causing problems determining the offset of the code items.
1663   // TODO: We should swap the order of these sections in the future to avoid this issue.
1664   uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
1665   uint32_t code_item_offset = header_->GetCollections().CodeItemsOffset();
1666   if (class_data_offset < code_item_offset) {
1667     return 0;
1668   }
1669 
1670   // Find the last code item so we can leave it in place if the next section is not 4 byte aligned.
1671   dex_ir::CodeItem* last_code_item = nullptr;
1672   std::unordered_set<dex_ir::CodeItem*> visited_code_items;
1673   bool is_code_item_aligned = IsNextSectionCodeItemAligned(code_item_offset);
1674   if (!is_code_item_aligned) {
1675     for (auto& code_item_pair : header_->GetCollections().CodeItems()) {
1676       std::unique_ptr<dex_ir::CodeItem>& code_item = code_item_pair.second;
1677       if (last_code_item == nullptr
1678           || last_code_item->GetOffset() < code_item->GetOffset()) {
1679         last_code_item = code_item.get();
1680       }
1681     }
1682   }
1683 
1684   static constexpr InvokeType invoke_types[] = {
1685     kDirect,
1686     kVirtual
1687   };
1688 
1689   const size_t num_layout_types = static_cast<size_t>(LayoutType::kLayoutTypeCount);
1690   std::unordered_set<dex_ir::CodeItem*> code_items[num_layout_types];
1691   for (InvokeType invoke_type : invoke_types) {
1692     for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
1693       const bool is_profile_class =
1694           info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1695 
1696       // Skip classes that are not defined in this dex file.
1697       dex_ir::ClassData* class_data = class_def->GetClassData();
1698       if (class_data == nullptr) {
1699         continue;
1700       }
1701       for (auto& method : *(invoke_type == InvokeType::kDirect
1702                                 ? class_data->DirectMethods()
1703                                 : class_data->VirtualMethods())) {
1704         const dex_ir::MethodId *method_id = method->GetMethodId();
1705         dex_ir::CodeItem *code_item = method->GetCodeItem();
1706         if (code_item == last_code_item || code_item == nullptr) {
1707           continue;
1708         }
1709         // Separate executed methods (clinits and profiled methods) from unexecuted methods.
1710         const bool is_clinit = (method->GetAccessFlags() & kAccConstructor) != 0 &&
1711             (method->GetAccessFlags() & kAccStatic) != 0;
1712         const bool is_startup_clinit = is_profile_class && is_clinit;
1713         using Hotness = ProfileCompilationInfo::MethodHotness;
1714         Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
1715         LayoutType state = LayoutType::kLayoutTypeUnused;
1716         if (hotness.IsHot()) {
1717           // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
1718           // now.
1719           state = LayoutType::kLayoutTypeHot;
1720         } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
1721           // Startup clinit or a method that only has the startup flag.
1722           state = LayoutType::kLayoutTypeStartupOnly;
1723         } else if (is_clinit) {
1724           state = LayoutType::kLayoutTypeUsedOnce;
1725         } else if (hotness.IsInProfile()) {
1726           state = LayoutType::kLayoutTypeSometimesUsed;
1727         }
1728         code_items[static_cast<size_t>(state)].insert(code_item);
1729       }
1730     }
1731   }
1732 
1733   // Total_diff includes diffs generated by clinits, executed, and non-executed methods.
1734   int32_t total_diff = 0;
1735   // The relative placement has no effect on correctness; it is used to ensure
1736   // the layout is deterministic
1737   for (size_t index = 0; index < num_layout_types; ++index) {
1738     const std::unordered_set<dex_ir::CodeItem*>& code_items_set = code_items[index];
1739     // diff is reset for each class of code items.
1740     int32_t diff = 0;
1741     const uint32_t start_offset = code_item_offset;
1742     for (dex_ir::ClassData* data : new_class_data_order) {
1743       data->SetOffset(data->GetOffset() + diff);
1744       for (InvokeType invoke_type : invoke_types) {
1745         for (auto &method : *(invoke_type == InvokeType::kDirect
1746                                   ? data->DirectMethods()
1747                                   : data->VirtualMethods())) {
1748           dex_ir::CodeItem* code_item = method->GetCodeItem();
1749           if (code_item != nullptr &&
1750               code_items_set.find(code_item) != code_items_set.end()) {
1751             diff += UnsignedLeb128Size(code_item_offset)
1752                 - UnsignedLeb128Size(code_item->GetOffset());
1753             code_item->SetOffset(code_item_offset);
1754             code_item_offset +=
1755                 RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
1756           }
1757         }
1758       }
1759     }
1760     DexLayoutSection& code_section = dex_sections_.sections_[static_cast<size_t>(
1761         DexLayoutSections::SectionType::kSectionTypeCode)];
1762     code_section.parts_[index].offset_ = start_offset;
1763     code_section.parts_[index].size_ = code_item_offset - start_offset;
1764     for (size_t i = 0; i < num_layout_types; ++i) {
1765       VLOG(dex) << "Code item layout bucket " << i << " count=" << code_items[i].size()
1766                 << " bytes=" << code_section.parts_[i].size_;
1767     }
1768     total_diff += diff;
1769   }
1770   // Adjust diff to be 4-byte aligned.
1771   return RoundUp(total_diff, kDexCodeItemAlignment);
1772 }
1773 
IsNextSectionCodeItemAligned(uint32_t offset)1774 bool DexLayout::IsNextSectionCodeItemAligned(uint32_t offset) {
1775   dex_ir::Collections& collections = header_->GetCollections();
1776   std::set<uint32_t> section_offsets;
1777   section_offsets.insert(collections.MapListOffset());
1778   section_offsets.insert(collections.TypeListsOffset());
1779   section_offsets.insert(collections.AnnotationSetRefListsOffset());
1780   section_offsets.insert(collections.AnnotationSetItemsOffset());
1781   section_offsets.insert(collections.ClassDatasOffset());
1782   section_offsets.insert(collections.CodeItemsOffset());
1783   section_offsets.insert(collections.StringDatasOffset());
1784   section_offsets.insert(collections.DebugInfoItemsOffset());
1785   section_offsets.insert(collections.AnnotationItemsOffset());
1786   section_offsets.insert(collections.EncodedArrayItemsOffset());
1787   section_offsets.insert(collections.AnnotationsDirectoryItemsOffset());
1788 
1789   auto found = section_offsets.find(offset);
1790   if (found != section_offsets.end()) {
1791     found++;
1792     if (found != section_offsets.end()) {
1793       return *found % kDexCodeItemAlignment == 0;
1794     }
1795   }
1796   return false;
1797 }
1798 
1799 // Adjust offsets of every item in the specified section by diff bytes.
FixupSection(std::map<uint32_t,std::unique_ptr<T>> & map,uint32_t diff)1800 template<class T> void DexLayout::FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map,
1801                                                uint32_t diff) {
1802   for (auto& pair : map) {
1803     std::unique_ptr<T>& item = pair.second;
1804     item->SetOffset(item->GetOffset() + diff);
1805   }
1806 }
1807 
1808 // Adjust offsets of all sections with an address after the specified offset by diff bytes.
FixupSections(uint32_t offset,uint32_t diff)1809 void DexLayout::FixupSections(uint32_t offset, uint32_t diff) {
1810   dex_ir::Collections& collections = header_->GetCollections();
1811   uint32_t map_list_offset = collections.MapListOffset();
1812   if (map_list_offset > offset) {
1813     collections.SetMapListOffset(map_list_offset + diff);
1814   }
1815 
1816   uint32_t type_lists_offset = collections.TypeListsOffset();
1817   if (type_lists_offset > offset) {
1818     collections.SetTypeListsOffset(type_lists_offset + diff);
1819     FixupSection(collections.TypeLists(), diff);
1820   }
1821 
1822   uint32_t annotation_set_ref_lists_offset = collections.AnnotationSetRefListsOffset();
1823   if (annotation_set_ref_lists_offset > offset) {
1824     collections.SetAnnotationSetRefListsOffset(annotation_set_ref_lists_offset + diff);
1825     FixupSection(collections.AnnotationSetRefLists(), diff);
1826   }
1827 
1828   uint32_t annotation_set_items_offset = collections.AnnotationSetItemsOffset();
1829   if (annotation_set_items_offset > offset) {
1830     collections.SetAnnotationSetItemsOffset(annotation_set_items_offset + diff);
1831     FixupSection(collections.AnnotationSetItems(), diff);
1832   }
1833 
1834   uint32_t class_datas_offset = collections.ClassDatasOffset();
1835   if (class_datas_offset > offset) {
1836     collections.SetClassDatasOffset(class_datas_offset + diff);
1837     FixupSection(collections.ClassDatas(), diff);
1838   }
1839 
1840   uint32_t code_items_offset = collections.CodeItemsOffset();
1841   if (code_items_offset > offset) {
1842     collections.SetCodeItemsOffset(code_items_offset + diff);
1843     FixupSection(collections.CodeItems(), diff);
1844   }
1845 
1846   uint32_t string_datas_offset = collections.StringDatasOffset();
1847   if (string_datas_offset > offset) {
1848     collections.SetStringDatasOffset(string_datas_offset + diff);
1849     FixupSection(collections.StringDatas(), diff);
1850   }
1851 
1852   uint32_t debug_info_items_offset = collections.DebugInfoItemsOffset();
1853   if (debug_info_items_offset > offset) {
1854     collections.SetDebugInfoItemsOffset(debug_info_items_offset + diff);
1855     FixupSection(collections.DebugInfoItems(), diff);
1856   }
1857 
1858   uint32_t annotation_items_offset = collections.AnnotationItemsOffset();
1859   if (annotation_items_offset > offset) {
1860     collections.SetAnnotationItemsOffset(annotation_items_offset + diff);
1861     FixupSection(collections.AnnotationItems(), diff);
1862   }
1863 
1864   uint32_t encoded_array_items_offset = collections.EncodedArrayItemsOffset();
1865   if (encoded_array_items_offset > offset) {
1866     collections.SetEncodedArrayItemsOffset(encoded_array_items_offset + diff);
1867     FixupSection(collections.EncodedArrayItems(), diff);
1868   }
1869 
1870   uint32_t annotations_directory_items_offset = collections.AnnotationsDirectoryItemsOffset();
1871   if (annotations_directory_items_offset > offset) {
1872     collections.SetAnnotationsDirectoryItemsOffset(annotations_directory_items_offset + diff);
1873     FixupSection(collections.AnnotationsDirectoryItems(), diff);
1874   }
1875 }
1876 
LayoutOutputFile(const DexFile * dex_file)1877 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
1878   LayoutStringData(dex_file);
1879   std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
1880   int32_t diff = LayoutCodeItems(dex_file, new_class_data_order);
1881   // Move sections after ClassData by diff bytes.
1882   FixupSections(header_->GetCollections().ClassDatasOffset(), diff);
1883   // Update file size.
1884   header_->SetFileSize(header_->FileSize() + diff);
1885 }
1886 
OutputDexFile(const DexFile * dex_file)1887 void DexLayout::OutputDexFile(const DexFile* dex_file) {
1888   const std::string& dex_file_location = dex_file->GetLocation();
1889   std::string error_msg;
1890   std::unique_ptr<File> new_file;
1891   if (!options_.output_to_memmap_) {
1892     std::string output_location(options_.output_dex_directory_);
1893     size_t last_slash = dex_file_location.rfind('/');
1894     std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
1895     if (output_location == dex_file_directory) {
1896       output_location = dex_file_location + ".new";
1897     } else if (last_slash != std::string::npos) {
1898       output_location += dex_file_location.substr(last_slash);
1899     } else {
1900       output_location += "/" + dex_file_location + ".new";
1901     }
1902     new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
1903     if (new_file == nullptr) {
1904       LOG(ERROR) << "Could not create dex writer output file: " << output_location;
1905       return;
1906     }
1907     if (ftruncate(new_file->Fd(), header_->FileSize()) != 0) {
1908       LOG(ERROR) << "Could not grow dex writer output file: " << output_location;;
1909       new_file->Erase();
1910       return;
1911     }
1912     mem_map_.reset(MemMap::MapFile(header_->FileSize(), PROT_READ | PROT_WRITE, MAP_SHARED,
1913         new_file->Fd(), 0, /*low_4gb*/ false, output_location.c_str(), &error_msg));
1914   } else {
1915     mem_map_.reset(MemMap::MapAnonymous("layout dex", nullptr, header_->FileSize(),
1916         PROT_READ | PROT_WRITE, /* low_4gb */ false, /* reuse */ false, &error_msg));
1917   }
1918   if (mem_map_ == nullptr) {
1919     LOG(ERROR) << "Could not create mem map for dex writer output: " << error_msg;
1920     if (new_file != nullptr) {
1921       new_file->Erase();
1922     }
1923     return;
1924   }
1925   DexWriter::Output(header_, mem_map_.get());
1926   if (new_file != nullptr) {
1927     UNUSED(new_file->FlushCloseOrErase());
1928   }
1929   // Verify the output dex file's structure for debug builds.
1930   if (kIsDebugBuild) {
1931     std::string location = "memory mapped file for " + dex_file_location;
1932     std::unique_ptr<const DexFile> output_dex_file(DexFile::Open(mem_map_->Begin(),
1933                                                                  mem_map_->Size(),
1934                                                                  location,
1935                                                                  header_->Checksum(),
1936                                                                  /*oat_dex_file*/ nullptr,
1937                                                                  /*verify*/ true,
1938                                                                  /*verify_checksum*/ false,
1939                                                                  &error_msg));
1940     DCHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << error_msg;
1941   }
1942   // Do IR-level comparison between input and output. This check ignores potential differences
1943   // due to layout, so offsets are not checked. Instead, it checks the data contents of each item.
1944   if (kIsDebugBuild || options_.verify_output_) {
1945     std::unique_ptr<dex_ir::Header> orig_header(dex_ir::DexIrBuilder(*dex_file));
1946     CHECK(VerifyOutputDexFile(orig_header.get(), header_, &error_msg)) << error_msg;
1947   }
1948 }
1949 
1950 /*
1951  * Dumps the requested sections of the file.
1952  */
ProcessDexFile(const char * file_name,const DexFile * dex_file,size_t dex_file_index)1953 void DexLayout::ProcessDexFile(const char* file_name,
1954                                const DexFile* dex_file,
1955                                size_t dex_file_index) {
1956   std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file));
1957   SetHeader(header.get());
1958 
1959   if (options_.verbose_) {
1960     fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
1961             file_name, dex_file->GetHeader().magic_ + 4);
1962   }
1963 
1964   if (options_.visualize_pattern_) {
1965     VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
1966     return;
1967   }
1968 
1969   if (options_.show_section_statistics_) {
1970     ShowDexSectionStatistics(header_, dex_file_index);
1971     return;
1972   }
1973 
1974   // Dump dex file.
1975   if (options_.dump_) {
1976     DumpDexFile();
1977   }
1978 
1979   // Output dex file as file or memmap.
1980   if (options_.output_dex_directory_ != nullptr || options_.output_to_memmap_) {
1981     if (info_ != nullptr) {
1982       LayoutOutputFile(dex_file);
1983     }
1984     OutputDexFile(dex_file);
1985   }
1986 }
1987 
1988 /*
1989  * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
1990  */
ProcessFile(const char * file_name)1991 int DexLayout::ProcessFile(const char* file_name) {
1992   if (options_.verbose_) {
1993     fprintf(out_file_, "Processing '%s'...\n", file_name);
1994   }
1995 
1996   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
1997   // all of which are Zip archives with "classes.dex" inside.
1998   const bool verify_checksum = !options_.ignore_bad_checksum_;
1999   std::string error_msg;
2000   std::vector<std::unique_ptr<const DexFile>> dex_files;
2001   if (!DexFile::Open(file_name, file_name, verify_checksum, &error_msg, &dex_files)) {
2002     // Display returned error message to user. Note that this error behavior
2003     // differs from the error messages shown by the original Dalvik dexdump.
2004     fputs(error_msg.c_str(), stderr);
2005     fputc('\n', stderr);
2006     return -1;
2007   }
2008 
2009   // Success. Either report checksum verification or process
2010   // all dex files found in given file.
2011   if (options_.checksum_only_) {
2012     fprintf(out_file_, "Checksum verified\n");
2013   } else {
2014     for (size_t i = 0; i < dex_files.size(); i++) {
2015       ProcessDexFile(file_name, dex_files[i].get(), i);
2016     }
2017   }
2018   return 0;
2019 }
2020 
2021 }  // namespace art
2022