• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  * Implementation file of the dexlayout utility.
17  *
18  * This is a tool to read dex files into an internal representation,
19  * reorganize the representation, and emit dex files with a better
20  * file layout.
21  */
22 
23 #include "dexlayout.h"
24 
25 #include <inttypes.h>
26 #include <stdio.h>
27 
28 #include <iostream>
29 #include <memory>
30 #include <sstream>
31 #include <unordered_set>
32 #include <vector>
33 
34 #include "android-base/stringprintf.h"
35 
36 #include "base/logging.h"  // For VLOG_IS_ON.
37 #include "base/hiddenapi_flags.h"
38 #include "base/mem_map.h"
39 #include "base/mman.h"  // For the PROT_* and MAP_* constants.
40 #include "base/os.h"
41 #include "base/utils.h"
42 #include "dex/art_dex_file_loader.h"
43 #include "dex/descriptors_names.h"
44 #include "dex/dex_file-inl.h"
45 #include "dex/dex_file_layout.h"
46 #include "dex/dex_file_loader.h"
47 #include "dex/dex_file_types.h"
48 #include "dex/dex_file_verifier.h"
49 #include "dex/dex_instruction-inl.h"
50 #include "dex_ir_builder.h"
51 #include "dex_verify.h"
52 #include "dex_visualize.h"
53 #include "dex_writer.h"
54 #include "profile/profile_compilation_info.h"
55 
56 namespace art {
57 
58 using android::base::StringPrintf;
59 
60 /*
61  * Flags for use with createAccessFlagStr().
62  */
63 enum AccessFor {
64   kAccessForClass = 0, kAccessForMethod = 1, kAccessForField = 2, kAccessForMAX
65 };
66 const int kNumFlags = 18;
67 
68 /*
69  * Gets 2 little-endian bytes.
70  */
Get2LE(unsigned char const * src)71 static inline uint16_t Get2LE(unsigned char const* src) {
72   return src[0] | (src[1] << 8);
73 }
74 
75 /*
76  * Converts the class name portion of a type descriptor to human-readable
77  * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
78  */
DescriptorClassToName(const char * str)79 static std::string DescriptorClassToName(const char* str) {
80   std::string descriptor(str);
81   // Reduce to just the class name prefix.
82   size_t last_slash = descriptor.rfind('/');
83   if (last_slash == std::string::npos) {
84     last_slash = 0;
85   }
86   // Start past the '/' or 'L'.
87   last_slash++;
88 
89   // Copy class name over, trimming trailing ';'.
90   size_t size = descriptor.size() - 1 - last_slash;
91   std::string result(descriptor.substr(last_slash, size));
92 
93   return result;
94 }
95 
96 /*
97  * Returns string representing the boolean value.
98  */
StrBool(bool val)99 static const char* StrBool(bool val) {
100   return val ? "true" : "false";
101 }
102 
103 /*
104  * Returns a quoted string representing the boolean value.
105  */
QuotedBool(bool val)106 static const char* QuotedBool(bool val) {
107   return val ? "\"true\"" : "\"false\"";
108 }
109 
110 /*
111  * Returns a quoted string representing the access flags.
112  */
QuotedVisibility(uint32_t access_flags)113 static const char* QuotedVisibility(uint32_t access_flags) {
114   if (access_flags & kAccPublic) {
115     return "\"public\"";
116   } else if (access_flags & kAccProtected) {
117     return "\"protected\"";
118   } else if (access_flags & kAccPrivate) {
119     return "\"private\"";
120   } else {
121     return "\"package\"";
122   }
123 }
124 
125 /*
126  * Counts the number of '1' bits in a word.
127  */
CountOnes(uint32_t val)128 static int CountOnes(uint32_t val) {
129   val = val - ((val >> 1) & 0x55555555);
130   val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
131   return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
132 }
133 
134 /*
135  * Creates a new string with human-readable access flags.
136  *
137  * In the base language the access_flags fields are type uint16_t; in Dalvik they're uint32_t.
138  */
CreateAccessFlagStr(uint32_t flags,AccessFor for_what)139 static char* CreateAccessFlagStr(uint32_t flags, AccessFor for_what) {
140   static const char* kAccessStrings[kAccessForMAX][kNumFlags] = {
141     {
142       "PUBLIC",                /* 0x00001 */
143       "PRIVATE",               /* 0x00002 */
144       "PROTECTED",             /* 0x00004 */
145       "STATIC",                /* 0x00008 */
146       "FINAL",                 /* 0x00010 */
147       "?",                     /* 0x00020 */
148       "?",                     /* 0x00040 */
149       "?",                     /* 0x00080 */
150       "?",                     /* 0x00100 */
151       "INTERFACE",             /* 0x00200 */
152       "ABSTRACT",              /* 0x00400 */
153       "?",                     /* 0x00800 */
154       "SYNTHETIC",             /* 0x01000 */
155       "ANNOTATION",            /* 0x02000 */
156       "ENUM",                  /* 0x04000 */
157       "?",                     /* 0x08000 */
158       "VERIFIED",              /* 0x10000 */
159       "OPTIMIZED",             /* 0x20000 */
160     }, {
161       "PUBLIC",                /* 0x00001 */
162       "PRIVATE",               /* 0x00002 */
163       "PROTECTED",             /* 0x00004 */
164       "STATIC",                /* 0x00008 */
165       "FINAL",                 /* 0x00010 */
166       "SYNCHRONIZED",          /* 0x00020 */
167       "BRIDGE",                /* 0x00040 */
168       "VARARGS",               /* 0x00080 */
169       "NATIVE",                /* 0x00100 */
170       "?",                     /* 0x00200 */
171       "ABSTRACT",              /* 0x00400 */
172       "STRICT",                /* 0x00800 */
173       "SYNTHETIC",             /* 0x01000 */
174       "?",                     /* 0x02000 */
175       "?",                     /* 0x04000 */
176       "MIRANDA",               /* 0x08000 */
177       "CONSTRUCTOR",           /* 0x10000 */
178       "DECLARED_SYNCHRONIZED", /* 0x20000 */
179     }, {
180       "PUBLIC",                /* 0x00001 */
181       "PRIVATE",               /* 0x00002 */
182       "PROTECTED",             /* 0x00004 */
183       "STATIC",                /* 0x00008 */
184       "FINAL",                 /* 0x00010 */
185       "?",                     /* 0x00020 */
186       "VOLATILE",              /* 0x00040 */
187       "TRANSIENT",             /* 0x00080 */
188       "?",                     /* 0x00100 */
189       "?",                     /* 0x00200 */
190       "?",                     /* 0x00400 */
191       "?",                     /* 0x00800 */
192       "SYNTHETIC",             /* 0x01000 */
193       "?",                     /* 0x02000 */
194       "ENUM",                  /* 0x04000 */
195       "?",                     /* 0x08000 */
196       "?",                     /* 0x10000 */
197       "?",                     /* 0x20000 */
198     },
199   };
200 
201   // Allocate enough storage to hold the expected number of strings,
202   // plus a space between each.  We over-allocate, using the longest
203   // string above as the base metric.
204   const int kLongest = 21;  // The strlen of longest string above.
205   const int count = CountOnes(flags);
206   char* str;
207   char* cp;
208   cp = str = reinterpret_cast<char*>(malloc(count * (kLongest + 1) + 1));
209 
210   for (int i = 0; i < kNumFlags; i++) {
211     if (flags & 0x01) {
212       const char* accessStr = kAccessStrings[for_what][i];
213       const int len = strlen(accessStr);
214       if (cp != str) {
215         *cp++ = ' ';
216       }
217       memcpy(cp, accessStr, len);
218       cp += len;
219     }
220     flags >>= 1;
221   }  // for
222 
223   *cp = '\0';
224   return str;
225 }
226 
GetHiddenapiFlagStr(uint32_t hiddenapi_flags)227 static std::string GetHiddenapiFlagStr(uint32_t hiddenapi_flags) {
228   std::stringstream ss;
229   hiddenapi::ApiList(hiddenapi_flags).Dump(ss);
230   std::string api_list = ss.str();
231   std::transform(api_list.begin(), api_list.end(), api_list.begin(), ::toupper);
232   return api_list;
233 }
234 
GetSignatureForProtoId(const dex_ir::ProtoId * proto)235 static std::string GetSignatureForProtoId(const dex_ir::ProtoId* proto) {
236   if (proto == nullptr) {
237     return "<no signature>";
238   }
239 
240   std::string result("(");
241   const dex_ir::TypeList* type_list = proto->Parameters();
242   if (type_list != nullptr) {
243     for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
244       result += type_id->GetStringId()->Data();
245     }
246   }
247   result += ")";
248   result += proto->ReturnType()->GetStringId()->Data();
249   return result;
250 }
251 
252 /*
253  * Copies character data from "data" to "out", converting non-ASCII values
254  * to fprintf format chars or an ASCII filler ('.' or '?').
255  *
256  * The output buffer must be able to hold (2*len)+1 bytes.  The result is
257  * NULL-terminated.
258  */
Asciify(char * out,const unsigned char * data,size_t len)259 static void Asciify(char* out, const unsigned char* data, size_t len) {
260   for (; len != 0u; --len) {
261     if (*data < 0x20) {
262       // Could do more here, but we don't need them yet.
263       switch (*data) {
264         case '\0':
265           *out++ = '\\';
266           *out++ = '0';
267           break;
268         case '\n':
269           *out++ = '\\';
270           *out++ = 'n';
271           break;
272         default:
273           *out++ = '.';
274           break;
275       }  // switch
276     } else if (*data >= 0x80) {
277       *out++ = '?';
278     } else {
279       *out++ = *data;
280     }
281     data++;
282   }  // while
283   *out = '\0';
284 }
285 /* clang-format off */
286 constexpr char kEscapedLength[256] = {
287     4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 4, 2, 2, 4, 4,  // \a, \b, \t, \n, \r
288     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
289     1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // ",
290     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // '0'..'9'
291     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'A'..'O'
292     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,  // 'P'..'Z', '\'
293     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'a'..'o'
294     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,  // 'p'..'z', DEL
295     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // Unicode range, keep
296     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
297     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
298     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
299     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
300     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
301     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
302     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
303 };
304 /* clang-format on */
305 
306 /*
307  * Check if a UTF8 string contains characters we should quote.
308  */
needsEscape(std::string_view s)309 static bool needsEscape(std::string_view s) {
310   for (unsigned char c : s) {
311     if (kEscapedLength[c] != 1) {
312       return true;
313     }
314   }
315   return false;
316 }
317 
escapeString(std::string_view s)318 std::string escapeString(std::string_view s) {
319   std::ostringstream oss;
320   for (unsigned char c : s) {
321     switch (kEscapedLength[c]) {
322       case 1:
323         oss << static_cast<char>(c);
324         break;
325       case 2:
326         switch (c) {
327           case '\b':
328             oss << '\\' << 'b';
329             break;
330           case '\f':
331             oss << '\\' << 'f';
332             break;
333           case '\n':
334             oss << '\\' << 'n';
335             break;
336           case '\r':
337             oss << '\\' << 'r';
338             break;
339           case '\t':
340             oss << '\\' << 't';
341             break;
342           case '\"':
343             oss << '\\' << '"';
344             break;
345           case '\\':
346             oss << '\\' << '\\';
347             break;
348         }
349         break;
350       case 4:
351         oss << '\\' << '0' + (c / 64) << '0' + ((c % 64) / 8) << '0' + (c % 8);
352         break;
353     }
354   }
355   return oss.str();
356 }
357 
358 /*
359  * Dumps a string value with some escape characters.
360  */
DumpEscapedString(std::string_view s,FILE * out_file)361 static void DumpEscapedString(std::string_view s, FILE* out_file) {
362   fputs("\"", out_file);
363   if (needsEscape(s)) {
364     std::string e = escapeString(s);
365     fputs(e.c_str(), out_file);
366   } else {
367     for (char c : s) {
368       fputc(c, out_file);
369     }
370   }
371   fputs("\"", out_file);
372 }
373 
374 /*
375  * Dumps a string as an XML attribute value.
376  */
DumpXmlAttribute(const char * p,FILE * out_file)377 static void DumpXmlAttribute(const char* p, FILE* out_file) {
378   for (; *p; p++) {
379     switch (*p) {
380       case '&':
381         fputs("&amp;", out_file);
382         break;
383       case '<':
384         fputs("&lt;", out_file);
385         break;
386       case '>':
387         fputs("&gt;", out_file);
388         break;
389       case '"':
390         fputs("&quot;", out_file);
391         break;
392       case '\t':
393         fputs("&#x9;", out_file);
394         break;
395       case '\n':
396         fputs("&#xA;", out_file);
397         break;
398       case '\r':
399         fputs("&#xD;", out_file);
400         break;
401       default:
402         putc(*p, out_file);
403     }  // switch
404   }  // for
405 }
406 
407 /*
408  * Helper for dumpInstruction(), which builds the string
409  * representation for the index in the given instruction.
410  * Returns a pointer to a buffer of sufficient size.
411  */
IndexString(dex_ir::Header * header,const Instruction * dec_insn,size_t buf_size)412 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
413                                            const Instruction* dec_insn,
414                                            size_t buf_size) {
415   std::unique_ptr<char[]> buf(new char[buf_size]);
416   // Determine index and width of the string.
417   uint32_t index = 0;
418   uint32_t secondary_index = dex::kDexNoIndex;
419   uint32_t width = 4;
420   switch (Instruction::FormatOf(dec_insn->Opcode())) {
421     // SOME NOT SUPPORTED:
422     // case Instruction::k20bc:
423     case Instruction::k21c:
424     case Instruction::k35c:
425     // case Instruction::k35ms:
426     case Instruction::k3rc:
427     // case Instruction::k3rms:
428     // case Instruction::k35mi:
429     // case Instruction::k3rmi:
430       index = dec_insn->VRegB();
431       width = 4;
432       break;
433     case Instruction::k31c:
434       index = dec_insn->VRegB();
435       width = 8;
436       break;
437     case Instruction::k22c:
438     // case Instruction::k22cs:
439       index = dec_insn->VRegC();
440       width = 4;
441       break;
442     case Instruction::k45cc:
443     case Instruction::k4rcc:
444       index = dec_insn->VRegB();
445       secondary_index = dec_insn->VRegH();
446       width = 4;
447       break;
448     default:
449       break;
450   }  // switch
451 
452   // Determine index type.
453   size_t outSize = 0;
454   switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
455     case Instruction::kIndexUnknown:
456       // This function should never get called for this type, but do
457       // something sensible here, just to help with debugging.
458       outSize = snprintf(buf.get(), buf_size, "<unknown-index>");
459       break;
460     case Instruction::kIndexNone:
461       // This function should never get called for this type, but do
462       // something sensible here, just to help with debugging.
463       outSize = snprintf(buf.get(), buf_size, "<no-index>");
464       break;
465     case Instruction::kIndexTypeRef:
466       if (index < header->TypeIds().Size()) {
467         const char* tp = header->TypeIds()[index]->GetStringId()->Data();
468         outSize = snprintf(buf.get(), buf_size, "%s // type@%0*x", tp, width, index);
469       } else {
470         outSize = snprintf(buf.get(), buf_size, "<type?> // type@%0*x", width, index);
471       }
472       break;
473     case Instruction::kIndexStringRef:
474       if (index < header->StringIds().Size()) {
475         const char* st = header->StringIds()[index]->Data();
476         if (needsEscape(std::string_view(st))) {
477           std::string escaped = escapeString(st);
478           outSize =
479               snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", escaped.c_str(), width, index);
480         } else {
481           outSize = snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", st, width, index);
482         }
483       } else {
484         outSize = snprintf(buf.get(), buf_size, "<string?> // string@%0*x", width, index);
485       }
486       break;
487     case Instruction::kIndexMethodRef:
488       if (index < header->MethodIds().Size()) {
489         dex_ir::MethodId* method_id = header->MethodIds()[index];
490         const char* name = method_id->Name()->Data();
491         std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
492         const char* back_descriptor = method_id->Class()->GetStringId()->Data();
493         outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // method@%0*x",
494                            back_descriptor, name, type_descriptor.c_str(), width, index);
495       } else {
496         outSize = snprintf(buf.get(), buf_size, "<method?> // method@%0*x", width, index);
497       }
498       break;
499     case Instruction::kIndexFieldRef:
500       if (index < header->FieldIds().Size()) {
501         dex_ir::FieldId* field_id = header->FieldIds()[index];
502         const char* name = field_id->Name()->Data();
503         const char* type_descriptor = field_id->Type()->GetStringId()->Data();
504         const char* back_descriptor = field_id->Class()->GetStringId()->Data();
505         outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // field@%0*x",
506                            back_descriptor, name, type_descriptor, width, index);
507       } else {
508         outSize = snprintf(buf.get(), buf_size, "<field?> // field@%0*x", width, index);
509       }
510       break;
511     case Instruction::kIndexVtableOffset:
512       outSize = snprintf(buf.get(), buf_size, "[%0*x] // vtable #%0*x",
513                          width, index, width, index);
514       break;
515     case Instruction::kIndexFieldOffset:
516       outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
517       break;
518     case Instruction::kIndexMethodAndProtoRef: {
519       std::string method("<method?>");
520       std::string proto("<proto?>");
521       if (index < header->MethodIds().Size()) {
522         dex_ir::MethodId* method_id = header->MethodIds()[index];
523         const char* name = method_id->Name()->Data();
524         std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
525         const char* back_descriptor = method_id->Class()->GetStringId()->Data();
526         method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
527       }
528       if (secondary_index < header->ProtoIds().Size()) {
529         dex_ir::ProtoId* proto_id = header->ProtoIds()[secondary_index];
530         proto = GetSignatureForProtoId(proto_id);
531       }
532       outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
533                          method.c_str(), proto.c_str(), width, index, width, secondary_index);
534     }
535     break;
536     // SOME NOT SUPPORTED:
537     // case Instruction::kIndexVaries:
538     // case Instruction::kIndexInlineMethod:
539     default:
540       outSize = snprintf(buf.get(), buf_size, "<?>");
541       break;
542   }  // switch
543 
544   // Determine success of string construction.
545   if (outSize >= buf_size) {
546     // The buffer wasn't big enough; retry with computed size. Note: snprintf()
547     // doesn't count/ the '\0' as part of its returned size, so we add explicit
548     // space for it here.
549     return IndexString(header, dec_insn, outSize + 1);
550   }
551   return buf;
552 }
553 
554 /*
555  * Dumps encoded annotation.
556  */
DumpEncodedAnnotation(dex_ir::EncodedAnnotation * annotation)557 void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
558   fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
559   // Display all name=value pairs.
560   for (auto& subannotation : *annotation->GetAnnotationElements()) {
561     fputc(' ', out_file_);
562     fputs(subannotation->GetName()->Data(), out_file_);
563     fputc('=', out_file_);
564     DumpEncodedValue(subannotation->GetValue());
565   }
566 }
567 /*
568  * Dumps encoded value.
569  */
DumpEncodedValue(const dex_ir::EncodedValue * data)570 void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
571   switch (data->Type()) {
572     case DexFile::kDexAnnotationByte:
573       fprintf(out_file_, "%" PRId8, data->GetByte());
574       break;
575     case DexFile::kDexAnnotationShort:
576       fprintf(out_file_, "%" PRId16, data->GetShort());
577       break;
578     case DexFile::kDexAnnotationChar:
579       fprintf(out_file_, "%" PRIu16, data->GetChar());
580       break;
581     case DexFile::kDexAnnotationInt:
582       fprintf(out_file_, "%" PRId32, data->GetInt());
583       break;
584     case DexFile::kDexAnnotationLong:
585       fprintf(out_file_, "%" PRId64, data->GetLong());
586       break;
587     case DexFile::kDexAnnotationFloat: {
588       fprintf(out_file_, "%g", data->GetFloat());
589       break;
590     }
591     case DexFile::kDexAnnotationDouble: {
592       fprintf(out_file_, "%g", data->GetDouble());
593       break;
594     }
595     case DexFile::kDexAnnotationString: {
596       dex_ir::StringId* string_id = data->GetStringId();
597       if (options_.output_format_ == kOutputPlain) {
598         DumpEscapedString(string_id->Data(), out_file_);
599       } else {
600         DumpXmlAttribute(string_id->Data(), out_file_);
601       }
602       break;
603     }
604     case DexFile::kDexAnnotationType: {
605       dex_ir::TypeId* type_id = data->GetTypeId();
606       fputs(type_id->GetStringId()->Data(), out_file_);
607       break;
608     }
609     case DexFile::kDexAnnotationField:
610     case DexFile::kDexAnnotationEnum: {
611       dex_ir::FieldId* field_id = data->GetFieldId();
612       fputs(field_id->Name()->Data(), out_file_);
613       break;
614     }
615     case DexFile::kDexAnnotationMethod: {
616       dex_ir::MethodId* method_id = data->GetMethodId();
617       fputs(method_id->Name()->Data(), out_file_);
618       break;
619     }
620     case DexFile::kDexAnnotationArray: {
621       fputc('{', out_file_);
622       // Display all elements.
623       for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
624         fputc(' ', out_file_);
625         DumpEncodedValue(value.get());
626       }
627       fputs(" }", out_file_);
628       break;
629     }
630     case DexFile::kDexAnnotationAnnotation: {
631       DumpEncodedAnnotation(data->GetEncodedAnnotation());
632       break;
633     }
634     case DexFile::kDexAnnotationNull:
635       fputs("null", out_file_);
636       break;
637     case DexFile::kDexAnnotationBoolean:
638       fputs(StrBool(data->GetBoolean()), out_file_);
639       break;
640     default:
641       fputs("????", out_file_);
642       break;
643   }  // switch
644 }
645 
646 /*
647  * Dumps the file header.
648  */
DumpFileHeader()649 void DexLayout::DumpFileHeader() {
650   char sanitized[8 * 2 + 1];
651   fprintf(out_file_, "DEX file header:\n");
652   Asciify(sanitized, header_->Magic(), 8);
653   fprintf(out_file_, "magic               : '%s'\n", sanitized);
654   fprintf(out_file_, "checksum            : %08x\n", header_->Checksum());
655   fprintf(out_file_, "signature           : %02x%02x...%02x%02x\n",
656           header_->Signature()[0], header_->Signature()[1],
657           header_->Signature()[DexFile::kSha1DigestSize - 2],
658           header_->Signature()[DexFile::kSha1DigestSize - 1]);
659   fprintf(out_file_, "file_size           : %d\n", header_->FileSize());
660   fprintf(out_file_, "header_size         : %d\n", header_->HeaderSize());
661   fprintf(out_file_, "link_size           : %d\n", header_->LinkSize());
662   fprintf(out_file_, "link_off            : %d (0x%06x)\n",
663           header_->LinkOffset(), header_->LinkOffset());
664   fprintf(out_file_, "string_ids_size     : %d\n", header_->StringIds().Size());
665   fprintf(out_file_, "string_ids_off      : %d (0x%06x)\n",
666           header_->StringIds().GetOffset(), header_->StringIds().GetOffset());
667   fprintf(out_file_, "type_ids_size       : %d\n", header_->TypeIds().Size());
668   fprintf(out_file_, "type_ids_off        : %d (0x%06x)\n",
669           header_->TypeIds().GetOffset(), header_->TypeIds().GetOffset());
670   fprintf(out_file_, "proto_ids_size      : %d\n", header_->ProtoIds().Size());
671   fprintf(out_file_, "proto_ids_off       : %d (0x%06x)\n",
672           header_->ProtoIds().GetOffset(), header_->ProtoIds().GetOffset());
673   fprintf(out_file_, "field_ids_size      : %d\n", header_->FieldIds().Size());
674   fprintf(out_file_, "field_ids_off       : %d (0x%06x)\n",
675           header_->FieldIds().GetOffset(), header_->FieldIds().GetOffset());
676   fprintf(out_file_, "method_ids_size     : %d\n", header_->MethodIds().Size());
677   fprintf(out_file_, "method_ids_off      : %d (0x%06x)\n",
678           header_->MethodIds().GetOffset(), header_->MethodIds().GetOffset());
679   fprintf(out_file_, "class_defs_size     : %d\n", header_->ClassDefs().Size());
680   fprintf(out_file_, "class_defs_off      : %d (0x%06x)\n",
681           header_->ClassDefs().GetOffset(), header_->ClassDefs().GetOffset());
682   fprintf(out_file_, "data_size           : %d\n", header_->DataSize());
683   fprintf(out_file_, "data_off            : %d (0x%06x)\n\n",
684           header_->DataOffset(), header_->DataOffset());
685 }
686 
687 /*
688  * Dumps a class_def_item.
689  */
DumpClassDef(int idx)690 void DexLayout::DumpClassDef(int idx) {
691   // General class information.
692   dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
693   fprintf(out_file_, "Class #%d header:\n", idx);
694   fprintf(out_file_, "class_idx           : %d\n", class_def->ClassType()->GetIndex());
695   fprintf(out_file_, "access_flags        : %d (0x%04x)\n",
696           class_def->GetAccessFlags(), class_def->GetAccessFlags());
697   uint32_t superclass_idx =  class_def->Superclass() == nullptr ?
698       DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
699   fprintf(out_file_, "superclass_idx      : %d\n", superclass_idx);
700   fprintf(out_file_, "interfaces_off      : %d (0x%06x)\n",
701           class_def->InterfacesOffset(), class_def->InterfacesOffset());
702   uint32_t source_file_offset = 0xffffffffU;
703   if (class_def->SourceFile() != nullptr) {
704     source_file_offset = class_def->SourceFile()->GetIndex();
705   }
706   fprintf(out_file_, "source_file_idx     : %d\n", source_file_offset);
707   uint32_t annotations_offset = 0;
708   if (class_def->Annotations() != nullptr) {
709     annotations_offset = class_def->Annotations()->GetOffset();
710   }
711   fprintf(out_file_, "annotations_off     : %d (0x%06x)\n",
712           annotations_offset, annotations_offset);
713   if (class_def->GetClassData() == nullptr) {
714     fprintf(out_file_, "class_data_off      : %d (0x%06x)\n", 0, 0);
715   } else {
716     fprintf(out_file_, "class_data_off      : %d (0x%06x)\n",
717             class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
718   }
719 
720   // Fields and methods.
721   dex_ir::ClassData* class_data = class_def->GetClassData();
722   if (class_data != nullptr && class_data->StaticFields() != nullptr) {
723     fprintf(out_file_, "static_fields_size  : %zu\n", class_data->StaticFields()->size());
724   } else {
725     fprintf(out_file_, "static_fields_size  : 0\n");
726   }
727   if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
728     fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
729   } else {
730     fprintf(out_file_, "instance_fields_size: 0\n");
731   }
732   if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
733     fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
734   } else {
735     fprintf(out_file_, "direct_methods_size : 0\n");
736   }
737   if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
738     fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
739   } else {
740     fprintf(out_file_, "virtual_methods_size: 0\n");
741   }
742   fprintf(out_file_, "\n");
743 }
744 
745 /**
746  * Dumps an annotation set item.
747  */
DumpAnnotationSetItem(dex_ir::AnnotationSetItem * set_item)748 void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
749   if (set_item == nullptr || set_item->GetItems()->size() == 0) {
750     fputs("  empty-annotation-set\n", out_file_);
751     return;
752   }
753   for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
754     if (annotation == nullptr) {
755       continue;
756     }
757     fputs("  ", out_file_);
758     switch (annotation->GetVisibility()) {
759       case DexFile::kDexVisibilityBuild:   fputs("VISIBILITY_BUILD ",   out_file_); break;
760       case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
761       case DexFile::kDexVisibilitySystem:  fputs("VISIBILITY_SYSTEM ",  out_file_); break;
762       default:                             fputs("VISIBILITY_UNKNOWN ", out_file_); break;
763     }  // switch
764     DumpEncodedAnnotation(annotation->GetAnnotation());
765     fputc('\n', out_file_);
766   }
767 }
768 
769 /*
770  * Dumps class annotations.
771  */
DumpClassAnnotations(int idx)772 void DexLayout::DumpClassAnnotations(int idx) {
773   dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
774   dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
775   if (annotations_directory == nullptr) {
776     return;  // none
777   }
778 
779   fprintf(out_file_, "Class #%d annotations:\n", idx);
780 
781   dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
782   dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
783   dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
784   dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
785 
786   // Annotations on the class itself.
787   if (class_set_item != nullptr) {
788     fprintf(out_file_, "Annotations on class\n");
789     DumpAnnotationSetItem(class_set_item);
790   }
791 
792   // Annotations on fields.
793   if (fields != nullptr) {
794     for (auto& field : *fields) {
795       const dex_ir::FieldId* field_id = field->GetFieldId();
796       const uint32_t field_idx = field_id->GetIndex();
797       const char* field_name = field_id->Name()->Data();
798       fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
799       DumpAnnotationSetItem(field->GetAnnotationSetItem());
800     }
801   }
802 
803   // Annotations on methods.
804   if (methods != nullptr) {
805     for (auto& method : *methods) {
806       const dex_ir::MethodId* method_id = method->GetMethodId();
807       const uint32_t method_idx = method_id->GetIndex();
808       const char* method_name = method_id->Name()->Data();
809       fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
810       DumpAnnotationSetItem(method->GetAnnotationSetItem());
811     }
812   }
813 
814   // Annotations on method parameters.
815   if (parameters != nullptr) {
816     for (auto& parameter : *parameters) {
817       const dex_ir::MethodId* method_id = parameter->GetMethodId();
818       const uint32_t method_idx = method_id->GetIndex();
819       const char* method_name = method_id->Name()->Data();
820       fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
821       uint32_t j = 0;
822       for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
823         fprintf(out_file_, "#%u\n", j);
824         DumpAnnotationSetItem(annotation);
825         ++j;
826       }
827     }
828   }
829 
830   fputc('\n', out_file_);
831 }
832 
833 /*
834  * Dumps an interface that a class declares to implement.
835  */
DumpInterface(const dex_ir::TypeId * type_item,int i)836 void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
837   const char* interface_name = type_item->GetStringId()->Data();
838   if (options_.output_format_ == kOutputPlain) {
839     fprintf(out_file_, "    #%d              : '%s'\n", i, interface_name);
840   } else {
841     std::string dot(DescriptorToDot(interface_name));
842     fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
843   }
844 }
845 
846 /*
847  * Dumps the catches table associated with the code.
848  */
DumpCatches(const dex_ir::CodeItem * code)849 void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
850   const uint16_t tries_size = code->TriesSize();
851 
852   // No catch table.
853   if (tries_size == 0) {
854     fprintf(out_file_, "      catches       : (none)\n");
855     return;
856   }
857 
858   // Dump all table entries.
859   fprintf(out_file_, "      catches       : %d\n", tries_size);
860   std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
861   for (uint32_t i = 0; i < tries_size; i++) {
862     const dex_ir::TryItem* try_item = (*tries)[i].get();
863     const uint32_t start = try_item->StartAddr();
864     const uint32_t end = start + try_item->InsnCount();
865     fprintf(out_file_, "        0x%04x - 0x%04x\n", start, end);
866     for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
867       const dex_ir::TypeId* type_id = handler->GetTypeId();
868       const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
869       fprintf(out_file_, "          %s -> 0x%04x\n", descriptor, handler->GetAddress());
870     }  // for
871   }  // for
872 }
873 
874 /*
875  * Dumps a single instruction.
876  */
DumpInstruction(const dex_ir::CodeItem * code,uint32_t code_offset,uint32_t insn_idx,uint32_t insn_width,const Instruction * dec_insn)877 void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
878                                 uint32_t code_offset,
879                                 uint32_t insn_idx,
880                                 uint32_t insn_width,
881                                 const Instruction* dec_insn) {
882   // Address of instruction (expressed as byte offset).
883   fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
884 
885   // Dump (part of) raw bytes.
886   const uint16_t* insns = code->Insns();
887   for (uint32_t i = 0; i < 8; i++) {
888     if (i < insn_width) {
889       if (i == 7) {
890         fprintf(out_file_, " ... ");
891       } else {
892         // Print 16-bit value in little-endian order.
893         const uint8_t* bytePtr = (const uint8_t*) &insns[insn_idx + i];
894         fprintf(out_file_, " %02x%02x", bytePtr[0], bytePtr[1]);
895       }
896     } else {
897       fputs("     ", out_file_);
898     }
899   }  // for
900 
901   // Dump pseudo-instruction or opcode.
902   if (dec_insn->Opcode() == Instruction::NOP) {
903     const uint16_t instr = Get2LE((const uint8_t*) &insns[insn_idx]);
904     if (instr == Instruction::kPackedSwitchSignature) {
905       fprintf(out_file_, "|%04x: packed-switch-data (%d units)", insn_idx, insn_width);
906     } else if (instr == Instruction::kSparseSwitchSignature) {
907       fprintf(out_file_, "|%04x: sparse-switch-data (%d units)", insn_idx, insn_width);
908     } else if (instr == Instruction::kArrayDataSignature) {
909       fprintf(out_file_, "|%04x: array-data (%d units)", insn_idx, insn_width);
910     } else {
911       fprintf(out_file_, "|%04x: nop // spacer", insn_idx);
912     }
913   } else {
914     fprintf(out_file_, "|%04x: %s", insn_idx, dec_insn->Name());
915   }
916 
917   // Set up additional argument.
918   std::unique_ptr<char[]> index_buf;
919   if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
920     index_buf = IndexString(header_, dec_insn, 200);
921   }
922 
923   // Dump the instruction.
924   //
925   // NOTE: pDecInsn->DumpString(pDexFile) differs too much from original.
926   //
927   switch (Instruction::FormatOf(dec_insn->Opcode())) {
928     case Instruction::k10x:        // op
929       break;
930     case Instruction::k12x:        // op vA, vB
931       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
932       break;
933     case Instruction::k11n:        // op vA, #+B
934       fprintf(out_file_, " v%d, #int %d // #%x",
935               dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint8_t)dec_insn->VRegB());
936       break;
937     case Instruction::k11x:        // op vAA
938       fprintf(out_file_, " v%d", dec_insn->VRegA());
939       break;
940     case Instruction::k10t:        // op +AA
941     case Instruction::k20t: {      // op +AAAA
942       const int32_t targ = (int32_t) dec_insn->VRegA();
943       fprintf(out_file_, " %04x // %c%04x",
944               insn_idx + targ,
945               (targ < 0) ? '-' : '+',
946               (targ < 0) ? -targ : targ);
947       break;
948     }
949     case Instruction::k22x:        // op vAA, vBBBB
950       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
951       break;
952     case Instruction::k21t: {     // op vAA, +BBBB
953       const int32_t targ = (int32_t) dec_insn->VRegB();
954       fprintf(out_file_, " v%d, %04x // %c%04x", dec_insn->VRegA(),
955               insn_idx + targ,
956               (targ < 0) ? '-' : '+',
957               (targ < 0) ? -targ : targ);
958       break;
959     }
960     case Instruction::k21s:        // op vAA, #+BBBB
961       fprintf(out_file_, " v%d, #int %d // #%x",
962               dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint16_t)dec_insn->VRegB());
963       break;
964     case Instruction::k21h:        // op vAA, #+BBBB0000[00000000]
965       // The printed format varies a bit based on the actual opcode.
966       if (dec_insn->Opcode() == Instruction::CONST_HIGH16) {
967         const int32_t value = dec_insn->VRegB() << 16;
968         fprintf(out_file_, " v%d, #int %d // #%x",
969                 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
970       } else {
971         const int64_t value = ((int64_t) dec_insn->VRegB()) << 48;
972         fprintf(out_file_, " v%d, #long %" PRId64 " // #%x",
973                 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
974       }
975       break;
976     case Instruction::k21c:        // op vAA, thing@BBBB
977     case Instruction::k31c:        // op vAA, thing@BBBBBBBB
978       fprintf(out_file_, " v%d, %s", dec_insn->VRegA(), index_buf.get());
979       break;
980     case Instruction::k23x:        // op vAA, vBB, vCC
981       fprintf(out_file_, " v%d, v%d, v%d",
982               dec_insn->VRegA(), dec_insn->VRegB(), dec_insn->VRegC());
983       break;
984     case Instruction::k22b:        // op vAA, vBB, #+CC
985       fprintf(out_file_, " v%d, v%d, #int %d // #%02x",
986               dec_insn->VRegA(), dec_insn->VRegB(),
987               (int32_t) dec_insn->VRegC(), (uint8_t) dec_insn->VRegC());
988       break;
989     case Instruction::k22t: {      // op vA, vB, +CCCC
990       const int32_t targ = (int32_t) dec_insn->VRegC();
991       fprintf(out_file_, " v%d, v%d, %04x // %c%04x",
992               dec_insn->VRegA(), dec_insn->VRegB(),
993               insn_idx + targ,
994               (targ < 0) ? '-' : '+',
995               (targ < 0) ? -targ : targ);
996       break;
997     }
998     case Instruction::k22s:        // op vA, vB, #+CCCC
999       fprintf(out_file_, " v%d, v%d, #int %d // #%04x",
1000               dec_insn->VRegA(), dec_insn->VRegB(),
1001               (int32_t) dec_insn->VRegC(), (uint16_t) dec_insn->VRegC());
1002       break;
1003     case Instruction::k22c:        // op vA, vB, thing@CCCC
1004     // NOT SUPPORTED:
1005     // case Instruction::k22cs:    // [opt] op vA, vB, field offset CCCC
1006       fprintf(out_file_, " v%d, v%d, %s",
1007               dec_insn->VRegA(), dec_insn->VRegB(), index_buf.get());
1008       break;
1009     case Instruction::k30t:
1010       fprintf(out_file_, " #%08x", dec_insn->VRegA());
1011       break;
1012     case Instruction::k31i: {     // op vAA, #+BBBBBBBB
1013       // This is often, but not always, a float.
1014       union {
1015         float f;
1016         uint32_t i;
1017       } conv;
1018       conv.i = dec_insn->VRegB();
1019       fprintf(out_file_, " v%d, #float %g // #%08x",
1020               dec_insn->VRegA(), conv.f, dec_insn->VRegB());
1021       break;
1022     }
1023     case Instruction::k31t:       // op vAA, offset +BBBBBBBB
1024       fprintf(out_file_, " v%d, %08x // +%08x",
1025               dec_insn->VRegA(), insn_idx + dec_insn->VRegB(), dec_insn->VRegB());
1026       break;
1027     case Instruction::k32x:        // op vAAAA, vBBBB
1028       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
1029       break;
1030     case Instruction::k35c:           // op {vC, vD, vE, vF, vG}, thing@BBBB
1031     case Instruction::k45cc: {        // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
1032     // NOT SUPPORTED:
1033     // case Instruction::k35ms:       // [opt] invoke-virtual+super
1034     // case Instruction::k35mi:       // [opt] inline invoke
1035       uint32_t arg[Instruction::kMaxVarArgRegs];
1036       dec_insn->GetVarArgs(arg);
1037       fputs(" {", out_file_);
1038       for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1039         if (i == 0) {
1040           fprintf(out_file_, "v%d", arg[i]);
1041         } else {
1042           fprintf(out_file_, ", v%d", arg[i]);
1043         }
1044       }  // for
1045       fprintf(out_file_, "}, %s", index_buf.get());
1046       break;
1047     }
1048     case Instruction::k3rc:           // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
1049     case Instruction::k4rcc:          // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
1050     // NOT SUPPORTED:
1051     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
1052     // case Instruction::k3rmi:       // [opt] execute-inline/range
1053       {
1054         // This doesn't match the "dx" output when some of the args are
1055         // 64-bit values -- dx only shows the first register.
1056         fputs(" {", out_file_);
1057         for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
1058           if (i == 0) {
1059             fprintf(out_file_, "v%d", dec_insn->VRegC() + i);
1060           } else {
1061             fprintf(out_file_, ", v%d", dec_insn->VRegC() + i);
1062           }
1063         }  // for
1064         fprintf(out_file_, "}, %s", index_buf.get());
1065       }
1066       break;
1067     case Instruction::k51l: {      // op vAA, #+BBBBBBBBBBBBBBBB
1068       // This is often, but not always, a double.
1069       union {
1070         double d;
1071         uint64_t j;
1072       } conv;
1073       conv.j = dec_insn->WideVRegB();
1074       fprintf(out_file_, " v%d, #double %g // #%016" PRIx64,
1075               dec_insn->VRegA(), conv.d, dec_insn->WideVRegB());
1076       break;
1077     }
1078     // NOT SUPPORTED:
1079     // case Instruction::k00x:        // unknown op or breakpoint
1080     //    break;
1081     default:
1082       fprintf(out_file_, " ???");
1083       break;
1084   }  // switch
1085 
1086   fputc('\n', out_file_);
1087 }
1088 
1089 /*
1090  * Dumps a bytecode disassembly.
1091  */
DumpBytecodes(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1092 void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1093   dex_ir::MethodId* method_id = header_->MethodIds()[idx];
1094   const char* name = method_id->Name()->Data();
1095   std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
1096   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1097 
1098   // Generate header.
1099   std::string dot(DescriptorToDot(back_descriptor));
1100   fprintf(out_file_, "%06x:                                        |[%06x] %s.%s:%s\n",
1101           code_offset, code_offset, dot.c_str(), name, type_descriptor.c_str());
1102 
1103   // Iterate over all instructions.
1104   for (const DexInstructionPcPair& inst : code->Instructions()) {
1105     const uint32_t insn_width = inst->SizeInCodeUnits();
1106     if (insn_width == 0) {
1107       LOG(WARNING) << "GLITCH: zero-width instruction at idx=0x" << std::hex << inst.DexPc();
1108       break;
1109     }
1110     DumpInstruction(code, code_offset, inst.DexPc(), insn_width, &inst.Inst());
1111   }  // for
1112 }
1113 
1114 /*
1115  * Lookup functions.
1116  */
StringDataByIdx(uint32_t idx,dex_ir::Header * header)1117 static const char* StringDataByIdx(uint32_t idx, dex_ir::Header* header) {
1118   dex_ir::StringId* string_id = header->GetStringIdOrNullPtr(idx);
1119   if (string_id == nullptr) {
1120     return nullptr;
1121   }
1122   return string_id->Data();
1123 }
1124 
StringDataByTypeIdx(uint16_t idx,dex_ir::Header * header)1125 static const char* StringDataByTypeIdx(uint16_t idx, dex_ir::Header* header) {
1126   dex_ir::TypeId* type_id = header->GetTypeIdOrNullPtr(idx);
1127   if (type_id == nullptr) {
1128     return nullptr;
1129   }
1130   dex_ir::StringId* string_id = type_id->GetStringId();
1131   if (string_id == nullptr) {
1132     return nullptr;
1133   }
1134   return string_id->Data();
1135 }
1136 
1137 
1138 /*
1139  * Dumps code of a method.
1140  */
DumpCode(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset,const char * declaring_class_descriptor,const char * method_name,bool is_static,const dex_ir::ProtoId * proto)1141 void DexLayout::DumpCode(uint32_t idx,
1142                          const dex_ir::CodeItem* code,
1143                          uint32_t code_offset,
1144                          const char* declaring_class_descriptor,
1145                          const char* method_name,
1146                          bool is_static,
1147                          const dex_ir::ProtoId* proto) {
1148   fprintf(out_file_, "      registers     : %d\n", code->RegistersSize());
1149   fprintf(out_file_, "      ins           : %d\n", code->InsSize());
1150   fprintf(out_file_, "      outs          : %d\n", code->OutsSize());
1151   fprintf(out_file_, "      insns size    : %d 16-bit code units\n",
1152           code->InsnsSize());
1153 
1154   // Bytecode disassembly, if requested.
1155   if (options_.disassemble_) {
1156     DumpBytecodes(idx, code, code_offset);
1157   }
1158 
1159   // Try-catch blocks.
1160   DumpCatches(code);
1161 
1162   // Positions and locals table in the debug info.
1163   dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
1164   fprintf(out_file_, "      positions     :\n");
1165   if (debug_info != nullptr) {
1166     DexFile::DecodeDebugPositionInfo(debug_info->GetDebugInfo(),
1167                                      [this](uint32_t idx) {
1168                                        return StringDataByIdx(idx, this->header_);
1169                                      },
1170                                      [&](const DexFile::PositionInfo& entry) {
1171                                        fprintf(out_file_,
1172                                                "        0x%04x line=%d\n",
1173                                                entry.address_,
1174                                                entry.line_);
1175                                         return false;
1176                                      });
1177   }
1178   fprintf(out_file_, "      locals        :\n");
1179   if (debug_info != nullptr) {
1180     std::vector<const char*> arg_descriptors;
1181     const dex_ir::TypeList* parameters = proto->Parameters();
1182     if (parameters != nullptr) {
1183       const dex_ir::TypeIdVector* parameter_type_vector = parameters->GetTypeList();
1184       if (parameter_type_vector != nullptr) {
1185         for (const dex_ir::TypeId* type_id : *parameter_type_vector) {
1186           arg_descriptors.push_back(type_id->GetStringId()->Data());
1187         }
1188       }
1189     }
1190     DexFile::DecodeDebugLocalInfo(debug_info->GetDebugInfo(),
1191                                   "DexLayout in-memory",
1192                                   declaring_class_descriptor,
1193                                   arg_descriptors,
1194                                   method_name,
1195                                   is_static,
1196                                   code->RegistersSize(),
1197                                   code->InsSize(),
1198                                   code->InsnsSize(),
1199                                   [this](uint32_t idx) {
1200                                     return StringDataByIdx(idx, this->header_);
1201                                   },
1202                                   [this](uint32_t idx) {
1203                                     return
1204                                         StringDataByTypeIdx(dchecked_integral_cast<uint16_t>(idx),
1205                                                             this->header_);
1206                                   },
1207                                   [&](const DexFile::LocalInfo& entry) {
1208                                     fprintf(out_file_,
1209                                             "        0x%04x - 0x%04x reg=%d %s %s",
1210                                             entry.start_address_,
1211                                             entry.end_address_,
1212                                             entry.reg_,
1213                                             entry.name_,
1214                                             entry.descriptor_);
1215                                     if (entry.signature_) {
1216                                       fputc(' ', out_file_);
1217                                       fputs(entry.signature_, out_file_);
1218                                     }
1219                                     fputc('\n', out_file_);
1220                                   });
1221   }
1222 }
1223 
1224 /*
1225  * Dumps a method.
1226  */
DumpMethod(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,const dex_ir::CodeItem * code,int i)1227 void DexLayout::DumpMethod(uint32_t idx,
1228                            uint32_t flags,
1229                            uint32_t hiddenapi_flags,
1230                            const dex_ir::CodeItem* code,
1231                            int i) {
1232   // Bail for anything private if export only requested.
1233   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1234     return;
1235   }
1236 
1237   dex_ir::MethodId* method_id = header_->MethodIds()[idx];
1238   const char* name = method_id->Name()->Data();
1239   char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
1240   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1241   char* access_str = CreateAccessFlagStr(flags, kAccessForMethod);
1242 
1243   if (options_.output_format_ == kOutputPlain) {
1244     fprintf(out_file_, "    #%d              : (in %s)\n", i, back_descriptor);
1245     fprintf(out_file_, "      name          : '%s'\n", name);
1246     fprintf(out_file_, "      type          : '%s'\n", type_descriptor);
1247     fprintf(out_file_, "      access        : 0x%04x (%s)\n", flags, access_str);
1248     if (options_.show_section_headers_) {
1249       fprintf(out_file_, "      method_idx    : %d\n", method_id->GetIndex());
1250     }
1251     if (hiddenapi_flags != 0u) {
1252       fprintf(out_file_,
1253               "      hiddenapi     : 0x%04x (%s)\n",
1254               hiddenapi_flags,
1255               GetHiddenapiFlagStr(hiddenapi_flags).c_str());
1256     }
1257     if (code == nullptr) {
1258       fprintf(out_file_, "      code          : (none)\n");
1259     } else {
1260       fprintf(out_file_, "      code          -\n");
1261       DumpCode(idx,
1262                code,
1263                code->GetOffset(),
1264                back_descriptor,
1265                name,
1266                (flags & kAccStatic) != 0,
1267                method_id->Proto());
1268     }
1269     if (options_.disassemble_) {
1270       fputc('\n', out_file_);
1271     }
1272   } else if (options_.output_format_ == kOutputXml) {
1273     const bool constructor = (name[0] == '<');
1274 
1275     // Method name and prototype.
1276     if (constructor) {
1277       std::string dot(DescriptorClassToName(back_descriptor));
1278       fprintf(out_file_, "<constructor name=\"%s\"\n", dot.c_str());
1279       dot = DescriptorToDot(back_descriptor);
1280       fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1281     } else {
1282       fprintf(out_file_, "<method name=\"%s\"\n", name);
1283       const char* return_type = strrchr(type_descriptor, ')');
1284       if (return_type == nullptr) {
1285         LOG(ERROR) << "bad method type descriptor '" << type_descriptor << "'";
1286         goto bail;
1287       }
1288       std::string dot(DescriptorToDot(return_type + 1));
1289       fprintf(out_file_, " return=\"%s\"\n", dot.c_str());
1290       fprintf(out_file_, " abstract=%s\n", QuotedBool((flags & kAccAbstract) != 0));
1291       fprintf(out_file_, " native=%s\n", QuotedBool((flags & kAccNative) != 0));
1292       fprintf(out_file_, " synchronized=%s\n", QuotedBool(
1293           (flags & (kAccSynchronized | kAccDeclaredSynchronized)) != 0));
1294     }
1295 
1296     // Additional method flags.
1297     fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1298     fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1299     // The "deprecated=" not knowable w/o parsing annotations.
1300     fprintf(out_file_, " visibility=%s\n>\n", QuotedVisibility(flags));
1301 
1302     // Parameters.
1303     if (type_descriptor[0] != '(') {
1304       LOG(ERROR) << "ERROR: bad descriptor '" << type_descriptor << "'";
1305       goto bail;
1306     }
1307     char* tmp_buf = reinterpret_cast<char*>(malloc(strlen(type_descriptor) + 1));
1308     const char* base = type_descriptor + 1;
1309     int arg_num = 0;
1310     while (*base != ')') {
1311       char* cp = tmp_buf;
1312       while (*base == '[') {
1313         *cp++ = *base++;
1314       }
1315       if (*base == 'L') {
1316         // Copy through ';'.
1317         do {
1318           *cp = *base++;
1319         } while (*cp++ != ';');
1320       } else {
1321         // Primitive char, copy it.
1322         if (strchr("ZBCSIFJD", *base) == nullptr) {
1323           LOG(ERROR) << "ERROR: bad method signature '" << base << "'";
1324           break;  // while
1325         }
1326         *cp++ = *base++;
1327       }
1328       // Null terminate and display.
1329       *cp++ = '\0';
1330       std::string dot(DescriptorToDot(tmp_buf));
1331       fprintf(out_file_, "<parameter name=\"arg%d\" type=\"%s\">\n"
1332                         "</parameter>\n", arg_num++, dot.c_str());
1333     }  // while
1334     free(tmp_buf);
1335     if (constructor) {
1336       fprintf(out_file_, "</constructor>\n");
1337     } else {
1338       fprintf(out_file_, "</method>\n");
1339     }
1340   }
1341 
1342 bail:
1343   free(type_descriptor);
1344   free(access_str);
1345 }
1346 
1347 /*
1348  * Dumps a static (class) field.
1349  */
DumpSField(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,int i,dex_ir::EncodedValue * init)1350 void DexLayout::DumpSField(uint32_t idx,
1351                            uint32_t flags,
1352                            uint32_t hiddenapi_flags,
1353                            int i,
1354                            dex_ir::EncodedValue* init) {
1355   // Bail for anything private if export only requested.
1356   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1357     return;
1358   }
1359 
1360   dex_ir::FieldId* field_id = header_->FieldIds()[idx];
1361   const char* name = field_id->Name()->Data();
1362   const char* type_descriptor = field_id->Type()->GetStringId()->Data();
1363   const char* back_descriptor = field_id->Class()->GetStringId()->Data();
1364   char* access_str = CreateAccessFlagStr(flags, kAccessForField);
1365 
1366   if (options_.output_format_ == kOutputPlain) {
1367     fprintf(out_file_, "    #%d              : (in %s)\n", i, back_descriptor);
1368     fprintf(out_file_, "      name          : '%s'\n", name);
1369     fprintf(out_file_, "      type          : '%s'\n", type_descriptor);
1370     fprintf(out_file_, "      access        : 0x%04x (%s)\n", flags, access_str);
1371     if (hiddenapi_flags != 0u) {
1372       fprintf(out_file_,
1373               "      hiddenapi     : 0x%04x (%s)\n",
1374               hiddenapi_flags,
1375               GetHiddenapiFlagStr(hiddenapi_flags).c_str());
1376     }
1377     if (init != nullptr) {
1378       fputs("      value         : ", out_file_);
1379       DumpEncodedValue(init);
1380       fputs("\n", out_file_);
1381     }
1382   } else if (options_.output_format_ == kOutputXml) {
1383     fprintf(out_file_, "<field name=\"%s\"\n", name);
1384     std::string dot(DescriptorToDot(type_descriptor));
1385     fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1386     fprintf(out_file_, " transient=%s\n", QuotedBool((flags & kAccTransient) != 0));
1387     fprintf(out_file_, " volatile=%s\n", QuotedBool((flags & kAccVolatile) != 0));
1388     // The "value=" is not knowable w/o parsing annotations.
1389     fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1390     fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1391     // The "deprecated=" is not knowable w/o parsing annotations.
1392     fprintf(out_file_, " visibility=%s\n", QuotedVisibility(flags));
1393     if (init != nullptr) {
1394       fputs(" value=\"", out_file_);
1395       DumpEncodedValue(init);
1396       fputs("\"\n", out_file_);
1397     }
1398     fputs(">\n</field>\n", out_file_);
1399   }
1400 
1401   free(access_str);
1402 }
1403 
1404 /*
1405  * Dumps an instance field.
1406  */
DumpIField(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,int i)1407 void DexLayout::DumpIField(uint32_t idx,
1408                            uint32_t flags,
1409                            uint32_t hiddenapi_flags,
1410                            int i) {
1411   DumpSField(idx, flags, hiddenapi_flags, i, nullptr);
1412 }
1413 
1414 /*
1415  * Dumps the class.
1416  *
1417  * Note "idx" is a DexClassDef index, not a DexTypeId index.
1418  *
1419  * If "*last_package" is nullptr or does not match the current class' package,
1420  * the value will be replaced with a newly-allocated string.
1421  */
DumpClass(int idx,char ** last_package)1422 void DexLayout::DumpClass(int idx, char** last_package) {
1423   dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
1424   // Omitting non-public class.
1425   if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
1426     return;
1427   }
1428 
1429   if (options_.show_section_headers_) {
1430     DumpClassDef(idx);
1431   }
1432 
1433   if (options_.show_annotations_) {
1434     DumpClassAnnotations(idx);
1435   }
1436 
1437   // For the XML output, show the package name.  Ideally we'd gather
1438   // up the classes, sort them, and dump them alphabetically so the
1439   // package name wouldn't jump around, but that's not a great plan
1440   // for something that needs to run on the device.
1441   const char* class_descriptor = header_->ClassDefs()[idx]->ClassType()->GetStringId()->Data();
1442   if (!(class_descriptor[0] == 'L' &&
1443         class_descriptor[strlen(class_descriptor)-1] == ';')) {
1444     // Arrays and primitives should not be defined explicitly. Keep going?
1445     LOG(ERROR) << "Malformed class name '" << class_descriptor << "'";
1446   } else if (options_.output_format_ == kOutputXml) {
1447     char* mangle = strdup(class_descriptor + 1);
1448     mangle[strlen(mangle)-1] = '\0';
1449 
1450     // Reduce to just the package name.
1451     char* last_slash = strrchr(mangle, '/');
1452     if (last_slash != nullptr) {
1453       *last_slash = '\0';
1454     } else {
1455       *mangle = '\0';
1456     }
1457 
1458     for (char* cp = mangle; *cp != '\0'; cp++) {
1459       if (*cp == '/') {
1460         *cp = '.';
1461       }
1462     }  // for
1463 
1464     if (*last_package == nullptr || strcmp(mangle, *last_package) != 0) {
1465       // Start of a new package.
1466       if (*last_package != nullptr) {
1467         fprintf(out_file_, "</package>\n");
1468       }
1469       fprintf(out_file_, "<package name=\"%s\"\n>\n", mangle);
1470       free(*last_package);
1471       *last_package = mangle;
1472     } else {
1473       free(mangle);
1474     }
1475   }
1476 
1477   // General class information.
1478   char* access_str = CreateAccessFlagStr(class_def->GetAccessFlags(), kAccessForClass);
1479   const char* superclass_descriptor = nullptr;
1480   if (class_def->Superclass() != nullptr) {
1481     superclass_descriptor = class_def->Superclass()->GetStringId()->Data();
1482   }
1483   if (options_.output_format_ == kOutputPlain) {
1484     fprintf(out_file_, "Class #%d            -\n", idx);
1485     fprintf(out_file_, "  Class descriptor  : '%s'\n", class_descriptor);
1486     fprintf(out_file_, "  Access flags      : 0x%04x (%s)\n",
1487             class_def->GetAccessFlags(), access_str);
1488     if (superclass_descriptor != nullptr) {
1489       fprintf(out_file_, "  Superclass        : '%s'\n", superclass_descriptor);
1490     }
1491     fprintf(out_file_, "  Interfaces        -\n");
1492   } else {
1493     std::string dot(DescriptorClassToName(class_descriptor));
1494     fprintf(out_file_, "<class name=\"%s\"\n", dot.c_str());
1495     if (superclass_descriptor != nullptr) {
1496       dot = DescriptorToDot(superclass_descriptor);
1497       fprintf(out_file_, " extends=\"%s\"\n", dot.c_str());
1498     }
1499     fprintf(out_file_, " interface=%s\n",
1500             QuotedBool((class_def->GetAccessFlags() & kAccInterface) != 0));
1501     fprintf(out_file_, " abstract=%s\n",
1502             QuotedBool((class_def->GetAccessFlags() & kAccAbstract) != 0));
1503     fprintf(out_file_, " static=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccStatic) != 0));
1504     fprintf(out_file_, " final=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccFinal) != 0));
1505     // The "deprecated=" not knowable w/o parsing annotations.
1506     fprintf(out_file_, " visibility=%s\n", QuotedVisibility(class_def->GetAccessFlags()));
1507     fprintf(out_file_, ">\n");
1508   }
1509 
1510   // Interfaces.
1511   const dex_ir::TypeList* interfaces = class_def->Interfaces();
1512   if (interfaces != nullptr) {
1513     const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
1514     for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
1515       DumpInterface((*interfaces_vector)[i], i);
1516     }  // for
1517   }
1518 
1519   // Fields and methods.
1520   dex_ir::ClassData* class_data = class_def->GetClassData();
1521   // Prepare data for static fields.
1522   dex_ir::EncodedArrayItem* static_values = class_def->StaticValues();
1523   dex_ir::EncodedValueVector* encoded_values =
1524       static_values == nullptr ? nullptr : static_values->GetEncodedValues();
1525   const uint32_t encoded_values_size = (encoded_values == nullptr) ? 0 : encoded_values->size();
1526 
1527   // Static fields.
1528   if (options_.output_format_ == kOutputPlain) {
1529     fprintf(out_file_, "  Static fields     -\n");
1530   }
1531   if (class_data != nullptr) {
1532     dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
1533     if (static_fields != nullptr) {
1534       for (uint32_t i = 0; i < static_fields->size(); i++) {
1535         DumpSField((*static_fields)[i].GetFieldId()->GetIndex(),
1536                    (*static_fields)[i].GetAccessFlags(),
1537                    dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*static_fields)[i]),
1538                    i,
1539                    i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
1540       }  // for
1541     }
1542   }
1543 
1544   // Instance fields.
1545   if (options_.output_format_ == kOutputPlain) {
1546     fprintf(out_file_, "  Instance fields   -\n");
1547   }
1548   if (class_data != nullptr) {
1549     dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
1550     if (instance_fields != nullptr) {
1551       for (uint32_t i = 0; i < instance_fields->size(); i++) {
1552         DumpIField((*instance_fields)[i].GetFieldId()->GetIndex(),
1553                    (*instance_fields)[i].GetAccessFlags(),
1554                    dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*instance_fields)[i]),
1555                    i);
1556       }  // for
1557     }
1558   }
1559 
1560   // Direct methods.
1561   if (options_.output_format_ == kOutputPlain) {
1562     fprintf(out_file_, "  Direct methods    -\n");
1563   }
1564   if (class_data != nullptr) {
1565     dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
1566     if (direct_methods != nullptr) {
1567       for (uint32_t i = 0; i < direct_methods->size(); i++) {
1568         DumpMethod((*direct_methods)[i].GetMethodId()->GetIndex(),
1569                    (*direct_methods)[i].GetAccessFlags(),
1570                    dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*direct_methods)[i]),
1571                    (*direct_methods)[i].GetCodeItem(),
1572                    i);
1573       }  // for
1574     }
1575   }
1576 
1577   // Virtual methods.
1578   if (options_.output_format_ == kOutputPlain) {
1579     fprintf(out_file_, "  Virtual methods   -\n");
1580   }
1581   if (class_data != nullptr) {
1582     dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
1583     if (virtual_methods != nullptr) {
1584       for (uint32_t i = 0; i < virtual_methods->size(); i++) {
1585         DumpMethod((*virtual_methods)[i].GetMethodId()->GetIndex(),
1586                    (*virtual_methods)[i].GetAccessFlags(),
1587                    dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*virtual_methods)[i]),
1588                    (*virtual_methods)[i].GetCodeItem(),
1589                    i);
1590       }  // for
1591     }
1592   }
1593 
1594   // End of class.
1595   if (options_.output_format_ == kOutputPlain) {
1596     const char* file_name = "unknown";
1597     if (class_def->SourceFile() != nullptr) {
1598       file_name = class_def->SourceFile()->Data();
1599     }
1600     const dex_ir::StringId* source_file = class_def->SourceFile();
1601     fprintf(out_file_, "  source_file_idx   : %d (%s)\n\n",
1602             source_file == nullptr ? 0xffffffffU : source_file->GetIndex(), file_name);
1603   } else if (options_.output_format_ == kOutputXml) {
1604     fprintf(out_file_, "</class>\n");
1605   }
1606 
1607   free(access_str);
1608 }
1609 
DumpDexFile()1610 void DexLayout::DumpDexFile() {
1611   // Headers.
1612   if (options_.show_file_headers_) {
1613     DumpFileHeader();
1614   }
1615 
1616   // Open XML context.
1617   if (options_.output_format_ == kOutputXml) {
1618     fprintf(out_file_, "<api>\n");
1619   }
1620 
1621   // Iterate over all classes.
1622   char* package = nullptr;
1623   const uint32_t class_defs_size = header_->ClassDefs().Size();
1624   for (uint32_t i = 0; i < class_defs_size; i++) {
1625     DumpClass(i, &package);
1626   }  // for
1627 
1628   // Free the last package allocated.
1629   if (package != nullptr) {
1630     fprintf(out_file_, "</package>\n");
1631     free(package);
1632   }
1633 
1634   // Close XML context.
1635   if (options_.output_format_ == kOutputXml) {
1636     fprintf(out_file_, "</api>\n");
1637   }
1638 }
1639 
LayoutClassDefsAndClassData(const DexFile * dex_file)1640 void DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
1641   std::vector<dex_ir::ClassDef*> new_class_def_order;
1642   for (auto& class_def : header_->ClassDefs()) {
1643     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1644     if (info_->ContainsClass(*dex_file, type_idx)) {
1645       new_class_def_order.push_back(class_def.get());
1646     }
1647   }
1648   for (auto& class_def : header_->ClassDefs()) {
1649     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1650     if (!info_->ContainsClass(*dex_file, type_idx)) {
1651       new_class_def_order.push_back(class_def.get());
1652     }
1653   }
1654   std::unordered_set<dex_ir::ClassData*> visited_class_data;
1655   size_t class_data_index = 0;
1656   auto& class_datas = header_->ClassDatas();
1657   for (dex_ir::ClassDef* class_def : new_class_def_order) {
1658     dex_ir::ClassData* class_data = class_def->GetClassData();
1659     if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
1660       visited_class_data.insert(class_data);
1661       // Overwrite the existing vector with the new ordering, note that the sets of objects are
1662       // equivalent, but the order changes. This is why this is not a memory leak.
1663       // TODO: Consider cleaning this up with a shared_ptr.
1664       class_datas[class_data_index].release();  // NOLINT b/117926937
1665       class_datas[class_data_index].reset(class_data);
1666       ++class_data_index;
1667     }
1668   }
1669   CHECK_EQ(class_data_index, class_datas.Size());
1670 
1671   if (DexLayout::kChangeClassDefOrder) {
1672     // This currently produces dex files that violate the spec since the super class class_def is
1673     // supposed to occur before any subclasses.
1674     dex_ir::CollectionVector<dex_ir::ClassDef>& class_defs = header_->ClassDefs();
1675     CHECK_EQ(new_class_def_order.size(), class_defs.Size());
1676     for (size_t i = 0; i < class_defs.Size(); ++i) {
1677       // Overwrite the existing vector with the new ordering, note that the sets of objects are
1678       // equivalent, but the order changes. This is why this is not a memory leak.
1679       // TODO: Consider cleaning this up with a shared_ptr.
1680       class_defs[i].release();  // NOLINT b/117926937
1681       class_defs[i].reset(new_class_def_order[i]);
1682     }
1683   }
1684 }
1685 
LayoutStringData(const DexFile * dex_file)1686 void DexLayout::LayoutStringData(const DexFile* dex_file) {
1687   const size_t num_strings = header_->StringIds().Size();
1688   std::vector<bool> is_shorty(num_strings, false);
1689   std::vector<bool> from_hot_method(num_strings, false);
1690   for (auto& class_def : header_->ClassDefs()) {
1691     // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
1692     // as hot. Add its super class and interfaces as well, which can be used during initialization.
1693     const bool is_profile_class =
1694         info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1695     if (is_profile_class) {
1696       from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
1697       const dex_ir::TypeId* superclass = class_def->Superclass();
1698       if (superclass != nullptr) {
1699         from_hot_method[superclass->GetStringId()->GetIndex()] = true;
1700       }
1701       const dex_ir::TypeList* interfaces = class_def->Interfaces();
1702       if (interfaces != nullptr) {
1703         for (const dex_ir::TypeId* interface_type : *interfaces->GetTypeList()) {
1704           from_hot_method[interface_type->GetStringId()->GetIndex()] = true;
1705         }
1706       }
1707     }
1708     dex_ir::ClassData* data = class_def->GetClassData();
1709     if (data == nullptr) {
1710       continue;
1711     }
1712     for (size_t i = 0; i < 2; ++i) {
1713       for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
1714         const dex_ir::MethodId* method_id = method.GetMethodId();
1715         dex_ir::CodeItem* code_item = method.GetCodeItem();
1716         if (code_item == nullptr) {
1717           continue;
1718         }
1719         const bool is_clinit = is_profile_class &&
1720             (method.GetAccessFlags() & kAccConstructor) != 0 &&
1721             (method.GetAccessFlags() & kAccStatic) != 0;
1722         const bool method_executed = is_clinit ||
1723             info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex())).IsInProfile();
1724         if (!method_executed) {
1725           continue;
1726         }
1727         is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
1728         dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
1729         if (fixups == nullptr) {
1730           continue;
1731         }
1732         // Add const-strings.
1733         for (dex_ir::StringId* id : fixups->StringIds()) {
1734           from_hot_method[id->GetIndex()] = true;
1735         }
1736         // Add field classes, names, and types.
1737         for (dex_ir::FieldId* id : fixups->FieldIds()) {
1738           // TODO: Only visit field ids from static getters and setters.
1739           from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1740           from_hot_method[id->Name()->GetIndex()] = true;
1741           from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
1742         }
1743         // For clinits, add referenced method classes, names, and protos.
1744         if (is_clinit) {
1745           for (dex_ir::MethodId* id : fixups->MethodIds()) {
1746             from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1747             from_hot_method[id->Name()->GetIndex()] = true;
1748             is_shorty[id->Proto()->Shorty()->GetIndex()] = true;
1749           }
1750         }
1751       }
1752     }
1753   }
1754   // Sort string data by specified order.
1755   std::vector<dex_ir::StringId*> string_ids;
1756   for (auto& string_id : header_->StringIds()) {
1757     string_ids.push_back(string_id.get());
1758   }
1759   std::sort(string_ids.begin(),
1760             string_ids.end(),
1761             [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
1762                                            const dex_ir::StringId* b) {
1763     const bool a_is_hot = from_hot_method[a->GetIndex()];
1764     const bool b_is_hot = from_hot_method[b->GetIndex()];
1765     if (a_is_hot != b_is_hot) {
1766       return a_is_hot < b_is_hot;
1767     }
1768     // After hot methods are partitioned, subpartition shorties.
1769     const bool a_is_shorty = is_shorty[a->GetIndex()];
1770     const bool b_is_shorty = is_shorty[b->GetIndex()];
1771     if (a_is_shorty != b_is_shorty) {
1772       return a_is_shorty < b_is_shorty;
1773     }
1774     // Order by index by default.
1775     return a->GetIndex() < b->GetIndex();
1776   });
1777   auto& string_datas = header_->StringDatas();
1778   // Now we know what order we want the string data, reorder them.
1779   size_t data_index = 0;
1780   for (dex_ir::StringId* string_id : string_ids) {
1781     string_datas[data_index].release();  // NOLINT b/117926937
1782     string_datas[data_index].reset(string_id->DataItem());
1783     ++data_index;
1784   }
1785   if (kIsDebugBuild) {
1786     std::unordered_set<dex_ir::StringData*> visited;
1787     for (const std::unique_ptr<dex_ir::StringData>& data : string_datas) {
1788       visited.insert(data.get());
1789     }
1790     for (auto& string_id : header_->StringIds()) {
1791       CHECK(visited.find(string_id->DataItem()) != visited.end());
1792     }
1793   }
1794   CHECK_EQ(data_index, string_datas.Size());
1795 }
1796 
1797 // Orders code items according to specified class data ordering.
LayoutCodeItems(const DexFile * dex_file)1798 void DexLayout::LayoutCodeItems(const DexFile* dex_file) {
1799   static constexpr InvokeType invoke_types[] = {
1800     kDirect,
1801     kVirtual
1802   };
1803 
1804   std::unordered_map<dex_ir::CodeItem*, LayoutType>& code_item_layout =
1805       layout_hotness_info_.code_item_layout_;
1806 
1807   // Assign hotness flags to all code items.
1808   for (InvokeType invoke_type : invoke_types) {
1809     for (auto& class_def : header_->ClassDefs()) {
1810       const bool is_profile_class =
1811           info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1812 
1813       // Skip classes that are not defined in this dex file.
1814       dex_ir::ClassData* class_data = class_def->GetClassData();
1815       if (class_data == nullptr) {
1816         continue;
1817       }
1818       for (auto& method : *(invoke_type == InvokeType::kDirect
1819                                 ? class_data->DirectMethods()
1820                                 : class_data->VirtualMethods())) {
1821         const dex_ir::MethodId *method_id = method.GetMethodId();
1822         dex_ir::CodeItem *code_item = method.GetCodeItem();
1823         if (code_item == nullptr) {
1824           continue;
1825         }
1826         // Separate executed methods (clinits and profiled methods) from unexecuted methods.
1827         const bool is_clinit = (method.GetAccessFlags() & kAccConstructor) != 0 &&
1828             (method.GetAccessFlags() & kAccStatic) != 0;
1829         const bool is_startup_clinit = is_profile_class && is_clinit;
1830         using Hotness = ProfileCompilationInfo::MethodHotness;
1831         Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
1832         LayoutType state = LayoutType::kLayoutTypeUnused;
1833         if (hotness.IsHot()) {
1834           // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
1835           // now.
1836           state = LayoutType::kLayoutTypeHot;
1837         } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
1838           // Startup clinit or a method that only has the startup flag.
1839           state = LayoutType::kLayoutTypeStartupOnly;
1840         } else if (is_clinit) {
1841           state = LayoutType::kLayoutTypeUsedOnce;
1842         } else if (hotness.IsInProfile()) {
1843           state = LayoutType::kLayoutTypeSometimesUsed;
1844         }
1845         auto it = code_item_layout.emplace(code_item, state);
1846         if (!it.second) {
1847           LayoutType& layout_type = it.first->second;
1848           // Already exists, merge the hotness.
1849           layout_type = MergeLayoutType(layout_type, state);
1850         }
1851       }
1852     }
1853   }
1854 
1855   const auto& code_items = header_->CodeItems();
1856   if (VLOG_IS_ON(dex)) {
1857     size_t layout_count[static_cast<size_t>(LayoutType::kLayoutTypeCount)] = {};
1858     for (const std::unique_ptr<dex_ir::CodeItem>& code_item : code_items) {
1859       auto it = code_item_layout.find(code_item.get());
1860       DCHECK(it != code_item_layout.end());
1861       ++layout_count[static_cast<size_t>(it->second)];
1862     }
1863     for (size_t i = 0; i < static_cast<size_t>(LayoutType::kLayoutTypeCount); ++i) {
1864       LOG(INFO) << "Code items in category " << i << " count=" << layout_count[i];
1865     }
1866   }
1867 
1868   // Sort the code items vector by new layout. The writing process will take care of calculating
1869   // all the offsets. Stable sort to preserve any existing locality that might be there.
1870   std::stable_sort(code_items.begin(),
1871                    code_items.end(),
1872                    [&](const std::unique_ptr<dex_ir::CodeItem>& a,
1873                        const std::unique_ptr<dex_ir::CodeItem>& b) {
1874     auto it_a = code_item_layout.find(a.get());
1875     auto it_b = code_item_layout.find(b.get());
1876     DCHECK(it_a != code_item_layout.end());
1877     DCHECK(it_b != code_item_layout.end());
1878     const LayoutType layout_type_a = it_a->second;
1879     const LayoutType layout_type_b = it_b->second;
1880     return layout_type_a < layout_type_b;
1881   });
1882 }
1883 
LayoutOutputFile(const DexFile * dex_file)1884 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
1885   LayoutStringData(dex_file);
1886   LayoutClassDefsAndClassData(dex_file);
1887   LayoutCodeItems(dex_file);
1888 }
1889 
OutputDexFile(const DexFile * input_dex_file,bool compute_offsets,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1890 bool DexLayout::OutputDexFile(const DexFile* input_dex_file,
1891                               bool compute_offsets,
1892                               std::unique_ptr<DexContainer>* dex_container,
1893                               std::string* error_msg) {
1894   const std::string& dex_file_location = input_dex_file->GetLocation();
1895   std::unique_ptr<File> new_file;
1896   // If options_.output_dex_directory_ is non null, we are outputting to a file.
1897   if (options_.output_dex_directory_ != nullptr) {
1898     std::string output_location(options_.output_dex_directory_);
1899     const size_t last_slash = dex_file_location.rfind('/');
1900     std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
1901     if (output_location == dex_file_directory) {
1902       output_location = dex_file_location + ".new";
1903     } else {
1904       if (!output_location.empty() && output_location.back() != '/') {
1905         output_location += "/";
1906       }
1907       const size_t separator = dex_file_location.rfind('!');
1908       if (separator != std::string::npos) {
1909         output_location += dex_file_location.substr(separator + 1);
1910       } else {
1911         output_location += "classes.dex";
1912       }
1913     }
1914     new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
1915     if (new_file == nullptr) {
1916       LOG(ERROR) << "Could not create dex writer output file: " << output_location;
1917       return false;
1918     }
1919   }
1920   if (!DexWriter::Output(this, dex_container, compute_offsets, error_msg)) {
1921     return false;
1922   }
1923   if (new_file != nullptr) {
1924     DexContainer* const container = dex_container->get();
1925     DexContainer::Section* const main_section = container->GetMainSection();
1926     if (!new_file->WriteFully(main_section->Begin(), main_section->Size())) {
1927       LOG(ERROR) << "Failed to write main section for dex file " << dex_file_location;
1928       new_file->Erase();
1929       return false;
1930     }
1931     DexContainer::Section* const data_section = container->GetDataSection();
1932     if (!new_file->WriteFully(data_section->Begin(), data_section->Size())) {
1933       LOG(ERROR) << "Failed to write data section for dex file " << dex_file_location;
1934       new_file->Erase();
1935       return false;
1936     }
1937     UNUSED(new_file->FlushCloseOrErase());
1938   }
1939   return true;
1940 }
1941 
1942 /*
1943  * Dumps the requested sections of the file.
1944  */
ProcessDexFile(const char * file_name,const DexFile * dex_file,size_t dex_file_index,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1945 bool DexLayout::ProcessDexFile(const char* file_name,
1946                                const DexFile* dex_file,
1947                                size_t dex_file_index,
1948                                std::unique_ptr<DexContainer>* dex_container,
1949                                std::string* error_msg) {
1950   const bool has_output_container = dex_container != nullptr;
1951   const bool output = options_.output_dex_directory_ != nullptr || has_output_container;
1952 
1953   // Try to avoid eagerly assigning offsets to find bugs since Offset will abort if the offset
1954   // is unassigned.
1955   bool eagerly_assign_offsets = false;
1956   if (options_.visualize_pattern_ || options_.show_section_statistics_ || options_.dump_) {
1957     // These options required the offsets for dumping purposes.
1958     eagerly_assign_offsets = true;
1959   }
1960   std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file,
1961                                                                eagerly_assign_offsets,
1962                                                                GetOptions()));
1963   SetHeader(header.get());
1964 
1965   if (options_.verbose_) {
1966     fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
1967             file_name, dex_file->GetHeader().magic_ + 4);
1968   }
1969 
1970   if (options_.visualize_pattern_) {
1971     VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
1972     return true;
1973   }
1974 
1975   if (options_.show_section_statistics_) {
1976     ShowDexSectionStatistics(header_, dex_file_index);
1977     return true;
1978   }
1979 
1980   // Dump dex file.
1981   if (options_.dump_) {
1982     DumpDexFile();
1983   }
1984 
1985   // In case we are outputting to a file, keep it open so we can verify.
1986   if (output) {
1987     // Layout information about what strings and code items are hot. Used by the writing process
1988     // to generate the sections that are stored in the oat file.
1989     bool do_layout = info_ != nullptr && !info_->IsEmpty();
1990     if (do_layout) {
1991       LayoutOutputFile(dex_file);
1992     }
1993     // The output needs a dex container, use a temporary one.
1994     std::unique_ptr<DexContainer> temp_container;
1995     if (dex_container == nullptr) {
1996       dex_container = &temp_container;
1997     }
1998     // If we didn't set the offsets eagerly, we definitely need to compute them here.
1999     if (!OutputDexFile(dex_file, do_layout || !eagerly_assign_offsets, dex_container, error_msg)) {
2000       return false;
2001     }
2002 
2003     // Clear header before verifying to reduce peak RAM usage.
2004     const size_t file_size = header_->FileSize();
2005     header.reset();
2006 
2007     // Verify the output dex file's structure, only enabled by default for debug builds.
2008     if (options_.verify_output_ && has_output_container) {
2009       std::string location = "memory mapped file for " + std::string(file_name);
2010       // Dex file verifier cannot handle compact dex.
2011       bool verify = options_.compact_dex_level_ == CompactDexLevel::kCompactDexLevelNone;
2012       const ArtDexFileLoader dex_file_loader;
2013       DexContainer::Section* const main_section = (*dex_container)->GetMainSection();
2014       DexContainer::Section* const data_section = (*dex_container)->GetDataSection();
2015       DCHECK_EQ(file_size, main_section->Size())
2016           << main_section->Size() << " " << data_section->Size();
2017       std::unique_ptr<const DexFile> output_dex_file(
2018           dex_file_loader.OpenWithDataSection(
2019               main_section->Begin(),
2020               main_section->Size(),
2021               data_section->Begin(),
2022               data_section->Size(),
2023               location,
2024               /* location_checksum= */ 0,
2025               /*oat_dex_file=*/ nullptr,
2026               verify,
2027               /*verify_checksum=*/ false,
2028               error_msg));
2029       CHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << *error_msg;
2030 
2031       // Do IR-level comparison between input and output. This check ignores potential differences
2032       // due to layout, so offsets are not checked. Instead, it checks the data contents of each
2033       // item.
2034       //
2035       // Regenerate output IR to catch any bugs that might happen during writing.
2036       std::unique_ptr<dex_ir::Header> output_header(
2037           dex_ir::DexIrBuilder(*output_dex_file,
2038                                /*eagerly_assign_offsets=*/ true,
2039                                GetOptions()));
2040       std::unique_ptr<dex_ir::Header> orig_header(
2041           dex_ir::DexIrBuilder(*dex_file,
2042                                /*eagerly_assign_offsets=*/ true,
2043                                GetOptions()));
2044       CHECK(VerifyOutputDexFile(output_header.get(), orig_header.get(), error_msg)) << *error_msg;
2045     }
2046   }
2047   return true;
2048 }
2049 
2050 /*
2051  * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
2052  */
ProcessFile(const char * file_name)2053 int DexLayout::ProcessFile(const char* file_name) {
2054   if (options_.verbose_) {
2055     fprintf(out_file_, "Processing '%s'...\n", file_name);
2056   }
2057 
2058   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
2059   // all of which are Zip archives with "classes.dex" inside.
2060   const bool verify_checksum = !options_.ignore_bad_checksum_;
2061   std::string error_msg;
2062   const ArtDexFileLoader dex_file_loader;
2063   std::vector<std::unique_ptr<const DexFile>> dex_files;
2064   if (!dex_file_loader.Open(
2065         file_name, file_name, /* verify= */ true, verify_checksum, &error_msg, &dex_files)) {
2066     // Display returned error message to user. Note that this error behavior
2067     // differs from the error messages shown by the original Dalvik dexdump.
2068     LOG(ERROR) << error_msg;
2069     return -1;
2070   }
2071 
2072   // Success. Either report checksum verification or process
2073   // all dex files found in given file.
2074   if (options_.checksum_only_) {
2075     fprintf(out_file_, "Checksum verified\n");
2076   } else {
2077     for (size_t i = 0; i < dex_files.size(); i++) {
2078       // Pass in a null container to avoid output by default.
2079       if (!ProcessDexFile(file_name,
2080                           dex_files[i].get(),
2081                           i,
2082                           /*dex_container=*/ nullptr,
2083                           &error_msg)) {
2084         LOG(WARNING) << "Failed to run dex file " << i << " in " << file_name << " : " << error_msg;
2085       }
2086     }
2087   }
2088   return 0;
2089 }
2090 
2091 }  // namespace art
2092