1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Implementation file of the dexlayout utility.
17 *
18 * This is a tool to read dex files into an internal representation,
19 * reorganize the representation, and emit dex files with a better
20 * file layout.
21 */
22
23 #include "dexlayout.h"
24
25 #include <inttypes.h>
26 #include <stdio.h>
27
28 #include <iostream>
29 #include <memory>
30 #include <sstream>
31 #include <vector>
32
33 #include "android-base/stringprintf.h"
34
35 #include "base/logging.h" // For VLOG_IS_ON.
36 #include "base/hiddenapi_flags.h"
37 #include "base/mem_map.h"
38 #include "base/mman.h" // For the PROT_* and MAP_* constants.
39 #include "base/os.h"
40 #include "base/utils.h"
41 #include "dex/art_dex_file_loader.h"
42 #include "dex/descriptors_names.h"
43 #include "dex/dex_file-inl.h"
44 #include "dex/dex_file_layout.h"
45 #include "dex/dex_file_loader.h"
46 #include "dex/dex_file_types.h"
47 #include "dex/dex_file_verifier.h"
48 #include "dex/dex_instruction-inl.h"
49 #include "dex_ir_builder.h"
50 #include "dex_verify.h"
51 #include "dex_visualize.h"
52 #include "dex_writer.h"
53 #include "profile/profile_compilation_info.h"
54
55 namespace art {
56
57 using android::base::StringPrintf;
58
59 /*
60 * Flags for use with createAccessFlagStr().
61 */
62 enum AccessFor {
63 kAccessForClass = 0, kAccessForMethod = 1, kAccessForField = 2, kAccessForMAX
64 };
65 const int kNumFlags = 18;
66
67 /*
68 * Gets 2 little-endian bytes.
69 */
Get2LE(unsigned char const * src)70 static inline uint16_t Get2LE(unsigned char const* src) {
71 return src[0] | (src[1] << 8);
72 }
73
74 /*
75 * Converts the class name portion of a type descriptor to human-readable
76 * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
77 */
DescriptorClassToName(const char * str)78 static std::string DescriptorClassToName(const char* str) {
79 std::string descriptor(str);
80 // Reduce to just the class name prefix.
81 size_t last_slash = descriptor.rfind('/');
82 if (last_slash == std::string::npos) {
83 last_slash = 0;
84 }
85 // Start past the '/' or 'L'.
86 last_slash++;
87
88 // Copy class name over, trimming trailing ';'.
89 size_t size = descriptor.size() - 1 - last_slash;
90 std::string result(descriptor.substr(last_slash, size));
91
92 return result;
93 }
94
95 /*
96 * Returns string representing the boolean value.
97 */
StrBool(bool val)98 static const char* StrBool(bool val) {
99 return val ? "true" : "false";
100 }
101
102 /*
103 * Returns a quoted string representing the boolean value.
104 */
QuotedBool(bool val)105 static const char* QuotedBool(bool val) {
106 return val ? "\"true\"" : "\"false\"";
107 }
108
109 /*
110 * Returns a quoted string representing the access flags.
111 */
QuotedVisibility(uint32_t access_flags)112 static const char* QuotedVisibility(uint32_t access_flags) {
113 if (access_flags & kAccPublic) {
114 return "\"public\"";
115 } else if (access_flags & kAccProtected) {
116 return "\"protected\"";
117 } else if (access_flags & kAccPrivate) {
118 return "\"private\"";
119 } else {
120 return "\"package\"";
121 }
122 }
123
124 /*
125 * Counts the number of '1' bits in a word.
126 */
CountOnes(uint32_t val)127 static int CountOnes(uint32_t val) {
128 val = val - ((val >> 1) & 0x55555555);
129 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
130 return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
131 }
132
133 /*
134 * Creates a new string with human-readable access flags.
135 *
136 * In the base language the access_flags fields are type uint16_t; in Dalvik they're uint32_t.
137 */
CreateAccessFlagStr(uint32_t flags,AccessFor for_what)138 static char* CreateAccessFlagStr(uint32_t flags, AccessFor for_what) {
139 static const char* kAccessStrings[kAccessForMAX][kNumFlags] = {
140 {
141 "PUBLIC", /* 0x00001 */
142 "PRIVATE", /* 0x00002 */
143 "PROTECTED", /* 0x00004 */
144 "STATIC", /* 0x00008 */
145 "FINAL", /* 0x00010 */
146 "?", /* 0x00020 */
147 "?", /* 0x00040 */
148 "?", /* 0x00080 */
149 "?", /* 0x00100 */
150 "INTERFACE", /* 0x00200 */
151 "ABSTRACT", /* 0x00400 */
152 "?", /* 0x00800 */
153 "SYNTHETIC", /* 0x01000 */
154 "ANNOTATION", /* 0x02000 */
155 "ENUM", /* 0x04000 */
156 "?", /* 0x08000 */
157 "VERIFIED", /* 0x10000 */
158 "OPTIMIZED", /* 0x20000 */
159 }, {
160 "PUBLIC", /* 0x00001 */
161 "PRIVATE", /* 0x00002 */
162 "PROTECTED", /* 0x00004 */
163 "STATIC", /* 0x00008 */
164 "FINAL", /* 0x00010 */
165 "SYNCHRONIZED", /* 0x00020 */
166 "BRIDGE", /* 0x00040 */
167 "VARARGS", /* 0x00080 */
168 "NATIVE", /* 0x00100 */
169 "?", /* 0x00200 */
170 "ABSTRACT", /* 0x00400 */
171 "STRICT", /* 0x00800 */
172 "SYNTHETIC", /* 0x01000 */
173 "?", /* 0x02000 */
174 "?", /* 0x04000 */
175 "MIRANDA", /* 0x08000 */
176 "CONSTRUCTOR", /* 0x10000 */
177 "DECLARED_SYNCHRONIZED", /* 0x20000 */
178 }, {
179 "PUBLIC", /* 0x00001 */
180 "PRIVATE", /* 0x00002 */
181 "PROTECTED", /* 0x00004 */
182 "STATIC", /* 0x00008 */
183 "FINAL", /* 0x00010 */
184 "?", /* 0x00020 */
185 "VOLATILE", /* 0x00040 */
186 "TRANSIENT", /* 0x00080 */
187 "?", /* 0x00100 */
188 "?", /* 0x00200 */
189 "?", /* 0x00400 */
190 "?", /* 0x00800 */
191 "SYNTHETIC", /* 0x01000 */
192 "?", /* 0x02000 */
193 "ENUM", /* 0x04000 */
194 "?", /* 0x08000 */
195 "?", /* 0x10000 */
196 "?", /* 0x20000 */
197 },
198 };
199
200 // Allocate enough storage to hold the expected number of strings,
201 // plus a space between each. We over-allocate, using the longest
202 // string above as the base metric.
203 const int kLongest = 21; // The strlen of longest string above.
204 const int count = CountOnes(flags);
205 char* str;
206 char* cp;
207 cp = str = reinterpret_cast<char*>(malloc(count * (kLongest + 1) + 1));
208
209 for (int i = 0; i < kNumFlags; i++) {
210 if (flags & 0x01) {
211 const char* accessStr = kAccessStrings[for_what][i];
212 const int len = strlen(accessStr);
213 if (cp != str) {
214 *cp++ = ' ';
215 }
216 memcpy(cp, accessStr, len);
217 cp += len;
218 }
219 flags >>= 1;
220 } // for
221
222 *cp = '\0';
223 return str;
224 }
225
GetHiddenapiFlagStr(uint32_t hiddenapi_flags)226 static std::string GetHiddenapiFlagStr(uint32_t hiddenapi_flags) {
227 std::stringstream ss;
228 hiddenapi::ApiList(hiddenapi_flags).Dump(ss);
229 std::string api_list = ss.str();
230 std::transform(api_list.begin(), api_list.end(), api_list.begin(), ::toupper);
231 return api_list;
232 }
233
GetSignatureForProtoId(const dex_ir::ProtoId * proto)234 static std::string GetSignatureForProtoId(const dex_ir::ProtoId* proto) {
235 if (proto == nullptr) {
236 return "<no signature>";
237 }
238
239 std::string result("(");
240 const dex_ir::TypeList* type_list = proto->Parameters();
241 if (type_list != nullptr) {
242 for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
243 result += type_id->GetStringId()->Data();
244 }
245 }
246 result += ")";
247 result += proto->ReturnType()->GetStringId()->Data();
248 return result;
249 }
250
251 /*
252 * Copies character data from "data" to "out", converting non-ASCII values
253 * to fprintf format chars or an ASCII filler ('.' or '?').
254 *
255 * The output buffer must be able to hold (2*len)+1 bytes. The result is
256 * NULL-terminated.
257 */
Asciify(char * out,const unsigned char * data,size_t len)258 static void Asciify(char* out, const unsigned char* data, size_t len) {
259 for (; len != 0u; --len) {
260 if (*data < 0x20) {
261 // Could do more here, but we don't need them yet.
262 switch (*data) {
263 case '\0':
264 *out++ = '\\';
265 *out++ = '0';
266 break;
267 case '\n':
268 *out++ = '\\';
269 *out++ = 'n';
270 break;
271 default:
272 *out++ = '.';
273 break;
274 } // switch
275 } else if (*data >= 0x80) {
276 *out++ = '?';
277 } else {
278 *out++ = *data;
279 }
280 data++;
281 } // while
282 *out = '\0';
283 }
284
285 /*
286 * Dumps a string value with some escape characters.
287 */
DumpEscapedString(const char * p,FILE * out_file)288 static void DumpEscapedString(const char* p, FILE* out_file) {
289 fputs("\"", out_file);
290 for (; *p; p++) {
291 switch (*p) {
292 case '\\':
293 fputs("\\\\", out_file);
294 break;
295 case '\"':
296 fputs("\\\"", out_file);
297 break;
298 case '\t':
299 fputs("\\t", out_file);
300 break;
301 case '\n':
302 fputs("\\n", out_file);
303 break;
304 case '\r':
305 fputs("\\r", out_file);
306 break;
307 default:
308 putc(*p, out_file);
309 } // switch
310 } // for
311 fputs("\"", out_file);
312 }
313
314 /*
315 * Dumps a string as an XML attribute value.
316 */
DumpXmlAttribute(const char * p,FILE * out_file)317 static void DumpXmlAttribute(const char* p, FILE* out_file) {
318 for (; *p; p++) {
319 switch (*p) {
320 case '&':
321 fputs("&", out_file);
322 break;
323 case '<':
324 fputs("<", out_file);
325 break;
326 case '>':
327 fputs(">", out_file);
328 break;
329 case '"':
330 fputs(""", out_file);
331 break;
332 case '\t':
333 fputs("	", out_file);
334 break;
335 case '\n':
336 fputs("
", out_file);
337 break;
338 case '\r':
339 fputs("
", out_file);
340 break;
341 default:
342 putc(*p, out_file);
343 } // switch
344 } // for
345 }
346
347 /*
348 * Helper for dumpInstruction(), which builds the string
349 * representation for the index in the given instruction.
350 * Returns a pointer to a buffer of sufficient size.
351 */
IndexString(dex_ir::Header * header,const Instruction * dec_insn,size_t buf_size)352 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
353 const Instruction* dec_insn,
354 size_t buf_size) {
355 std::unique_ptr<char[]> buf(new char[buf_size]);
356 // Determine index and width of the string.
357 uint32_t index = 0;
358 uint32_t secondary_index = dex::kDexNoIndex;
359 uint32_t width = 4;
360 switch (Instruction::FormatOf(dec_insn->Opcode())) {
361 // SOME NOT SUPPORTED:
362 // case Instruction::k20bc:
363 case Instruction::k21c:
364 case Instruction::k35c:
365 // case Instruction::k35ms:
366 case Instruction::k3rc:
367 // case Instruction::k3rms:
368 // case Instruction::k35mi:
369 // case Instruction::k3rmi:
370 index = dec_insn->VRegB();
371 width = 4;
372 break;
373 case Instruction::k31c:
374 index = dec_insn->VRegB();
375 width = 8;
376 break;
377 case Instruction::k22c:
378 // case Instruction::k22cs:
379 index = dec_insn->VRegC();
380 width = 4;
381 break;
382 case Instruction::k45cc:
383 case Instruction::k4rcc:
384 index = dec_insn->VRegB();
385 secondary_index = dec_insn->VRegH();
386 width = 4;
387 break;
388 default:
389 break;
390 } // switch
391
392 // Determine index type.
393 size_t outSize = 0;
394 switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
395 case Instruction::kIndexUnknown:
396 // This function should never get called for this type, but do
397 // something sensible here, just to help with debugging.
398 outSize = snprintf(buf.get(), buf_size, "<unknown-index>");
399 break;
400 case Instruction::kIndexNone:
401 // This function should never get called for this type, but do
402 // something sensible here, just to help with debugging.
403 outSize = snprintf(buf.get(), buf_size, "<no-index>");
404 break;
405 case Instruction::kIndexTypeRef:
406 if (index < header->TypeIds().Size()) {
407 const char* tp = header->TypeIds()[index]->GetStringId()->Data();
408 outSize = snprintf(buf.get(), buf_size, "%s // type@%0*x", tp, width, index);
409 } else {
410 outSize = snprintf(buf.get(), buf_size, "<type?> // type@%0*x", width, index);
411 }
412 break;
413 case Instruction::kIndexStringRef:
414 if (index < header->StringIds().Size()) {
415 const char* st = header->StringIds()[index]->Data();
416 outSize = snprintf(buf.get(), buf_size, "\"%s\" // string@%0*x", st, width, index);
417 } else {
418 outSize = snprintf(buf.get(), buf_size, "<string?> // string@%0*x", width, index);
419 }
420 break;
421 case Instruction::kIndexMethodRef:
422 if (index < header->MethodIds().Size()) {
423 dex_ir::MethodId* method_id = header->MethodIds()[index];
424 const char* name = method_id->Name()->Data();
425 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
426 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
427 outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // method@%0*x",
428 back_descriptor, name, type_descriptor.c_str(), width, index);
429 } else {
430 outSize = snprintf(buf.get(), buf_size, "<method?> // method@%0*x", width, index);
431 }
432 break;
433 case Instruction::kIndexFieldRef:
434 if (index < header->FieldIds().Size()) {
435 dex_ir::FieldId* field_id = header->FieldIds()[index];
436 const char* name = field_id->Name()->Data();
437 const char* type_descriptor = field_id->Type()->GetStringId()->Data();
438 const char* back_descriptor = field_id->Class()->GetStringId()->Data();
439 outSize = snprintf(buf.get(), buf_size, "%s.%s:%s // field@%0*x",
440 back_descriptor, name, type_descriptor, width, index);
441 } else {
442 outSize = snprintf(buf.get(), buf_size, "<field?> // field@%0*x", width, index);
443 }
444 break;
445 case Instruction::kIndexVtableOffset:
446 outSize = snprintf(buf.get(), buf_size, "[%0*x] // vtable #%0*x",
447 width, index, width, index);
448 break;
449 case Instruction::kIndexFieldOffset:
450 outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
451 break;
452 case Instruction::kIndexMethodAndProtoRef: {
453 std::string method("<method?>");
454 std::string proto("<proto?>");
455 if (index < header->MethodIds().Size()) {
456 dex_ir::MethodId* method_id = header->MethodIds()[index];
457 const char* name = method_id->Name()->Data();
458 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
459 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
460 method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
461 }
462 if (secondary_index < header->ProtoIds().Size()) {
463 dex_ir::ProtoId* proto_id = header->ProtoIds()[secondary_index];
464 proto = GetSignatureForProtoId(proto_id);
465 }
466 outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
467 method.c_str(), proto.c_str(), width, index, width, secondary_index);
468 }
469 break;
470 // SOME NOT SUPPORTED:
471 // case Instruction::kIndexVaries:
472 // case Instruction::kIndexInlineMethod:
473 default:
474 outSize = snprintf(buf.get(), buf_size, "<?>");
475 break;
476 } // switch
477
478 // Determine success of string construction.
479 if (outSize >= buf_size) {
480 // The buffer wasn't big enough; retry with computed size. Note: snprintf()
481 // doesn't count/ the '\0' as part of its returned size, so we add explicit
482 // space for it here.
483 return IndexString(header, dec_insn, outSize + 1);
484 }
485 return buf;
486 }
487
488 /*
489 * Dumps encoded annotation.
490 */
DumpEncodedAnnotation(dex_ir::EncodedAnnotation * annotation)491 void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
492 fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
493 // Display all name=value pairs.
494 for (auto& subannotation : *annotation->GetAnnotationElements()) {
495 fputc(' ', out_file_);
496 fputs(subannotation->GetName()->Data(), out_file_);
497 fputc('=', out_file_);
498 DumpEncodedValue(subannotation->GetValue());
499 }
500 }
501 /*
502 * Dumps encoded value.
503 */
DumpEncodedValue(const dex_ir::EncodedValue * data)504 void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
505 switch (data->Type()) {
506 case DexFile::kDexAnnotationByte:
507 fprintf(out_file_, "%" PRId8, data->GetByte());
508 break;
509 case DexFile::kDexAnnotationShort:
510 fprintf(out_file_, "%" PRId16, data->GetShort());
511 break;
512 case DexFile::kDexAnnotationChar:
513 fprintf(out_file_, "%" PRIu16, data->GetChar());
514 break;
515 case DexFile::kDexAnnotationInt:
516 fprintf(out_file_, "%" PRId32, data->GetInt());
517 break;
518 case DexFile::kDexAnnotationLong:
519 fprintf(out_file_, "%" PRId64, data->GetLong());
520 break;
521 case DexFile::kDexAnnotationFloat: {
522 fprintf(out_file_, "%g", data->GetFloat());
523 break;
524 }
525 case DexFile::kDexAnnotationDouble: {
526 fprintf(out_file_, "%g", data->GetDouble());
527 break;
528 }
529 case DexFile::kDexAnnotationString: {
530 dex_ir::StringId* string_id = data->GetStringId();
531 if (options_.output_format_ == kOutputPlain) {
532 DumpEscapedString(string_id->Data(), out_file_);
533 } else {
534 DumpXmlAttribute(string_id->Data(), out_file_);
535 }
536 break;
537 }
538 case DexFile::kDexAnnotationType: {
539 dex_ir::TypeId* type_id = data->GetTypeId();
540 fputs(type_id->GetStringId()->Data(), out_file_);
541 break;
542 }
543 case DexFile::kDexAnnotationField:
544 case DexFile::kDexAnnotationEnum: {
545 dex_ir::FieldId* field_id = data->GetFieldId();
546 fputs(field_id->Name()->Data(), out_file_);
547 break;
548 }
549 case DexFile::kDexAnnotationMethod: {
550 dex_ir::MethodId* method_id = data->GetMethodId();
551 fputs(method_id->Name()->Data(), out_file_);
552 break;
553 }
554 case DexFile::kDexAnnotationArray: {
555 fputc('{', out_file_);
556 // Display all elements.
557 for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
558 fputc(' ', out_file_);
559 DumpEncodedValue(value.get());
560 }
561 fputs(" }", out_file_);
562 break;
563 }
564 case DexFile::kDexAnnotationAnnotation: {
565 DumpEncodedAnnotation(data->GetEncodedAnnotation());
566 break;
567 }
568 case DexFile::kDexAnnotationNull:
569 fputs("null", out_file_);
570 break;
571 case DexFile::kDexAnnotationBoolean:
572 fputs(StrBool(data->GetBoolean()), out_file_);
573 break;
574 default:
575 fputs("????", out_file_);
576 break;
577 } // switch
578 }
579
580 /*
581 * Dumps the file header.
582 */
DumpFileHeader()583 void DexLayout::DumpFileHeader() {
584 char sanitized[8 * 2 + 1];
585 fprintf(out_file_, "DEX file header:\n");
586 Asciify(sanitized, header_->Magic(), 8);
587 fprintf(out_file_, "magic : '%s'\n", sanitized);
588 fprintf(out_file_, "checksum : %08x\n", header_->Checksum());
589 fprintf(out_file_, "signature : %02x%02x...%02x%02x\n",
590 header_->Signature()[0], header_->Signature()[1],
591 header_->Signature()[DexFile::kSha1DigestSize - 2],
592 header_->Signature()[DexFile::kSha1DigestSize - 1]);
593 fprintf(out_file_, "file_size : %d\n", header_->FileSize());
594 fprintf(out_file_, "header_size : %d\n", header_->HeaderSize());
595 fprintf(out_file_, "link_size : %d\n", header_->LinkSize());
596 fprintf(out_file_, "link_off : %d (0x%06x)\n",
597 header_->LinkOffset(), header_->LinkOffset());
598 fprintf(out_file_, "string_ids_size : %d\n", header_->StringIds().Size());
599 fprintf(out_file_, "string_ids_off : %d (0x%06x)\n",
600 header_->StringIds().GetOffset(), header_->StringIds().GetOffset());
601 fprintf(out_file_, "type_ids_size : %d\n", header_->TypeIds().Size());
602 fprintf(out_file_, "type_ids_off : %d (0x%06x)\n",
603 header_->TypeIds().GetOffset(), header_->TypeIds().GetOffset());
604 fprintf(out_file_, "proto_ids_size : %d\n", header_->ProtoIds().Size());
605 fprintf(out_file_, "proto_ids_off : %d (0x%06x)\n",
606 header_->ProtoIds().GetOffset(), header_->ProtoIds().GetOffset());
607 fprintf(out_file_, "field_ids_size : %d\n", header_->FieldIds().Size());
608 fprintf(out_file_, "field_ids_off : %d (0x%06x)\n",
609 header_->FieldIds().GetOffset(), header_->FieldIds().GetOffset());
610 fprintf(out_file_, "method_ids_size : %d\n", header_->MethodIds().Size());
611 fprintf(out_file_, "method_ids_off : %d (0x%06x)\n",
612 header_->MethodIds().GetOffset(), header_->MethodIds().GetOffset());
613 fprintf(out_file_, "class_defs_size : %d\n", header_->ClassDefs().Size());
614 fprintf(out_file_, "class_defs_off : %d (0x%06x)\n",
615 header_->ClassDefs().GetOffset(), header_->ClassDefs().GetOffset());
616 fprintf(out_file_, "data_size : %d\n", header_->DataSize());
617 fprintf(out_file_, "data_off : %d (0x%06x)\n\n",
618 header_->DataOffset(), header_->DataOffset());
619 }
620
621 /*
622 * Dumps a class_def_item.
623 */
DumpClassDef(int idx)624 void DexLayout::DumpClassDef(int idx) {
625 // General class information.
626 dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
627 fprintf(out_file_, "Class #%d header:\n", idx);
628 fprintf(out_file_, "class_idx : %d\n", class_def->ClassType()->GetIndex());
629 fprintf(out_file_, "access_flags : %d (0x%04x)\n",
630 class_def->GetAccessFlags(), class_def->GetAccessFlags());
631 uint32_t superclass_idx = class_def->Superclass() == nullptr ?
632 DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
633 fprintf(out_file_, "superclass_idx : %d\n", superclass_idx);
634 fprintf(out_file_, "interfaces_off : %d (0x%06x)\n",
635 class_def->InterfacesOffset(), class_def->InterfacesOffset());
636 uint32_t source_file_offset = 0xffffffffU;
637 if (class_def->SourceFile() != nullptr) {
638 source_file_offset = class_def->SourceFile()->GetIndex();
639 }
640 fprintf(out_file_, "source_file_idx : %d\n", source_file_offset);
641 uint32_t annotations_offset = 0;
642 if (class_def->Annotations() != nullptr) {
643 annotations_offset = class_def->Annotations()->GetOffset();
644 }
645 fprintf(out_file_, "annotations_off : %d (0x%06x)\n",
646 annotations_offset, annotations_offset);
647 if (class_def->GetClassData() == nullptr) {
648 fprintf(out_file_, "class_data_off : %d (0x%06x)\n", 0, 0);
649 } else {
650 fprintf(out_file_, "class_data_off : %d (0x%06x)\n",
651 class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
652 }
653
654 // Fields and methods.
655 dex_ir::ClassData* class_data = class_def->GetClassData();
656 if (class_data != nullptr && class_data->StaticFields() != nullptr) {
657 fprintf(out_file_, "static_fields_size : %zu\n", class_data->StaticFields()->size());
658 } else {
659 fprintf(out_file_, "static_fields_size : 0\n");
660 }
661 if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
662 fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
663 } else {
664 fprintf(out_file_, "instance_fields_size: 0\n");
665 }
666 if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
667 fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
668 } else {
669 fprintf(out_file_, "direct_methods_size : 0\n");
670 }
671 if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
672 fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
673 } else {
674 fprintf(out_file_, "virtual_methods_size: 0\n");
675 }
676 fprintf(out_file_, "\n");
677 }
678
679 /**
680 * Dumps an annotation set item.
681 */
DumpAnnotationSetItem(dex_ir::AnnotationSetItem * set_item)682 void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
683 if (set_item == nullptr || set_item->GetItems()->size() == 0) {
684 fputs(" empty-annotation-set\n", out_file_);
685 return;
686 }
687 for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
688 if (annotation == nullptr) {
689 continue;
690 }
691 fputs(" ", out_file_);
692 switch (annotation->GetVisibility()) {
693 case DexFile::kDexVisibilityBuild: fputs("VISIBILITY_BUILD ", out_file_); break;
694 case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
695 case DexFile::kDexVisibilitySystem: fputs("VISIBILITY_SYSTEM ", out_file_); break;
696 default: fputs("VISIBILITY_UNKNOWN ", out_file_); break;
697 } // switch
698 DumpEncodedAnnotation(annotation->GetAnnotation());
699 fputc('\n', out_file_);
700 }
701 }
702
703 /*
704 * Dumps class annotations.
705 */
DumpClassAnnotations(int idx)706 void DexLayout::DumpClassAnnotations(int idx) {
707 dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
708 dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
709 if (annotations_directory == nullptr) {
710 return; // none
711 }
712
713 fprintf(out_file_, "Class #%d annotations:\n", idx);
714
715 dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
716 dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
717 dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
718 dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
719
720 // Annotations on the class itself.
721 if (class_set_item != nullptr) {
722 fprintf(out_file_, "Annotations on class\n");
723 DumpAnnotationSetItem(class_set_item);
724 }
725
726 // Annotations on fields.
727 if (fields != nullptr) {
728 for (auto& field : *fields) {
729 const dex_ir::FieldId* field_id = field->GetFieldId();
730 const uint32_t field_idx = field_id->GetIndex();
731 const char* field_name = field_id->Name()->Data();
732 fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
733 DumpAnnotationSetItem(field->GetAnnotationSetItem());
734 }
735 }
736
737 // Annotations on methods.
738 if (methods != nullptr) {
739 for (auto& method : *methods) {
740 const dex_ir::MethodId* method_id = method->GetMethodId();
741 const uint32_t method_idx = method_id->GetIndex();
742 const char* method_name = method_id->Name()->Data();
743 fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
744 DumpAnnotationSetItem(method->GetAnnotationSetItem());
745 }
746 }
747
748 // Annotations on method parameters.
749 if (parameters != nullptr) {
750 for (auto& parameter : *parameters) {
751 const dex_ir::MethodId* method_id = parameter->GetMethodId();
752 const uint32_t method_idx = method_id->GetIndex();
753 const char* method_name = method_id->Name()->Data();
754 fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
755 uint32_t j = 0;
756 for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
757 fprintf(out_file_, "#%u\n", j);
758 DumpAnnotationSetItem(annotation);
759 ++j;
760 }
761 }
762 }
763
764 fputc('\n', out_file_);
765 }
766
767 /*
768 * Dumps an interface that a class declares to implement.
769 */
DumpInterface(const dex_ir::TypeId * type_item,int i)770 void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
771 const char* interface_name = type_item->GetStringId()->Data();
772 if (options_.output_format_ == kOutputPlain) {
773 fprintf(out_file_, " #%d : '%s'\n", i, interface_name);
774 } else {
775 std::string dot(DescriptorToDot(interface_name));
776 fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
777 }
778 }
779
780 /*
781 * Dumps the catches table associated with the code.
782 */
DumpCatches(const dex_ir::CodeItem * code)783 void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
784 const uint16_t tries_size = code->TriesSize();
785
786 // No catch table.
787 if (tries_size == 0) {
788 fprintf(out_file_, " catches : (none)\n");
789 return;
790 }
791
792 // Dump all table entries.
793 fprintf(out_file_, " catches : %d\n", tries_size);
794 std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
795 for (uint32_t i = 0; i < tries_size; i++) {
796 const dex_ir::TryItem* try_item = (*tries)[i].get();
797 const uint32_t start = try_item->StartAddr();
798 const uint32_t end = start + try_item->InsnCount();
799 fprintf(out_file_, " 0x%04x - 0x%04x\n", start, end);
800 for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
801 const dex_ir::TypeId* type_id = handler->GetTypeId();
802 const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
803 fprintf(out_file_, " %s -> 0x%04x\n", descriptor, handler->GetAddress());
804 } // for
805 } // for
806 }
807
808 /*
809 * Dumps a single instruction.
810 */
DumpInstruction(const dex_ir::CodeItem * code,uint32_t code_offset,uint32_t insn_idx,uint32_t insn_width,const Instruction * dec_insn)811 void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
812 uint32_t code_offset,
813 uint32_t insn_idx,
814 uint32_t insn_width,
815 const Instruction* dec_insn) {
816 // Address of instruction (expressed as byte offset).
817 fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
818
819 // Dump (part of) raw bytes.
820 const uint16_t* insns = code->Insns();
821 for (uint32_t i = 0; i < 8; i++) {
822 if (i < insn_width) {
823 if (i == 7) {
824 fprintf(out_file_, " ... ");
825 } else {
826 // Print 16-bit value in little-endian order.
827 const uint8_t* bytePtr = (const uint8_t*) &insns[insn_idx + i];
828 fprintf(out_file_, " %02x%02x", bytePtr[0], bytePtr[1]);
829 }
830 } else {
831 fputs(" ", out_file_);
832 }
833 } // for
834
835 // Dump pseudo-instruction or opcode.
836 if (dec_insn->Opcode() == Instruction::NOP) {
837 const uint16_t instr = Get2LE((const uint8_t*) &insns[insn_idx]);
838 if (instr == Instruction::kPackedSwitchSignature) {
839 fprintf(out_file_, "|%04x: packed-switch-data (%d units)", insn_idx, insn_width);
840 } else if (instr == Instruction::kSparseSwitchSignature) {
841 fprintf(out_file_, "|%04x: sparse-switch-data (%d units)", insn_idx, insn_width);
842 } else if (instr == Instruction::kArrayDataSignature) {
843 fprintf(out_file_, "|%04x: array-data (%d units)", insn_idx, insn_width);
844 } else {
845 fprintf(out_file_, "|%04x: nop // spacer", insn_idx);
846 }
847 } else {
848 fprintf(out_file_, "|%04x: %s", insn_idx, dec_insn->Name());
849 }
850
851 // Set up additional argument.
852 std::unique_ptr<char[]> index_buf;
853 if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
854 index_buf = IndexString(header_, dec_insn, 200);
855 }
856
857 // Dump the instruction.
858 //
859 // NOTE: pDecInsn->DumpString(pDexFile) differs too much from original.
860 //
861 switch (Instruction::FormatOf(dec_insn->Opcode())) {
862 case Instruction::k10x: // op
863 break;
864 case Instruction::k12x: // op vA, vB
865 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
866 break;
867 case Instruction::k11n: // op vA, #+B
868 fprintf(out_file_, " v%d, #int %d // #%x",
869 dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint8_t)dec_insn->VRegB());
870 break;
871 case Instruction::k11x: // op vAA
872 fprintf(out_file_, " v%d", dec_insn->VRegA());
873 break;
874 case Instruction::k10t: // op +AA
875 case Instruction::k20t: { // op +AAAA
876 const int32_t targ = (int32_t) dec_insn->VRegA();
877 fprintf(out_file_, " %04x // %c%04x",
878 insn_idx + targ,
879 (targ < 0) ? '-' : '+',
880 (targ < 0) ? -targ : targ);
881 break;
882 }
883 case Instruction::k22x: // op vAA, vBBBB
884 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
885 break;
886 case Instruction::k21t: { // op vAA, +BBBB
887 const int32_t targ = (int32_t) dec_insn->VRegB();
888 fprintf(out_file_, " v%d, %04x // %c%04x", dec_insn->VRegA(),
889 insn_idx + targ,
890 (targ < 0) ? '-' : '+',
891 (targ < 0) ? -targ : targ);
892 break;
893 }
894 case Instruction::k21s: // op vAA, #+BBBB
895 fprintf(out_file_, " v%d, #int %d // #%x",
896 dec_insn->VRegA(), (int32_t) dec_insn->VRegB(), (uint16_t)dec_insn->VRegB());
897 break;
898 case Instruction::k21h: // op vAA, #+BBBB0000[00000000]
899 // The printed format varies a bit based on the actual opcode.
900 if (dec_insn->Opcode() == Instruction::CONST_HIGH16) {
901 const int32_t value = dec_insn->VRegB() << 16;
902 fprintf(out_file_, " v%d, #int %d // #%x",
903 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
904 } else {
905 const int64_t value = ((int64_t) dec_insn->VRegB()) << 48;
906 fprintf(out_file_, " v%d, #long %" PRId64 " // #%x",
907 dec_insn->VRegA(), value, (uint16_t) dec_insn->VRegB());
908 }
909 break;
910 case Instruction::k21c: // op vAA, thing@BBBB
911 case Instruction::k31c: // op vAA, thing@BBBBBBBB
912 fprintf(out_file_, " v%d, %s", dec_insn->VRegA(), index_buf.get());
913 break;
914 case Instruction::k23x: // op vAA, vBB, vCC
915 fprintf(out_file_, " v%d, v%d, v%d",
916 dec_insn->VRegA(), dec_insn->VRegB(), dec_insn->VRegC());
917 break;
918 case Instruction::k22b: // op vAA, vBB, #+CC
919 fprintf(out_file_, " v%d, v%d, #int %d // #%02x",
920 dec_insn->VRegA(), dec_insn->VRegB(),
921 (int32_t) dec_insn->VRegC(), (uint8_t) dec_insn->VRegC());
922 break;
923 case Instruction::k22t: { // op vA, vB, +CCCC
924 const int32_t targ = (int32_t) dec_insn->VRegC();
925 fprintf(out_file_, " v%d, v%d, %04x // %c%04x",
926 dec_insn->VRegA(), dec_insn->VRegB(),
927 insn_idx + targ,
928 (targ < 0) ? '-' : '+',
929 (targ < 0) ? -targ : targ);
930 break;
931 }
932 case Instruction::k22s: // op vA, vB, #+CCCC
933 fprintf(out_file_, " v%d, v%d, #int %d // #%04x",
934 dec_insn->VRegA(), dec_insn->VRegB(),
935 (int32_t) dec_insn->VRegC(), (uint16_t) dec_insn->VRegC());
936 break;
937 case Instruction::k22c: // op vA, vB, thing@CCCC
938 // NOT SUPPORTED:
939 // case Instruction::k22cs: // [opt] op vA, vB, field offset CCCC
940 fprintf(out_file_, " v%d, v%d, %s",
941 dec_insn->VRegA(), dec_insn->VRegB(), index_buf.get());
942 break;
943 case Instruction::k30t:
944 fprintf(out_file_, " #%08x", dec_insn->VRegA());
945 break;
946 case Instruction::k31i: { // op vAA, #+BBBBBBBB
947 // This is often, but not always, a float.
948 union {
949 float f;
950 uint32_t i;
951 } conv;
952 conv.i = dec_insn->VRegB();
953 fprintf(out_file_, " v%d, #float %g // #%08x",
954 dec_insn->VRegA(), conv.f, dec_insn->VRegB());
955 break;
956 }
957 case Instruction::k31t: // op vAA, offset +BBBBBBBB
958 fprintf(out_file_, " v%d, %08x // +%08x",
959 dec_insn->VRegA(), insn_idx + dec_insn->VRegB(), dec_insn->VRegB());
960 break;
961 case Instruction::k32x: // op vAAAA, vBBBB
962 fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
963 break;
964 case Instruction::k35c: // op {vC, vD, vE, vF, vG}, thing@BBBB
965 case Instruction::k45cc: { // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
966 // NOT SUPPORTED:
967 // case Instruction::k35ms: // [opt] invoke-virtual+super
968 // case Instruction::k35mi: // [opt] inline invoke
969 uint32_t arg[Instruction::kMaxVarArgRegs];
970 dec_insn->GetVarArgs(arg);
971 fputs(" {", out_file_);
972 for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
973 if (i == 0) {
974 fprintf(out_file_, "v%d", arg[i]);
975 } else {
976 fprintf(out_file_, ", v%d", arg[i]);
977 }
978 } // for
979 fprintf(out_file_, "}, %s", index_buf.get());
980 break;
981 }
982 case Instruction::k3rc: // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
983 case Instruction::k4rcc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
984 // NOT SUPPORTED:
985 // case Instruction::k3rms: // [opt] invoke-virtual+super/range
986 // case Instruction::k3rmi: // [opt] execute-inline/range
987 {
988 // This doesn't match the "dx" output when some of the args are
989 // 64-bit values -- dx only shows the first register.
990 fputs(" {", out_file_);
991 for (int i = 0, n = dec_insn->VRegA(); i < n; i++) {
992 if (i == 0) {
993 fprintf(out_file_, "v%d", dec_insn->VRegC() + i);
994 } else {
995 fprintf(out_file_, ", v%d", dec_insn->VRegC() + i);
996 }
997 } // for
998 fprintf(out_file_, "}, %s", index_buf.get());
999 }
1000 break;
1001 case Instruction::k51l: { // op vAA, #+BBBBBBBBBBBBBBBB
1002 // This is often, but not always, a double.
1003 union {
1004 double d;
1005 uint64_t j;
1006 } conv;
1007 conv.j = dec_insn->WideVRegB();
1008 fprintf(out_file_, " v%d, #double %g // #%016" PRIx64,
1009 dec_insn->VRegA(), conv.d, dec_insn->WideVRegB());
1010 break;
1011 }
1012 // NOT SUPPORTED:
1013 // case Instruction::k00x: // unknown op or breakpoint
1014 // break;
1015 default:
1016 fprintf(out_file_, " ???");
1017 break;
1018 } // switch
1019
1020 fputc('\n', out_file_);
1021 }
1022
1023 /*
1024 * Dumps a bytecode disassembly.
1025 */
DumpBytecodes(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset)1026 void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
1027 dex_ir::MethodId* method_id = header_->MethodIds()[idx];
1028 const char* name = method_id->Name()->Data();
1029 std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
1030 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1031
1032 // Generate header.
1033 std::string dot(DescriptorToDot(back_descriptor));
1034 fprintf(out_file_, "%06x: |[%06x] %s.%s:%s\n",
1035 code_offset, code_offset, dot.c_str(), name, type_descriptor.c_str());
1036
1037 // Iterate over all instructions.
1038 for (const DexInstructionPcPair& inst : code->Instructions()) {
1039 const uint32_t insn_width = inst->SizeInCodeUnits();
1040 if (insn_width == 0) {
1041 LOG(WARNING) << "GLITCH: zero-width instruction at idx=0x" << std::hex << inst.DexPc();
1042 break;
1043 }
1044 DumpInstruction(code, code_offset, inst.DexPc(), insn_width, &inst.Inst());
1045 } // for
1046 }
1047
1048 /*
1049 * Lookup functions.
1050 */
StringDataByIdx(uint32_t idx,dex_ir::Header * header)1051 static const char* StringDataByIdx(uint32_t idx, dex_ir::Header* header) {
1052 dex_ir::StringId* string_id = header->GetStringIdOrNullPtr(idx);
1053 if (string_id == nullptr) {
1054 return nullptr;
1055 }
1056 return string_id->Data();
1057 }
1058
StringDataByTypeIdx(uint16_t idx,dex_ir::Header * header)1059 static const char* StringDataByTypeIdx(uint16_t idx, dex_ir::Header* header) {
1060 dex_ir::TypeId* type_id = header->GetTypeIdOrNullPtr(idx);
1061 if (type_id == nullptr) {
1062 return nullptr;
1063 }
1064 dex_ir::StringId* string_id = type_id->GetStringId();
1065 if (string_id == nullptr) {
1066 return nullptr;
1067 }
1068 return string_id->Data();
1069 }
1070
1071
1072 /*
1073 * Dumps code of a method.
1074 */
DumpCode(uint32_t idx,const dex_ir::CodeItem * code,uint32_t code_offset,const char * declaring_class_descriptor,const char * method_name,bool is_static,const dex_ir::ProtoId * proto)1075 void DexLayout::DumpCode(uint32_t idx,
1076 const dex_ir::CodeItem* code,
1077 uint32_t code_offset,
1078 const char* declaring_class_descriptor,
1079 const char* method_name,
1080 bool is_static,
1081 const dex_ir::ProtoId* proto) {
1082 fprintf(out_file_, " registers : %d\n", code->RegistersSize());
1083 fprintf(out_file_, " ins : %d\n", code->InsSize());
1084 fprintf(out_file_, " outs : %d\n", code->OutsSize());
1085 fprintf(out_file_, " insns size : %d 16-bit code units\n",
1086 code->InsnsSize());
1087
1088 // Bytecode disassembly, if requested.
1089 if (options_.disassemble_) {
1090 DumpBytecodes(idx, code, code_offset);
1091 }
1092
1093 // Try-catch blocks.
1094 DumpCatches(code);
1095
1096 // Positions and locals table in the debug info.
1097 dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
1098 fprintf(out_file_, " positions : \n");
1099 if (debug_info != nullptr) {
1100 DexFile::DecodeDebugPositionInfo(debug_info->GetDebugInfo(),
1101 [this](uint32_t idx) {
1102 return StringDataByIdx(idx, this->header_);
1103 },
1104 [&](const DexFile::PositionInfo& entry) {
1105 fprintf(out_file_,
1106 " 0x%04x line=%d\n",
1107 entry.address_,
1108 entry.line_);
1109 return false;
1110 });
1111 }
1112 fprintf(out_file_, " locals : \n");
1113 if (debug_info != nullptr) {
1114 std::vector<const char*> arg_descriptors;
1115 const dex_ir::TypeList* parameters = proto->Parameters();
1116 if (parameters != nullptr) {
1117 const dex_ir::TypeIdVector* parameter_type_vector = parameters->GetTypeList();
1118 if (parameter_type_vector != nullptr) {
1119 for (const dex_ir::TypeId* type_id : *parameter_type_vector) {
1120 arg_descriptors.push_back(type_id->GetStringId()->Data());
1121 }
1122 }
1123 }
1124 DexFile::DecodeDebugLocalInfo(debug_info->GetDebugInfo(),
1125 "DexLayout in-memory",
1126 declaring_class_descriptor,
1127 arg_descriptors,
1128 method_name,
1129 is_static,
1130 code->RegistersSize(),
1131 code->InsSize(),
1132 code->InsnsSize(),
1133 [this](uint32_t idx) {
1134 return StringDataByIdx(idx, this->header_);
1135 },
1136 [this](uint32_t idx) {
1137 return
1138 StringDataByTypeIdx(dchecked_integral_cast<uint16_t>(idx),
1139 this->header_);
1140 },
1141 [&](const DexFile::LocalInfo& entry) {
1142 const char* signature =
1143 entry.signature_ != nullptr ? entry.signature_ : "";
1144 fprintf(out_file_,
1145 " 0x%04x - 0x%04x reg=%d %s %s %s\n",
1146 entry.start_address_,
1147 entry.end_address_,
1148 entry.reg_,
1149 entry.name_,
1150 entry.descriptor_,
1151 signature);
1152 });
1153 }
1154 }
1155
1156 /*
1157 * Dumps a method.
1158 */
DumpMethod(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,const dex_ir::CodeItem * code,int i)1159 void DexLayout::DumpMethod(uint32_t idx,
1160 uint32_t flags,
1161 uint32_t hiddenapi_flags,
1162 const dex_ir::CodeItem* code,
1163 int i) {
1164 // Bail for anything private if export only requested.
1165 if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1166 return;
1167 }
1168
1169 dex_ir::MethodId* method_id = header_->MethodIds()[idx];
1170 const char* name = method_id->Name()->Data();
1171 char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
1172 const char* back_descriptor = method_id->Class()->GetStringId()->Data();
1173 char* access_str = CreateAccessFlagStr(flags, kAccessForMethod);
1174
1175 if (options_.output_format_ == kOutputPlain) {
1176 fprintf(out_file_, " #%d : (in %s)\n", i, back_descriptor);
1177 fprintf(out_file_, " name : '%s'\n", name);
1178 fprintf(out_file_, " type : '%s'\n", type_descriptor);
1179 fprintf(out_file_, " access : 0x%04x (%s)\n", flags, access_str);
1180 if (hiddenapi_flags != 0u) {
1181 fprintf(out_file_,
1182 " hiddenapi : 0x%04x (%s)\n",
1183 hiddenapi_flags,
1184 GetHiddenapiFlagStr(hiddenapi_flags).c_str());
1185 }
1186 if (code == nullptr) {
1187 fprintf(out_file_, " code : (none)\n");
1188 } else {
1189 fprintf(out_file_, " code -\n");
1190 DumpCode(idx,
1191 code,
1192 code->GetOffset(),
1193 back_descriptor,
1194 name,
1195 (flags & kAccStatic) != 0,
1196 method_id->Proto());
1197 }
1198 if (options_.disassemble_) {
1199 fputc('\n', out_file_);
1200 }
1201 } else if (options_.output_format_ == kOutputXml) {
1202 const bool constructor = (name[0] == '<');
1203
1204 // Method name and prototype.
1205 if (constructor) {
1206 std::string dot(DescriptorClassToName(back_descriptor));
1207 fprintf(out_file_, "<constructor name=\"%s\"\n", dot.c_str());
1208 dot = DescriptorToDot(back_descriptor);
1209 fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1210 } else {
1211 fprintf(out_file_, "<method name=\"%s\"\n", name);
1212 const char* return_type = strrchr(type_descriptor, ')');
1213 if (return_type == nullptr) {
1214 LOG(ERROR) << "bad method type descriptor '" << type_descriptor << "'";
1215 goto bail;
1216 }
1217 std::string dot(DescriptorToDot(return_type + 1));
1218 fprintf(out_file_, " return=\"%s\"\n", dot.c_str());
1219 fprintf(out_file_, " abstract=%s\n", QuotedBool((flags & kAccAbstract) != 0));
1220 fprintf(out_file_, " native=%s\n", QuotedBool((flags & kAccNative) != 0));
1221 fprintf(out_file_, " synchronized=%s\n", QuotedBool(
1222 (flags & (kAccSynchronized | kAccDeclaredSynchronized)) != 0));
1223 }
1224
1225 // Additional method flags.
1226 fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1227 fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1228 // The "deprecated=" not knowable w/o parsing annotations.
1229 fprintf(out_file_, " visibility=%s\n>\n", QuotedVisibility(flags));
1230
1231 // Parameters.
1232 if (type_descriptor[0] != '(') {
1233 LOG(ERROR) << "ERROR: bad descriptor '" << type_descriptor << "'";
1234 goto bail;
1235 }
1236 char* tmp_buf = reinterpret_cast<char*>(malloc(strlen(type_descriptor) + 1));
1237 const char* base = type_descriptor + 1;
1238 int arg_num = 0;
1239 while (*base != ')') {
1240 char* cp = tmp_buf;
1241 while (*base == '[') {
1242 *cp++ = *base++;
1243 }
1244 if (*base == 'L') {
1245 // Copy through ';'.
1246 do {
1247 *cp = *base++;
1248 } while (*cp++ != ';');
1249 } else {
1250 // Primitive char, copy it.
1251 if (strchr("ZBCSIFJD", *base) == nullptr) {
1252 LOG(ERROR) << "ERROR: bad method signature '" << base << "'";
1253 break; // while
1254 }
1255 *cp++ = *base++;
1256 }
1257 // Null terminate and display.
1258 *cp++ = '\0';
1259 std::string dot(DescriptorToDot(tmp_buf));
1260 fprintf(out_file_, "<parameter name=\"arg%d\" type=\"%s\">\n"
1261 "</parameter>\n", arg_num++, dot.c_str());
1262 } // while
1263 free(tmp_buf);
1264 if (constructor) {
1265 fprintf(out_file_, "</constructor>\n");
1266 } else {
1267 fprintf(out_file_, "</method>\n");
1268 }
1269 }
1270
1271 bail:
1272 free(type_descriptor);
1273 free(access_str);
1274 }
1275
1276 /*
1277 * Dumps a static (class) field.
1278 */
DumpSField(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,int i,dex_ir::EncodedValue * init)1279 void DexLayout::DumpSField(uint32_t idx,
1280 uint32_t flags,
1281 uint32_t hiddenapi_flags,
1282 int i,
1283 dex_ir::EncodedValue* init) {
1284 // Bail for anything private if export only requested.
1285 if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
1286 return;
1287 }
1288
1289 dex_ir::FieldId* field_id = header_->FieldIds()[idx];
1290 const char* name = field_id->Name()->Data();
1291 const char* type_descriptor = field_id->Type()->GetStringId()->Data();
1292 const char* back_descriptor = field_id->Class()->GetStringId()->Data();
1293 char* access_str = CreateAccessFlagStr(flags, kAccessForField);
1294
1295 if (options_.output_format_ == kOutputPlain) {
1296 fprintf(out_file_, " #%d : (in %s)\n", i, back_descriptor);
1297 fprintf(out_file_, " name : '%s'\n", name);
1298 fprintf(out_file_, " type : '%s'\n", type_descriptor);
1299 fprintf(out_file_, " access : 0x%04x (%s)\n", flags, access_str);
1300 if (hiddenapi_flags != 0u) {
1301 fprintf(out_file_,
1302 " hiddenapi : 0x%04x (%s)\n",
1303 hiddenapi_flags,
1304 GetHiddenapiFlagStr(hiddenapi_flags).c_str());
1305 }
1306 if (init != nullptr) {
1307 fputs(" value : ", out_file_);
1308 DumpEncodedValue(init);
1309 fputs("\n", out_file_);
1310 }
1311 } else if (options_.output_format_ == kOutputXml) {
1312 fprintf(out_file_, "<field name=\"%s\"\n", name);
1313 std::string dot(DescriptorToDot(type_descriptor));
1314 fprintf(out_file_, " type=\"%s\"\n", dot.c_str());
1315 fprintf(out_file_, " transient=%s\n", QuotedBool((flags & kAccTransient) != 0));
1316 fprintf(out_file_, " volatile=%s\n", QuotedBool((flags & kAccVolatile) != 0));
1317 // The "value=" is not knowable w/o parsing annotations.
1318 fprintf(out_file_, " static=%s\n", QuotedBool((flags & kAccStatic) != 0));
1319 fprintf(out_file_, " final=%s\n", QuotedBool((flags & kAccFinal) != 0));
1320 // The "deprecated=" is not knowable w/o parsing annotations.
1321 fprintf(out_file_, " visibility=%s\n", QuotedVisibility(flags));
1322 if (init != nullptr) {
1323 fputs(" value=\"", out_file_);
1324 DumpEncodedValue(init);
1325 fputs("\"\n", out_file_);
1326 }
1327 fputs(">\n</field>\n", out_file_);
1328 }
1329
1330 free(access_str);
1331 }
1332
1333 /*
1334 * Dumps an instance field.
1335 */
DumpIField(uint32_t idx,uint32_t flags,uint32_t hiddenapi_flags,int i)1336 void DexLayout::DumpIField(uint32_t idx,
1337 uint32_t flags,
1338 uint32_t hiddenapi_flags,
1339 int i) {
1340 DumpSField(idx, flags, hiddenapi_flags, i, nullptr);
1341 }
1342
1343 /*
1344 * Dumps the class.
1345 *
1346 * Note "idx" is a DexClassDef index, not a DexTypeId index.
1347 *
1348 * If "*last_package" is nullptr or does not match the current class' package,
1349 * the value will be replaced with a newly-allocated string.
1350 */
DumpClass(int idx,char ** last_package)1351 void DexLayout::DumpClass(int idx, char** last_package) {
1352 dex_ir::ClassDef* class_def = header_->ClassDefs()[idx];
1353 // Omitting non-public class.
1354 if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
1355 return;
1356 }
1357
1358 if (options_.show_section_headers_) {
1359 DumpClassDef(idx);
1360 }
1361
1362 if (options_.show_annotations_) {
1363 DumpClassAnnotations(idx);
1364 }
1365
1366 // For the XML output, show the package name. Ideally we'd gather
1367 // up the classes, sort them, and dump them alphabetically so the
1368 // package name wouldn't jump around, but that's not a great plan
1369 // for something that needs to run on the device.
1370 const char* class_descriptor = header_->ClassDefs()[idx]->ClassType()->GetStringId()->Data();
1371 if (!(class_descriptor[0] == 'L' &&
1372 class_descriptor[strlen(class_descriptor)-1] == ';')) {
1373 // Arrays and primitives should not be defined explicitly. Keep going?
1374 LOG(ERROR) << "Malformed class name '" << class_descriptor << "'";
1375 } else if (options_.output_format_ == kOutputXml) {
1376 char* mangle = strdup(class_descriptor + 1);
1377 mangle[strlen(mangle)-1] = '\0';
1378
1379 // Reduce to just the package name.
1380 char* last_slash = strrchr(mangle, '/');
1381 if (last_slash != nullptr) {
1382 *last_slash = '\0';
1383 } else {
1384 *mangle = '\0';
1385 }
1386
1387 for (char* cp = mangle; *cp != '\0'; cp++) {
1388 if (*cp == '/') {
1389 *cp = '.';
1390 }
1391 } // for
1392
1393 if (*last_package == nullptr || strcmp(mangle, *last_package) != 0) {
1394 // Start of a new package.
1395 if (*last_package != nullptr) {
1396 fprintf(out_file_, "</package>\n");
1397 }
1398 fprintf(out_file_, "<package name=\"%s\"\n>\n", mangle);
1399 free(*last_package);
1400 *last_package = mangle;
1401 } else {
1402 free(mangle);
1403 }
1404 }
1405
1406 // General class information.
1407 char* access_str = CreateAccessFlagStr(class_def->GetAccessFlags(), kAccessForClass);
1408 const char* superclass_descriptor = nullptr;
1409 if (class_def->Superclass() != nullptr) {
1410 superclass_descriptor = class_def->Superclass()->GetStringId()->Data();
1411 }
1412 if (options_.output_format_ == kOutputPlain) {
1413 fprintf(out_file_, "Class #%d -\n", idx);
1414 fprintf(out_file_, " Class descriptor : '%s'\n", class_descriptor);
1415 fprintf(out_file_, " Access flags : 0x%04x (%s)\n",
1416 class_def->GetAccessFlags(), access_str);
1417 if (superclass_descriptor != nullptr) {
1418 fprintf(out_file_, " Superclass : '%s'\n", superclass_descriptor);
1419 }
1420 fprintf(out_file_, " Interfaces -\n");
1421 } else {
1422 std::string dot(DescriptorClassToName(class_descriptor));
1423 fprintf(out_file_, "<class name=\"%s\"\n", dot.c_str());
1424 if (superclass_descriptor != nullptr) {
1425 dot = DescriptorToDot(superclass_descriptor);
1426 fprintf(out_file_, " extends=\"%s\"\n", dot.c_str());
1427 }
1428 fprintf(out_file_, " interface=%s\n",
1429 QuotedBool((class_def->GetAccessFlags() & kAccInterface) != 0));
1430 fprintf(out_file_, " abstract=%s\n",
1431 QuotedBool((class_def->GetAccessFlags() & kAccAbstract) != 0));
1432 fprintf(out_file_, " static=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccStatic) != 0));
1433 fprintf(out_file_, " final=%s\n", QuotedBool((class_def->GetAccessFlags() & kAccFinal) != 0));
1434 // The "deprecated=" not knowable w/o parsing annotations.
1435 fprintf(out_file_, " visibility=%s\n", QuotedVisibility(class_def->GetAccessFlags()));
1436 fprintf(out_file_, ">\n");
1437 }
1438
1439 // Interfaces.
1440 const dex_ir::TypeList* interfaces = class_def->Interfaces();
1441 if (interfaces != nullptr) {
1442 const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
1443 for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
1444 DumpInterface((*interfaces_vector)[i], i);
1445 } // for
1446 }
1447
1448 // Fields and methods.
1449 dex_ir::ClassData* class_data = class_def->GetClassData();
1450 // Prepare data for static fields.
1451 dex_ir::EncodedArrayItem* static_values = class_def->StaticValues();
1452 dex_ir::EncodedValueVector* encoded_values =
1453 static_values == nullptr ? nullptr : static_values->GetEncodedValues();
1454 const uint32_t encoded_values_size = (encoded_values == nullptr) ? 0 : encoded_values->size();
1455
1456 // Static fields.
1457 if (options_.output_format_ == kOutputPlain) {
1458 fprintf(out_file_, " Static fields -\n");
1459 }
1460 if (class_data != nullptr) {
1461 dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
1462 if (static_fields != nullptr) {
1463 for (uint32_t i = 0; i < static_fields->size(); i++) {
1464 DumpSField((*static_fields)[i].GetFieldId()->GetIndex(),
1465 (*static_fields)[i].GetAccessFlags(),
1466 dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*static_fields)[i]),
1467 i,
1468 i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
1469 } // for
1470 }
1471 }
1472
1473 // Instance fields.
1474 if (options_.output_format_ == kOutputPlain) {
1475 fprintf(out_file_, " Instance fields -\n");
1476 }
1477 if (class_data != nullptr) {
1478 dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
1479 if (instance_fields != nullptr) {
1480 for (uint32_t i = 0; i < instance_fields->size(); i++) {
1481 DumpIField((*instance_fields)[i].GetFieldId()->GetIndex(),
1482 (*instance_fields)[i].GetAccessFlags(),
1483 dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*instance_fields)[i]),
1484 i);
1485 } // for
1486 }
1487 }
1488
1489 // Direct methods.
1490 if (options_.output_format_ == kOutputPlain) {
1491 fprintf(out_file_, " Direct methods -\n");
1492 }
1493 if (class_data != nullptr) {
1494 dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
1495 if (direct_methods != nullptr) {
1496 for (uint32_t i = 0; i < direct_methods->size(); i++) {
1497 DumpMethod((*direct_methods)[i].GetMethodId()->GetIndex(),
1498 (*direct_methods)[i].GetAccessFlags(),
1499 dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*direct_methods)[i]),
1500 (*direct_methods)[i].GetCodeItem(),
1501 i);
1502 } // for
1503 }
1504 }
1505
1506 // Virtual methods.
1507 if (options_.output_format_ == kOutputPlain) {
1508 fprintf(out_file_, " Virtual methods -\n");
1509 }
1510 if (class_data != nullptr) {
1511 dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
1512 if (virtual_methods != nullptr) {
1513 for (uint32_t i = 0; i < virtual_methods->size(); i++) {
1514 DumpMethod((*virtual_methods)[i].GetMethodId()->GetIndex(),
1515 (*virtual_methods)[i].GetAccessFlags(),
1516 dex_ir::HiddenapiClassData::GetFlags(header_, class_def, &(*virtual_methods)[i]),
1517 (*virtual_methods)[i].GetCodeItem(),
1518 i);
1519 } // for
1520 }
1521 }
1522
1523 // End of class.
1524 if (options_.output_format_ == kOutputPlain) {
1525 const char* file_name = "unknown";
1526 if (class_def->SourceFile() != nullptr) {
1527 file_name = class_def->SourceFile()->Data();
1528 }
1529 const dex_ir::StringId* source_file = class_def->SourceFile();
1530 fprintf(out_file_, " source_file_idx : %d (%s)\n\n",
1531 source_file == nullptr ? 0xffffffffU : source_file->GetIndex(), file_name);
1532 } else if (options_.output_format_ == kOutputXml) {
1533 fprintf(out_file_, "</class>\n");
1534 }
1535
1536 free(access_str);
1537 }
1538
DumpDexFile()1539 void DexLayout::DumpDexFile() {
1540 // Headers.
1541 if (options_.show_file_headers_) {
1542 DumpFileHeader();
1543 }
1544
1545 // Open XML context.
1546 if (options_.output_format_ == kOutputXml) {
1547 fprintf(out_file_, "<api>\n");
1548 }
1549
1550 // Iterate over all classes.
1551 char* package = nullptr;
1552 const uint32_t class_defs_size = header_->ClassDefs().Size();
1553 for (uint32_t i = 0; i < class_defs_size; i++) {
1554 DumpClass(i, &package);
1555 } // for
1556
1557 // Free the last package allocated.
1558 if (package != nullptr) {
1559 fprintf(out_file_, "</package>\n");
1560 free(package);
1561 }
1562
1563 // Close XML context.
1564 if (options_.output_format_ == kOutputXml) {
1565 fprintf(out_file_, "</api>\n");
1566 }
1567 }
1568
LayoutClassDefsAndClassData(const DexFile * dex_file)1569 void DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
1570 std::vector<dex_ir::ClassDef*> new_class_def_order;
1571 for (auto& class_def : header_->ClassDefs()) {
1572 dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1573 if (info_->ContainsClass(*dex_file, type_idx)) {
1574 new_class_def_order.push_back(class_def.get());
1575 }
1576 }
1577 for (auto& class_def : header_->ClassDefs()) {
1578 dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
1579 if (!info_->ContainsClass(*dex_file, type_idx)) {
1580 new_class_def_order.push_back(class_def.get());
1581 }
1582 }
1583 std::unordered_set<dex_ir::ClassData*> visited_class_data;
1584 size_t class_data_index = 0;
1585 auto& class_datas = header_->ClassDatas();
1586 for (dex_ir::ClassDef* class_def : new_class_def_order) {
1587 dex_ir::ClassData* class_data = class_def->GetClassData();
1588 if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
1589 visited_class_data.insert(class_data);
1590 // Overwrite the existing vector with the new ordering, note that the sets of objects are
1591 // equivalent, but the order changes. This is why this is not a memory leak.
1592 // TODO: Consider cleaning this up with a shared_ptr.
1593 class_datas[class_data_index].release(); // NOLINT b/117926937
1594 class_datas[class_data_index].reset(class_data);
1595 ++class_data_index;
1596 }
1597 }
1598 CHECK_EQ(class_data_index, class_datas.Size());
1599
1600 if (DexLayout::kChangeClassDefOrder) {
1601 // This currently produces dex files that violate the spec since the super class class_def is
1602 // supposed to occur before any subclasses.
1603 dex_ir::CollectionVector<dex_ir::ClassDef>& class_defs = header_->ClassDefs();
1604 CHECK_EQ(new_class_def_order.size(), class_defs.Size());
1605 for (size_t i = 0; i < class_defs.Size(); ++i) {
1606 // Overwrite the existing vector with the new ordering, note that the sets of objects are
1607 // equivalent, but the order changes. This is why this is not a memory leak.
1608 // TODO: Consider cleaning this up with a shared_ptr.
1609 class_defs[i].release(); // NOLINT b/117926937
1610 class_defs[i].reset(new_class_def_order[i]);
1611 }
1612 }
1613 }
1614
LayoutStringData(const DexFile * dex_file)1615 void DexLayout::LayoutStringData(const DexFile* dex_file) {
1616 const size_t num_strings = header_->StringIds().Size();
1617 std::vector<bool> is_shorty(num_strings, false);
1618 std::vector<bool> from_hot_method(num_strings, false);
1619 for (auto& class_def : header_->ClassDefs()) {
1620 // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
1621 // as hot. Add its super class and interfaces as well, which can be used during initialization.
1622 const bool is_profile_class =
1623 info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1624 if (is_profile_class) {
1625 from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
1626 const dex_ir::TypeId* superclass = class_def->Superclass();
1627 if (superclass != nullptr) {
1628 from_hot_method[superclass->GetStringId()->GetIndex()] = true;
1629 }
1630 const dex_ir::TypeList* interfaces = class_def->Interfaces();
1631 if (interfaces != nullptr) {
1632 for (const dex_ir::TypeId* interface_type : *interfaces->GetTypeList()) {
1633 from_hot_method[interface_type->GetStringId()->GetIndex()] = true;
1634 }
1635 }
1636 }
1637 dex_ir::ClassData* data = class_def->GetClassData();
1638 if (data == nullptr) {
1639 continue;
1640 }
1641 for (size_t i = 0; i < 2; ++i) {
1642 for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
1643 const dex_ir::MethodId* method_id = method.GetMethodId();
1644 dex_ir::CodeItem* code_item = method.GetCodeItem();
1645 if (code_item == nullptr) {
1646 continue;
1647 }
1648 const bool is_clinit = is_profile_class &&
1649 (method.GetAccessFlags() & kAccConstructor) != 0 &&
1650 (method.GetAccessFlags() & kAccStatic) != 0;
1651 const bool method_executed = is_clinit ||
1652 info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex())).IsInProfile();
1653 if (!method_executed) {
1654 continue;
1655 }
1656 is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
1657 dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
1658 if (fixups == nullptr) {
1659 continue;
1660 }
1661 // Add const-strings.
1662 for (dex_ir::StringId* id : fixups->StringIds()) {
1663 from_hot_method[id->GetIndex()] = true;
1664 }
1665 // Add field classes, names, and types.
1666 for (dex_ir::FieldId* id : fixups->FieldIds()) {
1667 // TODO: Only visit field ids from static getters and setters.
1668 from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1669 from_hot_method[id->Name()->GetIndex()] = true;
1670 from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
1671 }
1672 // For clinits, add referenced method classes, names, and protos.
1673 if (is_clinit) {
1674 for (dex_ir::MethodId* id : fixups->MethodIds()) {
1675 from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
1676 from_hot_method[id->Name()->GetIndex()] = true;
1677 is_shorty[id->Proto()->Shorty()->GetIndex()] = true;
1678 }
1679 }
1680 }
1681 }
1682 }
1683 // Sort string data by specified order.
1684 std::vector<dex_ir::StringId*> string_ids;
1685 for (auto& string_id : header_->StringIds()) {
1686 string_ids.push_back(string_id.get());
1687 }
1688 std::sort(string_ids.begin(),
1689 string_ids.end(),
1690 [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
1691 const dex_ir::StringId* b) {
1692 const bool a_is_hot = from_hot_method[a->GetIndex()];
1693 const bool b_is_hot = from_hot_method[b->GetIndex()];
1694 if (a_is_hot != b_is_hot) {
1695 return a_is_hot < b_is_hot;
1696 }
1697 // After hot methods are partitioned, subpartition shorties.
1698 const bool a_is_shorty = is_shorty[a->GetIndex()];
1699 const bool b_is_shorty = is_shorty[b->GetIndex()];
1700 if (a_is_shorty != b_is_shorty) {
1701 return a_is_shorty < b_is_shorty;
1702 }
1703 // Order by index by default.
1704 return a->GetIndex() < b->GetIndex();
1705 });
1706 auto& string_datas = header_->StringDatas();
1707 // Now we know what order we want the string data, reorder them.
1708 size_t data_index = 0;
1709 for (dex_ir::StringId* string_id : string_ids) {
1710 string_datas[data_index].release(); // NOLINT b/117926937
1711 string_datas[data_index].reset(string_id->DataItem());
1712 ++data_index;
1713 }
1714 if (kIsDebugBuild) {
1715 std::unordered_set<dex_ir::StringData*> visited;
1716 for (const std::unique_ptr<dex_ir::StringData>& data : string_datas) {
1717 visited.insert(data.get());
1718 }
1719 for (auto& string_id : header_->StringIds()) {
1720 CHECK(visited.find(string_id->DataItem()) != visited.end());
1721 }
1722 }
1723 CHECK_EQ(data_index, string_datas.Size());
1724 }
1725
1726 // Orders code items according to specified class data ordering.
LayoutCodeItems(const DexFile * dex_file)1727 void DexLayout::LayoutCodeItems(const DexFile* dex_file) {
1728 static constexpr InvokeType invoke_types[] = {
1729 kDirect,
1730 kVirtual
1731 };
1732
1733 std::unordered_map<dex_ir::CodeItem*, LayoutType>& code_item_layout =
1734 layout_hotness_info_.code_item_layout_;
1735
1736 // Assign hotness flags to all code items.
1737 for (InvokeType invoke_type : invoke_types) {
1738 for (auto& class_def : header_->ClassDefs()) {
1739 const bool is_profile_class =
1740 info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
1741
1742 // Skip classes that are not defined in this dex file.
1743 dex_ir::ClassData* class_data = class_def->GetClassData();
1744 if (class_data == nullptr) {
1745 continue;
1746 }
1747 for (auto& method : *(invoke_type == InvokeType::kDirect
1748 ? class_data->DirectMethods()
1749 : class_data->VirtualMethods())) {
1750 const dex_ir::MethodId *method_id = method.GetMethodId();
1751 dex_ir::CodeItem *code_item = method.GetCodeItem();
1752 if (code_item == nullptr) {
1753 continue;
1754 }
1755 // Separate executed methods (clinits and profiled methods) from unexecuted methods.
1756 const bool is_clinit = (method.GetAccessFlags() & kAccConstructor) != 0 &&
1757 (method.GetAccessFlags() & kAccStatic) != 0;
1758 const bool is_startup_clinit = is_profile_class && is_clinit;
1759 using Hotness = ProfileCompilationInfo::MethodHotness;
1760 Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
1761 LayoutType state = LayoutType::kLayoutTypeUnused;
1762 if (hotness.IsHot()) {
1763 // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
1764 // now.
1765 state = LayoutType::kLayoutTypeHot;
1766 } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
1767 // Startup clinit or a method that only has the startup flag.
1768 state = LayoutType::kLayoutTypeStartupOnly;
1769 } else if (is_clinit) {
1770 state = LayoutType::kLayoutTypeUsedOnce;
1771 } else if (hotness.IsInProfile()) {
1772 state = LayoutType::kLayoutTypeSometimesUsed;
1773 }
1774 auto it = code_item_layout.emplace(code_item, state);
1775 if (!it.second) {
1776 LayoutType& layout_type = it.first->second;
1777 // Already exists, merge the hotness.
1778 layout_type = MergeLayoutType(layout_type, state);
1779 }
1780 }
1781 }
1782 }
1783
1784 const auto& code_items = header_->CodeItems();
1785 if (VLOG_IS_ON(dex)) {
1786 size_t layout_count[static_cast<size_t>(LayoutType::kLayoutTypeCount)] = {};
1787 for (const std::unique_ptr<dex_ir::CodeItem>& code_item : code_items) {
1788 auto it = code_item_layout.find(code_item.get());
1789 DCHECK(it != code_item_layout.end());
1790 ++layout_count[static_cast<size_t>(it->second)];
1791 }
1792 for (size_t i = 0; i < static_cast<size_t>(LayoutType::kLayoutTypeCount); ++i) {
1793 LOG(INFO) << "Code items in category " << i << " count=" << layout_count[i];
1794 }
1795 }
1796
1797 // Sort the code items vector by new layout. The writing process will take care of calculating
1798 // all the offsets. Stable sort to preserve any existing locality that might be there.
1799 std::stable_sort(code_items.begin(),
1800 code_items.end(),
1801 [&](const std::unique_ptr<dex_ir::CodeItem>& a,
1802 const std::unique_ptr<dex_ir::CodeItem>& b) {
1803 auto it_a = code_item_layout.find(a.get());
1804 auto it_b = code_item_layout.find(b.get());
1805 DCHECK(it_a != code_item_layout.end());
1806 DCHECK(it_b != code_item_layout.end());
1807 const LayoutType layout_type_a = it_a->second;
1808 const LayoutType layout_type_b = it_b->second;
1809 return layout_type_a < layout_type_b;
1810 });
1811 }
1812
LayoutOutputFile(const DexFile * dex_file)1813 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
1814 LayoutStringData(dex_file);
1815 LayoutClassDefsAndClassData(dex_file);
1816 LayoutCodeItems(dex_file);
1817 }
1818
OutputDexFile(const DexFile * input_dex_file,bool compute_offsets,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1819 bool DexLayout::OutputDexFile(const DexFile* input_dex_file,
1820 bool compute_offsets,
1821 std::unique_ptr<DexContainer>* dex_container,
1822 std::string* error_msg) {
1823 const std::string& dex_file_location = input_dex_file->GetLocation();
1824 std::unique_ptr<File> new_file;
1825 // If options_.output_dex_directory_ is non null, we are outputting to a file.
1826 if (options_.output_dex_directory_ != nullptr) {
1827 std::string output_location(options_.output_dex_directory_);
1828 const size_t last_slash = dex_file_location.rfind('/');
1829 std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
1830 if (output_location == dex_file_directory) {
1831 output_location = dex_file_location + ".new";
1832 } else {
1833 if (!output_location.empty() && output_location.back() != '/') {
1834 output_location += "/";
1835 }
1836 const size_t separator = dex_file_location.rfind('!');
1837 if (separator != std::string::npos) {
1838 output_location += dex_file_location.substr(separator + 1);
1839 } else {
1840 output_location += "classes.dex";
1841 }
1842 }
1843 new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
1844 if (new_file == nullptr) {
1845 LOG(ERROR) << "Could not create dex writer output file: " << output_location;
1846 return false;
1847 }
1848 }
1849 if (!DexWriter::Output(this, dex_container, compute_offsets, error_msg)) {
1850 return false;
1851 }
1852 if (new_file != nullptr) {
1853 DexContainer* const container = dex_container->get();
1854 DexContainer::Section* const main_section = container->GetMainSection();
1855 if (!new_file->WriteFully(main_section->Begin(), main_section->Size())) {
1856 LOG(ERROR) << "Failed to write main section for dex file " << dex_file_location;
1857 new_file->Erase();
1858 return false;
1859 }
1860 DexContainer::Section* const data_section = container->GetDataSection();
1861 if (!new_file->WriteFully(data_section->Begin(), data_section->Size())) {
1862 LOG(ERROR) << "Failed to write data section for dex file " << dex_file_location;
1863 new_file->Erase();
1864 return false;
1865 }
1866 UNUSED(new_file->FlushCloseOrErase());
1867 }
1868 return true;
1869 }
1870
1871 /*
1872 * Dumps the requested sections of the file.
1873 */
ProcessDexFile(const char * file_name,const DexFile * dex_file,size_t dex_file_index,std::unique_ptr<DexContainer> * dex_container,std::string * error_msg)1874 bool DexLayout::ProcessDexFile(const char* file_name,
1875 const DexFile* dex_file,
1876 size_t dex_file_index,
1877 std::unique_ptr<DexContainer>* dex_container,
1878 std::string* error_msg) {
1879 const bool has_output_container = dex_container != nullptr;
1880 const bool output = options_.output_dex_directory_ != nullptr || has_output_container;
1881
1882 // Try to avoid eagerly assigning offsets to find bugs since Offset will abort if the offset
1883 // is unassigned.
1884 bool eagerly_assign_offsets = false;
1885 if (options_.visualize_pattern_ || options_.show_section_statistics_ || options_.dump_) {
1886 // These options required the offsets for dumping purposes.
1887 eagerly_assign_offsets = true;
1888 }
1889 std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file,
1890 eagerly_assign_offsets,
1891 GetOptions()));
1892 SetHeader(header.get());
1893
1894 if (options_.verbose_) {
1895 fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
1896 file_name, dex_file->GetHeader().magic_ + 4);
1897 }
1898
1899 if (options_.visualize_pattern_) {
1900 VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
1901 return true;
1902 }
1903
1904 if (options_.show_section_statistics_) {
1905 ShowDexSectionStatistics(header_, dex_file_index);
1906 return true;
1907 }
1908
1909 // Dump dex file.
1910 if (options_.dump_) {
1911 DumpDexFile();
1912 }
1913
1914 // In case we are outputting to a file, keep it open so we can verify.
1915 if (output) {
1916 // Layout information about what strings and code items are hot. Used by the writing process
1917 // to generate the sections that are stored in the oat file.
1918 bool do_layout = info_ != nullptr;
1919 if (do_layout) {
1920 LayoutOutputFile(dex_file);
1921 }
1922 // The output needs a dex container, use a temporary one.
1923 std::unique_ptr<DexContainer> temp_container;
1924 if (dex_container == nullptr) {
1925 dex_container = &temp_container;
1926 }
1927 // If we didn't set the offsets eagerly, we definitely need to compute them here.
1928 if (!OutputDexFile(dex_file, do_layout || !eagerly_assign_offsets, dex_container, error_msg)) {
1929 return false;
1930 }
1931
1932 // Clear header before verifying to reduce peak RAM usage.
1933 const size_t file_size = header_->FileSize();
1934 header.reset();
1935
1936 // Verify the output dex file's structure, only enabled by default for debug builds.
1937 if (options_.verify_output_ && has_output_container) {
1938 std::string location = "memory mapped file for " + std::string(file_name);
1939 // Dex file verifier cannot handle compact dex.
1940 bool verify = options_.compact_dex_level_ == CompactDexLevel::kCompactDexLevelNone;
1941 const ArtDexFileLoader dex_file_loader;
1942 DexContainer::Section* const main_section = (*dex_container)->GetMainSection();
1943 DexContainer::Section* const data_section = (*dex_container)->GetDataSection();
1944 DCHECK_EQ(file_size, main_section->Size())
1945 << main_section->Size() << " " << data_section->Size();
1946 std::unique_ptr<const DexFile> output_dex_file(
1947 dex_file_loader.OpenWithDataSection(
1948 main_section->Begin(),
1949 main_section->Size(),
1950 data_section->Begin(),
1951 data_section->Size(),
1952 location,
1953 /* location_checksum= */ 0,
1954 /*oat_dex_file=*/ nullptr,
1955 verify,
1956 /*verify_checksum=*/ false,
1957 error_msg));
1958 CHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << *error_msg;
1959
1960 // Do IR-level comparison between input and output. This check ignores potential differences
1961 // due to layout, so offsets are not checked. Instead, it checks the data contents of each
1962 // item.
1963 //
1964 // Regenerate output IR to catch any bugs that might happen during writing.
1965 std::unique_ptr<dex_ir::Header> output_header(
1966 dex_ir::DexIrBuilder(*output_dex_file,
1967 /*eagerly_assign_offsets=*/ true,
1968 GetOptions()));
1969 std::unique_ptr<dex_ir::Header> orig_header(
1970 dex_ir::DexIrBuilder(*dex_file,
1971 /*eagerly_assign_offsets=*/ true,
1972 GetOptions()));
1973 CHECK(VerifyOutputDexFile(output_header.get(), orig_header.get(), error_msg)) << *error_msg;
1974 }
1975 }
1976 return true;
1977 }
1978
1979 /*
1980 * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
1981 */
ProcessFile(const char * file_name)1982 int DexLayout::ProcessFile(const char* file_name) {
1983 if (options_.verbose_) {
1984 fprintf(out_file_, "Processing '%s'...\n", file_name);
1985 }
1986
1987 // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
1988 // all of which are Zip archives with "classes.dex" inside.
1989 const bool verify_checksum = !options_.ignore_bad_checksum_;
1990 std::string error_msg;
1991 const ArtDexFileLoader dex_file_loader;
1992 std::vector<std::unique_ptr<const DexFile>> dex_files;
1993 if (!dex_file_loader.Open(
1994 file_name, file_name, /* verify= */ true, verify_checksum, &error_msg, &dex_files)) {
1995 // Display returned error message to user. Note that this error behavior
1996 // differs from the error messages shown by the original Dalvik dexdump.
1997 LOG(ERROR) << error_msg;
1998 return -1;
1999 }
2000
2001 // Success. Either report checksum verification or process
2002 // all dex files found in given file.
2003 if (options_.checksum_only_) {
2004 fprintf(out_file_, "Checksum verified\n");
2005 } else {
2006 for (size_t i = 0; i < dex_files.size(); i++) {
2007 // Pass in a null container to avoid output by default.
2008 if (!ProcessDexFile(file_name,
2009 dex_files[i].get(),
2010 i,
2011 /*dex_container=*/ nullptr,
2012 &error_msg)) {
2013 LOG(WARNING) << "Failed to run dex file " << i << " in " << file_name << " : " << error_msg;
2014 }
2015 }
2016 }
2017 return 0;
2018 }
2019
2020 } // namespace art
2021