1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /*
17 * Access the contents of a .dex file.
18 */
19
20 #include "DexFile.h"
21 #include "DexProto.h"
22 #include "Leb128.h"
23 #include "sha1.h"
24 #include "ZipArchive.h"
25
26 #include <zlib.h>
27
28 #include <stdlib.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <fcntl.h>
32 #include <errno.h>
33
34 /*
35 * Verifying checksums is good, but it slows things down and causes us to
36 * touch every page. In the "optimized" world, it doesn't work at all,
37 * because we rewrite the contents.
38 */
39 static const bool kVerifyChecksum = false;
40 static const bool kVerifySignature = false;
41
42
43 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
44 * code point values for comparison. This treats different encodings
45 * for the same code point as equivalent, except that only a real '\0'
46 * byte is considered the string terminator. The return value is as
47 * for strcmp(). */
dexUtf8Cmp(const char * s1,const char * s2)48 int dexUtf8Cmp(const char* s1, const char* s2) {
49 for (;;) {
50 if (*s1 == '\0') {
51 if (*s2 == '\0') {
52 return 0;
53 }
54 return -1;
55 } else if (*s2 == '\0') {
56 return 1;
57 }
58
59 int utf1 = dexGetUtf16FromUtf8(&s1);
60 int utf2 = dexGetUtf16FromUtf8(&s2);
61 int diff = utf1 - utf2;
62
63 if (diff != 0) {
64 return diff;
65 }
66 }
67 }
68
69 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
70 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
71 0x00000000, // 00..1f low control characters; nothing valid
72 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
73 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
74 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z'
75 };
76
77 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
dexIsValidMemberNameUtf8_0(const char ** pUtf8Ptr)78 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
79 /*
80 * It's a multibyte encoded character. Decode it and analyze. We
81 * accept anything that isn't (a) an improperly encoded low value,
82 * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
83 * control character, or (e) a high space, layout, or special
84 * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
85 * U+fff0..U+ffff).
86 */
87
88 u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
89
90 // Perform follow-up tests based on the high 8 bits.
91 switch (utf16 >> 8) {
92 case 0x00: {
93 // It's only valid if it's above the ISO-8859-1 high space (0xa0).
94 return (utf16 > 0x00a0);
95 }
96 case 0xd8:
97 case 0xd9:
98 case 0xda:
99 case 0xdb: {
100 /*
101 * It's a leading surrogate. Check to see that a trailing
102 * surrogate follows.
103 */
104 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
105 return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
106 }
107 case 0xdc:
108 case 0xdd:
109 case 0xde:
110 case 0xdf: {
111 // It's a trailing surrogate, which is not valid at this point.
112 return false;
113 }
114 case 0x20:
115 case 0xff: {
116 // It's in the range that has spaces, controls, and specials.
117 switch (utf16 & 0xfff8) {
118 case 0x2000:
119 case 0x2008:
120 case 0x2028:
121 case 0xfff0:
122 case 0xfff8: {
123 return false;
124 }
125 }
126 break;
127 }
128 }
129
130 return true;
131 }
132
133 /* Return whether the given string is a valid field or method name. */
dexIsValidMemberName(const char * s)134 bool dexIsValidMemberName(const char* s) {
135 bool angleName = false;
136
137 switch (*s) {
138 case '\0': {
139 // The empty string is not a valid name.
140 return false;
141 }
142 case '<': {
143 /*
144 * '<' is allowed only at the start of a name, and if present,
145 * means that the name must end with '>'.
146 */
147 angleName = true;
148 s++;
149 break;
150 }
151 }
152
153 for (;;) {
154 switch (*s) {
155 case '\0': {
156 return !angleName;
157 }
158 case '>': {
159 return angleName && s[1] == '\0';
160 }
161 }
162 if (!dexIsValidMemberNameUtf8(&s)) {
163 return false;
164 }
165 }
166 }
167
168 /* Return whether the given string is a valid type descriptor. */
dexIsValidTypeDescriptor(const char * s)169 bool dexIsValidTypeDescriptor(const char* s) {
170 int arrayCount = 0;
171
172 while (*s == '[') {
173 arrayCount++;
174 s++;
175 }
176
177 if (arrayCount > 255) {
178 // Arrays may have no more than 255 dimensions.
179 return false;
180 }
181
182 switch (*(s++)) {
183 case 'B':
184 case 'C':
185 case 'D':
186 case 'F':
187 case 'I':
188 case 'J':
189 case 'S':
190 case 'Z': {
191 // These are all single-character descriptors for primitive types.
192 return (*s == '\0');
193 }
194 case 'V': {
195 // You can't have an array of void.
196 return (arrayCount == 0) && (*s == '\0');
197 }
198 case 'L': {
199 // Break out and continue below.
200 break;
201 }
202 default: {
203 // Oddball descriptor character.
204 return false;
205 }
206 }
207
208 // We just consumed the 'L' that introduces a class name.
209
210 bool slashOrFirst = true; // first character or just encountered a slash
211 for (;;) {
212 u1 c = (u1) *s;
213 switch (c) {
214 case '\0': {
215 // Premature end.
216 return false;
217 }
218 case ';': {
219 /*
220 * Make sure that this is the end of the string and that
221 * it doesn't end with an empty component (including the
222 * degenerate case of "L;").
223 */
224 return (s[1] == '\0') && !slashOrFirst;
225 }
226 case '/': {
227 if (slashOrFirst) {
228 // Slash at start or two slashes in a row.
229 return false;
230 }
231 slashOrFirst = true;
232 s++;
233 break;
234 }
235 default: {
236 if (!dexIsValidMemberNameUtf8(&s)) {
237 return false;
238 }
239 slashOrFirst = false;
240 break;
241 }
242 }
243 }
244 }
245
246 /* Return whether the given string is a valid reference descriptor. This
247 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
248 * is for a class or array and not a primitive type. */
dexIsReferenceDescriptor(const char * s)249 bool dexIsReferenceDescriptor(const char* s) {
250 if (!dexIsValidTypeDescriptor(s)) {
251 return false;
252 }
253
254 return (s[0] == 'L') || (s[0] == '[');
255 }
256
257 /* Return whether the given string is a valid class descriptor. This
258 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
259 * is for a class and not an array or primitive type. */
dexIsClassDescriptor(const char * s)260 bool dexIsClassDescriptor(const char* s) {
261 if (!dexIsValidTypeDescriptor(s)) {
262 return false;
263 }
264
265 return s[0] == 'L';
266 }
267
268 /* Return whether the given string is a valid field type descriptor. This
269 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
270 * is for anything but "void". */
dexIsFieldDescriptor(const char * s)271 bool dexIsFieldDescriptor(const char* s) {
272 if (!dexIsValidTypeDescriptor(s)) {
273 return false;
274 }
275
276 return s[0] != 'V';
277 }
278
279 /* Return the UTF-8 encoded string with the specified string_id index,
280 * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)281 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
282 u4* utf16Size) {
283 const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
284 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
285
286 *utf16Size = readUnsignedLeb128(&ptr);
287 return (const char*) ptr;
288 }
289
290 /*
291 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at
292 * least kSHA1DigestOutputLen bytes.
293 */
294 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
295
296 /*
297 * Compute a SHA-1 digest on a range of bytes.
298 */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])299 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
300 unsigned char digest[])
301 {
302 SHA1_CTX context;
303 SHA1Init(&context);
304 SHA1Update(&context, data, length);
305 SHA1Final(digest, &context);
306 }
307
308 /*
309 * Format the SHA-1 digest into the buffer, which must be able to hold at
310 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
311 */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)312 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
313 {
314 static const char hexDigit[] = "0123456789abcdef";
315 char* cp;
316 int i;
317
318 cp = tmpBuf;
319 for (i = 0; i < kSHA1DigestLen; i++) {
320 *cp++ = hexDigit[digest[i] >> 4];
321 *cp++ = hexDigit[digest[i] & 0x0f];
322 }
323 *cp++ = '\0';
324
325 assert(cp == tmpBuf + kSHA1DigestOutputLen);
326
327 return tmpBuf;
328 }
329
330 /*
331 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
332 *
333 * This may or may not be compatible with UTF-8 hash functions used inside
334 * the Dalvik VM.
335 *
336 * The basic "multiply by 31 and add" approach does better on class names
337 * than most other things tried (e.g. adler32).
338 */
classDescriptorHash(const char * str)339 static u4 classDescriptorHash(const char* str)
340 {
341 u4 hash = 1;
342
343 while (*str != '\0')
344 hash = hash * 31 + *str++;
345
346 return hash;
347 }
348
349 /*
350 * Add an entry to the class lookup table. We hash the string and probe
351 * until we find an open slot.
352 */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)353 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
354 int stringOff, int classDefOff, int* pNumProbes)
355 {
356 const char* classDescriptor =
357 (const char*) (pDexFile->baseAddr + stringOff);
358 const DexClassDef* pClassDef =
359 (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
360 u4 hash = classDescriptorHash(classDescriptor);
361 int mask = pLookup->numEntries-1;
362 int idx = hash & mask;
363
364 /*
365 * Find the first empty slot. We oversized the table, so this is
366 * guaranteed to finish.
367 */
368 int probes = 0;
369 while (pLookup->table[idx].classDescriptorOffset != 0) {
370 idx = (idx + 1) & mask;
371 probes++;
372 }
373 //if (probes > 1)
374 // LOGW("classLookupAdd: probes=%d\n", probes);
375
376 pLookup->table[idx].classDescriptorHash = hash;
377 pLookup->table[idx].classDescriptorOffset = stringOff;
378 pLookup->table[idx].classDefOffset = classDefOff;
379 *pNumProbes = probes;
380 }
381
382 /*
383 * Round up to the next highest power of 2.
384 *
385 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
386 */
dexRoundUpPower2(u4 val)387 u4 dexRoundUpPower2(u4 val)
388 {
389 val--;
390 val |= val >> 1;
391 val |= val >> 2;
392 val |= val >> 4;
393 val |= val >> 8;
394 val |= val >> 16;
395 val++;
396
397 return val;
398 }
399
400 /*
401 * Create the class lookup hash table.
402 *
403 * Returns newly-allocated storage.
404 */
dexCreateClassLookup(DexFile * pDexFile)405 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
406 {
407 DexClassLookup* pLookup;
408 int allocSize;
409 int i, numEntries;
410 int numProbes, totalProbes, maxProbes;
411
412 numProbes = totalProbes = maxProbes = 0;
413
414 assert(pDexFile != NULL);
415
416 /*
417 * Using a factor of 3 results in far less probing than a factor of 2,
418 * but almost doubles the flash storage requirements for the bootstrap
419 * DEX files. The overall impact on class loading performance seems
420 * to be minor. We could probably get some performance improvement by
421 * using a secondary hash.
422 */
423 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
424 allocSize = offsetof(DexClassLookup, table)
425 + numEntries * sizeof(pLookup->table[0]);
426
427 pLookup = (DexClassLookup*) calloc(1, allocSize);
428 if (pLookup == NULL)
429 return NULL;
430 pLookup->size = allocSize;
431 pLookup->numEntries = numEntries;
432
433 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
434 const DexClassDef* pClassDef;
435 const char* pString;
436
437 pClassDef = dexGetClassDef(pDexFile, i);
438 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
439
440 classLookupAdd(pDexFile, pLookup,
441 (u1*)pString - pDexFile->baseAddr,
442 (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
443
444 if (numProbes > maxProbes)
445 maxProbes = numProbes;
446 totalProbes += numProbes;
447 }
448
449 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
450 " total=%d max=%d\n",
451 pDexFile->pHeader->classDefsSize, numEntries,
452 (100 * pDexFile->pHeader->classDefsSize) / numEntries,
453 allocSize, totalProbes, maxProbes);
454
455 return pLookup;
456 }
457
458
459 /*
460 * Set up the basic raw data pointers of a DexFile. This function isn't
461 * meant for general use.
462 */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)463 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
464 DexHeader *pHeader = (DexHeader*) data;
465
466 pDexFile->baseAddr = data;
467 pDexFile->pHeader = pHeader;
468 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
469 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
470 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
471 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
472 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
473 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
474 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
475 }
476
477
478 /*
479 * Parse out an index map entry, advancing "*pData" and reducing "*pSize".
480 */
parseIndexMapEntry(const u1 ** pData,u4 * pSize,bool expanding,u4 * pFullCount,u4 * pReducedCount,const u2 ** pMap)481 static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
482 u4* pFullCount, u4* pReducedCount, const u2** pMap)
483 {
484 const u4* wordPtr = (const u4*) *pData;
485 u4 size = *pSize;
486 u4 mapCount;
487
488 if (expanding) {
489 if (size < 4)
490 return false;
491 mapCount = *pReducedCount = *wordPtr++;
492 *pFullCount = (u4) -1;
493 size -= sizeof(u4);
494 } else {
495 if (size < 8)
496 return false;
497 mapCount = *pFullCount = *wordPtr++;
498 *pReducedCount = *wordPtr++;
499 size -= sizeof(u4) * 2;
500 }
501
502 u4 mapSize = mapCount * sizeof(u2);
503
504 if (size < mapSize)
505 return false;
506 *pMap = (const u2*) wordPtr;
507 size -= mapSize;
508
509 /* advance the pointer */
510 const u1* ptr = (const u1*) wordPtr;
511 ptr += (mapSize + 3) & ~0x3;
512
513 /* update pass-by-reference values */
514 *pData = (const u1*) ptr;
515 *pSize = size;
516
517 return true;
518 }
519
520 /*
521 * Set up some pointers into the mapped data.
522 *
523 * See analysis/ReduceConstants.c for the data layout description.
524 */
parseIndexMap(DexFile * pDexFile,const u1 * data,u4 size,bool expanding)525 static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
526 bool expanding)
527 {
528 if (!parseIndexMapEntry(&data, &size, expanding,
529 &pDexFile->indexMap.classFullCount,
530 &pDexFile->indexMap.classReducedCount,
531 &pDexFile->indexMap.classMap))
532 {
533 return false;
534 }
535
536 if (!parseIndexMapEntry(&data, &size, expanding,
537 &pDexFile->indexMap.methodFullCount,
538 &pDexFile->indexMap.methodReducedCount,
539 &pDexFile->indexMap.methodMap))
540 {
541 return false;
542 }
543
544 if (!parseIndexMapEntry(&data, &size, expanding,
545 &pDexFile->indexMap.fieldFullCount,
546 &pDexFile->indexMap.fieldReducedCount,
547 &pDexFile->indexMap.fieldMap))
548 {
549 return false;
550 }
551
552 if (!parseIndexMapEntry(&data, &size, expanding,
553 &pDexFile->indexMap.stringFullCount,
554 &pDexFile->indexMap.stringReducedCount,
555 &pDexFile->indexMap.stringMap))
556 {
557 return false;
558 }
559
560 if (expanding) {
561 /*
562 * The map includes the "reduced" counts; pull the original counts
563 * out of the DexFile so that code has a consistent source.
564 */
565 assert(pDexFile->indexMap.classFullCount == (u4) -1);
566 assert(pDexFile->indexMap.methodFullCount == (u4) -1);
567 assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
568 assert(pDexFile->indexMap.stringFullCount == (u4) -1);
569
570 #if 0 // TODO: not available yet -- do later or just skip this
571 pDexFile->indexMap.classFullCount =
572 pDexFile->pHeader->typeIdsSize;
573 pDexFile->indexMap.methodFullCount =
574 pDexFile->pHeader->methodIdsSize;
575 pDexFile->indexMap.fieldFullCount =
576 pDexFile->pHeader->fieldIdsSize;
577 pDexFile->indexMap.stringFullCount =
578 pDexFile->pHeader->stringIdsSize;
579 #endif
580 }
581
582 LOGI("Class : %u %u %u\n",
583 pDexFile->indexMap.classFullCount,
584 pDexFile->indexMap.classReducedCount,
585 pDexFile->indexMap.classMap[0]);
586 LOGI("Method: %u %u %u\n",
587 pDexFile->indexMap.methodFullCount,
588 pDexFile->indexMap.methodReducedCount,
589 pDexFile->indexMap.methodMap[0]);
590 LOGI("Field : %u %u %u\n",
591 pDexFile->indexMap.fieldFullCount,
592 pDexFile->indexMap.fieldReducedCount,
593 pDexFile->indexMap.fieldMap[0]);
594 LOGI("String: %u %u %u\n",
595 pDexFile->indexMap.stringFullCount,
596 pDexFile->indexMap.stringReducedCount,
597 pDexFile->indexMap.stringMap[0]);
598
599 return true;
600 }
601
602 /*
603 * Parse some auxillary data tables.
604 *
605 * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
606 * table. Subsequent versions switched to the "chunk" format.
607 */
parseAuxData(const u1 * data,DexFile * pDexFile)608 static bool parseAuxData(const u1* data, DexFile* pDexFile)
609 {
610 const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
611 u4 indexMapType = 0;
612
613 /* v1.0 format? */
614 if (*pAux == 0) {
615 LOGV("+++ found OLD dex format\n");
616 pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
617 return true;
618 }
619 LOGV("+++ found NEW dex format\n");
620
621 /* process chunks until we see the end marker */
622 while (*pAux != kDexChunkEnd) {
623 u4 size = *(pAux+1);
624 u1* data = (u1*) (pAux + 2);
625
626 switch (*pAux) {
627 case kDexChunkClassLookup:
628 pDexFile->pClassLookup = (const DexClassLookup*) data;
629 break;
630 case kDexChunkReducingIndexMap:
631 LOGI("+++ found reducing index map, size=%u\n", size);
632 if (!parseIndexMap(pDexFile, data, size, false)) {
633 LOGE("Failed parsing reducing index map\n");
634 return false;
635 }
636 indexMapType = *pAux;
637 break;
638 case kDexChunkExpandingIndexMap:
639 LOGI("+++ found expanding index map, size=%u\n", size);
640 if (!parseIndexMap(pDexFile, data, size, true)) {
641 LOGE("Failed parsing expanding index map\n");
642 return false;
643 }
644 indexMapType = *pAux;
645 break;
646 default:
647 LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
648 *pAux,
649 (char) ((*pAux) >> 24), (char) ((*pAux) >> 16),
650 (char) ((*pAux) >> 8), (char) (*pAux),
651 size);
652 break;
653 }
654
655 /*
656 * Advance pointer, padding to 64-bit boundary. The extra "+8" is
657 * for the type/size header.
658 */
659 size = (size + 8 + 7) & ~7;
660 pAux += size / sizeof(u4);
661 }
662
663 #if 0 // TODO: propagate expected map type from the VM through the API
664 /*
665 * If we're configured to expect an index map, and we don't find one,
666 * reject this DEX so we'll regenerate it. Also, if we found an
667 * "expanding" map but we're not configured to use it, we have to fail
668 * because the constants aren't usable without translation.
669 */
670 if (indexMapType != expectedIndexMapType) {
671 LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
672 indexMapType, DVM_REDUCE_CONSTANTS);
673 return false;
674 }
675 #endif
676
677 return true;
678 }
679
680 /*
681 * Parse an optimized or unoptimized .dex file sitting in memory. This is
682 * called after the byte-ordering and structure alignment has been fixed up.
683 *
684 * On success, return a newly-allocated DexFile.
685 */
dexFileParse(const u1 * data,size_t length,int flags)686 DexFile* dexFileParse(const u1* data, size_t length, int flags)
687 {
688 DexFile* pDexFile = NULL;
689 const DexHeader* pHeader;
690 const u1* magic;
691 int result = -1;
692
693 if (length < sizeof(DexHeader)) {
694 LOGE("too short to be a valid .dex\n");
695 goto bail; /* bad file format */
696 }
697
698 pDexFile = (DexFile*) malloc(sizeof(DexFile));
699 if (pDexFile == NULL)
700 goto bail; /* alloc failure */
701 memset(pDexFile, 0, sizeof(DexFile));
702
703 /*
704 * Peel off the optimized header.
705 */
706 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
707 magic = data;
708 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
709 LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
710 magic[4], magic[5], magic[6], magic[7]);
711 goto bail;
712 }
713
714 pDexFile->pOptHeader = (const DexOptHeader*) data;
715 LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
716 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
717
718 /* locate some auxillary data tables */
719 if (!parseAuxData(data, pDexFile))
720 goto bail;
721
722 /* ignore the opt header and appended data from here on out */
723 data += pDexFile->pOptHeader->dexOffset;
724 length -= pDexFile->pOptHeader->dexOffset;
725 if (pDexFile->pOptHeader->dexLength > length) {
726 LOGE("File truncated? stored len=%d, rem len=%d\n",
727 pDexFile->pOptHeader->dexLength, (int) length);
728 goto bail;
729 }
730 length = pDexFile->pOptHeader->dexLength;
731 }
732
733 dexFileSetupBasicPointers(pDexFile, data);
734 pHeader = pDexFile->pHeader;
735
736 magic = pHeader->magic;
737 if (memcmp(magic, DEX_MAGIC, 4) != 0) {
738 /* not expected */
739 LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
740 magic[0], magic[1], magic[2], magic[3]);
741 goto bail;
742 }
743 if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
744 LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
745 magic[4], magic[5], magic[6], magic[7]);
746 goto bail;
747 }
748
749 /*
750 * Verify the checksum. This is reasonably quick, but does require
751 * touching every byte in the DEX file. The checksum changes after
752 * byte-swapping and DEX optimization.
753 */
754 if (flags & kDexParseVerifyChecksum) {
755 u4 adler = dexComputeChecksum(pHeader);
756 if (adler != pHeader->checksum) {
757 LOGE("ERROR: bad checksum (%08x vs %08x)\n",
758 adler, pHeader->checksum);
759 if (!(flags & kDexParseContinueOnError))
760 goto bail;
761 } else {
762 LOGV("+++ adler32 checksum (%08x) verified\n", adler);
763 }
764 }
765
766 /*
767 * Verify the SHA-1 digest. (Normally we don't want to do this --
768 * the digest is used to uniquely identify a DEX file, and can't be
769 * computed post-optimization.)
770 *
771 * The digest will be invalid after byte swapping and DEX optimization.
772 */
773 if (kVerifySignature) {
774 unsigned char sha1Digest[kSHA1DigestLen];
775 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
776 kSHA1DigestLen;
777
778 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
779 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
780 char tmpBuf1[kSHA1DigestOutputLen];
781 char tmpBuf2[kSHA1DigestOutputLen];
782 LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
783 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
784 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
785 if (!(flags & kDexParseContinueOnError))
786 goto bail;
787 } else {
788 LOGV("+++ sha1 digest verified\n");
789 }
790 }
791
792 if (pHeader->fileSize != length) {
793 LOGE("ERROR: stored file size (%d) != expected (%d)\n",
794 (int) pHeader->fileSize, (int) length);
795 if (!(flags & kDexParseContinueOnError))
796 goto bail;
797 }
798
799 if (pHeader->classDefsSize == 0) {
800 LOGE("ERROR: DEX file has no classes in it, failing\n");
801 goto bail;
802 }
803
804 /*
805 * Success!
806 */
807 result = 0;
808
809 bail:
810 if (result != 0 && pDexFile != NULL) {
811 dexFileFree(pDexFile);
812 pDexFile = NULL;
813 }
814 return pDexFile;
815 }
816
817 /*
818 * Free up the DexFile and any associated data structures.
819 *
820 * Note we may be called with a partially-initialized DexFile.
821 */
dexFileFree(DexFile * pDexFile)822 void dexFileFree(DexFile* pDexFile)
823 {
824 if (pDexFile == NULL)
825 return;
826
827 free(pDexFile);
828 }
829
830 /*
831 * Look up a class definition entry by descriptor.
832 *
833 * "descriptor" should look like "Landroid/debug/Stuff;".
834 */
dexFindClass(const DexFile * pDexFile,const char * descriptor)835 const DexClassDef* dexFindClass(const DexFile* pDexFile,
836 const char* descriptor)
837 {
838 const DexClassLookup* pLookup = pDexFile->pClassLookup;
839 u4 hash;
840 int idx, mask;
841
842 hash = classDescriptorHash(descriptor);
843 mask = pLookup->numEntries - 1;
844 idx = hash & mask;
845
846 /*
847 * Search until we find a matching entry or an empty slot.
848 */
849 while (true) {
850 int offset;
851
852 offset = pLookup->table[idx].classDescriptorOffset;
853 if (offset == 0)
854 return NULL;
855
856 if (pLookup->table[idx].classDescriptorHash == hash) {
857 const char* str;
858
859 str = (const char*) (pDexFile->baseAddr + offset);
860 if (strcmp(str, descriptor) == 0) {
861 return (const DexClassDef*)
862 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
863 }
864 }
865
866 idx = (idx + 1) & mask;
867 }
868 }
869
870
871 /*
872 * Compute the DEX file checksum for a memory-mapped DEX file.
873 */
dexComputeChecksum(const DexHeader * pHeader)874 u4 dexComputeChecksum(const DexHeader* pHeader)
875 {
876 const u1* start = (const u1*) pHeader;
877
878 uLong adler = adler32(0L, Z_NULL, 0);
879 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
880
881 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
882 }
883
884
885 /*
886 * ===========================================================================
887 * Debug info
888 * ===========================================================================
889 */
890
891 /*
892 * Decode the arguments in a method signature, which looks something
893 * like "(ID[Ljava/lang/String;)V".
894 *
895 * Returns the type signature letter for the next argument, or ')' if
896 * there are no more args. Advances "pSig" to point to the character
897 * after the one returned.
898 */
decodeSignature(const char ** pSig)899 static char decodeSignature(const char** pSig)
900 {
901 const char* sig = *pSig;
902
903 if (*sig == '(')
904 sig++;
905
906 if (*sig == 'L') {
907 /* object ref */
908 while (*++sig != ';')
909 ;
910 *pSig = sig+1;
911 return 'L';
912 }
913 if (*sig == '[') {
914 /* array; advance past array type */
915 while (*++sig == '[')
916 ;
917 if (*sig == 'L') {
918 while (*++sig != ';')
919 ;
920 }
921 *pSig = sig+1;
922 return '[';
923 }
924 if (*sig == '\0')
925 return *sig; /* don't advance further */
926
927 *pSig = sig+1;
928 return *sig;
929 }
930
931 /*
932 * returns the length of a type string, given the start of the
933 * type string. Used for the case where the debug info format
934 * references types that are inside a method type signature.
935 */
typeLength(const char * type)936 static int typeLength (const char *type) {
937 // Assumes any leading '(' has already been gobbled
938 const char *end = type;
939 decodeSignature(&end);
940 return end - type;
941 }
942
943 /*
944 * Reads a string index as encoded for the debug info format,
945 * returning a string pointer or NULL as appropriate.
946 */
readStringIdx(const DexFile * pDexFile,const u1 ** pStream)947 static const char* readStringIdx(const DexFile* pDexFile,
948 const u1** pStream) {
949 u4 stringIdx = readUnsignedLeb128(pStream);
950
951 // Remember, encoded string indicies have 1 added to them.
952 if (stringIdx == 0) {
953 return NULL;
954 } else {
955 return dexStringById(pDexFile, stringIdx - 1);
956 }
957 }
958
959 /*
960 * Reads a type index as encoded for the debug info format, returning
961 * a string pointer for its descriptor or NULL as appropriate.
962 */
readTypeIdx(const DexFile * pDexFile,const u1 ** pStream)963 static const char* readTypeIdx(const DexFile* pDexFile,
964 const u1** pStream) {
965 u4 typeIdx = readUnsignedLeb128(pStream);
966
967 // Remember, encoded type indicies have 1 added to them.
968 if (typeIdx == 0) {
969 return NULL;
970 } else {
971 return dexStringByTypeIdx(pDexFile, typeIdx - 1);
972 }
973 }
974
975 /* access_flag value indicating that a method is static */
976 #define ACC_STATIC 0x0008
977
978 typedef struct LocalInfo {
979 const char *name;
980 const char *descriptor;
981 const char *signature;
982 u2 startAddress;
983 bool live;
984 } LocalInfo;
985
emitLocalCbIfLive(void * cnxt,int reg,u4 endAddress,LocalInfo * localInReg,DexDebugNewLocalCb localCb)986 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
987 LocalInfo *localInReg, DexDebugNewLocalCb localCb)
988 {
989 if (localCb != NULL && localInReg[reg].live) {
990 localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
991 localInReg[reg].name,
992 localInReg[reg].descriptor,
993 localInReg[reg].signature == NULL
994 ? "" : localInReg[reg].signature );
995 }
996 }
997
998 // TODO optimize localCb == NULL case
dexDecodeDebugInfo(const DexFile * pDexFile,const DexCode * pCode,const char * classDescriptor,u4 protoIdx,u4 accessFlags,DexDebugNewPositionCb posCb,DexDebugNewLocalCb localCb,void * cnxt)999 void dexDecodeDebugInfo(
1000 const DexFile* pDexFile,
1001 const DexCode* pCode,
1002 const char* classDescriptor,
1003 u4 protoIdx,
1004 u4 accessFlags,
1005 DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
1006 void* cnxt)
1007 {
1008 const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
1009 u4 line;
1010 u4 parametersSize;
1011 u4 address = 0;
1012 LocalInfo localInReg[pCode->registersSize];
1013 u4 insnsSize = pCode->insnsSize;
1014 DexProto proto = { pDexFile, protoIdx };
1015
1016 memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
1017
1018 if (stream == NULL) {
1019 goto end;
1020 }
1021
1022 line = readUnsignedLeb128(&stream);
1023 parametersSize = readUnsignedLeb128(&stream);
1024
1025 u2 argReg = pCode->registersSize - pCode->insSize;
1026
1027 if ((accessFlags & ACC_STATIC) == 0) {
1028 /*
1029 * The code is an instance method, which means that there is
1030 * an initial this parameter. Also, the proto list should
1031 * contain exactly one fewer argument word than the insSize
1032 * indicates.
1033 */
1034 assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
1035 localInReg[argReg].name = "this";
1036 localInReg[argReg].descriptor = classDescriptor;
1037 localInReg[argReg].startAddress = 0;
1038 localInReg[argReg].live = true;
1039 argReg++;
1040 } else {
1041 assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
1042 }
1043
1044 DexParameterIterator iterator;
1045 dexParameterIteratorInit(&iterator, &proto);
1046
1047 while (parametersSize-- != 0) {
1048 const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
1049 const char *name;
1050 int reg;
1051
1052 if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
1053 goto invalid_stream;
1054 }
1055
1056 name = readStringIdx(pDexFile, &stream);
1057 reg = argReg;
1058
1059 switch (descriptor[0]) {
1060 case 'D':
1061 case 'J':
1062 argReg += 2;
1063 break;
1064 default:
1065 argReg += 1;
1066 break;
1067 }
1068
1069 if (name != NULL) {
1070 localInReg[reg].name = name;
1071 localInReg[reg].descriptor = descriptor;
1072 localInReg[reg].signature = NULL;
1073 localInReg[reg].startAddress = address;
1074 localInReg[reg].live = true;
1075 }
1076 }
1077
1078 for (;;) {
1079 u1 opcode = *stream++;
1080 u2 reg;
1081
1082 switch (opcode) {
1083 case DBG_END_SEQUENCE:
1084 goto end;
1085
1086 case DBG_ADVANCE_PC:
1087 address += readUnsignedLeb128(&stream);
1088 break;
1089
1090 case DBG_ADVANCE_LINE:
1091 line += readSignedLeb128(&stream);
1092 break;
1093
1094 case DBG_START_LOCAL:
1095 case DBG_START_LOCAL_EXTENDED:
1096 reg = readUnsignedLeb128(&stream);
1097 if (reg > pCode->registersSize) goto invalid_stream;
1098
1099 // Emit what was previously there, if anything
1100 emitLocalCbIfLive (cnxt, reg, address,
1101 localInReg, localCb);
1102
1103 localInReg[reg].name = readStringIdx(pDexFile, &stream);
1104 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
1105 if (opcode == DBG_START_LOCAL_EXTENDED) {
1106 localInReg[reg].signature
1107 = readStringIdx(pDexFile, &stream);
1108 } else {
1109 localInReg[reg].signature = NULL;
1110 }
1111 localInReg[reg].startAddress = address;
1112 localInReg[reg].live = true;
1113 break;
1114
1115 case DBG_END_LOCAL:
1116 reg = readUnsignedLeb128(&stream);
1117 if (reg > pCode->registersSize) goto invalid_stream;
1118
1119 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
1120 localInReg[reg].live = false;
1121 break;
1122
1123 case DBG_RESTART_LOCAL:
1124 reg = readUnsignedLeb128(&stream);
1125 if (reg > pCode->registersSize) goto invalid_stream;
1126
1127 if (localInReg[reg].name == NULL
1128 || localInReg[reg].descriptor == NULL) {
1129 goto invalid_stream;
1130 }
1131
1132 /*
1133 * If the register is live, the "restart" is superfluous,
1134 * and we don't want to mess with the existing start address.
1135 */
1136 if (!localInReg[reg].live) {
1137 localInReg[reg].startAddress = address;
1138 localInReg[reg].live = true;
1139 }
1140 break;
1141
1142 case DBG_SET_PROLOGUE_END:
1143 case DBG_SET_EPILOGUE_BEGIN:
1144 case DBG_SET_FILE:
1145 break;
1146
1147 default: {
1148 int adjopcode = opcode - DBG_FIRST_SPECIAL;
1149
1150 address += adjopcode / DBG_LINE_RANGE;
1151 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
1152
1153 if (posCb != NULL) {
1154 int done;
1155 done = posCb(cnxt, address, line);
1156
1157 if (done) {
1158 // early exit
1159 goto end;
1160 }
1161 }
1162 break;
1163 }
1164 }
1165 }
1166
1167 end:
1168 {
1169 int reg;
1170 for (reg = 0; reg < pCode->registersSize; reg++) {
1171 emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
1172 }
1173 }
1174 return;
1175
1176 invalid_stream:
1177 IF_LOGE() {
1178 char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
1179 LOGE("Invalid debug info stream. class %s; proto %s",
1180 classDescriptor, methodDescriptor);
1181 free(methodDescriptor);
1182 }
1183 }
1184