• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * Access the contents of a .dex file.
19  */
20 
21 #include "DexFile.h"
22 #include "DexProto.h"
23 #include "DexCatch.h"
24 #include "Leb128.h"
25 #include "sha1.h"
26 #include "ZipArchive.h"
27 
28 #include <zlib.h>
29 
30 #include <stdlib.h>
31 #include <stddef.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <errno.h>
35 
36 /*
37  * Verifying checksums is good, but it slows things down and causes us to
38  * touch every page.  In the "optimized" world, it doesn't work at all,
39  * because we rewrite the contents.
40  */
41 static const bool kVerifyChecksum = false;
42 static const bool kVerifySignature = false;
43 
44 
45 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
46  * code point values for comparison. This treats different encodings
47  * for the same code point as equivalent, except that only a real '\0'
48  * byte is considered the string terminator. The return value is as
49  * for strcmp(). */
dexUtf8Cmp(const char * s1,const char * s2)50 int dexUtf8Cmp(const char* s1, const char* s2) {
51     for (;;) {
52         if (*s1 == '\0') {
53             if (*s2 == '\0') {
54                 return 0;
55             }
56             return -1;
57         } else if (*s2 == '\0') {
58             return 1;
59         }
60 
61         int utf1 = dexGetUtf16FromUtf8(&s1);
62         int utf2 = dexGetUtf16FromUtf8(&s2);
63         int diff = utf1 - utf2;
64 
65         if (diff != 0) {
66             return diff;
67         }
68     }
69 }
70 
71 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
72 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
73     0x00000000, // 00..1f low control characters; nothing valid
74     0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
75     0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
76     0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
77 };
78 
79 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
dexIsValidMemberNameUtf8_0(const char ** pUtf8Ptr)80 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
81     /*
82      * It's a multibyte encoded character. Decode it and analyze. We
83      * accept anything that isn't (a) an improperly encoded low value,
84      * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
85      * control character, or (e) a high space, layout, or special
86      * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
87      * U+fff0..U+ffff).
88      */
89 
90     u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
91 
92     // Perform follow-up tests based on the high 8 bits.
93     switch (utf16 >> 8) {
94         case 0x00: {
95             // It's only valid if it's above the ISO-8859-1 high space (0xa0).
96             return (utf16 > 0x00a0);
97         }
98         case 0xd8:
99         case 0xd9:
100         case 0xda:
101         case 0xdb: {
102             /*
103              * It's a leading surrogate. Check to see that a trailing
104              * surrogate follows.
105              */
106             utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
107             return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
108         }
109         case 0xdc:
110         case 0xdd:
111         case 0xde:
112         case 0xdf: {
113             // It's a trailing surrogate, which is not valid at this point.
114             return false;
115         }
116         case 0x20:
117         case 0xff: {
118             // It's in the range that has spaces, controls, and specials.
119             switch (utf16 & 0xfff8) {
120                 case 0x2000:
121                 case 0x2008:
122                 case 0x2028:
123                 case 0xfff0:
124                 case 0xfff8: {
125                     return false;
126                 }
127             }
128             break;
129         }
130     }
131 
132     return true;
133 }
134 
135 /* Return whether the given string is a valid field or method name. */
dexIsValidMemberName(const char * s)136 bool dexIsValidMemberName(const char* s) {
137     bool angleName = false;
138 
139     switch (*s) {
140         case '\0': {
141             // The empty string is not a valid name.
142             return false;
143         }
144         case '<': {
145             /*
146              * '<' is allowed only at the start of a name, and if present,
147              * means that the name must end with '>'.
148              */
149             angleName = true;
150             s++;
151             break;
152         }
153     }
154 
155     for (;;) {
156         switch (*s) {
157             case '\0': {
158                 return !angleName;
159             }
160             case '>': {
161                 return angleName && s[1] == '\0';
162             }
163         }
164         if (!dexIsValidMemberNameUtf8(&s)) {
165             return false;
166         }
167     }
168 }
169 
170 /* Return whether the given string is a valid type descriptor. */
dexIsValidTypeDescriptor(const char * s)171 bool dexIsValidTypeDescriptor(const char* s) {
172     int arrayCount = 0;
173 
174     while (*s == '[') {
175         arrayCount++;
176         s++;
177     }
178 
179     if (arrayCount > 255) {
180         // Arrays may have no more than 255 dimensions.
181         return false;
182     }
183 
184     switch (*(s++)) {
185         case 'B':
186         case 'C':
187         case 'D':
188         case 'F':
189         case 'I':
190         case 'J':
191         case 'S':
192         case 'Z': {
193             // These are all single-character descriptors for primitive types.
194             return (*s == '\0');
195         }
196         case 'V': {
197             // You can't have an array of void.
198             return (arrayCount == 0) && (*s == '\0');
199         }
200         case 'L': {
201             // Break out and continue below.
202             break;
203         }
204         default: {
205             // Oddball descriptor character.
206             return false;
207         }
208     }
209 
210     // We just consumed the 'L' that introduces a class name.
211 
212     bool slashOrFirst = true; // first character or just encountered a slash
213     for (;;) {
214         u1 c = (u1) *s;
215         switch (c) {
216             case '\0': {
217                 // Premature end.
218                 return false;
219             }
220             case ';': {
221                 /*
222                  * Make sure that this is the end of the string and that
223                  * it doesn't end with an empty component (including the
224                  * degenerate case of "L;").
225                  */
226                 return (s[1] == '\0') && !slashOrFirst;
227             }
228             case '/': {
229                 if (slashOrFirst) {
230                     // Slash at start or two slashes in a row.
231                     return false;
232                 }
233                 slashOrFirst = true;
234                 s++;
235                 break;
236             }
237             default: {
238                 if (!dexIsValidMemberNameUtf8(&s)) {
239                     return false;
240                 }
241                 slashOrFirst = false;
242                 break;
243             }
244         }
245     }
246 }
247 
248 /* Return whether the given string is a valid reference descriptor. This
249  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
250  * is for a class or array and not a primitive type. */
dexIsReferenceDescriptor(const char * s)251 bool dexIsReferenceDescriptor(const char* s) {
252     if (!dexIsValidTypeDescriptor(s)) {
253         return false;
254     }
255 
256     return (s[0] == 'L') || (s[0] == '[');
257 }
258 
259 /* Return whether the given string is a valid class descriptor. This
260  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
261  * is for a class and not an array or primitive type. */
dexIsClassDescriptor(const char * s)262 bool dexIsClassDescriptor(const char* s) {
263     if (!dexIsValidTypeDescriptor(s)) {
264         return false;
265     }
266 
267     return s[0] == 'L';
268 }
269 
270 /* Return whether the given string is a valid field type descriptor. This
271  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
272  * is for anything but "void". */
dexIsFieldDescriptor(const char * s)273 bool dexIsFieldDescriptor(const char* s) {
274     if (!dexIsValidTypeDescriptor(s)) {
275         return false;
276     }
277 
278     return s[0] != 'V';
279 }
280 
281 /* Return the UTF-8 encoded string with the specified string_id index,
282  * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)283 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
284         u4* utf16Size) {
285     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
286     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
287 
288     *utf16Size = readUnsignedLeb128(&ptr);
289     return (const char*) ptr;
290 }
291 
292 /*
293  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
294  * least kSHA1DigestOutputLen bytes.
295  */
296 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
297 
298 /*
299  * Compute a SHA-1 digest on a range of bytes.
300  */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])301 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
302     unsigned char digest[])
303 {
304     SHA1_CTX context;
305     SHA1Init(&context);
306     SHA1Update(&context, data, length);
307     SHA1Final(digest, &context);
308 }
309 
310 /*
311  * Format the SHA-1 digest into the buffer, which must be able to hold at
312  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
313  */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)314 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
315 {
316     static const char hexDigit[] = "0123456789abcdef";
317     char* cp;
318     int i;
319 
320     cp = tmpBuf;
321     for (i = 0; i < kSHA1DigestLen; i++) {
322         *cp++ = hexDigit[digest[i] >> 4];
323         *cp++ = hexDigit[digest[i] & 0x0f];
324     }
325     *cp++ = '\0';
326 
327     assert(cp == tmpBuf + kSHA1DigestOutputLen);
328 
329     return tmpBuf;
330 }
331 
332 /*
333  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
334  *
335  * This may or may not be compatible with UTF-8 hash functions used inside
336  * the Dalvik VM.
337  *
338  * The basic "multiply by 31 and add" approach does better on class names
339  * than most other things tried (e.g. adler32).
340  */
classDescriptorHash(const char * str)341 static u4 classDescriptorHash(const char* str)
342 {
343     u4 hash = 1;
344 
345     while (*str != '\0')
346         hash = hash * 31 + *str++;
347 
348     return hash;
349 }
350 
351 /*
352  * Add an entry to the class lookup table.  We hash the string and probe
353  * until we find an open slot.
354  */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)355 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
356     int stringOff, int classDefOff, int* pNumProbes)
357 {
358     const char* classDescriptor =
359         (const char*) (pDexFile->baseAddr + stringOff);
360     const DexClassDef* pClassDef =
361         (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
362     u4 hash = classDescriptorHash(classDescriptor);
363     int mask = pLookup->numEntries-1;
364     int idx = hash & mask;
365 
366     /*
367      * Find the first empty slot.  We oversized the table, so this is
368      * guaranteed to finish.
369      */
370     int probes = 0;
371     while (pLookup->table[idx].classDescriptorOffset != 0) {
372         idx = (idx + 1) & mask;
373         probes++;
374     }
375     //if (probes > 1)
376     //    LOGW("classLookupAdd: probes=%d\n", probes);
377 
378     pLookup->table[idx].classDescriptorHash = hash;
379     pLookup->table[idx].classDescriptorOffset = stringOff;
380     pLookup->table[idx].classDefOffset = classDefOff;
381     *pNumProbes = probes;
382 }
383 
384 /*
385  * Round up to the next highest power of 2.
386  *
387  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
388  */
dexRoundUpPower2(u4 val)389 u4 dexRoundUpPower2(u4 val)
390 {
391     val--;
392     val |= val >> 1;
393     val |= val >> 2;
394     val |= val >> 4;
395     val |= val >> 8;
396     val |= val >> 16;
397     val++;
398 
399     return val;
400 }
401 
402 /*
403  * Create the class lookup hash table.
404  *
405  * Returns newly-allocated storage.
406  */
dexCreateClassLookup(DexFile * pDexFile)407 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
408 {
409     DexClassLookup* pLookup;
410     int allocSize;
411     int i, numEntries;
412     int numProbes, totalProbes, maxProbes;
413 
414     numProbes = totalProbes = maxProbes = 0;
415 
416     assert(pDexFile != NULL);
417 
418     /*
419      * Using a factor of 3 results in far less probing than a factor of 2,
420      * but almost doubles the flash storage requirements for the bootstrap
421      * DEX files.  The overall impact on class loading performance seems
422      * to be minor.  We could probably get some performance improvement by
423      * using a secondary hash.
424      */
425     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
426     allocSize = offsetof(DexClassLookup, table)
427                     + numEntries * sizeof(pLookup->table[0]);
428 
429     pLookup = (DexClassLookup*) calloc(1, allocSize);
430     if (pLookup == NULL)
431         return NULL;
432     pLookup->size = allocSize;
433     pLookup->numEntries = numEntries;
434 
435     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
436         const DexClassDef* pClassDef;
437         const char* pString;
438 
439         pClassDef = dexGetClassDef(pDexFile, i);
440         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
441 
442         classLookupAdd(pDexFile, pLookup,
443             (u1*)pString - pDexFile->baseAddr,
444             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
445 
446         if (numProbes > maxProbes)
447             maxProbes = numProbes;
448         totalProbes += numProbes;
449     }
450 
451     LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
452          " total=%d max=%d\n",
453         pDexFile->pHeader->classDefsSize, numEntries,
454         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
455         allocSize, totalProbes, maxProbes);
456 
457     return pLookup;
458 }
459 
460 
461 /*
462  * Set up the basic raw data pointers of a DexFile. This function isn't
463  * meant for general use.
464  */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)465 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
466     DexHeader *pHeader = (DexHeader*) data;
467 
468     pDexFile->baseAddr = data;
469     pDexFile->pHeader = pHeader;
470     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
471     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
472     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
473     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
474     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
475     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
476     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
477 }
478 
479 
480 /*
481  * Parse out an index map entry, advancing "*pData" and reducing "*pSize".
482  */
parseIndexMapEntry(const u1 ** pData,u4 * pSize,bool expanding,u4 * pFullCount,u4 * pReducedCount,const u2 ** pMap)483 static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
484     u4* pFullCount, u4* pReducedCount, const u2** pMap)
485 {
486     const u4* wordPtr = (const u4*) *pData;
487     u4 size = *pSize;
488     u4 mapCount;
489 
490     if (expanding) {
491         if (size < 4)
492             return false;
493         mapCount = *pReducedCount = *wordPtr++;
494         *pFullCount = (u4) -1;
495         size -= sizeof(u4);
496     } else {
497         if (size < 8)
498             return false;
499         mapCount = *pFullCount = *wordPtr++;
500         *pReducedCount = *wordPtr++;
501         size -= sizeof(u4) * 2;
502     }
503 
504     u4 mapSize = mapCount * sizeof(u2);
505 
506     if (size < mapSize)
507         return false;
508     *pMap = (const u2*) wordPtr;
509     size -= mapSize;
510 
511     /* advance the pointer */
512     const u1* ptr = (const u1*) wordPtr;
513     ptr += (mapSize + 3) & ~0x3;
514 
515     /* update pass-by-reference values */
516     *pData = (const u1*) ptr;
517     *pSize = size;
518 
519     return true;
520 }
521 
522 /*
523  * Set up some pointers into the mapped data.
524  *
525  * See analysis/ReduceConstants.c for the data layout description.
526  */
parseIndexMap(DexFile * pDexFile,const u1 * data,u4 size,bool expanding)527 static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
528     bool expanding)
529 {
530     if (!parseIndexMapEntry(&data, &size, expanding,
531             &pDexFile->indexMap.classFullCount,
532             &pDexFile->indexMap.classReducedCount,
533             &pDexFile->indexMap.classMap))
534     {
535         return false;
536     }
537 
538     if (!parseIndexMapEntry(&data, &size, expanding,
539             &pDexFile->indexMap.methodFullCount,
540             &pDexFile->indexMap.methodReducedCount,
541             &pDexFile->indexMap.methodMap))
542     {
543         return false;
544     }
545 
546     if (!parseIndexMapEntry(&data, &size, expanding,
547             &pDexFile->indexMap.fieldFullCount,
548             &pDexFile->indexMap.fieldReducedCount,
549             &pDexFile->indexMap.fieldMap))
550     {
551         return false;
552     }
553 
554     if (!parseIndexMapEntry(&data, &size, expanding,
555             &pDexFile->indexMap.stringFullCount,
556             &pDexFile->indexMap.stringReducedCount,
557             &pDexFile->indexMap.stringMap))
558     {
559         return false;
560     }
561 
562     if (expanding) {
563         /*
564          * The map includes the "reduced" counts; pull the original counts
565          * out of the DexFile so that code has a consistent source.
566          */
567         assert(pDexFile->indexMap.classFullCount == (u4) -1);
568         assert(pDexFile->indexMap.methodFullCount == (u4) -1);
569         assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
570         assert(pDexFile->indexMap.stringFullCount == (u4) -1);
571 
572 #if 0   // TODO: not available yet -- do later or just skip this
573         pDexFile->indexMap.classFullCount =
574             pDexFile->pHeader->typeIdsSize;
575         pDexFile->indexMap.methodFullCount =
576             pDexFile->pHeader->methodIdsSize;
577         pDexFile->indexMap.fieldFullCount =
578             pDexFile->pHeader->fieldIdsSize;
579         pDexFile->indexMap.stringFullCount =
580             pDexFile->pHeader->stringIdsSize;
581 #endif
582     }
583 
584     LOGI("Class : %u %u %u\n",
585         pDexFile->indexMap.classFullCount,
586         pDexFile->indexMap.classReducedCount,
587         pDexFile->indexMap.classMap[0]);
588     LOGI("Method: %u %u %u\n",
589         pDexFile->indexMap.methodFullCount,
590         pDexFile->indexMap.methodReducedCount,
591         pDexFile->indexMap.methodMap[0]);
592     LOGI("Field : %u %u %u\n",
593         pDexFile->indexMap.fieldFullCount,
594         pDexFile->indexMap.fieldReducedCount,
595         pDexFile->indexMap.fieldMap[0]);
596     LOGI("String: %u %u %u\n",
597         pDexFile->indexMap.stringFullCount,
598         pDexFile->indexMap.stringReducedCount,
599         pDexFile->indexMap.stringMap[0]);
600 
601     return true;
602 }
603 
604 /*
605  * Parse some auxillary data tables.
606  *
607  * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
608  * table.  Subsequent versions switched to the "chunk" format.
609  */
parseAuxData(const u1 * data,DexFile * pDexFile)610 static bool parseAuxData(const u1* data, DexFile* pDexFile)
611 {
612     const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
613     u4 indexMapType = 0;
614 
615     /* v1.0 format? */
616     if (*pAux == 0) {
617         LOGV("+++ found OLD dex format\n");
618         pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
619         return true;
620     }
621     LOGV("+++ found NEW dex format\n");
622 
623     /* process chunks until we see the end marker */
624     while (*pAux != kDexChunkEnd) {
625         u4 size = *(pAux+1);
626         u1* data = (u1*) (pAux + 2);
627 
628         switch (*pAux) {
629         case kDexChunkClassLookup:
630             pDexFile->pClassLookup = (const DexClassLookup*) data;
631             break;
632         case kDexChunkReducingIndexMap:
633             LOGI("+++ found reducing index map, size=%u\n", size);
634             if (!parseIndexMap(pDexFile, data, size, false)) {
635                 LOGE("Failed parsing reducing index map\n");
636                 return false;
637             }
638             indexMapType = *pAux;
639             break;
640         case kDexChunkExpandingIndexMap:
641             LOGI("+++ found expanding index map, size=%u\n", size);
642             if (!parseIndexMap(pDexFile, data, size, true)) {
643                 LOGE("Failed parsing expanding index map\n");
644                 return false;
645             }
646             indexMapType = *pAux;
647             break;
648         case kDexChunkRegisterMaps:
649             LOGV("+++ found register maps, size=%u\n", size);
650             pDexFile->pRegisterMapPool = data;
651             break;
652         default:
653             LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
654                 *pAux,
655                 (char) ((*pAux) >> 24), (char) ((*pAux) >> 16),
656                 (char) ((*pAux) >> 8),  (char)  (*pAux),
657                 size);
658             break;
659         }
660 
661         /*
662          * Advance pointer, padding to 64-bit boundary.  The extra "+8" is
663          * for the type/size header.
664          */
665         size = (size + 8 + 7) & ~7;
666         pAux += size / sizeof(u4);
667     }
668 
669 #if 0   // TODO: propagate expected map type from the VM through the API
670     /*
671      * If we're configured to expect an index map, and we don't find one,
672      * reject this DEX so we'll regenerate it.  Also, if we found an
673      * "expanding" map but we're not configured to use it, we have to fail
674      * because the constants aren't usable without translation.
675      */
676     if (indexMapType != expectedIndexMapType) {
677         LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
678             indexMapType, DVM_REDUCE_CONSTANTS);
679         return false;
680     }
681 #endif
682 
683     return true;
684 }
685 
686 /*
687  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
688  * called after the byte-ordering and structure alignment has been fixed up.
689  *
690  * On success, return a newly-allocated DexFile.
691  */
dexFileParse(const u1 * data,size_t length,int flags)692 DexFile* dexFileParse(const u1* data, size_t length, int flags)
693 {
694     DexFile* pDexFile = NULL;
695     const DexHeader* pHeader;
696     const u1* magic;
697     int result = -1;
698 
699     if (length < sizeof(DexHeader)) {
700         LOGE("too short to be a valid .dex\n");
701         goto bail;      /* bad file format */
702     }
703 
704     pDexFile = (DexFile*) malloc(sizeof(DexFile));
705     if (pDexFile == NULL)
706         goto bail;      /* alloc failure */
707     memset(pDexFile, 0, sizeof(DexFile));
708 
709     /*
710      * Peel off the optimized header.
711      */
712     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
713         magic = data;
714         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
715             LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
716                  magic[4], magic[5], magic[6], magic[7]);
717             goto bail;
718         }
719 
720         pDexFile->pOptHeader = (const DexOptHeader*) data;
721         LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
722             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
723 
724         /* locate some auxillary data tables */
725         if (!parseAuxData(data, pDexFile))
726             goto bail;
727 
728         /* ignore the opt header and appended data from here on out */
729         data += pDexFile->pOptHeader->dexOffset;
730         length -= pDexFile->pOptHeader->dexOffset;
731         if (pDexFile->pOptHeader->dexLength > length) {
732             LOGE("File truncated? stored len=%d, rem len=%d\n",
733                 pDexFile->pOptHeader->dexLength, (int) length);
734             goto bail;
735         }
736         length = pDexFile->pOptHeader->dexLength;
737     }
738 
739     dexFileSetupBasicPointers(pDexFile, data);
740     pHeader = pDexFile->pHeader;
741 
742     magic = pHeader->magic;
743     if (memcmp(magic, DEX_MAGIC, 4) != 0) {
744         /* not expected */
745         LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
746              magic[0], magic[1], magic[2], magic[3]);
747         goto bail;
748     }
749     if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
750         LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
751              magic[4], magic[5], magic[6], magic[7]);
752         goto bail;
753     }
754 
755     /*
756      * Verify the checksum.  This is reasonably quick, but does require
757      * touching every byte in the DEX file.  The checksum changes after
758      * byte-swapping and DEX optimization.
759      */
760     if (flags & kDexParseVerifyChecksum) {
761         u4 adler = dexComputeChecksum(pHeader);
762         if (adler != pHeader->checksum) {
763             LOGE("ERROR: bad checksum (%08x vs %08x)\n",
764                 adler, pHeader->checksum);
765             if (!(flags & kDexParseContinueOnError))
766                 goto bail;
767         } else {
768             LOGV("+++ adler32 checksum (%08x) verified\n", adler);
769         }
770     }
771 
772     /*
773      * Verify the SHA-1 digest.  (Normally we don't want to do this --
774      * the digest is used to uniquely identify a DEX file, and can't be
775      * computed post-optimization.)
776      *
777      * The digest will be invalid after byte swapping and DEX optimization.
778      */
779     if (kVerifySignature) {
780         unsigned char sha1Digest[kSHA1DigestLen];
781         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
782                             kSHA1DigestLen;
783 
784         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
785         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
786             char tmpBuf1[kSHA1DigestOutputLen];
787             char tmpBuf2[kSHA1DigestOutputLen];
788             LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
789                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
790                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
791             if (!(flags & kDexParseContinueOnError))
792                 goto bail;
793         } else {
794             LOGV("+++ sha1 digest verified\n");
795         }
796     }
797 
798     if (pHeader->fileSize != length) {
799         LOGE("ERROR: stored file size (%d) != expected (%d)\n",
800             (int) pHeader->fileSize, (int) length);
801         if (!(flags & kDexParseContinueOnError))
802             goto bail;
803     }
804 
805     if (pHeader->classDefsSize == 0) {
806         LOGE("ERROR: DEX file has no classes in it, failing\n");
807         goto bail;
808     }
809 
810     /*
811      * Success!
812      */
813     result = 0;
814 
815 bail:
816     if (result != 0 && pDexFile != NULL) {
817         dexFileFree(pDexFile);
818         pDexFile = NULL;
819     }
820     return pDexFile;
821 }
822 
823 /*
824  * Free up the DexFile and any associated data structures.
825  *
826  * Note we may be called with a partially-initialized DexFile.
827  */
dexFileFree(DexFile * pDexFile)828 void dexFileFree(DexFile* pDexFile)
829 {
830     if (pDexFile == NULL)
831         return;
832 
833     free(pDexFile);
834 }
835 
836 /*
837  * Look up a class definition entry by descriptor.
838  *
839  * "descriptor" should look like "Landroid/debug/Stuff;".
840  */
dexFindClass(const DexFile * pDexFile,const char * descriptor)841 const DexClassDef* dexFindClass(const DexFile* pDexFile,
842     const char* descriptor)
843 {
844     const DexClassLookup* pLookup = pDexFile->pClassLookup;
845     u4 hash;
846     int idx, mask;
847 
848     hash = classDescriptorHash(descriptor);
849     mask = pLookup->numEntries - 1;
850     idx = hash & mask;
851 
852     /*
853      * Search until we find a matching entry or an empty slot.
854      */
855     while (true) {
856         int offset;
857 
858         offset = pLookup->table[idx].classDescriptorOffset;
859         if (offset == 0)
860             return NULL;
861 
862         if (pLookup->table[idx].classDescriptorHash == hash) {
863             const char* str;
864 
865             str = (const char*) (pDexFile->baseAddr + offset);
866             if (strcmp(str, descriptor) == 0) {
867                 return (const DexClassDef*)
868                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
869             }
870         }
871 
872         idx = (idx + 1) & mask;
873     }
874 }
875 
876 
877 /*
878  * Compute the DEX file checksum for a memory-mapped DEX file.
879  */
dexComputeChecksum(const DexHeader * pHeader)880 u4 dexComputeChecksum(const DexHeader* pHeader)
881 {
882     const u1* start = (const u1*) pHeader;
883 
884     uLong adler = adler32(0L, Z_NULL, 0);
885     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
886 
887     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
888 }
889 
890 
891 /*
892  * Compute the size, in bytes, of a DexCode.
893  */
dexGetDexCodeSize(const DexCode * pCode)894 size_t dexGetDexCodeSize(const DexCode* pCode)
895 {
896     /*
897      * The catch handler data is the last entry.  It has a variable number
898      * of variable-size pieces, so we need to create an iterator.
899      */
900     u4 handlersSize;
901     u4 offset;
902     u4 ui;
903 
904     if (pCode->triesSize != 0) {
905         handlersSize = dexGetHandlersSize(pCode);
906         offset = dexGetFirstHandlerOffset(pCode);
907     } else {
908         handlersSize = 0;
909         offset = 0;
910     }
911 
912     for (ui = 0; ui < handlersSize; ui++) {
913         DexCatchIterator iterator;
914         dexCatchIteratorInit(&iterator, pCode, offset);
915         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
916     }
917 
918     const u1* handlerData = dexGetCatchHandlerData(pCode);
919 
920     //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
921     //    pCode, handlerData, offset);
922 
923     /* return the size of the catch handler + everything before it */
924     return (handlerData - (u1*) pCode) + offset;
925 }
926 
927 
928 /*
929  * ===========================================================================
930  *      Debug info
931  * ===========================================================================
932  */
933 
934 /*
935  * Decode the arguments in a method signature, which looks something
936  * like "(ID[Ljava/lang/String;)V".
937  *
938  * Returns the type signature letter for the next argument, or ')' if
939  * there are no more args.  Advances "pSig" to point to the character
940  * after the one returned.
941  */
decodeSignature(const char ** pSig)942 static char decodeSignature(const char** pSig)
943 {
944     const char* sig = *pSig;
945 
946     if (*sig == '(')
947         sig++;
948 
949     if (*sig == 'L') {
950         /* object ref */
951         while (*++sig != ';')
952             ;
953         *pSig = sig+1;
954         return 'L';
955     }
956     if (*sig == '[') {
957         /* array; advance past array type */
958         while (*++sig == '[')
959             ;
960         if (*sig == 'L') {
961             while (*++sig != ';')
962                 ;
963         }
964         *pSig = sig+1;
965         return '[';
966     }
967     if (*sig == '\0')
968         return *sig;        /* don't advance further */
969 
970     *pSig = sig+1;
971     return *sig;
972 }
973 
974 /*
975  * returns the length of a type string, given the start of the
976  * type string. Used for the case where the debug info format
977  * references types that are inside a method type signature.
978  */
typeLength(const char * type)979 static int typeLength (const char *type) {
980     // Assumes any leading '(' has already been gobbled
981     const char *end = type;
982     decodeSignature(&end);
983     return end - type;
984 }
985 
986 /*
987  * Reads a string index as encoded for the debug info format,
988  * returning a string pointer or NULL as appropriate.
989  */
readStringIdx(const DexFile * pDexFile,const u1 ** pStream)990 static const char* readStringIdx(const DexFile* pDexFile,
991         const u1** pStream) {
992     u4 stringIdx = readUnsignedLeb128(pStream);
993 
994     // Remember, encoded string indicies have 1 added to them.
995     if (stringIdx == 0) {
996         return NULL;
997     } else {
998         return dexStringById(pDexFile, stringIdx - 1);
999     }
1000 }
1001 
1002 /*
1003  * Reads a type index as encoded for the debug info format, returning
1004  * a string pointer for its descriptor or NULL as appropriate.
1005  */
readTypeIdx(const DexFile * pDexFile,const u1 ** pStream)1006 static const char* readTypeIdx(const DexFile* pDexFile,
1007         const u1** pStream) {
1008     u4 typeIdx = readUnsignedLeb128(pStream);
1009 
1010     // Remember, encoded type indicies have 1 added to them.
1011     if (typeIdx == 0) {
1012         return NULL;
1013     } else {
1014         return dexStringByTypeIdx(pDexFile, typeIdx - 1);
1015     }
1016 }
1017 
1018 /* access_flag value indicating that a method is static */
1019 #define ACC_STATIC              0x0008
1020 
1021 typedef struct LocalInfo {
1022     const char *name;
1023     const char *descriptor;
1024     const char *signature;
1025     u2 startAddress;
1026     bool live;
1027 } LocalInfo;
1028 
emitLocalCbIfLive(void * cnxt,int reg,u4 endAddress,LocalInfo * localInReg,DexDebugNewLocalCb localCb)1029 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
1030         LocalInfo *localInReg, DexDebugNewLocalCb localCb)
1031 {
1032     if (localCb != NULL && localInReg[reg].live) {
1033         localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
1034                 localInReg[reg].name,
1035                 localInReg[reg].descriptor,
1036                 localInReg[reg].signature == NULL
1037                 ? "" : localInReg[reg].signature );
1038     }
1039 }
1040 
1041 // TODO optimize localCb == NULL case
dexDecodeDebugInfo(const DexFile * pDexFile,const DexCode * pCode,const char * classDescriptor,u4 protoIdx,u4 accessFlags,DexDebugNewPositionCb posCb,DexDebugNewLocalCb localCb,void * cnxt)1042 void dexDecodeDebugInfo(
1043             const DexFile* pDexFile,
1044             const DexCode* pCode,
1045             const char* classDescriptor,
1046             u4 protoIdx,
1047             u4 accessFlags,
1048             DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
1049             void* cnxt)
1050 {
1051     const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
1052     u4 line;
1053     u4 parametersSize;
1054     u4 address = 0;
1055     LocalInfo localInReg[pCode->registersSize];
1056     u4 insnsSize = pCode->insnsSize;
1057     DexProto proto = { pDexFile, protoIdx };
1058 
1059     memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
1060 
1061     if (stream == NULL) {
1062         goto end;
1063     }
1064 
1065     line = readUnsignedLeb128(&stream);
1066     parametersSize = readUnsignedLeb128(&stream);
1067 
1068     u2 argReg = pCode->registersSize - pCode->insSize;
1069 
1070     if ((accessFlags & ACC_STATIC) == 0) {
1071         /*
1072          * The code is an instance method, which means that there is
1073          * an initial this parameter. Also, the proto list should
1074          * contain exactly one fewer argument word than the insSize
1075          * indicates.
1076          */
1077         assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
1078         localInReg[argReg].name = "this";
1079         localInReg[argReg].descriptor = classDescriptor;
1080         localInReg[argReg].startAddress = 0;
1081         localInReg[argReg].live = true;
1082         argReg++;
1083     } else {
1084         assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
1085     }
1086 
1087     DexParameterIterator iterator;
1088     dexParameterIteratorInit(&iterator, &proto);
1089 
1090     while (parametersSize-- != 0) {
1091         const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
1092         const char *name;
1093         int reg;
1094 
1095         if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
1096             goto invalid_stream;
1097         }
1098 
1099         name = readStringIdx(pDexFile, &stream);
1100         reg = argReg;
1101 
1102         switch (descriptor[0]) {
1103             case 'D':
1104             case 'J':
1105                 argReg += 2;
1106                 break;
1107             default:
1108                 argReg += 1;
1109                 break;
1110         }
1111 
1112         if (name != NULL) {
1113             localInReg[reg].name = name;
1114             localInReg[reg].descriptor = descriptor;
1115             localInReg[reg].signature = NULL;
1116             localInReg[reg].startAddress = address;
1117             localInReg[reg].live = true;
1118         }
1119     }
1120 
1121     for (;;)  {
1122         u1 opcode = *stream++;
1123         u2 reg;
1124 
1125         switch (opcode) {
1126             case DBG_END_SEQUENCE:
1127                 goto end;
1128 
1129             case DBG_ADVANCE_PC:
1130                 address += readUnsignedLeb128(&stream);
1131                 break;
1132 
1133             case DBG_ADVANCE_LINE:
1134                 line += readSignedLeb128(&stream);
1135                 break;
1136 
1137             case DBG_START_LOCAL:
1138             case DBG_START_LOCAL_EXTENDED:
1139                 reg = readUnsignedLeb128(&stream);
1140                 if (reg > pCode->registersSize) goto invalid_stream;
1141 
1142                 // Emit what was previously there, if anything
1143                 emitLocalCbIfLive (cnxt, reg, address,
1144                     localInReg, localCb);
1145 
1146                 localInReg[reg].name = readStringIdx(pDexFile, &stream);
1147                 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
1148                 if (opcode == DBG_START_LOCAL_EXTENDED) {
1149                     localInReg[reg].signature
1150                         = readStringIdx(pDexFile, &stream);
1151                 } else {
1152                     localInReg[reg].signature = NULL;
1153                 }
1154                 localInReg[reg].startAddress = address;
1155                 localInReg[reg].live = true;
1156                 break;
1157 
1158             case DBG_END_LOCAL:
1159                 reg = readUnsignedLeb128(&stream);
1160                 if (reg > pCode->registersSize) goto invalid_stream;
1161 
1162                 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
1163                 localInReg[reg].live = false;
1164                 break;
1165 
1166             case DBG_RESTART_LOCAL:
1167                 reg = readUnsignedLeb128(&stream);
1168                 if (reg > pCode->registersSize) goto invalid_stream;
1169 
1170                 if (localInReg[reg].name == NULL
1171                         || localInReg[reg].descriptor == NULL) {
1172                     goto invalid_stream;
1173                 }
1174 
1175                 /*
1176                  * If the register is live, the "restart" is superfluous,
1177                  * and we don't want to mess with the existing start address.
1178                  */
1179                 if (!localInReg[reg].live) {
1180                     localInReg[reg].startAddress = address;
1181                     localInReg[reg].live = true;
1182                 }
1183                 break;
1184 
1185             case DBG_SET_PROLOGUE_END:
1186             case DBG_SET_EPILOGUE_BEGIN:
1187             case DBG_SET_FILE:
1188                 break;
1189 
1190             default: {
1191                 int adjopcode = opcode - DBG_FIRST_SPECIAL;
1192 
1193                 address += adjopcode / DBG_LINE_RANGE;
1194                 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
1195 
1196                 if (posCb != NULL) {
1197                     int done;
1198                     done = posCb(cnxt, address, line);
1199 
1200                     if (done) {
1201                         // early exit
1202                         goto end;
1203                     }
1204                 }
1205                 break;
1206             }
1207         }
1208     }
1209 
1210 end:
1211     {
1212         int reg;
1213         for (reg = 0; reg < pCode->registersSize; reg++) {
1214             emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
1215         }
1216     }
1217     return;
1218 
1219 invalid_stream:
1220     IF_LOGE() {
1221         char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
1222         LOGE("Invalid debug info stream. class %s; proto %s",
1223                 classDescriptor, methodDescriptor);
1224         free(methodDescriptor);
1225     }
1226 }
1227 
1228