• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * Access the contents of a .dex file.
19  */
20 
21 #include "DexFile.h"
22 #include "DexOptData.h"
23 #include "DexProto.h"
24 #include "DexCatch.h"
25 #include "Leb128.h"
26 #include "sha1.h"
27 #include "ZipArchive.h"
28 
29 #include <zlib.h>
30 
31 #include <stdlib.h>
32 #include <stddef.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <errno.h>
36 
37 
38 /*
39  * Verifying checksums is good, but it slows things down and causes us to
40  * touch every page.  In the "optimized" world, it doesn't work at all,
41  * because we rewrite the contents.
42  */
43 static const bool kVerifyChecksum = false;
44 static const bool kVerifySignature = false;
45 
46 
47 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
48  * code point values for comparison. This treats different encodings
49  * for the same code point as equivalent, except that only a real '\0'
50  * byte is considered the string terminator. The return value is as
51  * for strcmp(). */
dexUtf8Cmp(const char * s1,const char * s2)52 int dexUtf8Cmp(const char* s1, const char* s2) {
53     for (;;) {
54         if (*s1 == '\0') {
55             if (*s2 == '\0') {
56                 return 0;
57             }
58             return -1;
59         } else if (*s2 == '\0') {
60             return 1;
61         }
62 
63         int utf1 = dexGetUtf16FromUtf8(&s1);
64         int utf2 = dexGetUtf16FromUtf8(&s2);
65         int diff = utf1 - utf2;
66 
67         if (diff != 0) {
68             return diff;
69         }
70     }
71 }
72 
73 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
74 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
75     0x00000000, // 00..1f low control characters; nothing valid
76     0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
77     0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
78     0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
79 };
80 
81 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
dexIsValidMemberNameUtf8_0(const char ** pUtf8Ptr)82 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
83     /*
84      * It's a multibyte encoded character. Decode it and analyze. We
85      * accept anything that isn't (a) an improperly encoded low value,
86      * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
87      * control character, or (e) a high space, layout, or special
88      * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
89      * U+fff0..U+ffff).
90      */
91 
92     u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
93 
94     // Perform follow-up tests based on the high 8 bits.
95     switch (utf16 >> 8) {
96         case 0x00: {
97             // It's only valid if it's above the ISO-8859-1 high space (0xa0).
98             return (utf16 > 0x00a0);
99         }
100         case 0xd8:
101         case 0xd9:
102         case 0xda:
103         case 0xdb: {
104             /*
105              * It's a leading surrogate. Check to see that a trailing
106              * surrogate follows.
107              */
108             utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
109             return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
110         }
111         case 0xdc:
112         case 0xdd:
113         case 0xde:
114         case 0xdf: {
115             // It's a trailing surrogate, which is not valid at this point.
116             return false;
117         }
118         case 0x20:
119         case 0xff: {
120             // It's in the range that has spaces, controls, and specials.
121             switch (utf16 & 0xfff8) {
122                 case 0x2000:
123                 case 0x2008:
124                 case 0x2028:
125                 case 0xfff0:
126                 case 0xfff8: {
127                     return false;
128                 }
129             }
130             break;
131         }
132     }
133 
134     return true;
135 }
136 
137 /* Return whether the given string is a valid field or method name. */
dexIsValidMemberName(const char * s)138 bool dexIsValidMemberName(const char* s) {
139     bool angleName = false;
140 
141     switch (*s) {
142         case '\0': {
143             // The empty string is not a valid name.
144             return false;
145         }
146         case '<': {
147             /*
148              * '<' is allowed only at the start of a name, and if present,
149              * means that the name must end with '>'.
150              */
151             angleName = true;
152             s++;
153             break;
154         }
155     }
156 
157     for (;;) {
158         switch (*s) {
159             case '\0': {
160                 return !angleName;
161             }
162             case '>': {
163                 return angleName && s[1] == '\0';
164             }
165         }
166         if (!dexIsValidMemberNameUtf8(&s)) {
167             return false;
168         }
169     }
170 }
171 
172 /* Return whether the given string is a valid type descriptor. */
dexIsValidTypeDescriptor(const char * s)173 bool dexIsValidTypeDescriptor(const char* s) {
174     int arrayCount = 0;
175 
176     while (*s == '[') {
177         arrayCount++;
178         s++;
179     }
180 
181     if (arrayCount > 255) {
182         // Arrays may have no more than 255 dimensions.
183         return false;
184     }
185 
186     switch (*(s++)) {
187         case 'B':
188         case 'C':
189         case 'D':
190         case 'F':
191         case 'I':
192         case 'J':
193         case 'S':
194         case 'Z': {
195             // These are all single-character descriptors for primitive types.
196             return (*s == '\0');
197         }
198         case 'V': {
199             // You can't have an array of void.
200             return (arrayCount == 0) && (*s == '\0');
201         }
202         case 'L': {
203             // Break out and continue below.
204             break;
205         }
206         default: {
207             // Oddball descriptor character.
208             return false;
209         }
210     }
211 
212     // We just consumed the 'L' that introduces a class name.
213 
214     bool slashOrFirst = true; // first character or just encountered a slash
215     for (;;) {
216         u1 c = (u1) *s;
217         switch (c) {
218             case '\0': {
219                 // Premature end.
220                 return false;
221             }
222             case ';': {
223                 /*
224                  * Make sure that this is the end of the string and that
225                  * it doesn't end with an empty component (including the
226                  * degenerate case of "L;").
227                  */
228                 return (s[1] == '\0') && !slashOrFirst;
229             }
230             case '/': {
231                 if (slashOrFirst) {
232                     // Slash at start or two slashes in a row.
233                     return false;
234                 }
235                 slashOrFirst = true;
236                 s++;
237                 break;
238             }
239             default: {
240                 if (!dexIsValidMemberNameUtf8(&s)) {
241                     return false;
242                 }
243                 slashOrFirst = false;
244                 break;
245             }
246         }
247     }
248 }
249 
250 /* Return whether the given string is a valid reference descriptor. This
251  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
252  * is for a class or array and not a primitive type. */
dexIsReferenceDescriptor(const char * s)253 bool dexIsReferenceDescriptor(const char* s) {
254     if (!dexIsValidTypeDescriptor(s)) {
255         return false;
256     }
257 
258     return (s[0] == 'L') || (s[0] == '[');
259 }
260 
261 /* Return whether the given string is a valid class descriptor. This
262  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
263  * is for a class and not an array or primitive type. */
dexIsClassDescriptor(const char * s)264 bool dexIsClassDescriptor(const char* s) {
265     if (!dexIsValidTypeDescriptor(s)) {
266         return false;
267     }
268 
269     return s[0] == 'L';
270 }
271 
272 /* Return whether the given string is a valid field type descriptor. This
273  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
274  * is for anything but "void". */
dexIsFieldDescriptor(const char * s)275 bool dexIsFieldDescriptor(const char* s) {
276     if (!dexIsValidTypeDescriptor(s)) {
277         return false;
278     }
279 
280     return s[0] != 'V';
281 }
282 
283 /* Return the UTF-8 encoded string with the specified string_id index,
284  * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)285 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
286         u4* utf16Size) {
287     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
288     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
289 
290     *utf16Size = readUnsignedLeb128(&ptr);
291     return (const char*) ptr;
292 }
293 
294 /*
295  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
296  * least kSHA1DigestOutputLen bytes.
297  */
298 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
299 
300 /*
301  * Compute a SHA-1 digest on a range of bytes.
302  */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])303 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
304     unsigned char digest[])
305 {
306     SHA1_CTX context;
307     SHA1Init(&context);
308     SHA1Update(&context, data, length);
309     SHA1Final(digest, &context);
310 }
311 
312 /*
313  * Format the SHA-1 digest into the buffer, which must be able to hold at
314  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
315  */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)316 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
317 {
318     static const char hexDigit[] = "0123456789abcdef";
319     char* cp;
320     int i;
321 
322     cp = tmpBuf;
323     for (i = 0; i < kSHA1DigestLen; i++) {
324         *cp++ = hexDigit[digest[i] >> 4];
325         *cp++ = hexDigit[digest[i] & 0x0f];
326     }
327     *cp++ = '\0';
328 
329     assert(cp == tmpBuf + kSHA1DigestOutputLen);
330 
331     return tmpBuf;
332 }
333 
334 /*
335  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
336  *
337  * This may or may not be compatible with UTF-8 hash functions used inside
338  * the Dalvik VM.
339  *
340  * The basic "multiply by 31 and add" approach does better on class names
341  * than most other things tried (e.g. adler32).
342  */
classDescriptorHash(const char * str)343 static u4 classDescriptorHash(const char* str)
344 {
345     u4 hash = 1;
346 
347     while (*str != '\0')
348         hash = hash * 31 + *str++;
349 
350     return hash;
351 }
352 
353 /*
354  * Add an entry to the class lookup table.  We hash the string and probe
355  * until we find an open slot.
356  */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)357 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
358     int stringOff, int classDefOff, int* pNumProbes)
359 {
360     const char* classDescriptor =
361         (const char*) (pDexFile->baseAddr + stringOff);
362     const DexClassDef* pClassDef =
363         (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
364     u4 hash = classDescriptorHash(classDescriptor);
365     int mask = pLookup->numEntries-1;
366     int idx = hash & mask;
367 
368     /*
369      * Find the first empty slot.  We oversized the table, so this is
370      * guaranteed to finish.
371      */
372     int probes = 0;
373     while (pLookup->table[idx].classDescriptorOffset != 0) {
374         idx = (idx + 1) & mask;
375         probes++;
376     }
377     //if (probes > 1)
378     //    LOGW("classLookupAdd: probes=%d\n", probes);
379 
380     pLookup->table[idx].classDescriptorHash = hash;
381     pLookup->table[idx].classDescriptorOffset = stringOff;
382     pLookup->table[idx].classDefOffset = classDefOff;
383     *pNumProbes = probes;
384 }
385 
386 /*
387  * Round up to the next highest power of 2.
388  *
389  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
390  */
dexRoundUpPower2(u4 val)391 u4 dexRoundUpPower2(u4 val)
392 {
393     val--;
394     val |= val >> 1;
395     val |= val >> 2;
396     val |= val >> 4;
397     val |= val >> 8;
398     val |= val >> 16;
399     val++;
400 
401     return val;
402 }
403 
404 /*
405  * Create the class lookup hash table.
406  *
407  * Returns newly-allocated storage.
408  */
dexCreateClassLookup(DexFile * pDexFile)409 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
410 {
411     DexClassLookup* pLookup;
412     int allocSize;
413     int i, numEntries;
414     int numProbes, totalProbes, maxProbes;
415 
416     numProbes = totalProbes = maxProbes = 0;
417 
418     assert(pDexFile != NULL);
419 
420     /*
421      * Using a factor of 3 results in far less probing than a factor of 2,
422      * but almost doubles the flash storage requirements for the bootstrap
423      * DEX files.  The overall impact on class loading performance seems
424      * to be minor.  We could probably get some performance improvement by
425      * using a secondary hash.
426      */
427     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
428     allocSize = offsetof(DexClassLookup, table)
429                     + numEntries * sizeof(pLookup->table[0]);
430 
431     pLookup = (DexClassLookup*) calloc(1, allocSize);
432     if (pLookup == NULL)
433         return NULL;
434     pLookup->size = allocSize;
435     pLookup->numEntries = numEntries;
436 
437     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
438         const DexClassDef* pClassDef;
439         const char* pString;
440 
441         pClassDef = dexGetClassDef(pDexFile, i);
442         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
443 
444         classLookupAdd(pDexFile, pLookup,
445             (u1*)pString - pDexFile->baseAddr,
446             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
447 
448         if (numProbes > maxProbes)
449             maxProbes = numProbes;
450         totalProbes += numProbes;
451     }
452 
453     LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
454          " total=%d max=%d\n",
455         pDexFile->pHeader->classDefsSize, numEntries,
456         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
457         allocSize, totalProbes, maxProbes);
458 
459     return pLookup;
460 }
461 
462 
463 /*
464  * Set up the basic raw data pointers of a DexFile. This function isn't
465  * meant for general use.
466  */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)467 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
468     DexHeader *pHeader = (DexHeader*) data;
469 
470     pDexFile->baseAddr = data;
471     pDexFile->pHeader = pHeader;
472     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
473     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
474     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
475     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
476     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
477     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
478     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
479 }
480 
481 /*
482  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
483  * called after the byte-ordering and structure alignment has been fixed up.
484  *
485  * On success, return a newly-allocated DexFile.
486  */
dexFileParse(const u1 * data,size_t length,int flags)487 DexFile* dexFileParse(const u1* data, size_t length, int flags)
488 {
489     DexFile* pDexFile = NULL;
490     const DexHeader* pHeader;
491     const u1* magic;
492     int result = -1;
493 
494     if (length < sizeof(DexHeader)) {
495         LOGE("too short to be a valid .dex\n");
496         goto bail;      /* bad file format */
497     }
498 
499     pDexFile = (DexFile*) malloc(sizeof(DexFile));
500     if (pDexFile == NULL)
501         goto bail;      /* alloc failure */
502     memset(pDexFile, 0, sizeof(DexFile));
503 
504     /*
505      * Peel off the optimized header.
506      */
507     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
508         magic = data;
509         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
510             LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
511                  magic[4], magic[5], magic[6], magic[7]);
512             goto bail;
513         }
514 
515         pDexFile->pOptHeader = (const DexOptHeader*) data;
516         LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
517             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
518 
519         /* parse the optimized dex file tables */
520         if (!dexParseOptData(data, length, pDexFile))
521             goto bail;
522 
523         /* ignore the opt header and appended data from here on out */
524         data += pDexFile->pOptHeader->dexOffset;
525         length -= pDexFile->pOptHeader->dexOffset;
526         if (pDexFile->pOptHeader->dexLength > length) {
527             LOGE("File truncated? stored len=%d, rem len=%d\n",
528                 pDexFile->pOptHeader->dexLength, (int) length);
529             goto bail;
530         }
531         length = pDexFile->pOptHeader->dexLength;
532     }
533 
534     dexFileSetupBasicPointers(pDexFile, data);
535     pHeader = pDexFile->pHeader;
536 
537     magic = pHeader->magic;
538     if (memcmp(magic, DEX_MAGIC, 4) != 0) {
539         /* not expected */
540         LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
541              magic[0], magic[1], magic[2], magic[3]);
542         goto bail;
543     }
544     if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
545         LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
546              magic[4], magic[5], magic[6], magic[7]);
547         goto bail;
548     }
549 
550     /*
551      * Verify the checksum(s).  This is reasonably quick, but does require
552      * touching every byte in the DEX file.  The base checksum changes after
553      * byte-swapping and DEX optimization.
554      */
555     if (flags & kDexParseVerifyChecksum) {
556         u4 adler = dexComputeChecksum(pHeader);
557         if (adler != pHeader->checksum) {
558             LOGE("ERROR: bad checksum (%08x vs %08x)\n",
559                 adler, pHeader->checksum);
560             if (!(flags & kDexParseContinueOnError))
561                 goto bail;
562         } else {
563             LOGV("+++ adler32 checksum (%08x) verified\n", adler);
564         }
565 
566         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
567         if (pOptHeader != NULL) {
568             adler = dexComputeOptChecksum(pOptHeader);
569             if (adler != pOptHeader->checksum) {
570                 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
571                     adler, pOptHeader->checksum);
572                 if (!(flags & kDexParseContinueOnError))
573                     goto bail;
574             } else {
575                 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
576             }
577         }
578     }
579 
580     /*
581      * Verify the SHA-1 digest.  (Normally we don't want to do this --
582      * the digest is used to uniquely identify the original DEX file, and
583      * can't be computed for verification after the DEX is byte-swapped
584      * and optimized.)
585      */
586     if (kVerifySignature) {
587         unsigned char sha1Digest[kSHA1DigestLen];
588         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
589                             kSHA1DigestLen;
590 
591         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
592         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
593             char tmpBuf1[kSHA1DigestOutputLen];
594             char tmpBuf2[kSHA1DigestOutputLen];
595             LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
596                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
597                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
598             if (!(flags & kDexParseContinueOnError))
599                 goto bail;
600         } else {
601             LOGV("+++ sha1 digest verified\n");
602         }
603     }
604 
605     if (pHeader->fileSize != length) {
606         LOGE("ERROR: stored file size (%d) != expected (%d)\n",
607             (int) pHeader->fileSize, (int) length);
608         if (!(flags & kDexParseContinueOnError))
609             goto bail;
610     }
611 
612     if (pHeader->classDefsSize == 0) {
613         LOGE("ERROR: DEX file has no classes in it, failing\n");
614         goto bail;
615     }
616 
617     /*
618      * Success!
619      */
620     result = 0;
621 
622 bail:
623     if (result != 0 && pDexFile != NULL) {
624         dexFileFree(pDexFile);
625         pDexFile = NULL;
626     }
627     return pDexFile;
628 }
629 
630 /*
631  * Free up the DexFile and any associated data structures.
632  *
633  * Note we may be called with a partially-initialized DexFile.
634  */
dexFileFree(DexFile * pDexFile)635 void dexFileFree(DexFile* pDexFile)
636 {
637     if (pDexFile == NULL)
638         return;
639 
640     free(pDexFile);
641 }
642 
643 /*
644  * Look up a class definition entry by descriptor.
645  *
646  * "descriptor" should look like "Landroid/debug/Stuff;".
647  */
dexFindClass(const DexFile * pDexFile,const char * descriptor)648 const DexClassDef* dexFindClass(const DexFile* pDexFile,
649     const char* descriptor)
650 {
651     const DexClassLookup* pLookup = pDexFile->pClassLookup;
652     u4 hash;
653     int idx, mask;
654 
655     hash = classDescriptorHash(descriptor);
656     mask = pLookup->numEntries - 1;
657     idx = hash & mask;
658 
659     /*
660      * Search until we find a matching entry or an empty slot.
661      */
662     while (true) {
663         int offset;
664 
665         offset = pLookup->table[idx].classDescriptorOffset;
666         if (offset == 0)
667             return NULL;
668 
669         if (pLookup->table[idx].classDescriptorHash == hash) {
670             const char* str;
671 
672             str = (const char*) (pDexFile->baseAddr + offset);
673             if (strcmp(str, descriptor) == 0) {
674                 return (const DexClassDef*)
675                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
676             }
677         }
678 
679         idx = (idx + 1) & mask;
680     }
681 }
682 
683 
684 /*
685  * Compute the DEX file checksum for a memory-mapped DEX file.
686  */
dexComputeChecksum(const DexHeader * pHeader)687 u4 dexComputeChecksum(const DexHeader* pHeader)
688 {
689     const u1* start = (const u1*) pHeader;
690 
691     uLong adler = adler32(0L, Z_NULL, 0);
692     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
693 
694     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
695 }
696 
697 /*
698  * Compute the size, in bytes, of a DexCode.
699  */
dexGetDexCodeSize(const DexCode * pCode)700 size_t dexGetDexCodeSize(const DexCode* pCode)
701 {
702     /*
703      * The catch handler data is the last entry.  It has a variable number
704      * of variable-size pieces, so we need to create an iterator.
705      */
706     u4 handlersSize;
707     u4 offset;
708     u4 ui;
709 
710     if (pCode->triesSize != 0) {
711         handlersSize = dexGetHandlersSize(pCode);
712         offset = dexGetFirstHandlerOffset(pCode);
713     } else {
714         handlersSize = 0;
715         offset = 0;
716     }
717 
718     for (ui = 0; ui < handlersSize; ui++) {
719         DexCatchIterator iterator;
720         dexCatchIteratorInit(&iterator, pCode, offset);
721         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
722     }
723 
724     const u1* handlerData = dexGetCatchHandlerData(pCode);
725 
726     //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
727     //    pCode, handlerData, offset);
728 
729     /* return the size of the catch handler + everything before it */
730     return (handlerData - (u1*) pCode) + offset;
731 }
732 
733 
734 /*
735  * ===========================================================================
736  *      Debug info
737  * ===========================================================================
738  */
739 
740 /*
741  * Decode the arguments in a method signature, which looks something
742  * like "(ID[Ljava/lang/String;)V".
743  *
744  * Returns the type signature letter for the next argument, or ')' if
745  * there are no more args.  Advances "pSig" to point to the character
746  * after the one returned.
747  */
decodeSignature(const char ** pSig)748 static char decodeSignature(const char** pSig)
749 {
750     const char* sig = *pSig;
751 
752     if (*sig == '(')
753         sig++;
754 
755     if (*sig == 'L') {
756         /* object ref */
757         while (*++sig != ';')
758             ;
759         *pSig = sig+1;
760         return 'L';
761     }
762     if (*sig == '[') {
763         /* array; advance past array type */
764         while (*++sig == '[')
765             ;
766         if (*sig == 'L') {
767             while (*++sig != ';')
768                 ;
769         }
770         *pSig = sig+1;
771         return '[';
772     }
773     if (*sig == '\0')
774         return *sig;        /* don't advance further */
775 
776     *pSig = sig+1;
777     return *sig;
778 }
779 
780 /*
781  * returns the length of a type string, given the start of the
782  * type string. Used for the case where the debug info format
783  * references types that are inside a method type signature.
784  */
typeLength(const char * type)785 static int typeLength (const char *type) {
786     // Assumes any leading '(' has already been gobbled
787     const char *end = type;
788     decodeSignature(&end);
789     return end - type;
790 }
791 
792 /*
793  * Reads a string index as encoded for the debug info format,
794  * returning a string pointer or NULL as appropriate.
795  */
readStringIdx(const DexFile * pDexFile,const u1 ** pStream)796 static const char* readStringIdx(const DexFile* pDexFile,
797         const u1** pStream) {
798     u4 stringIdx = readUnsignedLeb128(pStream);
799 
800     // Remember, encoded string indicies have 1 added to them.
801     if (stringIdx == 0) {
802         return NULL;
803     } else {
804         return dexStringById(pDexFile, stringIdx - 1);
805     }
806 }
807 
808 /*
809  * Reads a type index as encoded for the debug info format, returning
810  * a string pointer for its descriptor or NULL as appropriate.
811  */
readTypeIdx(const DexFile * pDexFile,const u1 ** pStream)812 static const char* readTypeIdx(const DexFile* pDexFile,
813         const u1** pStream) {
814     u4 typeIdx = readUnsignedLeb128(pStream);
815 
816     // Remember, encoded type indicies have 1 added to them.
817     if (typeIdx == 0) {
818         return NULL;
819     } else {
820         return dexStringByTypeIdx(pDexFile, typeIdx - 1);
821     }
822 }
823 
824 /* access_flag value indicating that a method is static */
825 #define ACC_STATIC              0x0008
826 
827 typedef struct LocalInfo {
828     const char *name;
829     const char *descriptor;
830     const char *signature;
831     u2 startAddress;
832     bool live;
833 } LocalInfo;
834 
emitLocalCbIfLive(void * cnxt,int reg,u4 endAddress,LocalInfo * localInReg,DexDebugNewLocalCb localCb)835 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
836         LocalInfo *localInReg, DexDebugNewLocalCb localCb)
837 {
838     if (localCb != NULL && localInReg[reg].live) {
839         localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
840                 localInReg[reg].name,
841                 localInReg[reg].descriptor,
842                 localInReg[reg].signature == NULL
843                 ? "" : localInReg[reg].signature );
844     }
845 }
846 
847 // TODO optimize localCb == NULL case
dexDecodeDebugInfo(const DexFile * pDexFile,const DexCode * pCode,const char * classDescriptor,u4 protoIdx,u4 accessFlags,DexDebugNewPositionCb posCb,DexDebugNewLocalCb localCb,void * cnxt)848 void dexDecodeDebugInfo(
849             const DexFile* pDexFile,
850             const DexCode* pCode,
851             const char* classDescriptor,
852             u4 protoIdx,
853             u4 accessFlags,
854             DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
855             void* cnxt)
856 {
857     const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
858     u4 line;
859     u4 parametersSize;
860     u4 address = 0;
861     LocalInfo localInReg[pCode->registersSize];
862     u4 insnsSize = pCode->insnsSize;
863     DexProto proto = { pDexFile, protoIdx };
864 
865     memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
866 
867     if (stream == NULL) {
868         goto end;
869     }
870 
871     line = readUnsignedLeb128(&stream);
872     parametersSize = readUnsignedLeb128(&stream);
873 
874     u2 argReg = pCode->registersSize - pCode->insSize;
875 
876     if ((accessFlags & ACC_STATIC) == 0) {
877         /*
878          * The code is an instance method, which means that there is
879          * an initial this parameter. Also, the proto list should
880          * contain exactly one fewer argument word than the insSize
881          * indicates.
882          */
883         assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
884         localInReg[argReg].name = "this";
885         localInReg[argReg].descriptor = classDescriptor;
886         localInReg[argReg].startAddress = 0;
887         localInReg[argReg].live = true;
888         argReg++;
889     } else {
890         assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
891     }
892 
893     DexParameterIterator iterator;
894     dexParameterIteratorInit(&iterator, &proto);
895 
896     while (parametersSize-- != 0) {
897         const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
898         const char *name;
899         int reg;
900 
901         if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
902             goto invalid_stream;
903         }
904 
905         name = readStringIdx(pDexFile, &stream);
906         reg = argReg;
907 
908         switch (descriptor[0]) {
909             case 'D':
910             case 'J':
911                 argReg += 2;
912                 break;
913             default:
914                 argReg += 1;
915                 break;
916         }
917 
918         if (name != NULL) {
919             localInReg[reg].name = name;
920             localInReg[reg].descriptor = descriptor;
921             localInReg[reg].signature = NULL;
922             localInReg[reg].startAddress = address;
923             localInReg[reg].live = true;
924         }
925     }
926 
927     for (;;)  {
928         u1 opcode = *stream++;
929         u2 reg;
930 
931         switch (opcode) {
932             case DBG_END_SEQUENCE:
933                 goto end;
934 
935             case DBG_ADVANCE_PC:
936                 address += readUnsignedLeb128(&stream);
937                 break;
938 
939             case DBG_ADVANCE_LINE:
940                 line += readSignedLeb128(&stream);
941                 break;
942 
943             case DBG_START_LOCAL:
944             case DBG_START_LOCAL_EXTENDED:
945                 reg = readUnsignedLeb128(&stream);
946                 if (reg > pCode->registersSize) goto invalid_stream;
947 
948                 // Emit what was previously there, if anything
949                 emitLocalCbIfLive (cnxt, reg, address,
950                     localInReg, localCb);
951 
952                 localInReg[reg].name = readStringIdx(pDexFile, &stream);
953                 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
954                 if (opcode == DBG_START_LOCAL_EXTENDED) {
955                     localInReg[reg].signature
956                         = readStringIdx(pDexFile, &stream);
957                 } else {
958                     localInReg[reg].signature = NULL;
959                 }
960                 localInReg[reg].startAddress = address;
961                 localInReg[reg].live = true;
962                 break;
963 
964             case DBG_END_LOCAL:
965                 reg = readUnsignedLeb128(&stream);
966                 if (reg > pCode->registersSize) goto invalid_stream;
967 
968                 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
969                 localInReg[reg].live = false;
970                 break;
971 
972             case DBG_RESTART_LOCAL:
973                 reg = readUnsignedLeb128(&stream);
974                 if (reg > pCode->registersSize) goto invalid_stream;
975 
976                 if (localInReg[reg].name == NULL
977                         || localInReg[reg].descriptor == NULL) {
978                     goto invalid_stream;
979                 }
980 
981                 /*
982                  * If the register is live, the "restart" is superfluous,
983                  * and we don't want to mess with the existing start address.
984                  */
985                 if (!localInReg[reg].live) {
986                     localInReg[reg].startAddress = address;
987                     localInReg[reg].live = true;
988                 }
989                 break;
990 
991             case DBG_SET_PROLOGUE_END:
992             case DBG_SET_EPILOGUE_BEGIN:
993             case DBG_SET_FILE:
994                 break;
995 
996             default: {
997                 int adjopcode = opcode - DBG_FIRST_SPECIAL;
998 
999                 address += adjopcode / DBG_LINE_RANGE;
1000                 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
1001 
1002                 if (posCb != NULL) {
1003                     int done;
1004                     done = posCb(cnxt, address, line);
1005 
1006                     if (done) {
1007                         // early exit
1008                         goto end;
1009                     }
1010                 }
1011                 break;
1012             }
1013         }
1014     }
1015 
1016 end:
1017     {
1018         int reg;
1019         for (reg = 0; reg < pCode->registersSize; reg++) {
1020             emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
1021         }
1022     }
1023     return;
1024 
1025 invalid_stream:
1026     IF_LOGE() {
1027         char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
1028         LOGE("Invalid debug info stream. class %s; proto %s",
1029                 classDescriptor, methodDescriptor);
1030         free(methodDescriptor);
1031     }
1032 }
1033