1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * Access the contents of a .dex file.
19 */
20
21 #include "DexFile.h"
22 #include "DexOptData.h"
23 #include "DexProto.h"
24 #include "DexCatch.h"
25 #include "Leb128.h"
26 #include "sha1.h"
27 #include "ZipArchive.h"
28
29 #include <zlib.h>
30
31 #include <stdlib.h>
32 #include <stddef.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <errno.h>
36
37
38 /*
39 * Verifying checksums is good, but it slows things down and causes us to
40 * touch every page. In the "optimized" world, it doesn't work at all,
41 * because we rewrite the contents.
42 */
43 static const bool kVerifyChecksum = false;
44 static const bool kVerifySignature = false;
45
46
47 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
48 * code point values for comparison. This treats different encodings
49 * for the same code point as equivalent, except that only a real '\0'
50 * byte is considered the string terminator. The return value is as
51 * for strcmp(). */
dexUtf8Cmp(const char * s1,const char * s2)52 int dexUtf8Cmp(const char* s1, const char* s2) {
53 for (;;) {
54 if (*s1 == '\0') {
55 if (*s2 == '\0') {
56 return 0;
57 }
58 return -1;
59 } else if (*s2 == '\0') {
60 return 1;
61 }
62
63 int utf1 = dexGetUtf16FromUtf8(&s1);
64 int utf2 = dexGetUtf16FromUtf8(&s2);
65 int diff = utf1 - utf2;
66
67 if (diff != 0) {
68 return diff;
69 }
70 }
71 }
72
73 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
74 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
75 0x00000000, // 00..1f low control characters; nothing valid
76 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
77 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
78 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z'
79 };
80
81 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
dexIsValidMemberNameUtf8_0(const char ** pUtf8Ptr)82 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
83 /*
84 * It's a multibyte encoded character. Decode it and analyze. We
85 * accept anything that isn't (a) an improperly encoded low value,
86 * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
87 * control character, or (e) a high space, layout, or special
88 * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
89 * U+fff0..U+ffff).
90 */
91
92 u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
93
94 // Perform follow-up tests based on the high 8 bits.
95 switch (utf16 >> 8) {
96 case 0x00: {
97 // It's only valid if it's above the ISO-8859-1 high space (0xa0).
98 return (utf16 > 0x00a0);
99 }
100 case 0xd8:
101 case 0xd9:
102 case 0xda:
103 case 0xdb: {
104 /*
105 * It's a leading surrogate. Check to see that a trailing
106 * surrogate follows.
107 */
108 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
109 return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
110 }
111 case 0xdc:
112 case 0xdd:
113 case 0xde:
114 case 0xdf: {
115 // It's a trailing surrogate, which is not valid at this point.
116 return false;
117 }
118 case 0x20:
119 case 0xff: {
120 // It's in the range that has spaces, controls, and specials.
121 switch (utf16 & 0xfff8) {
122 case 0x2000:
123 case 0x2008:
124 case 0x2028:
125 case 0xfff0:
126 case 0xfff8: {
127 return false;
128 }
129 }
130 break;
131 }
132 }
133
134 return true;
135 }
136
137 /* Return whether the given string is a valid field or method name. */
dexIsValidMemberName(const char * s)138 bool dexIsValidMemberName(const char* s) {
139 bool angleName = false;
140
141 switch (*s) {
142 case '\0': {
143 // The empty string is not a valid name.
144 return false;
145 }
146 case '<': {
147 /*
148 * '<' is allowed only at the start of a name, and if present,
149 * means that the name must end with '>'.
150 */
151 angleName = true;
152 s++;
153 break;
154 }
155 }
156
157 for (;;) {
158 switch (*s) {
159 case '\0': {
160 return !angleName;
161 }
162 case '>': {
163 return angleName && s[1] == '\0';
164 }
165 }
166 if (!dexIsValidMemberNameUtf8(&s)) {
167 return false;
168 }
169 }
170 }
171
172 /* Return whether the given string is a valid type descriptor. */
dexIsValidTypeDescriptor(const char * s)173 bool dexIsValidTypeDescriptor(const char* s) {
174 int arrayCount = 0;
175
176 while (*s == '[') {
177 arrayCount++;
178 s++;
179 }
180
181 if (arrayCount > 255) {
182 // Arrays may have no more than 255 dimensions.
183 return false;
184 }
185
186 switch (*(s++)) {
187 case 'B':
188 case 'C':
189 case 'D':
190 case 'F':
191 case 'I':
192 case 'J':
193 case 'S':
194 case 'Z': {
195 // These are all single-character descriptors for primitive types.
196 return (*s == '\0');
197 }
198 case 'V': {
199 // You can't have an array of void.
200 return (arrayCount == 0) && (*s == '\0');
201 }
202 case 'L': {
203 // Break out and continue below.
204 break;
205 }
206 default: {
207 // Oddball descriptor character.
208 return false;
209 }
210 }
211
212 // We just consumed the 'L' that introduces a class name.
213
214 bool slashOrFirst = true; // first character or just encountered a slash
215 for (;;) {
216 u1 c = (u1) *s;
217 switch (c) {
218 case '\0': {
219 // Premature end.
220 return false;
221 }
222 case ';': {
223 /*
224 * Make sure that this is the end of the string and that
225 * it doesn't end with an empty component (including the
226 * degenerate case of "L;").
227 */
228 return (s[1] == '\0') && !slashOrFirst;
229 }
230 case '/': {
231 if (slashOrFirst) {
232 // Slash at start or two slashes in a row.
233 return false;
234 }
235 slashOrFirst = true;
236 s++;
237 break;
238 }
239 default: {
240 if (!dexIsValidMemberNameUtf8(&s)) {
241 return false;
242 }
243 slashOrFirst = false;
244 break;
245 }
246 }
247 }
248 }
249
250 /* Return whether the given string is a valid reference descriptor. This
251 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
252 * is for a class or array and not a primitive type. */
dexIsReferenceDescriptor(const char * s)253 bool dexIsReferenceDescriptor(const char* s) {
254 if (!dexIsValidTypeDescriptor(s)) {
255 return false;
256 }
257
258 return (s[0] == 'L') || (s[0] == '[');
259 }
260
261 /* Return whether the given string is a valid class descriptor. This
262 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
263 * is for a class and not an array or primitive type. */
dexIsClassDescriptor(const char * s)264 bool dexIsClassDescriptor(const char* s) {
265 if (!dexIsValidTypeDescriptor(s)) {
266 return false;
267 }
268
269 return s[0] == 'L';
270 }
271
272 /* Return whether the given string is a valid field type descriptor. This
273 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
274 * is for anything but "void". */
dexIsFieldDescriptor(const char * s)275 bool dexIsFieldDescriptor(const char* s) {
276 if (!dexIsValidTypeDescriptor(s)) {
277 return false;
278 }
279
280 return s[0] != 'V';
281 }
282
283 /* Return the UTF-8 encoded string with the specified string_id index,
284 * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)285 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
286 u4* utf16Size) {
287 const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
288 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
289
290 *utf16Size = readUnsignedLeb128(&ptr);
291 return (const char*) ptr;
292 }
293
294 /*
295 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at
296 * least kSHA1DigestOutputLen bytes.
297 */
298 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
299
300 /*
301 * Compute a SHA-1 digest on a range of bytes.
302 */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])303 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
304 unsigned char digest[])
305 {
306 SHA1_CTX context;
307 SHA1Init(&context);
308 SHA1Update(&context, data, length);
309 SHA1Final(digest, &context);
310 }
311
312 /*
313 * Format the SHA-1 digest into the buffer, which must be able to hold at
314 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
315 */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)316 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
317 {
318 static const char hexDigit[] = "0123456789abcdef";
319 char* cp;
320 int i;
321
322 cp = tmpBuf;
323 for (i = 0; i < kSHA1DigestLen; i++) {
324 *cp++ = hexDigit[digest[i] >> 4];
325 *cp++ = hexDigit[digest[i] & 0x0f];
326 }
327 *cp++ = '\0';
328
329 assert(cp == tmpBuf + kSHA1DigestOutputLen);
330
331 return tmpBuf;
332 }
333
334 /*
335 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
336 *
337 * This may or may not be compatible with UTF-8 hash functions used inside
338 * the Dalvik VM.
339 *
340 * The basic "multiply by 31 and add" approach does better on class names
341 * than most other things tried (e.g. adler32).
342 */
classDescriptorHash(const char * str)343 static u4 classDescriptorHash(const char* str)
344 {
345 u4 hash = 1;
346
347 while (*str != '\0')
348 hash = hash * 31 + *str++;
349
350 return hash;
351 }
352
353 /*
354 * Add an entry to the class lookup table. We hash the string and probe
355 * until we find an open slot.
356 */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)357 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
358 int stringOff, int classDefOff, int* pNumProbes)
359 {
360 const char* classDescriptor =
361 (const char*) (pDexFile->baseAddr + stringOff);
362 const DexClassDef* pClassDef =
363 (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
364 u4 hash = classDescriptorHash(classDescriptor);
365 int mask = pLookup->numEntries-1;
366 int idx = hash & mask;
367
368 /*
369 * Find the first empty slot. We oversized the table, so this is
370 * guaranteed to finish.
371 */
372 int probes = 0;
373 while (pLookup->table[idx].classDescriptorOffset != 0) {
374 idx = (idx + 1) & mask;
375 probes++;
376 }
377 //if (probes > 1)
378 // LOGW("classLookupAdd: probes=%d\n", probes);
379
380 pLookup->table[idx].classDescriptorHash = hash;
381 pLookup->table[idx].classDescriptorOffset = stringOff;
382 pLookup->table[idx].classDefOffset = classDefOff;
383 *pNumProbes = probes;
384 }
385
386 /*
387 * Round up to the next highest power of 2.
388 *
389 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
390 */
dexRoundUpPower2(u4 val)391 u4 dexRoundUpPower2(u4 val)
392 {
393 val--;
394 val |= val >> 1;
395 val |= val >> 2;
396 val |= val >> 4;
397 val |= val >> 8;
398 val |= val >> 16;
399 val++;
400
401 return val;
402 }
403
404 /*
405 * Create the class lookup hash table.
406 *
407 * Returns newly-allocated storage.
408 */
dexCreateClassLookup(DexFile * pDexFile)409 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
410 {
411 DexClassLookup* pLookup;
412 int allocSize;
413 int i, numEntries;
414 int numProbes, totalProbes, maxProbes;
415
416 numProbes = totalProbes = maxProbes = 0;
417
418 assert(pDexFile != NULL);
419
420 /*
421 * Using a factor of 3 results in far less probing than a factor of 2,
422 * but almost doubles the flash storage requirements for the bootstrap
423 * DEX files. The overall impact on class loading performance seems
424 * to be minor. We could probably get some performance improvement by
425 * using a secondary hash.
426 */
427 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
428 allocSize = offsetof(DexClassLookup, table)
429 + numEntries * sizeof(pLookup->table[0]);
430
431 pLookup = (DexClassLookup*) calloc(1, allocSize);
432 if (pLookup == NULL)
433 return NULL;
434 pLookup->size = allocSize;
435 pLookup->numEntries = numEntries;
436
437 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
438 const DexClassDef* pClassDef;
439 const char* pString;
440
441 pClassDef = dexGetClassDef(pDexFile, i);
442 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
443
444 classLookupAdd(pDexFile, pLookup,
445 (u1*)pString - pDexFile->baseAddr,
446 (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
447
448 if (numProbes > maxProbes)
449 maxProbes = numProbes;
450 totalProbes += numProbes;
451 }
452
453 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
454 " total=%d max=%d\n",
455 pDexFile->pHeader->classDefsSize, numEntries,
456 (100 * pDexFile->pHeader->classDefsSize) / numEntries,
457 allocSize, totalProbes, maxProbes);
458
459 return pLookup;
460 }
461
462
463 /*
464 * Set up the basic raw data pointers of a DexFile. This function isn't
465 * meant for general use.
466 */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)467 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
468 DexHeader *pHeader = (DexHeader*) data;
469
470 pDexFile->baseAddr = data;
471 pDexFile->pHeader = pHeader;
472 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
473 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
474 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
475 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
476 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
477 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
478 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
479 }
480
481 /*
482 * Parse an optimized or unoptimized .dex file sitting in memory. This is
483 * called after the byte-ordering and structure alignment has been fixed up.
484 *
485 * On success, return a newly-allocated DexFile.
486 */
dexFileParse(const u1 * data,size_t length,int flags)487 DexFile* dexFileParse(const u1* data, size_t length, int flags)
488 {
489 DexFile* pDexFile = NULL;
490 const DexHeader* pHeader;
491 const u1* magic;
492 int result = -1;
493
494 if (length < sizeof(DexHeader)) {
495 LOGE("too short to be a valid .dex\n");
496 goto bail; /* bad file format */
497 }
498
499 pDexFile = (DexFile*) malloc(sizeof(DexFile));
500 if (pDexFile == NULL)
501 goto bail; /* alloc failure */
502 memset(pDexFile, 0, sizeof(DexFile));
503
504 /*
505 * Peel off the optimized header.
506 */
507 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
508 magic = data;
509 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
510 LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
511 magic[4], magic[5], magic[6], magic[7]);
512 goto bail;
513 }
514
515 pDexFile->pOptHeader = (const DexOptHeader*) data;
516 LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
517 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
518
519 /* parse the optimized dex file tables */
520 if (!dexParseOptData(data, length, pDexFile))
521 goto bail;
522
523 /* ignore the opt header and appended data from here on out */
524 data += pDexFile->pOptHeader->dexOffset;
525 length -= pDexFile->pOptHeader->dexOffset;
526 if (pDexFile->pOptHeader->dexLength > length) {
527 LOGE("File truncated? stored len=%d, rem len=%d\n",
528 pDexFile->pOptHeader->dexLength, (int) length);
529 goto bail;
530 }
531 length = pDexFile->pOptHeader->dexLength;
532 }
533
534 dexFileSetupBasicPointers(pDexFile, data);
535 pHeader = pDexFile->pHeader;
536
537 magic = pHeader->magic;
538 if (memcmp(magic, DEX_MAGIC, 4) != 0) {
539 /* not expected */
540 LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
541 magic[0], magic[1], magic[2], magic[3]);
542 goto bail;
543 }
544 if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
545 LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
546 magic[4], magic[5], magic[6], magic[7]);
547 goto bail;
548 }
549
550 /*
551 * Verify the checksum(s). This is reasonably quick, but does require
552 * touching every byte in the DEX file. The base checksum changes after
553 * byte-swapping and DEX optimization.
554 */
555 if (flags & kDexParseVerifyChecksum) {
556 u4 adler = dexComputeChecksum(pHeader);
557 if (adler != pHeader->checksum) {
558 LOGE("ERROR: bad checksum (%08x vs %08x)\n",
559 adler, pHeader->checksum);
560 if (!(flags & kDexParseContinueOnError))
561 goto bail;
562 } else {
563 LOGV("+++ adler32 checksum (%08x) verified\n", adler);
564 }
565
566 const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
567 if (pOptHeader != NULL) {
568 adler = dexComputeOptChecksum(pOptHeader);
569 if (adler != pOptHeader->checksum) {
570 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
571 adler, pOptHeader->checksum);
572 if (!(flags & kDexParseContinueOnError))
573 goto bail;
574 } else {
575 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
576 }
577 }
578 }
579
580 /*
581 * Verify the SHA-1 digest. (Normally we don't want to do this --
582 * the digest is used to uniquely identify the original DEX file, and
583 * can't be computed for verification after the DEX is byte-swapped
584 * and optimized.)
585 */
586 if (kVerifySignature) {
587 unsigned char sha1Digest[kSHA1DigestLen];
588 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
589 kSHA1DigestLen;
590
591 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
592 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
593 char tmpBuf1[kSHA1DigestOutputLen];
594 char tmpBuf2[kSHA1DigestOutputLen];
595 LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
596 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
597 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
598 if (!(flags & kDexParseContinueOnError))
599 goto bail;
600 } else {
601 LOGV("+++ sha1 digest verified\n");
602 }
603 }
604
605 if (pHeader->fileSize != length) {
606 LOGE("ERROR: stored file size (%d) != expected (%d)\n",
607 (int) pHeader->fileSize, (int) length);
608 if (!(flags & kDexParseContinueOnError))
609 goto bail;
610 }
611
612 if (pHeader->classDefsSize == 0) {
613 LOGE("ERROR: DEX file has no classes in it, failing\n");
614 goto bail;
615 }
616
617 /*
618 * Success!
619 */
620 result = 0;
621
622 bail:
623 if (result != 0 && pDexFile != NULL) {
624 dexFileFree(pDexFile);
625 pDexFile = NULL;
626 }
627 return pDexFile;
628 }
629
630 /*
631 * Free up the DexFile and any associated data structures.
632 *
633 * Note we may be called with a partially-initialized DexFile.
634 */
dexFileFree(DexFile * pDexFile)635 void dexFileFree(DexFile* pDexFile)
636 {
637 if (pDexFile == NULL)
638 return;
639
640 free(pDexFile);
641 }
642
643 /*
644 * Look up a class definition entry by descriptor.
645 *
646 * "descriptor" should look like "Landroid/debug/Stuff;".
647 */
dexFindClass(const DexFile * pDexFile,const char * descriptor)648 const DexClassDef* dexFindClass(const DexFile* pDexFile,
649 const char* descriptor)
650 {
651 const DexClassLookup* pLookup = pDexFile->pClassLookup;
652 u4 hash;
653 int idx, mask;
654
655 hash = classDescriptorHash(descriptor);
656 mask = pLookup->numEntries - 1;
657 idx = hash & mask;
658
659 /*
660 * Search until we find a matching entry or an empty slot.
661 */
662 while (true) {
663 int offset;
664
665 offset = pLookup->table[idx].classDescriptorOffset;
666 if (offset == 0)
667 return NULL;
668
669 if (pLookup->table[idx].classDescriptorHash == hash) {
670 const char* str;
671
672 str = (const char*) (pDexFile->baseAddr + offset);
673 if (strcmp(str, descriptor) == 0) {
674 return (const DexClassDef*)
675 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
676 }
677 }
678
679 idx = (idx + 1) & mask;
680 }
681 }
682
683
684 /*
685 * Compute the DEX file checksum for a memory-mapped DEX file.
686 */
dexComputeChecksum(const DexHeader * pHeader)687 u4 dexComputeChecksum(const DexHeader* pHeader)
688 {
689 const u1* start = (const u1*) pHeader;
690
691 uLong adler = adler32(0L, Z_NULL, 0);
692 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
693
694 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
695 }
696
697 /*
698 * Compute the size, in bytes, of a DexCode.
699 */
dexGetDexCodeSize(const DexCode * pCode)700 size_t dexGetDexCodeSize(const DexCode* pCode)
701 {
702 /*
703 * The catch handler data is the last entry. It has a variable number
704 * of variable-size pieces, so we need to create an iterator.
705 */
706 u4 handlersSize;
707 u4 offset;
708 u4 ui;
709
710 if (pCode->triesSize != 0) {
711 handlersSize = dexGetHandlersSize(pCode);
712 offset = dexGetFirstHandlerOffset(pCode);
713 } else {
714 handlersSize = 0;
715 offset = 0;
716 }
717
718 for (ui = 0; ui < handlersSize; ui++) {
719 DexCatchIterator iterator;
720 dexCatchIteratorInit(&iterator, pCode, offset);
721 offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
722 }
723
724 const u1* handlerData = dexGetCatchHandlerData(pCode);
725
726 //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
727 // pCode, handlerData, offset);
728
729 /* return the size of the catch handler + everything before it */
730 return (handlerData - (u1*) pCode) + offset;
731 }
732
733
734 /*
735 * ===========================================================================
736 * Debug info
737 * ===========================================================================
738 */
739
740 /*
741 * Decode the arguments in a method signature, which looks something
742 * like "(ID[Ljava/lang/String;)V".
743 *
744 * Returns the type signature letter for the next argument, or ')' if
745 * there are no more args. Advances "pSig" to point to the character
746 * after the one returned.
747 */
decodeSignature(const char ** pSig)748 static char decodeSignature(const char** pSig)
749 {
750 const char* sig = *pSig;
751
752 if (*sig == '(')
753 sig++;
754
755 if (*sig == 'L') {
756 /* object ref */
757 while (*++sig != ';')
758 ;
759 *pSig = sig+1;
760 return 'L';
761 }
762 if (*sig == '[') {
763 /* array; advance past array type */
764 while (*++sig == '[')
765 ;
766 if (*sig == 'L') {
767 while (*++sig != ';')
768 ;
769 }
770 *pSig = sig+1;
771 return '[';
772 }
773 if (*sig == '\0')
774 return *sig; /* don't advance further */
775
776 *pSig = sig+1;
777 return *sig;
778 }
779
780 /*
781 * returns the length of a type string, given the start of the
782 * type string. Used for the case where the debug info format
783 * references types that are inside a method type signature.
784 */
typeLength(const char * type)785 static int typeLength (const char *type) {
786 // Assumes any leading '(' has already been gobbled
787 const char *end = type;
788 decodeSignature(&end);
789 return end - type;
790 }
791
792 /*
793 * Reads a string index as encoded for the debug info format,
794 * returning a string pointer or NULL as appropriate.
795 */
readStringIdx(const DexFile * pDexFile,const u1 ** pStream)796 static const char* readStringIdx(const DexFile* pDexFile,
797 const u1** pStream) {
798 u4 stringIdx = readUnsignedLeb128(pStream);
799
800 // Remember, encoded string indicies have 1 added to them.
801 if (stringIdx == 0) {
802 return NULL;
803 } else {
804 return dexStringById(pDexFile, stringIdx - 1);
805 }
806 }
807
808 /*
809 * Reads a type index as encoded for the debug info format, returning
810 * a string pointer for its descriptor or NULL as appropriate.
811 */
readTypeIdx(const DexFile * pDexFile,const u1 ** pStream)812 static const char* readTypeIdx(const DexFile* pDexFile,
813 const u1** pStream) {
814 u4 typeIdx = readUnsignedLeb128(pStream);
815
816 // Remember, encoded type indicies have 1 added to them.
817 if (typeIdx == 0) {
818 return NULL;
819 } else {
820 return dexStringByTypeIdx(pDexFile, typeIdx - 1);
821 }
822 }
823
824 /* access_flag value indicating that a method is static */
825 #define ACC_STATIC 0x0008
826
827 typedef struct LocalInfo {
828 const char *name;
829 const char *descriptor;
830 const char *signature;
831 u2 startAddress;
832 bool live;
833 } LocalInfo;
834
emitLocalCbIfLive(void * cnxt,int reg,u4 endAddress,LocalInfo * localInReg,DexDebugNewLocalCb localCb)835 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
836 LocalInfo *localInReg, DexDebugNewLocalCb localCb)
837 {
838 if (localCb != NULL && localInReg[reg].live) {
839 localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
840 localInReg[reg].name,
841 localInReg[reg].descriptor,
842 localInReg[reg].signature == NULL
843 ? "" : localInReg[reg].signature );
844 }
845 }
846
847 // TODO optimize localCb == NULL case
dexDecodeDebugInfo(const DexFile * pDexFile,const DexCode * pCode,const char * classDescriptor,u4 protoIdx,u4 accessFlags,DexDebugNewPositionCb posCb,DexDebugNewLocalCb localCb,void * cnxt)848 void dexDecodeDebugInfo(
849 const DexFile* pDexFile,
850 const DexCode* pCode,
851 const char* classDescriptor,
852 u4 protoIdx,
853 u4 accessFlags,
854 DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
855 void* cnxt)
856 {
857 const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
858 u4 line;
859 u4 parametersSize;
860 u4 address = 0;
861 LocalInfo localInReg[pCode->registersSize];
862 u4 insnsSize = pCode->insnsSize;
863 DexProto proto = { pDexFile, protoIdx };
864
865 memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
866
867 if (stream == NULL) {
868 goto end;
869 }
870
871 line = readUnsignedLeb128(&stream);
872 parametersSize = readUnsignedLeb128(&stream);
873
874 u2 argReg = pCode->registersSize - pCode->insSize;
875
876 if ((accessFlags & ACC_STATIC) == 0) {
877 /*
878 * The code is an instance method, which means that there is
879 * an initial this parameter. Also, the proto list should
880 * contain exactly one fewer argument word than the insSize
881 * indicates.
882 */
883 assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
884 localInReg[argReg].name = "this";
885 localInReg[argReg].descriptor = classDescriptor;
886 localInReg[argReg].startAddress = 0;
887 localInReg[argReg].live = true;
888 argReg++;
889 } else {
890 assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
891 }
892
893 DexParameterIterator iterator;
894 dexParameterIteratorInit(&iterator, &proto);
895
896 while (parametersSize-- != 0) {
897 const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
898 const char *name;
899 int reg;
900
901 if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
902 goto invalid_stream;
903 }
904
905 name = readStringIdx(pDexFile, &stream);
906 reg = argReg;
907
908 switch (descriptor[0]) {
909 case 'D':
910 case 'J':
911 argReg += 2;
912 break;
913 default:
914 argReg += 1;
915 break;
916 }
917
918 if (name != NULL) {
919 localInReg[reg].name = name;
920 localInReg[reg].descriptor = descriptor;
921 localInReg[reg].signature = NULL;
922 localInReg[reg].startAddress = address;
923 localInReg[reg].live = true;
924 }
925 }
926
927 for (;;) {
928 u1 opcode = *stream++;
929 u2 reg;
930
931 switch (opcode) {
932 case DBG_END_SEQUENCE:
933 goto end;
934
935 case DBG_ADVANCE_PC:
936 address += readUnsignedLeb128(&stream);
937 break;
938
939 case DBG_ADVANCE_LINE:
940 line += readSignedLeb128(&stream);
941 break;
942
943 case DBG_START_LOCAL:
944 case DBG_START_LOCAL_EXTENDED:
945 reg = readUnsignedLeb128(&stream);
946 if (reg > pCode->registersSize) goto invalid_stream;
947
948 // Emit what was previously there, if anything
949 emitLocalCbIfLive (cnxt, reg, address,
950 localInReg, localCb);
951
952 localInReg[reg].name = readStringIdx(pDexFile, &stream);
953 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
954 if (opcode == DBG_START_LOCAL_EXTENDED) {
955 localInReg[reg].signature
956 = readStringIdx(pDexFile, &stream);
957 } else {
958 localInReg[reg].signature = NULL;
959 }
960 localInReg[reg].startAddress = address;
961 localInReg[reg].live = true;
962 break;
963
964 case DBG_END_LOCAL:
965 reg = readUnsignedLeb128(&stream);
966 if (reg > pCode->registersSize) goto invalid_stream;
967
968 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
969 localInReg[reg].live = false;
970 break;
971
972 case DBG_RESTART_LOCAL:
973 reg = readUnsignedLeb128(&stream);
974 if (reg > pCode->registersSize) goto invalid_stream;
975
976 if (localInReg[reg].name == NULL
977 || localInReg[reg].descriptor == NULL) {
978 goto invalid_stream;
979 }
980
981 /*
982 * If the register is live, the "restart" is superfluous,
983 * and we don't want to mess with the existing start address.
984 */
985 if (!localInReg[reg].live) {
986 localInReg[reg].startAddress = address;
987 localInReg[reg].live = true;
988 }
989 break;
990
991 case DBG_SET_PROLOGUE_END:
992 case DBG_SET_EPILOGUE_BEGIN:
993 case DBG_SET_FILE:
994 break;
995
996 default: {
997 int adjopcode = opcode - DBG_FIRST_SPECIAL;
998
999 address += adjopcode / DBG_LINE_RANGE;
1000 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
1001
1002 if (posCb != NULL) {
1003 int done;
1004 done = posCb(cnxt, address, line);
1005
1006 if (done) {
1007 // early exit
1008 goto end;
1009 }
1010 }
1011 break;
1012 }
1013 }
1014 }
1015
1016 end:
1017 {
1018 int reg;
1019 for (reg = 0; reg < pCode->registersSize; reg++) {
1020 emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
1021 }
1022 }
1023 return;
1024
1025 invalid_stream:
1026 IF_LOGE() {
1027 char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
1028 LOGE("Invalid debug info stream. class %s; proto %s",
1029 classDescriptor, methodDescriptor);
1030 free(methodDescriptor);
1031 }
1032 }
1033