1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * Access the contents of a .dex file.
19 */
20
21 #include "DexFile.h"
22 #include "DexProto.h"
23 #include "DexCatch.h"
24 #include "Leb128.h"
25 #include "sha1.h"
26 #include "ZipArchive.h"
27
28 #include <zlib.h>
29
30 #include <stdlib.h>
31 #include <stddef.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <errno.h>
35
36 /*
37 * Verifying checksums is good, but it slows things down and causes us to
38 * touch every page. In the "optimized" world, it doesn't work at all,
39 * because we rewrite the contents.
40 */
41 static const bool kVerifyChecksum = false;
42 static const bool kVerifySignature = false;
43
44
45 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
46 * code point values for comparison. This treats different encodings
47 * for the same code point as equivalent, except that only a real '\0'
48 * byte is considered the string terminator. The return value is as
49 * for strcmp(). */
dexUtf8Cmp(const char * s1,const char * s2)50 int dexUtf8Cmp(const char* s1, const char* s2) {
51 for (;;) {
52 if (*s1 == '\0') {
53 if (*s2 == '\0') {
54 return 0;
55 }
56 return -1;
57 } else if (*s2 == '\0') {
58 return 1;
59 }
60
61 int utf1 = dexGetUtf16FromUtf8(&s1);
62 int utf2 = dexGetUtf16FromUtf8(&s2);
63 int diff = utf1 - utf2;
64
65 if (diff != 0) {
66 return diff;
67 }
68 }
69 }
70
71 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
72 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
73 0x00000000, // 00..1f low control characters; nothing valid
74 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
75 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
76 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z'
77 };
78
79 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
dexIsValidMemberNameUtf8_0(const char ** pUtf8Ptr)80 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
81 /*
82 * It's a multibyte encoded character. Decode it and analyze. We
83 * accept anything that isn't (a) an improperly encoded low value,
84 * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
85 * control character, or (e) a high space, layout, or special
86 * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
87 * U+fff0..U+ffff).
88 */
89
90 u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
91
92 // Perform follow-up tests based on the high 8 bits.
93 switch (utf16 >> 8) {
94 case 0x00: {
95 // It's only valid if it's above the ISO-8859-1 high space (0xa0).
96 return (utf16 > 0x00a0);
97 }
98 case 0xd8:
99 case 0xd9:
100 case 0xda:
101 case 0xdb: {
102 /*
103 * It's a leading surrogate. Check to see that a trailing
104 * surrogate follows.
105 */
106 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
107 return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
108 }
109 case 0xdc:
110 case 0xdd:
111 case 0xde:
112 case 0xdf: {
113 // It's a trailing surrogate, which is not valid at this point.
114 return false;
115 }
116 case 0x20:
117 case 0xff: {
118 // It's in the range that has spaces, controls, and specials.
119 switch (utf16 & 0xfff8) {
120 case 0x2000:
121 case 0x2008:
122 case 0x2028:
123 case 0xfff0:
124 case 0xfff8: {
125 return false;
126 }
127 }
128 break;
129 }
130 }
131
132 return true;
133 }
134
135 /* Return whether the given string is a valid field or method name. */
dexIsValidMemberName(const char * s)136 bool dexIsValidMemberName(const char* s) {
137 bool angleName = false;
138
139 switch (*s) {
140 case '\0': {
141 // The empty string is not a valid name.
142 return false;
143 }
144 case '<': {
145 /*
146 * '<' is allowed only at the start of a name, and if present,
147 * means that the name must end with '>'.
148 */
149 angleName = true;
150 s++;
151 break;
152 }
153 }
154
155 for (;;) {
156 switch (*s) {
157 case '\0': {
158 return !angleName;
159 }
160 case '>': {
161 return angleName && s[1] == '\0';
162 }
163 }
164 if (!dexIsValidMemberNameUtf8(&s)) {
165 return false;
166 }
167 }
168 }
169
170 /* Return whether the given string is a valid type descriptor. */
dexIsValidTypeDescriptor(const char * s)171 bool dexIsValidTypeDescriptor(const char* s) {
172 int arrayCount = 0;
173
174 while (*s == '[') {
175 arrayCount++;
176 s++;
177 }
178
179 if (arrayCount > 255) {
180 // Arrays may have no more than 255 dimensions.
181 return false;
182 }
183
184 switch (*(s++)) {
185 case 'B':
186 case 'C':
187 case 'D':
188 case 'F':
189 case 'I':
190 case 'J':
191 case 'S':
192 case 'Z': {
193 // These are all single-character descriptors for primitive types.
194 return (*s == '\0');
195 }
196 case 'V': {
197 // You can't have an array of void.
198 return (arrayCount == 0) && (*s == '\0');
199 }
200 case 'L': {
201 // Break out and continue below.
202 break;
203 }
204 default: {
205 // Oddball descriptor character.
206 return false;
207 }
208 }
209
210 // We just consumed the 'L' that introduces a class name.
211
212 bool slashOrFirst = true; // first character or just encountered a slash
213 for (;;) {
214 u1 c = (u1) *s;
215 switch (c) {
216 case '\0': {
217 // Premature end.
218 return false;
219 }
220 case ';': {
221 /*
222 * Make sure that this is the end of the string and that
223 * it doesn't end with an empty component (including the
224 * degenerate case of "L;").
225 */
226 return (s[1] == '\0') && !slashOrFirst;
227 }
228 case '/': {
229 if (slashOrFirst) {
230 // Slash at start or two slashes in a row.
231 return false;
232 }
233 slashOrFirst = true;
234 s++;
235 break;
236 }
237 default: {
238 if (!dexIsValidMemberNameUtf8(&s)) {
239 return false;
240 }
241 slashOrFirst = false;
242 break;
243 }
244 }
245 }
246 }
247
248 /* Return whether the given string is a valid reference descriptor. This
249 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
250 * is for a class or array and not a primitive type. */
dexIsReferenceDescriptor(const char * s)251 bool dexIsReferenceDescriptor(const char* s) {
252 if (!dexIsValidTypeDescriptor(s)) {
253 return false;
254 }
255
256 return (s[0] == 'L') || (s[0] == '[');
257 }
258
259 /* Return whether the given string is a valid class descriptor. This
260 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
261 * is for a class and not an array or primitive type. */
dexIsClassDescriptor(const char * s)262 bool dexIsClassDescriptor(const char* s) {
263 if (!dexIsValidTypeDescriptor(s)) {
264 return false;
265 }
266
267 return s[0] == 'L';
268 }
269
270 /* Return whether the given string is a valid field type descriptor. This
271 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
272 * is for anything but "void". */
dexIsFieldDescriptor(const char * s)273 bool dexIsFieldDescriptor(const char* s) {
274 if (!dexIsValidTypeDescriptor(s)) {
275 return false;
276 }
277
278 return s[0] != 'V';
279 }
280
281 /* Return the UTF-8 encoded string with the specified string_id index,
282 * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)283 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
284 u4* utf16Size) {
285 const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
286 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
287
288 *utf16Size = readUnsignedLeb128(&ptr);
289 return (const char*) ptr;
290 }
291
292 /*
293 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at
294 * least kSHA1DigestOutputLen bytes.
295 */
296 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
297
298 /*
299 * Compute a SHA-1 digest on a range of bytes.
300 */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])301 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
302 unsigned char digest[])
303 {
304 SHA1_CTX context;
305 SHA1Init(&context);
306 SHA1Update(&context, data, length);
307 SHA1Final(digest, &context);
308 }
309
310 /*
311 * Format the SHA-1 digest into the buffer, which must be able to hold at
312 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
313 */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)314 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
315 {
316 static const char hexDigit[] = "0123456789abcdef";
317 char* cp;
318 int i;
319
320 cp = tmpBuf;
321 for (i = 0; i < kSHA1DigestLen; i++) {
322 *cp++ = hexDigit[digest[i] >> 4];
323 *cp++ = hexDigit[digest[i] & 0x0f];
324 }
325 *cp++ = '\0';
326
327 assert(cp == tmpBuf + kSHA1DigestOutputLen);
328
329 return tmpBuf;
330 }
331
332 /*
333 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
334 *
335 * This may or may not be compatible with UTF-8 hash functions used inside
336 * the Dalvik VM.
337 *
338 * The basic "multiply by 31 and add" approach does better on class names
339 * than most other things tried (e.g. adler32).
340 */
classDescriptorHash(const char * str)341 static u4 classDescriptorHash(const char* str)
342 {
343 u4 hash = 1;
344
345 while (*str != '\0')
346 hash = hash * 31 + *str++;
347
348 return hash;
349 }
350
351 /*
352 * Add an entry to the class lookup table. We hash the string and probe
353 * until we find an open slot.
354 */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)355 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
356 int stringOff, int classDefOff, int* pNumProbes)
357 {
358 const char* classDescriptor =
359 (const char*) (pDexFile->baseAddr + stringOff);
360 const DexClassDef* pClassDef =
361 (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
362 u4 hash = classDescriptorHash(classDescriptor);
363 int mask = pLookup->numEntries-1;
364 int idx = hash & mask;
365
366 /*
367 * Find the first empty slot. We oversized the table, so this is
368 * guaranteed to finish.
369 */
370 int probes = 0;
371 while (pLookup->table[idx].classDescriptorOffset != 0) {
372 idx = (idx + 1) & mask;
373 probes++;
374 }
375 //if (probes > 1)
376 // LOGW("classLookupAdd: probes=%d\n", probes);
377
378 pLookup->table[idx].classDescriptorHash = hash;
379 pLookup->table[idx].classDescriptorOffset = stringOff;
380 pLookup->table[idx].classDefOffset = classDefOff;
381 *pNumProbes = probes;
382 }
383
384 /*
385 * Round up to the next highest power of 2.
386 *
387 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
388 */
dexRoundUpPower2(u4 val)389 u4 dexRoundUpPower2(u4 val)
390 {
391 val--;
392 val |= val >> 1;
393 val |= val >> 2;
394 val |= val >> 4;
395 val |= val >> 8;
396 val |= val >> 16;
397 val++;
398
399 return val;
400 }
401
402 /*
403 * Create the class lookup hash table.
404 *
405 * Returns newly-allocated storage.
406 */
dexCreateClassLookup(DexFile * pDexFile)407 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
408 {
409 DexClassLookup* pLookup;
410 int allocSize;
411 int i, numEntries;
412 int numProbes, totalProbes, maxProbes;
413
414 numProbes = totalProbes = maxProbes = 0;
415
416 assert(pDexFile != NULL);
417
418 /*
419 * Using a factor of 3 results in far less probing than a factor of 2,
420 * but almost doubles the flash storage requirements for the bootstrap
421 * DEX files. The overall impact on class loading performance seems
422 * to be minor. We could probably get some performance improvement by
423 * using a secondary hash.
424 */
425 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
426 allocSize = offsetof(DexClassLookup, table)
427 + numEntries * sizeof(pLookup->table[0]);
428
429 pLookup = (DexClassLookup*) calloc(1, allocSize);
430 if (pLookup == NULL)
431 return NULL;
432 pLookup->size = allocSize;
433 pLookup->numEntries = numEntries;
434
435 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
436 const DexClassDef* pClassDef;
437 const char* pString;
438
439 pClassDef = dexGetClassDef(pDexFile, i);
440 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
441
442 classLookupAdd(pDexFile, pLookup,
443 (u1*)pString - pDexFile->baseAddr,
444 (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
445
446 if (numProbes > maxProbes)
447 maxProbes = numProbes;
448 totalProbes += numProbes;
449 }
450
451 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
452 " total=%d max=%d\n",
453 pDexFile->pHeader->classDefsSize, numEntries,
454 (100 * pDexFile->pHeader->classDefsSize) / numEntries,
455 allocSize, totalProbes, maxProbes);
456
457 return pLookup;
458 }
459
460
461 /*
462 * Set up the basic raw data pointers of a DexFile. This function isn't
463 * meant for general use.
464 */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)465 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
466 DexHeader *pHeader = (DexHeader*) data;
467
468 pDexFile->baseAddr = data;
469 pDexFile->pHeader = pHeader;
470 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
471 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
472 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
473 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
474 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
475 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
476 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
477 }
478
479
480 /*
481 * Parse out an index map entry, advancing "*pData" and reducing "*pSize".
482 */
parseIndexMapEntry(const u1 ** pData,u4 * pSize,bool expanding,u4 * pFullCount,u4 * pReducedCount,const u2 ** pMap)483 static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
484 u4* pFullCount, u4* pReducedCount, const u2** pMap)
485 {
486 const u4* wordPtr = (const u4*) *pData;
487 u4 size = *pSize;
488 u4 mapCount;
489
490 if (expanding) {
491 if (size < 4)
492 return false;
493 mapCount = *pReducedCount = *wordPtr++;
494 *pFullCount = (u4) -1;
495 size -= sizeof(u4);
496 } else {
497 if (size < 8)
498 return false;
499 mapCount = *pFullCount = *wordPtr++;
500 *pReducedCount = *wordPtr++;
501 size -= sizeof(u4) * 2;
502 }
503
504 u4 mapSize = mapCount * sizeof(u2);
505
506 if (size < mapSize)
507 return false;
508 *pMap = (const u2*) wordPtr;
509 size -= mapSize;
510
511 /* advance the pointer */
512 const u1* ptr = (const u1*) wordPtr;
513 ptr += (mapSize + 3) & ~0x3;
514
515 /* update pass-by-reference values */
516 *pData = (const u1*) ptr;
517 *pSize = size;
518
519 return true;
520 }
521
522 /*
523 * Set up some pointers into the mapped data.
524 *
525 * See analysis/ReduceConstants.c for the data layout description.
526 */
parseIndexMap(DexFile * pDexFile,const u1 * data,u4 size,bool expanding)527 static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
528 bool expanding)
529 {
530 if (!parseIndexMapEntry(&data, &size, expanding,
531 &pDexFile->indexMap.classFullCount,
532 &pDexFile->indexMap.classReducedCount,
533 &pDexFile->indexMap.classMap))
534 {
535 return false;
536 }
537
538 if (!parseIndexMapEntry(&data, &size, expanding,
539 &pDexFile->indexMap.methodFullCount,
540 &pDexFile->indexMap.methodReducedCount,
541 &pDexFile->indexMap.methodMap))
542 {
543 return false;
544 }
545
546 if (!parseIndexMapEntry(&data, &size, expanding,
547 &pDexFile->indexMap.fieldFullCount,
548 &pDexFile->indexMap.fieldReducedCount,
549 &pDexFile->indexMap.fieldMap))
550 {
551 return false;
552 }
553
554 if (!parseIndexMapEntry(&data, &size, expanding,
555 &pDexFile->indexMap.stringFullCount,
556 &pDexFile->indexMap.stringReducedCount,
557 &pDexFile->indexMap.stringMap))
558 {
559 return false;
560 }
561
562 if (expanding) {
563 /*
564 * The map includes the "reduced" counts; pull the original counts
565 * out of the DexFile so that code has a consistent source.
566 */
567 assert(pDexFile->indexMap.classFullCount == (u4) -1);
568 assert(pDexFile->indexMap.methodFullCount == (u4) -1);
569 assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
570 assert(pDexFile->indexMap.stringFullCount == (u4) -1);
571
572 #if 0 // TODO: not available yet -- do later or just skip this
573 pDexFile->indexMap.classFullCount =
574 pDexFile->pHeader->typeIdsSize;
575 pDexFile->indexMap.methodFullCount =
576 pDexFile->pHeader->methodIdsSize;
577 pDexFile->indexMap.fieldFullCount =
578 pDexFile->pHeader->fieldIdsSize;
579 pDexFile->indexMap.stringFullCount =
580 pDexFile->pHeader->stringIdsSize;
581 #endif
582 }
583
584 LOGI("Class : %u %u %u\n",
585 pDexFile->indexMap.classFullCount,
586 pDexFile->indexMap.classReducedCount,
587 pDexFile->indexMap.classMap[0]);
588 LOGI("Method: %u %u %u\n",
589 pDexFile->indexMap.methodFullCount,
590 pDexFile->indexMap.methodReducedCount,
591 pDexFile->indexMap.methodMap[0]);
592 LOGI("Field : %u %u %u\n",
593 pDexFile->indexMap.fieldFullCount,
594 pDexFile->indexMap.fieldReducedCount,
595 pDexFile->indexMap.fieldMap[0]);
596 LOGI("String: %u %u %u\n",
597 pDexFile->indexMap.stringFullCount,
598 pDexFile->indexMap.stringReducedCount,
599 pDexFile->indexMap.stringMap[0]);
600
601 return true;
602 }
603
604 /*
605 * Parse some auxillary data tables.
606 *
607 * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
608 * table. Subsequent versions switched to the "chunk" format.
609 */
parseAuxData(const u1 * data,DexFile * pDexFile)610 static bool parseAuxData(const u1* data, DexFile* pDexFile)
611 {
612 const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
613 u4 indexMapType = 0;
614
615 /* v1.0 format? */
616 if (*pAux == 0) {
617 LOGV("+++ found OLD dex format\n");
618 pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
619 return true;
620 }
621 LOGV("+++ found NEW dex format\n");
622
623 /* process chunks until we see the end marker */
624 while (*pAux != kDexChunkEnd) {
625 u4 size = *(pAux+1);
626 u1* data = (u1*) (pAux + 2);
627
628 switch (*pAux) {
629 case kDexChunkClassLookup:
630 pDexFile->pClassLookup = (const DexClassLookup*) data;
631 break;
632 case kDexChunkReducingIndexMap:
633 LOGI("+++ found reducing index map, size=%u\n", size);
634 if (!parseIndexMap(pDexFile, data, size, false)) {
635 LOGE("Failed parsing reducing index map\n");
636 return false;
637 }
638 indexMapType = *pAux;
639 break;
640 case kDexChunkExpandingIndexMap:
641 LOGI("+++ found expanding index map, size=%u\n", size);
642 if (!parseIndexMap(pDexFile, data, size, true)) {
643 LOGE("Failed parsing expanding index map\n");
644 return false;
645 }
646 indexMapType = *pAux;
647 break;
648 case kDexChunkRegisterMaps:
649 LOGV("+++ found register maps, size=%u\n", size);
650 pDexFile->pRegisterMapPool = data;
651 break;
652 default:
653 LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
654 *pAux,
655 (char) ((*pAux) >> 24), (char) ((*pAux) >> 16),
656 (char) ((*pAux) >> 8), (char) (*pAux),
657 size);
658 break;
659 }
660
661 /*
662 * Advance pointer, padding to 64-bit boundary. The extra "+8" is
663 * for the type/size header.
664 */
665 size = (size + 8 + 7) & ~7;
666 pAux += size / sizeof(u4);
667 }
668
669 #if 0 // TODO: propagate expected map type from the VM through the API
670 /*
671 * If we're configured to expect an index map, and we don't find one,
672 * reject this DEX so we'll regenerate it. Also, if we found an
673 * "expanding" map but we're not configured to use it, we have to fail
674 * because the constants aren't usable without translation.
675 */
676 if (indexMapType != expectedIndexMapType) {
677 LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
678 indexMapType, DVM_REDUCE_CONSTANTS);
679 return false;
680 }
681 #endif
682
683 return true;
684 }
685
686 /*
687 * Parse an optimized or unoptimized .dex file sitting in memory. This is
688 * called after the byte-ordering and structure alignment has been fixed up.
689 *
690 * On success, return a newly-allocated DexFile.
691 */
dexFileParse(const u1 * data,size_t length,int flags)692 DexFile* dexFileParse(const u1* data, size_t length, int flags)
693 {
694 DexFile* pDexFile = NULL;
695 const DexHeader* pHeader;
696 const u1* magic;
697 int result = -1;
698
699 if (length < sizeof(DexHeader)) {
700 LOGE("too short to be a valid .dex\n");
701 goto bail; /* bad file format */
702 }
703
704 pDexFile = (DexFile*) malloc(sizeof(DexFile));
705 if (pDexFile == NULL)
706 goto bail; /* alloc failure */
707 memset(pDexFile, 0, sizeof(DexFile));
708
709 /*
710 * Peel off the optimized header.
711 */
712 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
713 magic = data;
714 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
715 LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
716 magic[4], magic[5], magic[6], magic[7]);
717 goto bail;
718 }
719
720 pDexFile->pOptHeader = (const DexOptHeader*) data;
721 LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
722 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
723
724 /* locate some auxillary data tables */
725 if (!parseAuxData(data, pDexFile))
726 goto bail;
727
728 /* ignore the opt header and appended data from here on out */
729 data += pDexFile->pOptHeader->dexOffset;
730 length -= pDexFile->pOptHeader->dexOffset;
731 if (pDexFile->pOptHeader->dexLength > length) {
732 LOGE("File truncated? stored len=%d, rem len=%d\n",
733 pDexFile->pOptHeader->dexLength, (int) length);
734 goto bail;
735 }
736 length = pDexFile->pOptHeader->dexLength;
737 }
738
739 dexFileSetupBasicPointers(pDexFile, data);
740 pHeader = pDexFile->pHeader;
741
742 magic = pHeader->magic;
743 if (memcmp(magic, DEX_MAGIC, 4) != 0) {
744 /* not expected */
745 LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
746 magic[0], magic[1], magic[2], magic[3]);
747 goto bail;
748 }
749 if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
750 LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
751 magic[4], magic[5], magic[6], magic[7]);
752 goto bail;
753 }
754
755 /*
756 * Verify the checksum. This is reasonably quick, but does require
757 * touching every byte in the DEX file. The checksum changes after
758 * byte-swapping and DEX optimization.
759 */
760 if (flags & kDexParseVerifyChecksum) {
761 u4 adler = dexComputeChecksum(pHeader);
762 if (adler != pHeader->checksum) {
763 LOGE("ERROR: bad checksum (%08x vs %08x)\n",
764 adler, pHeader->checksum);
765 if (!(flags & kDexParseContinueOnError))
766 goto bail;
767 } else {
768 LOGV("+++ adler32 checksum (%08x) verified\n", adler);
769 }
770 }
771
772 /*
773 * Verify the SHA-1 digest. (Normally we don't want to do this --
774 * the digest is used to uniquely identify a DEX file, and can't be
775 * computed post-optimization.)
776 *
777 * The digest will be invalid after byte swapping and DEX optimization.
778 */
779 if (kVerifySignature) {
780 unsigned char sha1Digest[kSHA1DigestLen];
781 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
782 kSHA1DigestLen;
783
784 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
785 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
786 char tmpBuf1[kSHA1DigestOutputLen];
787 char tmpBuf2[kSHA1DigestOutputLen];
788 LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
789 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
790 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
791 if (!(flags & kDexParseContinueOnError))
792 goto bail;
793 } else {
794 LOGV("+++ sha1 digest verified\n");
795 }
796 }
797
798 if (pHeader->fileSize != length) {
799 LOGE("ERROR: stored file size (%d) != expected (%d)\n",
800 (int) pHeader->fileSize, (int) length);
801 if (!(flags & kDexParseContinueOnError))
802 goto bail;
803 }
804
805 if (pHeader->classDefsSize == 0) {
806 LOGE("ERROR: DEX file has no classes in it, failing\n");
807 goto bail;
808 }
809
810 /*
811 * Success!
812 */
813 result = 0;
814
815 bail:
816 if (result != 0 && pDexFile != NULL) {
817 dexFileFree(pDexFile);
818 pDexFile = NULL;
819 }
820 return pDexFile;
821 }
822
823 /*
824 * Free up the DexFile and any associated data structures.
825 *
826 * Note we may be called with a partially-initialized DexFile.
827 */
dexFileFree(DexFile * pDexFile)828 void dexFileFree(DexFile* pDexFile)
829 {
830 if (pDexFile == NULL)
831 return;
832
833 free(pDexFile);
834 }
835
836 /*
837 * Look up a class definition entry by descriptor.
838 *
839 * "descriptor" should look like "Landroid/debug/Stuff;".
840 */
dexFindClass(const DexFile * pDexFile,const char * descriptor)841 const DexClassDef* dexFindClass(const DexFile* pDexFile,
842 const char* descriptor)
843 {
844 const DexClassLookup* pLookup = pDexFile->pClassLookup;
845 u4 hash;
846 int idx, mask;
847
848 hash = classDescriptorHash(descriptor);
849 mask = pLookup->numEntries - 1;
850 idx = hash & mask;
851
852 /*
853 * Search until we find a matching entry or an empty slot.
854 */
855 while (true) {
856 int offset;
857
858 offset = pLookup->table[idx].classDescriptorOffset;
859 if (offset == 0)
860 return NULL;
861
862 if (pLookup->table[idx].classDescriptorHash == hash) {
863 const char* str;
864
865 str = (const char*) (pDexFile->baseAddr + offset);
866 if (strcmp(str, descriptor) == 0) {
867 return (const DexClassDef*)
868 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
869 }
870 }
871
872 idx = (idx + 1) & mask;
873 }
874 }
875
876
877 /*
878 * Compute the DEX file checksum for a memory-mapped DEX file.
879 */
dexComputeChecksum(const DexHeader * pHeader)880 u4 dexComputeChecksum(const DexHeader* pHeader)
881 {
882 const u1* start = (const u1*) pHeader;
883
884 uLong adler = adler32(0L, Z_NULL, 0);
885 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
886
887 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
888 }
889
890
891 /*
892 * Compute the size, in bytes, of a DexCode.
893 */
dexGetDexCodeSize(const DexCode * pCode)894 size_t dexGetDexCodeSize(const DexCode* pCode)
895 {
896 /*
897 * The catch handler data is the last entry. It has a variable number
898 * of variable-size pieces, so we need to create an iterator.
899 */
900 u4 handlersSize;
901 u4 offset;
902 u4 ui;
903
904 if (pCode->triesSize != 0) {
905 handlersSize = dexGetHandlersSize(pCode);
906 offset = dexGetFirstHandlerOffset(pCode);
907 } else {
908 handlersSize = 0;
909 offset = 0;
910 }
911
912 for (ui = 0; ui < handlersSize; ui++) {
913 DexCatchIterator iterator;
914 dexCatchIteratorInit(&iterator, pCode, offset);
915 offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
916 }
917
918 const u1* handlerData = dexGetCatchHandlerData(pCode);
919
920 //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
921 // pCode, handlerData, offset);
922
923 /* return the size of the catch handler + everything before it */
924 return (handlerData - (u1*) pCode) + offset;
925 }
926
927
928 /*
929 * ===========================================================================
930 * Debug info
931 * ===========================================================================
932 */
933
934 /*
935 * Decode the arguments in a method signature, which looks something
936 * like "(ID[Ljava/lang/String;)V".
937 *
938 * Returns the type signature letter for the next argument, or ')' if
939 * there are no more args. Advances "pSig" to point to the character
940 * after the one returned.
941 */
decodeSignature(const char ** pSig)942 static char decodeSignature(const char** pSig)
943 {
944 const char* sig = *pSig;
945
946 if (*sig == '(')
947 sig++;
948
949 if (*sig == 'L') {
950 /* object ref */
951 while (*++sig != ';')
952 ;
953 *pSig = sig+1;
954 return 'L';
955 }
956 if (*sig == '[') {
957 /* array; advance past array type */
958 while (*++sig == '[')
959 ;
960 if (*sig == 'L') {
961 while (*++sig != ';')
962 ;
963 }
964 *pSig = sig+1;
965 return '[';
966 }
967 if (*sig == '\0')
968 return *sig; /* don't advance further */
969
970 *pSig = sig+1;
971 return *sig;
972 }
973
974 /*
975 * returns the length of a type string, given the start of the
976 * type string. Used for the case where the debug info format
977 * references types that are inside a method type signature.
978 */
typeLength(const char * type)979 static int typeLength (const char *type) {
980 // Assumes any leading '(' has already been gobbled
981 const char *end = type;
982 decodeSignature(&end);
983 return end - type;
984 }
985
986 /*
987 * Reads a string index as encoded for the debug info format,
988 * returning a string pointer or NULL as appropriate.
989 */
readStringIdx(const DexFile * pDexFile,const u1 ** pStream)990 static const char* readStringIdx(const DexFile* pDexFile,
991 const u1** pStream) {
992 u4 stringIdx = readUnsignedLeb128(pStream);
993
994 // Remember, encoded string indicies have 1 added to them.
995 if (stringIdx == 0) {
996 return NULL;
997 } else {
998 return dexStringById(pDexFile, stringIdx - 1);
999 }
1000 }
1001
1002 /*
1003 * Reads a type index as encoded for the debug info format, returning
1004 * a string pointer for its descriptor or NULL as appropriate.
1005 */
readTypeIdx(const DexFile * pDexFile,const u1 ** pStream)1006 static const char* readTypeIdx(const DexFile* pDexFile,
1007 const u1** pStream) {
1008 u4 typeIdx = readUnsignedLeb128(pStream);
1009
1010 // Remember, encoded type indicies have 1 added to them.
1011 if (typeIdx == 0) {
1012 return NULL;
1013 } else {
1014 return dexStringByTypeIdx(pDexFile, typeIdx - 1);
1015 }
1016 }
1017
1018 /* access_flag value indicating that a method is static */
1019 #define ACC_STATIC 0x0008
1020
1021 typedef struct LocalInfo {
1022 const char *name;
1023 const char *descriptor;
1024 const char *signature;
1025 u2 startAddress;
1026 bool live;
1027 } LocalInfo;
1028
emitLocalCbIfLive(void * cnxt,int reg,u4 endAddress,LocalInfo * localInReg,DexDebugNewLocalCb localCb)1029 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
1030 LocalInfo *localInReg, DexDebugNewLocalCb localCb)
1031 {
1032 if (localCb != NULL && localInReg[reg].live) {
1033 localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
1034 localInReg[reg].name,
1035 localInReg[reg].descriptor,
1036 localInReg[reg].signature == NULL
1037 ? "" : localInReg[reg].signature );
1038 }
1039 }
1040
1041 // TODO optimize localCb == NULL case
dexDecodeDebugInfo(const DexFile * pDexFile,const DexCode * pCode,const char * classDescriptor,u4 protoIdx,u4 accessFlags,DexDebugNewPositionCb posCb,DexDebugNewLocalCb localCb,void * cnxt)1042 void dexDecodeDebugInfo(
1043 const DexFile* pDexFile,
1044 const DexCode* pCode,
1045 const char* classDescriptor,
1046 u4 protoIdx,
1047 u4 accessFlags,
1048 DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
1049 void* cnxt)
1050 {
1051 const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
1052 u4 line;
1053 u4 parametersSize;
1054 u4 address = 0;
1055 LocalInfo localInReg[pCode->registersSize];
1056 u4 insnsSize = pCode->insnsSize;
1057 DexProto proto = { pDexFile, protoIdx };
1058
1059 memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
1060
1061 if (stream == NULL) {
1062 goto end;
1063 }
1064
1065 line = readUnsignedLeb128(&stream);
1066 parametersSize = readUnsignedLeb128(&stream);
1067
1068 u2 argReg = pCode->registersSize - pCode->insSize;
1069
1070 if ((accessFlags & ACC_STATIC) == 0) {
1071 /*
1072 * The code is an instance method, which means that there is
1073 * an initial this parameter. Also, the proto list should
1074 * contain exactly one fewer argument word than the insSize
1075 * indicates.
1076 */
1077 assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
1078 localInReg[argReg].name = "this";
1079 localInReg[argReg].descriptor = classDescriptor;
1080 localInReg[argReg].startAddress = 0;
1081 localInReg[argReg].live = true;
1082 argReg++;
1083 } else {
1084 assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
1085 }
1086
1087 DexParameterIterator iterator;
1088 dexParameterIteratorInit(&iterator, &proto);
1089
1090 while (parametersSize-- != 0) {
1091 const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
1092 const char *name;
1093 int reg;
1094
1095 if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
1096 goto invalid_stream;
1097 }
1098
1099 name = readStringIdx(pDexFile, &stream);
1100 reg = argReg;
1101
1102 switch (descriptor[0]) {
1103 case 'D':
1104 case 'J':
1105 argReg += 2;
1106 break;
1107 default:
1108 argReg += 1;
1109 break;
1110 }
1111
1112 if (name != NULL) {
1113 localInReg[reg].name = name;
1114 localInReg[reg].descriptor = descriptor;
1115 localInReg[reg].signature = NULL;
1116 localInReg[reg].startAddress = address;
1117 localInReg[reg].live = true;
1118 }
1119 }
1120
1121 for (;;) {
1122 u1 opcode = *stream++;
1123 u2 reg;
1124
1125 switch (opcode) {
1126 case DBG_END_SEQUENCE:
1127 goto end;
1128
1129 case DBG_ADVANCE_PC:
1130 address += readUnsignedLeb128(&stream);
1131 break;
1132
1133 case DBG_ADVANCE_LINE:
1134 line += readSignedLeb128(&stream);
1135 break;
1136
1137 case DBG_START_LOCAL:
1138 case DBG_START_LOCAL_EXTENDED:
1139 reg = readUnsignedLeb128(&stream);
1140 if (reg > pCode->registersSize) goto invalid_stream;
1141
1142 // Emit what was previously there, if anything
1143 emitLocalCbIfLive (cnxt, reg, address,
1144 localInReg, localCb);
1145
1146 localInReg[reg].name = readStringIdx(pDexFile, &stream);
1147 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
1148 if (opcode == DBG_START_LOCAL_EXTENDED) {
1149 localInReg[reg].signature
1150 = readStringIdx(pDexFile, &stream);
1151 } else {
1152 localInReg[reg].signature = NULL;
1153 }
1154 localInReg[reg].startAddress = address;
1155 localInReg[reg].live = true;
1156 break;
1157
1158 case DBG_END_LOCAL:
1159 reg = readUnsignedLeb128(&stream);
1160 if (reg > pCode->registersSize) goto invalid_stream;
1161
1162 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
1163 localInReg[reg].live = false;
1164 break;
1165
1166 case DBG_RESTART_LOCAL:
1167 reg = readUnsignedLeb128(&stream);
1168 if (reg > pCode->registersSize) goto invalid_stream;
1169
1170 if (localInReg[reg].name == NULL
1171 || localInReg[reg].descriptor == NULL) {
1172 goto invalid_stream;
1173 }
1174
1175 /*
1176 * If the register is live, the "restart" is superfluous,
1177 * and we don't want to mess with the existing start address.
1178 */
1179 if (!localInReg[reg].live) {
1180 localInReg[reg].startAddress = address;
1181 localInReg[reg].live = true;
1182 }
1183 break;
1184
1185 case DBG_SET_PROLOGUE_END:
1186 case DBG_SET_EPILOGUE_BEGIN:
1187 case DBG_SET_FILE:
1188 break;
1189
1190 default: {
1191 int adjopcode = opcode - DBG_FIRST_SPECIAL;
1192
1193 address += adjopcode / DBG_LINE_RANGE;
1194 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
1195
1196 if (posCb != NULL) {
1197 int done;
1198 done = posCb(cnxt, address, line);
1199
1200 if (done) {
1201 // early exit
1202 goto end;
1203 }
1204 }
1205 break;
1206 }
1207 }
1208 }
1209
1210 end:
1211 {
1212 int reg;
1213 for (reg = 0; reg < pCode->registersSize; reg++) {
1214 emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
1215 }
1216 }
1217 return;
1218
1219 invalid_stream:
1220 IF_LOGE() {
1221 char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
1222 LOGE("Invalid debug info stream. class %s; proto %s",
1223 classDescriptor, methodDescriptor);
1224 free(methodDescriptor);
1225 }
1226 }
1227
1228