1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * Access the contents of a .dex file.
19 */
20
21 #include "DexFile.h"
22 #include "DexOptData.h"
23 #include "DexProto.h"
24 #include "DexCatch.h"
25 #include "Leb128.h"
26 #include "sha1.h"
27 #include "ZipArchive.h"
28
29 #include <zlib.h>
30
31 #include <stdlib.h>
32 #include <stddef.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <errno.h>
36
37
38 /*
39 * Verifying checksums is good, but it slows things down and causes us to
40 * touch every page. In the "optimized" world, it doesn't work at all,
41 * because we rewrite the contents.
42 */
43 static const bool kVerifySignature = false;
44
45 /* (documented in header) */
dexGetPrimitiveTypeDescriptorChar(PrimitiveType type)46 char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
47 const char* string = dexGetPrimitiveTypeDescriptor(type);
48
49 return (string == NULL) ? '\0' : string[0];
50 }
51
52 /* (documented in header) */
dexGetPrimitiveTypeDescriptor(PrimitiveType type)53 const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
54 switch (type) {
55 case PRIM_VOID: return "V";
56 case PRIM_BOOLEAN: return "Z";
57 case PRIM_BYTE: return "B";
58 case PRIM_SHORT: return "S";
59 case PRIM_CHAR: return "C";
60 case PRIM_INT: return "I";
61 case PRIM_LONG: return "J";
62 case PRIM_FLOAT: return "F";
63 case PRIM_DOUBLE: return "D";
64 default: return NULL;
65 }
66
67 return NULL;
68 }
69
70 /* (documented in header) */
dexGetBoxedTypeDescriptor(PrimitiveType type)71 const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
72 switch (type) {
73 case PRIM_VOID: return NULL;
74 case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
75 case PRIM_BYTE: return "Ljava/lang/Byte;";
76 case PRIM_SHORT: return "Ljava/lang/Short;";
77 case PRIM_CHAR: return "Ljava/lang/Character;";
78 case PRIM_INT: return "Ljava/lang/Integer;";
79 case PRIM_LONG: return "Ljava/lang/Long;";
80 case PRIM_FLOAT: return "Ljava/lang/Float;";
81 case PRIM_DOUBLE: return "Ljava/lang/Double;";
82 default: return NULL;
83 }
84 }
85
86 /* (documented in header) */
dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar)87 PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
88 switch (descriptorChar) {
89 case 'V': return PRIM_VOID;
90 case 'Z': return PRIM_BOOLEAN;
91 case 'B': return PRIM_BYTE;
92 case 'S': return PRIM_SHORT;
93 case 'C': return PRIM_CHAR;
94 case 'I': return PRIM_INT;
95 case 'J': return PRIM_LONG;
96 case 'F': return PRIM_FLOAT;
97 case 'D': return PRIM_DOUBLE;
98 default: return PRIM_NOT;
99 }
100 }
101
102 /* Return the UTF-8 encoded string with the specified string_id index,
103 * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)104 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
105 u4* utf16Size) {
106 const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
107 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
108
109 *utf16Size = readUnsignedLeb128(&ptr);
110 return (const char*) ptr;
111 }
112
113 /*
114 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at
115 * least kSHA1DigestOutputLen bytes.
116 */
117 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
118
119 /*
120 * Compute a SHA-1 digest on a range of bytes.
121 */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])122 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
123 unsigned char digest[])
124 {
125 SHA1_CTX context;
126 SHA1Init(&context);
127 SHA1Update(&context, data, length);
128 SHA1Final(digest, &context);
129 }
130
131 /*
132 * Format the SHA-1 digest into the buffer, which must be able to hold at
133 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
134 */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)135 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
136 {
137 static const char hexDigit[] = "0123456789abcdef";
138 char* cp;
139 int i;
140
141 cp = tmpBuf;
142 for (i = 0; i < kSHA1DigestLen; i++) {
143 *cp++ = hexDigit[digest[i] >> 4];
144 *cp++ = hexDigit[digest[i] & 0x0f];
145 }
146 *cp++ = '\0';
147
148 assert(cp == tmpBuf + kSHA1DigestOutputLen);
149
150 return tmpBuf;
151 }
152
153 /*
154 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
155 *
156 * This may or may not be compatible with UTF-8 hash functions used inside
157 * the Dalvik VM.
158 *
159 * The basic "multiply by 31 and add" approach does better on class names
160 * than most other things tried (e.g. adler32).
161 */
classDescriptorHash(const char * str)162 static u4 classDescriptorHash(const char* str)
163 {
164 u4 hash = 1;
165
166 while (*str != '\0')
167 hash = hash * 31 + *str++;
168
169 return hash;
170 }
171
172 /*
173 * Add an entry to the class lookup table. We hash the string and probe
174 * until we find an open slot.
175 */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)176 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
177 int stringOff, int classDefOff, int* pNumProbes)
178 {
179 const char* classDescriptor =
180 (const char*) (pDexFile->baseAddr + stringOff);
181 u4 hash = classDescriptorHash(classDescriptor);
182 int mask = pLookup->numEntries-1;
183 int idx = hash & mask;
184
185 /*
186 * Find the first empty slot. We oversized the table, so this is
187 * guaranteed to finish.
188 */
189 int probes = 0;
190 while (pLookup->table[idx].classDescriptorOffset != 0) {
191 idx = (idx + 1) & mask;
192 probes++;
193 }
194 //if (probes > 1)
195 // ALOGW("classLookupAdd: probes=%d", probes);
196
197 pLookup->table[idx].classDescriptorHash = hash;
198 pLookup->table[idx].classDescriptorOffset = stringOff;
199 pLookup->table[idx].classDefOffset = classDefOff;
200 *pNumProbes = probes;
201 }
202
203 /*
204 * Create the class lookup hash table.
205 *
206 * Returns newly-allocated storage.
207 */
dexCreateClassLookup(DexFile * pDexFile)208 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
209 {
210 DexClassLookup* pLookup;
211 int allocSize;
212 int i, numEntries;
213 int numProbes, totalProbes, maxProbes;
214
215 numProbes = totalProbes = maxProbes = 0;
216
217 assert(pDexFile != NULL);
218
219 /*
220 * Using a factor of 3 results in far less probing than a factor of 2,
221 * but almost doubles the flash storage requirements for the bootstrap
222 * DEX files. The overall impact on class loading performance seems
223 * to be minor. We could probably get some performance improvement by
224 * using a secondary hash.
225 */
226 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
227 allocSize = offsetof(DexClassLookup, table)
228 + numEntries * sizeof(pLookup->table[0]);
229
230 pLookup = (DexClassLookup*) calloc(1, allocSize);
231 if (pLookup == NULL)
232 return NULL;
233 pLookup->size = allocSize;
234 pLookup->numEntries = numEntries;
235
236 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
237 const DexClassDef* pClassDef;
238 const char* pString;
239
240 pClassDef = dexGetClassDef(pDexFile, i);
241 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
242
243 classLookupAdd(pDexFile, pLookup,
244 (u1*)pString - pDexFile->baseAddr,
245 (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
246
247 if (numProbes > maxProbes)
248 maxProbes = numProbes;
249 totalProbes += numProbes;
250 }
251
252 ALOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
253 " total=%d max=%d",
254 pDexFile->pHeader->classDefsSize, numEntries,
255 (100 * pDexFile->pHeader->classDefsSize) / numEntries,
256 allocSize, totalProbes, maxProbes);
257
258 return pLookup;
259 }
260
261
262 /*
263 * Set up the basic raw data pointers of a DexFile. This function isn't
264 * meant for general use.
265 */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)266 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
267 DexHeader *pHeader = (DexHeader*) data;
268
269 pDexFile->baseAddr = data;
270 pDexFile->pHeader = pHeader;
271 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
272 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
273 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
274 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
275 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
276 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
277 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
278 }
279
280 /*
281 * Parse an optimized or unoptimized .dex file sitting in memory. This is
282 * called after the byte-ordering and structure alignment has been fixed up.
283 *
284 * On success, return a newly-allocated DexFile.
285 */
dexFileParse(const u1 * data,size_t length,int flags)286 DexFile* dexFileParse(const u1* data, size_t length, int flags)
287 {
288 DexFile* pDexFile = NULL;
289 const DexHeader* pHeader;
290 const u1* magic;
291 int result = -1;
292
293 if (length < sizeof(DexHeader)) {
294 ALOGE("too short to be a valid .dex");
295 goto bail; /* bad file format */
296 }
297
298 pDexFile = (DexFile*) malloc(sizeof(DexFile));
299 if (pDexFile == NULL)
300 goto bail; /* alloc failure */
301 memset(pDexFile, 0, sizeof(DexFile));
302
303 /*
304 * Peel off the optimized header.
305 */
306 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
307 magic = data;
308 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
309 ALOGE("bad opt version (0x%02x %02x %02x %02x)",
310 magic[4], magic[5], magic[6], magic[7]);
311 goto bail;
312 }
313
314 pDexFile->pOptHeader = (const DexOptHeader*) data;
315 ALOGV("Good opt header, DEX offset is %d, flags=0x%02x",
316 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
317
318 /* parse the optimized dex file tables */
319 if (!dexParseOptData(data, length, pDexFile))
320 goto bail;
321
322 /* ignore the opt header and appended data from here on out */
323 data += pDexFile->pOptHeader->dexOffset;
324 length -= pDexFile->pOptHeader->dexOffset;
325 if (pDexFile->pOptHeader->dexLength > length) {
326 ALOGE("File truncated? stored len=%d, rem len=%d",
327 pDexFile->pOptHeader->dexLength, (int) length);
328 goto bail;
329 }
330 length = pDexFile->pOptHeader->dexLength;
331 }
332
333 dexFileSetupBasicPointers(pDexFile, data);
334 pHeader = pDexFile->pHeader;
335
336 if (!dexHasValidMagic(pHeader)) {
337 goto bail;
338 }
339
340 /*
341 * Verify the checksum(s). This is reasonably quick, but does require
342 * touching every byte in the DEX file. The base checksum changes after
343 * byte-swapping and DEX optimization.
344 */
345 if (flags & kDexParseVerifyChecksum) {
346 u4 adler = dexComputeChecksum(pHeader);
347 if (adler != pHeader->checksum) {
348 ALOGE("ERROR: bad checksum (%08x vs %08x)",
349 adler, pHeader->checksum);
350 if (!(flags & kDexParseContinueOnError))
351 goto bail;
352 } else {
353 ALOGV("+++ adler32 checksum (%08x) verified", adler);
354 }
355
356 const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
357 if (pOptHeader != NULL) {
358 adler = dexComputeOptChecksum(pOptHeader);
359 if (adler != pOptHeader->checksum) {
360 ALOGE("ERROR: bad opt checksum (%08x vs %08x)",
361 adler, pOptHeader->checksum);
362 if (!(flags & kDexParseContinueOnError))
363 goto bail;
364 } else {
365 ALOGV("+++ adler32 opt checksum (%08x) verified", adler);
366 }
367 }
368 }
369
370 /*
371 * Verify the SHA-1 digest. (Normally we don't want to do this --
372 * the digest is used to uniquely identify the original DEX file, and
373 * can't be computed for verification after the DEX is byte-swapped
374 * and optimized.)
375 */
376 if (kVerifySignature) {
377 unsigned char sha1Digest[kSHA1DigestLen];
378 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
379 kSHA1DigestLen;
380
381 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
382 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
383 char tmpBuf1[kSHA1DigestOutputLen];
384 char tmpBuf2[kSHA1DigestOutputLen];
385 ALOGE("ERROR: bad SHA1 digest (%s vs %s)",
386 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
387 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
388 if (!(flags & kDexParseContinueOnError))
389 goto bail;
390 } else {
391 ALOGV("+++ sha1 digest verified");
392 }
393 }
394
395 if (pHeader->fileSize != length) {
396 ALOGE("ERROR: stored file size (%d) != expected (%d)",
397 (int) pHeader->fileSize, (int) length);
398 if (!(flags & kDexParseContinueOnError))
399 goto bail;
400 }
401
402 if (pHeader->classDefsSize == 0) {
403 ALOGE("ERROR: DEX file has no classes in it, failing");
404 goto bail;
405 }
406
407 /*
408 * Success!
409 */
410 result = 0;
411
412 bail:
413 if (result != 0 && pDexFile != NULL) {
414 dexFileFree(pDexFile);
415 pDexFile = NULL;
416 }
417 return pDexFile;
418 }
419
420 /*
421 * Free up the DexFile and any associated data structures.
422 *
423 * Note we may be called with a partially-initialized DexFile.
424 */
dexFileFree(DexFile * pDexFile)425 void dexFileFree(DexFile* pDexFile)
426 {
427 if (pDexFile == NULL)
428 return;
429
430 free(pDexFile);
431 }
432
433 /*
434 * Look up a class definition entry by descriptor.
435 *
436 * "descriptor" should look like "Landroid/debug/Stuff;".
437 */
dexFindClass(const DexFile * pDexFile,const char * descriptor)438 const DexClassDef* dexFindClass(const DexFile* pDexFile,
439 const char* descriptor)
440 {
441 const DexClassLookup* pLookup = pDexFile->pClassLookup;
442 u4 hash;
443 int idx, mask;
444
445 hash = classDescriptorHash(descriptor);
446 mask = pLookup->numEntries - 1;
447 idx = hash & mask;
448
449 /*
450 * Search until we find a matching entry or an empty slot.
451 */
452 while (true) {
453 int offset;
454
455 offset = pLookup->table[idx].classDescriptorOffset;
456 if (offset == 0)
457 return NULL;
458
459 if (pLookup->table[idx].classDescriptorHash == hash) {
460 const char* str;
461
462 str = (const char*) (pDexFile->baseAddr + offset);
463 if (strcmp(str, descriptor) == 0) {
464 return (const DexClassDef*)
465 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
466 }
467 }
468
469 idx = (idx + 1) & mask;
470 }
471 }
472
473
474 /*
475 * Compute the DEX file checksum for a memory-mapped DEX file.
476 */
dexComputeChecksum(const DexHeader * pHeader)477 u4 dexComputeChecksum(const DexHeader* pHeader)
478 {
479 const u1* start = (const u1*) pHeader;
480
481 uLong adler = adler32(0L, Z_NULL, 0);
482 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
483
484 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
485 }
486
487 /*
488 * Compute the size, in bytes, of a DexCode.
489 */
dexGetDexCodeSize(const DexCode * pCode)490 size_t dexGetDexCodeSize(const DexCode* pCode)
491 {
492 /*
493 * The catch handler data is the last entry. It has a variable number
494 * of variable-size pieces, so we need to create an iterator.
495 */
496 u4 handlersSize;
497 u4 offset;
498 u4 ui;
499
500 if (pCode->triesSize != 0) {
501 handlersSize = dexGetHandlersSize(pCode);
502 offset = dexGetFirstHandlerOffset(pCode);
503 } else {
504 handlersSize = 0;
505 offset = 0;
506 }
507
508 for (ui = 0; ui < handlersSize; ui++) {
509 DexCatchIterator iterator;
510 dexCatchIteratorInit(&iterator, pCode, offset);
511 offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
512 }
513
514 const u1* handlerData = dexGetCatchHandlerData(pCode);
515
516 //ALOGD("+++ pCode=%p handlerData=%p last offset=%d",
517 // pCode, handlerData, offset);
518
519 /* return the size of the catch handler + everything before it */
520 return (handlerData - (u1*) pCode) + offset;
521 }
522
523 /*
524 * Round up to the next highest power of 2.
525 *
526 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
527 */
dexRoundUpPower2(u4 val)528 u4 dexRoundUpPower2(u4 val)
529 {
530 val--;
531 val |= val >> 1;
532 val |= val >> 2;
533 val |= val >> 4;
534 val |= val >> 8;
535 val |= val >> 16;
536 val++;
537
538 return val;
539 }
540