1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * Access the contents of a .dex file.
19 */
20
21 #include "DexFile.h"
22 #include "DexOptData.h"
23 #include "DexProto.h"
24 #include "DexCatch.h"
25 #include "Leb128.h"
26 #include "sha1.h"
27 #include "ZipArchive.h"
28
29 #include <zlib.h>
30
31 #include <stdlib.h>
32 #include <stddef.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <errno.h>
36
37
38 /*
39 * Verifying checksums is good, but it slows things down and causes us to
40 * touch every page. In the "optimized" world, it doesn't work at all,
41 * because we rewrite the contents.
42 */
43 static const bool kVerifyChecksum = false;
44 static const bool kVerifySignature = false;
45
46 /* (documented in header) */
dexGetPrimitiveTypeDescriptorChar(PrimitiveType type)47 char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
48 const char* string = dexGetPrimitiveTypeDescriptor(type);
49
50 return (string == NULL) ? '\0' : string[0];
51 }
52
53 /* (documented in header) */
dexGetPrimitiveTypeDescriptor(PrimitiveType type)54 const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
55 switch (type) {
56 case PRIM_VOID: return "V";
57 case PRIM_BOOLEAN: return "Z";
58 case PRIM_BYTE: return "B";
59 case PRIM_SHORT: return "S";
60 case PRIM_CHAR: return "C";
61 case PRIM_INT: return "I";
62 case PRIM_LONG: return "J";
63 case PRIM_FLOAT: return "F";
64 case PRIM_DOUBLE: return "D";
65 default: return NULL;
66 }
67
68 return NULL;
69 }
70
71 /* (documented in header) */
dexGetBoxedTypeDescriptor(PrimitiveType type)72 const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
73 switch (type) {
74 case PRIM_VOID: return NULL;
75 case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
76 case PRIM_BYTE: return "Ljava/lang/Byte;";
77 case PRIM_SHORT: return "Ljava/lang/Short;";
78 case PRIM_CHAR: return "Ljava/lang/Character;";
79 case PRIM_INT: return "Ljava/lang/Integer;";
80 case PRIM_LONG: return "Ljava/lang/Long;";
81 case PRIM_FLOAT: return "Ljava/lang/Float;";
82 case PRIM_DOUBLE: return "Ljava/lang/Double;";
83 default: return NULL;
84 }
85 }
86
87 /* (documented in header) */
dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar)88 PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
89 switch (descriptorChar) {
90 case 'V': return PRIM_VOID;
91 case 'Z': return PRIM_BOOLEAN;
92 case 'B': return PRIM_BYTE;
93 case 'S': return PRIM_SHORT;
94 case 'C': return PRIM_CHAR;
95 case 'I': return PRIM_INT;
96 case 'J': return PRIM_LONG;
97 case 'F': return PRIM_FLOAT;
98 case 'D': return PRIM_DOUBLE;
99 default: return PRIM_NOT;
100 }
101 }
102
103 /* Return the UTF-8 encoded string with the specified string_id index,
104 * also filling in the UTF-16 size (number of 16-bit code points).*/
dexStringAndSizeById(const DexFile * pDexFile,u4 idx,u4 * utf16Size)105 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
106 u4* utf16Size) {
107 const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
108 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
109
110 *utf16Size = readUnsignedLeb128(&ptr);
111 return (const char*) ptr;
112 }
113
114 /*
115 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at
116 * least kSHA1DigestOutputLen bytes.
117 */
118 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
119
120 /*
121 * Compute a SHA-1 digest on a range of bytes.
122 */
dexComputeSHA1Digest(const unsigned char * data,size_t length,unsigned char digest[])123 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
124 unsigned char digest[])
125 {
126 SHA1_CTX context;
127 SHA1Init(&context);
128 SHA1Update(&context, data, length);
129 SHA1Final(digest, &context);
130 }
131
132 /*
133 * Format the SHA-1 digest into the buffer, which must be able to hold at
134 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
135 */
dexSHA1DigestToStr(const unsigned char digest[],char * tmpBuf)136 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
137 {
138 static const char hexDigit[] = "0123456789abcdef";
139 char* cp;
140 int i;
141
142 cp = tmpBuf;
143 for (i = 0; i < kSHA1DigestLen; i++) {
144 *cp++ = hexDigit[digest[i] >> 4];
145 *cp++ = hexDigit[digest[i] & 0x0f];
146 }
147 *cp++ = '\0';
148
149 assert(cp == tmpBuf + kSHA1DigestOutputLen);
150
151 return tmpBuf;
152 }
153
154 /*
155 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
156 *
157 * This may or may not be compatible with UTF-8 hash functions used inside
158 * the Dalvik VM.
159 *
160 * The basic "multiply by 31 and add" approach does better on class names
161 * than most other things tried (e.g. adler32).
162 */
classDescriptorHash(const char * str)163 static u4 classDescriptorHash(const char* str)
164 {
165 u4 hash = 1;
166
167 while (*str != '\0')
168 hash = hash * 31 + *str++;
169
170 return hash;
171 }
172
173 /*
174 * Add an entry to the class lookup table. We hash the string and probe
175 * until we find an open slot.
176 */
classLookupAdd(DexFile * pDexFile,DexClassLookup * pLookup,int stringOff,int classDefOff,int * pNumProbes)177 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
178 int stringOff, int classDefOff, int* pNumProbes)
179 {
180 const char* classDescriptor =
181 (const char*) (pDexFile->baseAddr + stringOff);
182 const DexClassDef* pClassDef =
183 (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
184 u4 hash = classDescriptorHash(classDescriptor);
185 int mask = pLookup->numEntries-1;
186 int idx = hash & mask;
187
188 /*
189 * Find the first empty slot. We oversized the table, so this is
190 * guaranteed to finish.
191 */
192 int probes = 0;
193 while (pLookup->table[idx].classDescriptorOffset != 0) {
194 idx = (idx + 1) & mask;
195 probes++;
196 }
197 //if (probes > 1)
198 // LOGW("classLookupAdd: probes=%d", probes);
199
200 pLookup->table[idx].classDescriptorHash = hash;
201 pLookup->table[idx].classDescriptorOffset = stringOff;
202 pLookup->table[idx].classDefOffset = classDefOff;
203 *pNumProbes = probes;
204 }
205
206 /*
207 * Create the class lookup hash table.
208 *
209 * Returns newly-allocated storage.
210 */
dexCreateClassLookup(DexFile * pDexFile)211 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
212 {
213 DexClassLookup* pLookup;
214 int allocSize;
215 int i, numEntries;
216 int numProbes, totalProbes, maxProbes;
217
218 numProbes = totalProbes = maxProbes = 0;
219
220 assert(pDexFile != NULL);
221
222 /*
223 * Using a factor of 3 results in far less probing than a factor of 2,
224 * but almost doubles the flash storage requirements for the bootstrap
225 * DEX files. The overall impact on class loading performance seems
226 * to be minor. We could probably get some performance improvement by
227 * using a secondary hash.
228 */
229 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
230 allocSize = offsetof(DexClassLookup, table)
231 + numEntries * sizeof(pLookup->table[0]);
232
233 pLookup = (DexClassLookup*) calloc(1, allocSize);
234 if (pLookup == NULL)
235 return NULL;
236 pLookup->size = allocSize;
237 pLookup->numEntries = numEntries;
238
239 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
240 const DexClassDef* pClassDef;
241 const char* pString;
242
243 pClassDef = dexGetClassDef(pDexFile, i);
244 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
245
246 classLookupAdd(pDexFile, pLookup,
247 (u1*)pString - pDexFile->baseAddr,
248 (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
249
250 if (numProbes > maxProbes)
251 maxProbes = numProbes;
252 totalProbes += numProbes;
253 }
254
255 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
256 " total=%d max=%d",
257 pDexFile->pHeader->classDefsSize, numEntries,
258 (100 * pDexFile->pHeader->classDefsSize) / numEntries,
259 allocSize, totalProbes, maxProbes);
260
261 return pLookup;
262 }
263
264
265 /*
266 * Set up the basic raw data pointers of a DexFile. This function isn't
267 * meant for general use.
268 */
dexFileSetupBasicPointers(DexFile * pDexFile,const u1 * data)269 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
270 DexHeader *pHeader = (DexHeader*) data;
271
272 pDexFile->baseAddr = data;
273 pDexFile->pHeader = pHeader;
274 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
275 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
276 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
277 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
278 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
279 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
280 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
281 }
282
283 /*
284 * Parse an optimized or unoptimized .dex file sitting in memory. This is
285 * called after the byte-ordering and structure alignment has been fixed up.
286 *
287 * On success, return a newly-allocated DexFile.
288 */
dexFileParse(const u1 * data,size_t length,int flags)289 DexFile* dexFileParse(const u1* data, size_t length, int flags)
290 {
291 DexFile* pDexFile = NULL;
292 const DexHeader* pHeader;
293 const u1* magic;
294 int result = -1;
295
296 if (length < sizeof(DexHeader)) {
297 LOGE("too short to be a valid .dex");
298 goto bail; /* bad file format */
299 }
300
301 pDexFile = (DexFile*) malloc(sizeof(DexFile));
302 if (pDexFile == NULL)
303 goto bail; /* alloc failure */
304 memset(pDexFile, 0, sizeof(DexFile));
305
306 /*
307 * Peel off the optimized header.
308 */
309 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
310 magic = data;
311 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
312 LOGE("bad opt version (0x%02x %02x %02x %02x)",
313 magic[4], magic[5], magic[6], magic[7]);
314 goto bail;
315 }
316
317 pDexFile->pOptHeader = (const DexOptHeader*) data;
318 LOGV("Good opt header, DEX offset is %d, flags=0x%02x",
319 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
320
321 /* parse the optimized dex file tables */
322 if (!dexParseOptData(data, length, pDexFile))
323 goto bail;
324
325 /* ignore the opt header and appended data from here on out */
326 data += pDexFile->pOptHeader->dexOffset;
327 length -= pDexFile->pOptHeader->dexOffset;
328 if (pDexFile->pOptHeader->dexLength > length) {
329 LOGE("File truncated? stored len=%d, rem len=%d",
330 pDexFile->pOptHeader->dexLength, (int) length);
331 goto bail;
332 }
333 length = pDexFile->pOptHeader->dexLength;
334 }
335
336 dexFileSetupBasicPointers(pDexFile, data);
337 pHeader = pDexFile->pHeader;
338
339 if (!dexHasValidMagic(pHeader)) {
340 goto bail;
341 }
342
343 /*
344 * Verify the checksum(s). This is reasonably quick, but does require
345 * touching every byte in the DEX file. The base checksum changes after
346 * byte-swapping and DEX optimization.
347 */
348 if (flags & kDexParseVerifyChecksum) {
349 u4 adler = dexComputeChecksum(pHeader);
350 if (adler != pHeader->checksum) {
351 LOGE("ERROR: bad checksum (%08x vs %08x)",
352 adler, pHeader->checksum);
353 if (!(flags & kDexParseContinueOnError))
354 goto bail;
355 } else {
356 LOGV("+++ adler32 checksum (%08x) verified", adler);
357 }
358
359 const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
360 if (pOptHeader != NULL) {
361 adler = dexComputeOptChecksum(pOptHeader);
362 if (adler != pOptHeader->checksum) {
363 LOGE("ERROR: bad opt checksum (%08x vs %08x)",
364 adler, pOptHeader->checksum);
365 if (!(flags & kDexParseContinueOnError))
366 goto bail;
367 } else {
368 LOGV("+++ adler32 opt checksum (%08x) verified", adler);
369 }
370 }
371 }
372
373 /*
374 * Verify the SHA-1 digest. (Normally we don't want to do this --
375 * the digest is used to uniquely identify the original DEX file, and
376 * can't be computed for verification after the DEX is byte-swapped
377 * and optimized.)
378 */
379 if (kVerifySignature) {
380 unsigned char sha1Digest[kSHA1DigestLen];
381 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
382 kSHA1DigestLen;
383
384 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
385 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
386 char tmpBuf1[kSHA1DigestOutputLen];
387 char tmpBuf2[kSHA1DigestOutputLen];
388 LOGE("ERROR: bad SHA1 digest (%s vs %s)",
389 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
390 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
391 if (!(flags & kDexParseContinueOnError))
392 goto bail;
393 } else {
394 LOGV("+++ sha1 digest verified");
395 }
396 }
397
398 if (pHeader->fileSize != length) {
399 LOGE("ERROR: stored file size (%d) != expected (%d)",
400 (int) pHeader->fileSize, (int) length);
401 if (!(flags & kDexParseContinueOnError))
402 goto bail;
403 }
404
405 if (pHeader->classDefsSize == 0) {
406 LOGE("ERROR: DEX file has no classes in it, failing");
407 goto bail;
408 }
409
410 /*
411 * Success!
412 */
413 result = 0;
414
415 bail:
416 if (result != 0 && pDexFile != NULL) {
417 dexFileFree(pDexFile);
418 pDexFile = NULL;
419 }
420 return pDexFile;
421 }
422
423 /*
424 * Free up the DexFile and any associated data structures.
425 *
426 * Note we may be called with a partially-initialized DexFile.
427 */
dexFileFree(DexFile * pDexFile)428 void dexFileFree(DexFile* pDexFile)
429 {
430 if (pDexFile == NULL)
431 return;
432
433 free(pDexFile);
434 }
435
436 /*
437 * Look up a class definition entry by descriptor.
438 *
439 * "descriptor" should look like "Landroid/debug/Stuff;".
440 */
dexFindClass(const DexFile * pDexFile,const char * descriptor)441 const DexClassDef* dexFindClass(const DexFile* pDexFile,
442 const char* descriptor)
443 {
444 const DexClassLookup* pLookup = pDexFile->pClassLookup;
445 u4 hash;
446 int idx, mask;
447
448 hash = classDescriptorHash(descriptor);
449 mask = pLookup->numEntries - 1;
450 idx = hash & mask;
451
452 /*
453 * Search until we find a matching entry or an empty slot.
454 */
455 while (true) {
456 int offset;
457
458 offset = pLookup->table[idx].classDescriptorOffset;
459 if (offset == 0)
460 return NULL;
461
462 if (pLookup->table[idx].classDescriptorHash == hash) {
463 const char* str;
464
465 str = (const char*) (pDexFile->baseAddr + offset);
466 if (strcmp(str, descriptor) == 0) {
467 return (const DexClassDef*)
468 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
469 }
470 }
471
472 idx = (idx + 1) & mask;
473 }
474 }
475
476
477 /*
478 * Compute the DEX file checksum for a memory-mapped DEX file.
479 */
dexComputeChecksum(const DexHeader * pHeader)480 u4 dexComputeChecksum(const DexHeader* pHeader)
481 {
482 const u1* start = (const u1*) pHeader;
483
484 uLong adler = adler32(0L, Z_NULL, 0);
485 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
486
487 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
488 }
489
490 /*
491 * Compute the size, in bytes, of a DexCode.
492 */
dexGetDexCodeSize(const DexCode * pCode)493 size_t dexGetDexCodeSize(const DexCode* pCode)
494 {
495 /*
496 * The catch handler data is the last entry. It has a variable number
497 * of variable-size pieces, so we need to create an iterator.
498 */
499 u4 handlersSize;
500 u4 offset;
501 u4 ui;
502
503 if (pCode->triesSize != 0) {
504 handlersSize = dexGetHandlersSize(pCode);
505 offset = dexGetFirstHandlerOffset(pCode);
506 } else {
507 handlersSize = 0;
508 offset = 0;
509 }
510
511 for (ui = 0; ui < handlersSize; ui++) {
512 DexCatchIterator iterator;
513 dexCatchIteratorInit(&iterator, pCode, offset);
514 offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
515 }
516
517 const u1* handlerData = dexGetCatchHandlerData(pCode);
518
519 //LOGD("+++ pCode=%p handlerData=%p last offset=%d",
520 // pCode, handlerData, offset);
521
522 /* return the size of the catch handler + everything before it */
523 return (handlerData - (u1*) pCode) + offset;
524 }
525
526 /*
527 * Round up to the next highest power of 2.
528 *
529 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
530 */
dexRoundUpPower2(u4 val)531 u4 dexRoundUpPower2(u4 val)
532 {
533 val--;
534 val |= val >> 1;
535 val |= val >> 2;
536 val |= val >> 4;
537 val |= val >> 8;
538 val |= val >> 16;
539 val++;
540
541 return val;
542 }
543