1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /*
17 * Read-only access to Zip archives, with minimal heap allocation.
18 */
19 #include "ZipArchive.h"
20
21 #include <zlib.h>
22
23 #include <stdlib.h>
24 #include <string.h>
25 #include <fcntl.h>
26 #include <errno.h>
27
28
29 /*
30 * Zip file constants.
31 */
32 #define kEOCDSignature 0x06054b50
33 #define kEOCDLen 22
34 #define kEOCDNumEntries 8 // offset to #of entries in file
35 #define kEOCDFileOffset 16 // offset to central directory
36
37 #define kMaxCommentLen 65535 // longest possible in ushort
38 #define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen)
39
40 #define kLFHSignature 0x04034b50
41 #define kLFHLen 30 // excluding variable-len fields
42 #define kLFHNameLen 26 // offset to filename length
43 #define kLFHExtraLen 28 // offset to extra length
44
45 #define kCDESignature 0x02014b50
46 #define kCDELen 46 // excluding variable-len fields
47 #define kCDEMethod 10 // offset to compression method
48 #define kCDEModWhen 12 // offset to modification timestamp
49 #define kCDECRC 16 // offset to entry CRC
50 #define kCDECompLen 20 // offset to compressed length
51 #define kCDEUncompLen 24 // offset to uncompressed length
52 #define kCDENameLen 28 // offset to filename length
53 #define kCDEExtraLen 30 // offset to extra length
54 #define kCDECommentLen 32 // offset to comment length
55 #define kCDELocalOffset 42 // offset to local hdr
56
57 /*
58 * The values we return for ZipEntry use 0 as an invalid value, so we
59 * want to adjust the hash table index by a fixed amount. Using a large
60 * value helps insure that people don't mix & match arguments, e.g. with
61 * entry indices.
62 */
63 #define kZipEntryAdj 10000
64
65 /*
66 * Convert a ZipEntry to a hash table index, verifying that it's in a
67 * valid range.
68 */
entryToIndex(const ZipArchive * pArchive,const ZipEntry entry)69 static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry)
70 {
71 long ent = ((long) entry) - kZipEntryAdj;
72 if (ent < 0 || ent >= pArchive->mHashTableSize ||
73 pArchive->mHashTable[ent].name == NULL)
74 {
75 LOGW("Invalid ZipEntry %p (%ld)\n", entry, ent);
76 return -1;
77 }
78 return ent;
79 }
80
81 /*
82 * Simple string hash function for non-null-terminated strings.
83 */
computeHash(const char * str,int len)84 static unsigned int computeHash(const char* str, int len)
85 {
86 unsigned int hash = 0;
87
88 while (len--)
89 hash = hash * 31 + *str++;
90
91 return hash;
92 }
93
94 /*
95 * Add a new entry to the hash table.
96 */
addToHash(ZipArchive * pArchive,const char * str,int strLen,unsigned int hash)97 static void addToHash(ZipArchive* pArchive, const char* str, int strLen,
98 unsigned int hash)
99 {
100 const int hashTableSize = pArchive->mHashTableSize;
101 int ent = hash & (hashTableSize - 1);
102
103 /*
104 * We over-allocated the table, so we're guaranteed to find an empty slot.
105 */
106 while (pArchive->mHashTable[ent].name != NULL)
107 ent = (ent + 1) & (hashTableSize-1);
108
109 pArchive->mHashTable[ent].name = str;
110 pArchive->mHashTable[ent].nameLen = strLen;
111 }
112
113 /*
114 * Get 2 little-endian bytes.
115 */
get2LE(unsigned char const * pSrc)116 static u2 get2LE(unsigned char const* pSrc)
117 {
118 return pSrc[0] | (pSrc[1] << 8);
119 }
120
121 /*
122 * Get 4 little-endian bytes.
123 */
get4LE(unsigned char const * pSrc)124 static u4 get4LE(unsigned char const* pSrc)
125 {
126 u4 result;
127
128 result = pSrc[0];
129 result |= pSrc[1] << 8;
130 result |= pSrc[2] << 16;
131 result |= pSrc[3] << 24;
132
133 return result;
134 }
135
136 /*
137 * Parse the Zip archive, verifying its contents and initializing internal
138 * data structures.
139 */
parseZipArchive(ZipArchive * pArchive,const MemMapping * pMap)140 static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap)
141 {
142 #define CHECK_OFFSET(_off) { \
143 if ((unsigned int) (_off) >= maxOffset) { \
144 LOGE("ERROR: bad offset %u (max %d): %s\n", \
145 (unsigned int) (_off), maxOffset, #_off); \
146 goto bail; \
147 } \
148 }
149 bool result = false;
150 const unsigned char* basePtr = (const unsigned char*)pMap->addr;
151 const unsigned char* ptr;
152 size_t length = pMap->length;
153 unsigned int i, numEntries, cdOffset;
154 unsigned int val;
155
156 /*
157 * The first 4 bytes of the file will either be the local header
158 * signature for the first file (kLFHSignature) or, if the archive doesn't
159 * have any files in it, the end-of-central-directory signature
160 * (kEOCDSignature).
161 */
162 val = get4LE(basePtr);
163 if (val == kEOCDSignature) {
164 LOGI("Found Zip archive, but it looks empty\n");
165 goto bail;
166 } else if (val != kLFHSignature) {
167 LOGV("Not a Zip archive (found 0x%08x)\n", val);
168 goto bail;
169 }
170
171 /*
172 * Find the EOCD. We'll find it immediately unless they have a file
173 * comment.
174 */
175 ptr = basePtr + length - kEOCDLen;
176
177 while (ptr >= basePtr) {
178 if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature)
179 break;
180 ptr--;
181 }
182 if (ptr < basePtr) {
183 LOGI("Could not find end-of-central-directory in Zip\n");
184 goto bail;
185 }
186
187 /*
188 * There are two interesting items in the EOCD block: the number of
189 * entries in the file, and the file offset of the start of the
190 * central directory.
191 *
192 * (There's actually a count of the #of entries in this file, and for
193 * all files which comprise a spanned archive, but for our purposes
194 * we're only interested in the current file. Besides, we expect the
195 * two to be equivalent for our stuff.)
196 */
197 numEntries = get2LE(ptr + kEOCDNumEntries);
198 cdOffset = get4LE(ptr + kEOCDFileOffset);
199
200 /* valid offsets are [0,EOCD] */
201 unsigned int maxOffset;
202 maxOffset = (ptr - basePtr) +1;
203
204 LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
205 if (numEntries == 0 || cdOffset >= length) {
206 LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
207 numEntries, cdOffset, length);
208 goto bail;
209 }
210
211 /*
212 * Create hash table. We have a minimum 75% load factor, possibly as
213 * low as 50% after we round off to a power of 2. There must be at
214 * least one unused entry to avoid an infinite loop during creation.
215 */
216 pArchive->mNumEntries = numEntries;
217 pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3);
218 pArchive->mHashTable = (ZipHashEntry*)
219 calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry));
220
221 /*
222 * Walk through the central directory, adding entries to the hash
223 * table.
224 */
225 ptr = basePtr + cdOffset;
226 for (i = 0; i < numEntries; i++) {
227 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
228 const unsigned char* localHdr;
229 unsigned int hash;
230
231 if (get4LE(ptr) != kCDESignature) {
232 LOGW("Missed a central dir sig (at %d)\n", i);
233 goto bail;
234 }
235 if (ptr + kCDELen > basePtr + length) {
236 LOGW("Ran off the end (at %d)\n", i);
237 goto bail;
238 }
239
240 localHdrOffset = get4LE(ptr + kCDELocalOffset);
241 CHECK_OFFSET(localHdrOffset);
242 fileNameLen = get2LE(ptr + kCDENameLen);
243 extraLen = get2LE(ptr + kCDEExtraLen);
244 commentLen = get2LE(ptr + kCDECommentLen);
245
246 //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n",
247 // i, localHdrOffset, fileNameLen, extraLen, commentLen);
248 //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen);
249
250 /* add the CDE filename to the hash table */
251 hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
252 addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash);
253
254 localHdr = basePtr + localHdrOffset;
255 if (get4LE(localHdr) != kLFHSignature) {
256 LOGW("Bad offset to local header: %d (at %d)\n",
257 localHdrOffset, i);
258 goto bail;
259 }
260
261 ptr += kCDELen + fileNameLen + extraLen + commentLen;
262 CHECK_OFFSET(ptr - basePtr);
263 }
264
265 result = true;
266
267 bail:
268 return result;
269 #undef CHECK_OFFSET
270 }
271
272 /*
273 * Open the specified file read-only. We memory-map the entire thing and
274 * parse the contents.
275 *
276 * This will be called on non-Zip files, especially during VM startup, so
277 * we don't want to be too noisy about certain types of failure. (Do
278 * we want a "quiet" flag?)
279 *
280 * On success, we fill out the contents of "pArchive" and return 0.
281 */
dexZipOpenArchive(const char * fileName,ZipArchive * pArchive)282 int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive)
283 {
284 int fd, err;
285
286 LOGV("Opening archive '%s' %p\n", fileName, pArchive);
287
288 memset(pArchive, 0, sizeof(ZipArchive));
289
290 fd = open(fileName, O_RDONLY, 0);
291 if (fd < 0) {
292 err = errno ? errno : -1;
293 LOGV("Unable to open '%s': %s\n", fileName, strerror(err));
294 return err;
295 }
296
297 return dexZipPrepArchive(fd, fileName, pArchive);
298 }
299
300 /*
301 * Prepare to access a ZipArchive in an open file descriptor.
302 */
dexZipPrepArchive(int fd,const char * debugFileName,ZipArchive * pArchive)303 int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive)
304 {
305 MemMapping map;
306 int err;
307
308 map.addr = NULL;
309 memset(pArchive, 0, sizeof(*pArchive));
310
311 pArchive->mFd = fd;
312
313 if (sysMapFileInShmem(pArchive->mFd, &map) != 0) {
314 err = -1;
315 LOGW("Map of '%s' failed\n", debugFileName);
316 goto bail;
317 }
318
319 if (map.length < kEOCDLen) {
320 err = -1;
321 LOGV("File '%s' too small to be zip (%zd)\n", debugFileName,map.length);
322 goto bail;
323 }
324
325 if (!parseZipArchive(pArchive, &map)) {
326 err = -1;
327 LOGV("Parsing '%s' failed\n", debugFileName);
328 goto bail;
329 }
330
331 /* success */
332 err = 0;
333 sysCopyMap(&pArchive->mMap, &map);
334 map.addr = NULL;
335
336 bail:
337 if (err != 0)
338 dexZipCloseArchive(pArchive);
339 if (map.addr != NULL)
340 sysReleaseShmem(&map);
341 return err;
342 }
343
344
345 /*
346 * Close a ZipArchive, closing the file and freeing the contents.
347 *
348 * NOTE: the ZipArchive may not have been fully created.
349 */
dexZipCloseArchive(ZipArchive * pArchive)350 void dexZipCloseArchive(ZipArchive* pArchive)
351 {
352 LOGV("Closing archive %p\n", pArchive);
353
354 if (pArchive->mFd >= 0)
355 close(pArchive->mFd);
356
357 sysReleaseShmem(&pArchive->mMap);
358
359 free(pArchive->mHashTable);
360
361 pArchive->mFd = -1;
362 pArchive->mNumEntries = -1;
363 pArchive->mHashTableSize = -1;
364 pArchive->mHashTable = NULL;
365 }
366
367
368 /*
369 * Find a matching entry.
370 *
371 * Returns 0 if not found.
372 */
dexZipFindEntry(const ZipArchive * pArchive,const char * entryName)373 ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName)
374 {
375 int nameLen = strlen(entryName);
376 unsigned int hash = computeHash(entryName, nameLen);
377 const int hashTableSize = pArchive->mHashTableSize;
378 int ent = hash & (hashTableSize-1);
379
380 while (pArchive->mHashTable[ent].name != NULL) {
381 if (pArchive->mHashTable[ent].nameLen == nameLen &&
382 memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0)
383 {
384 /* match */
385 return (ZipEntry) (ent + kZipEntryAdj);
386 }
387
388 ent = (ent + 1) & (hashTableSize-1);
389 }
390
391 return NULL;
392 }
393
394 #if 0
395 /*
396 * Find the Nth entry.
397 *
398 * This currently involves walking through the sparse hash table, counting
399 * non-empty entries. If we need to speed this up we can either allocate
400 * a parallel lookup table or (perhaps better) provide an iterator interface.
401 */
402 ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx)
403 {
404 if (idx < 0 || idx >= pArchive->mNumEntries) {
405 LOGW("Invalid index %d\n", idx);
406 return NULL;
407 }
408
409 int ent;
410 for (ent = 0; ent < pArchive->mHashTableSize; ent++) {
411 if (pArchive->mHashTable[ent].name != NULL) {
412 if (idx-- == 0)
413 return (ZipEntry) (ent + kZipEntryAdj);
414 }
415 }
416
417 return NULL;
418 }
419 #endif
420
421 /*
422 * Get the useful fields from the zip entry.
423 *
424 * Returns "false" if the offsets to the fields or the contents of the fields
425 * appear to be bogus.
426 */
dexZipGetEntryInfo(const ZipArchive * pArchive,ZipEntry entry,int * pMethod,long * pUncompLen,long * pCompLen,off_t * pOffset,long * pModWhen,long * pCrc32)427 bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
428 int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset,
429 long* pModWhen, long* pCrc32)
430 {
431 int ent = entryToIndex(pArchive, entry);
432 if (ent < 0)
433 return false;
434
435 /*
436 * Recover the start of the central directory entry from the filename
437 * pointer.
438 */
439 const unsigned char* basePtr = (const unsigned char*)
440 pArchive->mMap.addr;
441 const unsigned char* ptr = (const unsigned char*)
442 pArchive->mHashTable[ent].name;
443 size_t zipLength =
444 pArchive->mMap.length;
445
446 ptr -= kCDELen;
447
448 int method = get2LE(ptr + kCDEMethod);
449 if (pMethod != NULL)
450 *pMethod = method;
451
452 if (pModWhen != NULL)
453 *pModWhen = get4LE(ptr + kCDEModWhen);
454 if (pCrc32 != NULL)
455 *pCrc32 = get4LE(ptr + kCDECRC);
456
457 /*
458 * We need to make sure that the lengths are not so large that somebody
459 * trying to map the compressed or uncompressed data runs off the end
460 * of the mapped region.
461 */
462 unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset);
463 if (localHdrOffset + kLFHLen >= zipLength) {
464 LOGE("ERROR: bad local hdr offset in zip\n");
465 return false;
466 }
467 const unsigned char* localHdr = basePtr + localHdrOffset;
468 off_t dataOffset = localHdrOffset + kLFHLen
469 + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen);
470 if ((unsigned long) dataOffset >= zipLength) {
471 LOGE("ERROR: bad data offset in zip\n");
472 return false;
473 }
474
475 if (pCompLen != NULL) {
476 *pCompLen = get4LE(ptr + kCDECompLen);
477 if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) {
478 LOGE("ERROR: bad compressed length in zip\n");
479 return false;
480 }
481 }
482 if (pUncompLen != NULL) {
483 *pUncompLen = get4LE(ptr + kCDEUncompLen);
484 if (*pUncompLen < 0) {
485 LOGE("ERROR: negative uncompressed length in zip\n");
486 return false;
487 }
488 if (method == kCompressStored &&
489 (size_t)(dataOffset + *pUncompLen) >= zipLength)
490 {
491 LOGE("ERROR: bad uncompressed length in zip\n");
492 return false;
493 }
494 }
495
496 if (pOffset != NULL) {
497 *pOffset = dataOffset;
498 }
499 return true;
500 }
501
502 /*
503 * Uncompress "deflate" data from one buffer to an open file descriptor.
504 */
inflateToFile(int fd,const void * inBuf,long uncompLen,long compLen)505 static bool inflateToFile(int fd, const void* inBuf, long uncompLen,
506 long compLen)
507 {
508 bool result = false;
509 const int kWriteBufSize = 32768;
510 unsigned char writeBuf[kWriteBufSize];
511 z_stream zstream;
512 int zerr;
513
514 /*
515 * Initialize the zlib stream struct.
516 */
517 memset(&zstream, 0, sizeof(zstream));
518 zstream.zalloc = Z_NULL;
519 zstream.zfree = Z_NULL;
520 zstream.opaque = Z_NULL;
521 zstream.next_in = (Bytef*)inBuf;
522 zstream.avail_in = compLen;
523 zstream.next_out = (Bytef*) writeBuf;
524 zstream.avail_out = sizeof(writeBuf);
525 zstream.data_type = Z_UNKNOWN;
526
527 /*
528 * Use the undocumented "negative window bits" feature to tell zlib
529 * that there's no zlib header waiting for it.
530 */
531 zerr = inflateInit2(&zstream, -MAX_WBITS);
532 if (zerr != Z_OK) {
533 if (zerr == Z_VERSION_ERROR) {
534 LOGE("Installed zlib is not compatible with linked version (%s)\n",
535 ZLIB_VERSION);
536 } else {
537 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
538 }
539 goto bail;
540 }
541
542 /*
543 * Loop while we have more to do.
544 */
545 do {
546 /*
547 * Expand data.
548 */
549 zerr = inflate(&zstream, Z_NO_FLUSH);
550 if (zerr != Z_OK && zerr != Z_STREAM_END) {
551 LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
552 zerr, zstream.next_in, zstream.avail_in,
553 zstream.next_out, zstream.avail_out);
554 goto z_bail;
555 }
556
557 /* write when we're full or when we're done */
558 if (zstream.avail_out == 0 ||
559 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf)))
560 {
561 long writeSize = zstream.next_out - writeBuf;
562 int cc = write(fd, writeBuf, writeSize);
563 if (cc != (int) writeSize) {
564 if (cc < 0) {
565 LOGW("write failed in inflate: %s\n", strerror(errno));
566 } else {
567 LOGW("partial write in inflate (%d vs %ld)\n",
568 cc, writeSize);
569 }
570 goto z_bail;
571 }
572
573 zstream.next_out = writeBuf;
574 zstream.avail_out = sizeof(writeBuf);
575 }
576 } while (zerr == Z_OK);
577
578 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
579
580 /* paranoia */
581 if ((long) zstream.total_out != uncompLen) {
582 LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
583 zstream.total_out, uncompLen);
584 goto z_bail;
585 }
586
587 result = true;
588
589 z_bail:
590 inflateEnd(&zstream); /* free up any allocated structures */
591
592 bail:
593 return result;
594 }
595
596 /*
597 * Uncompress an entry, in its entirety, to an open file descriptor.
598 *
599 * TODO: this doesn't verify the data's CRC, but probably should (especially
600 * for uncompressed data).
601 */
dexZipExtractEntryToFile(const ZipArchive * pArchive,const ZipEntry entry,int fd)602 bool dexZipExtractEntryToFile(const ZipArchive* pArchive,
603 const ZipEntry entry, int fd)
604 {
605 bool result = false;
606 int ent = entryToIndex(pArchive, entry);
607 if (ent < 0)
608 return -1;
609
610 const unsigned char* basePtr = (const unsigned char*)pArchive->mMap.addr;
611 int method;
612 long uncompLen, compLen;
613 off_t offset;
614
615 if (!dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen,
616 &offset, NULL, NULL))
617 {
618 goto bail;
619 }
620
621 if (method == kCompressStored) {
622 ssize_t actual;
623
624 actual = write(fd, basePtr + offset, uncompLen);
625 if (actual < 0) {
626 LOGE("Write failed: %s\n", strerror(errno));
627 goto bail;
628 } else if (actual != uncompLen) {
629 LOGE("Partial write during uncompress (%d of %ld)\n",
630 (int) actual, uncompLen);
631 goto bail;
632 } else {
633 LOGI("+++ successful write\n");
634 }
635 } else {
636 if (!inflateToFile(fd, basePtr+offset, uncompLen, compLen))
637 goto bail;
638 }
639
640 result = true;
641
642 bail:
643 return result;
644 }
645
646