• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2006 The Android Open Source Project
3  *
4  * Simple Zip file support.
5  */
6 #include "safe_iop.h"
7 #include "zlib.h"
8 
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <limits.h>
12 #include <stdint.h>     // for uintptr_t
13 #include <stdlib.h>
14 #include <sys/stat.h>   // for S_ISLNK()
15 #include <unistd.h>
16 
17 #define LOG_TAG "minzip"
18 #include "Zip.h"
19 #include "Bits.h"
20 #include "Log.h"
21 #include "DirUtil.h"
22 
23 #undef NDEBUG   // do this after including Log.h
24 #include <assert.h>
25 
26 #define SORT_ENTRIES 1
27 
28 /*
29  * Offset and length constants (java.util.zip naming convention).
30  */
31 enum {
32     CENSIG = 0x02014b50,      // PK12
33     CENHDR = 46,
34 
35     CENVEM =  4,
36     CENVER =  6,
37     CENFLG =  8,
38     CENHOW = 10,
39     CENTIM = 12,
40     CENCRC = 16,
41     CENSIZ = 20,
42     CENLEN = 24,
43     CENNAM = 28,
44     CENEXT = 30,
45     CENCOM = 32,
46     CENDSK = 34,
47     CENATT = 36,
48     CENATX = 38,
49     CENOFF = 42,
50 
51     ENDSIG = 0x06054b50,     // PK56
52     ENDHDR = 22,
53 
54     ENDSUB =  8,
55     ENDTOT = 10,
56     ENDSIZ = 12,
57     ENDOFF = 16,
58     ENDCOM = 20,
59 
60     EXTSIG = 0x08074b50,     // PK78
61     EXTHDR = 16,
62 
63     EXTCRC =  4,
64     EXTSIZ =  8,
65     EXTLEN = 12,
66 
67     LOCSIG = 0x04034b50,      // PK34
68     LOCHDR = 30,
69 
70     LOCVER =  4,
71     LOCFLG =  6,
72     LOCHOW =  8,
73     LOCTIM = 10,
74     LOCCRC = 14,
75     LOCSIZ = 18,
76     LOCLEN = 22,
77     LOCNAM = 26,
78     LOCEXT = 28,
79 
80     STORED = 0,
81     DEFLATED = 8,
82 
83     CENVEM_UNIX = 3 << 8,   // the high byte of CENVEM
84 };
85 
86 
87 /*
88  * For debugging, dump the contents of a ZipEntry.
89  */
90 #if 0
91 static void dumpEntry(const ZipEntry* pEntry)
92 {
93     LOGI(" %p '%.*s'\n", pEntry->fileName,pEntry->fileNameLen,pEntry->fileName);
94     LOGI("   off=%ld comp=%ld uncomp=%ld how=%d\n", pEntry->offset,
95         pEntry->compLen, pEntry->uncompLen, pEntry->compression);
96 }
97 #endif
98 
99 /*
100  * (This is a mzHashTableLookup callback.)
101  *
102  * Compare two ZipEntry structs, by name.
103  */
hashcmpZipEntry(const void * ventry1,const void * ventry2)104 static int hashcmpZipEntry(const void* ventry1, const void* ventry2)
105 {
106     const ZipEntry* entry1 = (const ZipEntry*) ventry1;
107     const ZipEntry* entry2 = (const ZipEntry*) ventry2;
108 
109     if (entry1->fileNameLen != entry2->fileNameLen)
110         return entry1->fileNameLen - entry2->fileNameLen;
111     return memcmp(entry1->fileName, entry2->fileName, entry1->fileNameLen);
112 }
113 
114 /*
115  * (This is a mzHashTableLookup callback.)
116  *
117  * find a ZipEntry struct by name.
118  */
hashcmpZipName(const void * ventry,const void * vname)119 static int hashcmpZipName(const void* ventry, const void* vname)
120 {
121     const ZipEntry* entry = (const ZipEntry*) ventry;
122     const char* name = (const char*) vname;
123     unsigned int nameLen = strlen(name);
124 
125     if (entry->fileNameLen != nameLen)
126         return entry->fileNameLen - nameLen;
127     return memcmp(entry->fileName, name, nameLen);
128 }
129 
130 /*
131  * Compute the hash code for a ZipEntry filename.
132  *
133  * Not expected to be compatible with any other hash function, so we init
134  * to 2 to ensure it doesn't happen to match.
135  */
computeHash(const char * name,int nameLen)136 static unsigned int computeHash(const char* name, int nameLen)
137 {
138     unsigned int hash = 2;
139 
140     while (nameLen--)
141         hash = hash * 31 + *name++;
142 
143     return hash;
144 }
145 
addEntryToHashTable(HashTable * pHash,ZipEntry * pEntry)146 static void addEntryToHashTable(HashTable* pHash, ZipEntry* pEntry)
147 {
148     unsigned int itemHash = computeHash(pEntry->fileName, pEntry->fileNameLen);
149     const ZipEntry* found;
150 
151     found = (const ZipEntry*)mzHashTableLookup(pHash,
152                 itemHash, pEntry, hashcmpZipEntry, true);
153     if (found != pEntry) {
154         LOGW("WARNING: duplicate entry '%.*s' in Zip\n",
155             found->fileNameLen, found->fileName);
156         /* keep going */
157     }
158 }
159 
validFilename(const char * fileName,unsigned int fileNameLen)160 static int validFilename(const char *fileName, unsigned int fileNameLen)
161 {
162     // Forbid super long filenames.
163     if (fileNameLen >= PATH_MAX) {
164         LOGW("Filename too long (%d chatacters)\n", fileNameLen);
165         return 0;
166     }
167 
168     // Require all characters to be printable ASCII (no NUL, no UTF-8, etc).
169     unsigned int i;
170     for (i = 0; i < fileNameLen; ++i) {
171         if (fileName[i] < 32 || fileName[i] >= 127) {
172             LOGW("Filename contains invalid character '\%03o'\n", fileName[i]);
173             return 0;
174         }
175     }
176 
177     return 1;
178 }
179 
180 /*
181  * Parse the contents of a Zip archive.  After confirming that the file
182  * is in fact a Zip, we scan out the contents of the central directory and
183  * store it in a hash table.
184  *
185  * Returns "true" on success.
186  */
parseZipArchive(ZipArchive * pArchive,const MemMapping * pMap)187 static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap)
188 {
189     bool result = false;
190     const unsigned char* ptr;
191     unsigned int i, numEntries, cdOffset;
192     unsigned int val;
193 
194     /*
195      * The first 4 bytes of the file will either be the local header
196      * signature for the first file (LOCSIG) or, if the archive doesn't
197      * have any files in it, the end-of-central-directory signature (ENDSIG).
198      */
199     val = get4LE(pMap->addr);
200     if (val == ENDSIG) {
201         LOGI("Found Zip archive, but it looks empty\n");
202         goto bail;
203     } else if (val != LOCSIG) {
204         LOGV("Not a Zip archive (found 0x%08x)\n", val);
205         goto bail;
206     }
207 
208     /*
209      * Find the EOCD.  We'll find it immediately unless they have a file
210      * comment.
211      */
212     ptr = pMap->addr + pMap->length - ENDHDR;
213 
214     while (ptr >= (const unsigned char*) pMap->addr) {
215         if (*ptr == (ENDSIG & 0xff) && get4LE(ptr) == ENDSIG)
216             break;
217         ptr--;
218     }
219     if (ptr < (const unsigned char*) pMap->addr) {
220         LOGI("Could not find end-of-central-directory in Zip\n");
221         goto bail;
222     }
223 
224     /*
225      * There are two interesting items in the EOCD block: the number of
226      * entries in the file, and the file offset of the start of the
227      * central directory.
228      */
229     numEntries = get2LE(ptr + ENDSUB);
230     cdOffset = get4LE(ptr + ENDOFF);
231 
232     LOGVV("numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
233     if (numEntries == 0 || cdOffset >= pMap->length) {
234         LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
235             numEntries, cdOffset, pMap->length);
236         goto bail;
237     }
238 
239     /*
240      * Create data structures to hold entries.
241      */
242     pArchive->numEntries = numEntries;
243     pArchive->pEntries = (ZipEntry*) calloc(numEntries, sizeof(ZipEntry));
244     pArchive->pHash = mzHashTableCreate(mzHashSize(numEntries), NULL);
245     if (pArchive->pEntries == NULL || pArchive->pHash == NULL)
246         goto bail;
247 
248     ptr = pMap->addr + cdOffset;
249     for (i = 0; i < numEntries; i++) {
250         ZipEntry* pEntry;
251         unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
252         const unsigned char* localHdr;
253         const char *fileName;
254 
255         if (ptr + CENHDR > (const unsigned char*)pMap->addr + pMap->length) {
256             LOGW("Ran off the end (at %d)\n", i);
257             goto bail;
258         }
259         if (get4LE(ptr) != CENSIG) {
260             LOGW("Missed a central dir sig (at %d)\n", i);
261             goto bail;
262         }
263 
264         localHdrOffset = get4LE(ptr + CENOFF);
265         fileNameLen = get2LE(ptr + CENNAM);
266         extraLen = get2LE(ptr + CENEXT);
267         commentLen = get2LE(ptr + CENCOM);
268         fileName = (const char*)ptr + CENHDR;
269         if (fileName + fileNameLen > (const char*)pMap->addr + pMap->length) {
270             LOGW("Filename ran off the end (at %d)\n", i);
271             goto bail;
272         }
273         if (!validFilename(fileName, fileNameLen)) {
274             LOGW("Invalid filename (at %d)\n", i);
275             goto bail;
276         }
277 
278 #if SORT_ENTRIES
279         /* Figure out where this entry should go (binary search).
280          */
281         if (i > 0) {
282             int low, high;
283 
284             low = 0;
285             high = i - 1;
286             while (low <= high) {
287                 int mid;
288                 int diff;
289                 int diffLen;
290 
291                 mid = low + ((high - low) / 2); // avoid overflow
292 
293                 if (pArchive->pEntries[mid].fileNameLen < fileNameLen) {
294                     diffLen = pArchive->pEntries[mid].fileNameLen;
295                 } else {
296                     diffLen = fileNameLen;
297                 }
298                 diff = strncmp(pArchive->pEntries[mid].fileName, fileName,
299                         diffLen);
300                 if (diff == 0) {
301                     diff = pArchive->pEntries[mid].fileNameLen - fileNameLen;
302                 }
303                 if (diff < 0) {
304                     low = mid + 1;
305                 } else if (diff > 0) {
306                     high = mid - 1;
307                 } else {
308                     high = mid;
309                     break;
310                 }
311             }
312 
313             unsigned int target = high + 1;
314             assert(target <= i);
315             if (target != i) {
316                 /* It belongs somewhere other than at the end of
317                  * the list.  Make some room at [target].
318                  */
319                 memmove(pArchive->pEntries + target + 1,
320                         pArchive->pEntries + target,
321                         (i - target) * sizeof(ZipEntry));
322             }
323             pEntry = &pArchive->pEntries[target];
324         } else {
325             pEntry = &pArchive->pEntries[0];
326         }
327 #else
328         pEntry = &pArchive->pEntries[i];
329 #endif
330 
331         //LOGI("%d: localHdr=%d fnl=%d el=%d cl=%d\n",
332         //    i, localHdrOffset, fileNameLen, extraLen, commentLen);
333 
334         pEntry->fileNameLen = fileNameLen;
335         pEntry->fileName = fileName;
336 
337         pEntry->compLen = get4LE(ptr + CENSIZ);
338         pEntry->uncompLen = get4LE(ptr + CENLEN);
339         pEntry->compression = get2LE(ptr + CENHOW);
340         pEntry->modTime = get4LE(ptr + CENTIM);
341         pEntry->crc32 = get4LE(ptr + CENCRC);
342 
343         /* These two are necessary for finding the mode of the file.
344          */
345         pEntry->versionMadeBy = get2LE(ptr + CENVEM);
346         if ((pEntry->versionMadeBy & 0xff00) != 0 &&
347                 (pEntry->versionMadeBy & 0xff00) != CENVEM_UNIX)
348         {
349             LOGW("Incompatible \"version made by\": 0x%02x (at %d)\n",
350                     pEntry->versionMadeBy >> 8, i);
351             goto bail;
352         }
353         pEntry->externalFileAttributes = get4LE(ptr + CENATX);
354 
355         // Perform pMap->addr + localHdrOffset, ensuring that it won't
356         // overflow. This is needed because localHdrOffset is untrusted.
357         if (!safe_add((uintptr_t *)&localHdr, (uintptr_t)pMap->addr,
358             (uintptr_t)localHdrOffset)) {
359             LOGW("Integer overflow adding in parseZipArchive\n");
360             goto bail;
361         }
362         if ((uintptr_t)localHdr + LOCHDR >
363             (uintptr_t)pMap->addr + pMap->length) {
364             LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i);
365             goto bail;
366         }
367         if (get4LE(localHdr) != LOCSIG) {
368             LOGW("Missed a local header sig (at %d)\n", i);
369             goto bail;
370         }
371         pEntry->offset = localHdrOffset + LOCHDR
372             + get2LE(localHdr + LOCNAM) + get2LE(localHdr + LOCEXT);
373         if (!safe_add(NULL, pEntry->offset, pEntry->compLen)) {
374             LOGW("Integer overflow adding in parseZipArchive\n");
375             goto bail;
376         }
377         if ((size_t)pEntry->offset + pEntry->compLen > pMap->length) {
378             LOGW("Data ran off the end (at %d)\n", i);
379             goto bail;
380         }
381 
382 #if !SORT_ENTRIES
383         /* Add to hash table; no need to lock here.
384          * Can't do this now if we're sorting, because entries
385          * will move around.
386          */
387         addEntryToHashTable(pArchive->pHash, pEntry);
388 #endif
389 
390         //dumpEntry(pEntry);
391         ptr += CENHDR + fileNameLen + extraLen + commentLen;
392     }
393 
394 #if SORT_ENTRIES
395     /* If we're sorting, we have to wait until all entries
396      * are in their final places, otherwise the pointers will
397      * probably point to the wrong things.
398      */
399     for (i = 0; i < numEntries; i++) {
400         /* Add to hash table; no need to lock here.
401          */
402         addEntryToHashTable(pArchive->pHash, &pArchive->pEntries[i]);
403     }
404 #endif
405 
406     result = true;
407 
408 bail:
409     if (!result) {
410         mzHashTableFree(pArchive->pHash);
411         pArchive->pHash = NULL;
412     }
413     return result;
414 }
415 
416 /*
417  * Open a Zip archive and scan out the contents.
418  *
419  * The easiest way to do this is to mmap() the whole thing and do the
420  * traditional backward scan for central directory.  Since the EOCD is
421  * a relatively small bit at the end, we should end up only touching a
422  * small set of pages.
423  *
424  * This will be called on non-Zip files, especially during startup, so
425  * we don't want to be too noisy about failures.  (Do we want a "quiet"
426  * flag?)
427  *
428  * On success, we fill out the contents of "pArchive".
429  */
mzOpenZipArchive(const char * fileName,ZipArchive * pArchive)430 int mzOpenZipArchive(const char* fileName, ZipArchive* pArchive)
431 {
432     MemMapping map;
433     int err;
434 
435     LOGV("Opening archive '%s' %p\n", fileName, pArchive);
436 
437     map.addr = NULL;
438     memset(pArchive, 0, sizeof(*pArchive));
439 
440     pArchive->fd = open(fileName, O_RDONLY, 0);
441     if (pArchive->fd < 0) {
442         err = errno ? errno : -1;
443         LOGV("Unable to open '%s': %s\n", fileName, strerror(err));
444         goto bail;
445     }
446 
447     if (sysMapFileInShmem(pArchive->fd, &map) != 0) {
448         err = -1;
449         LOGW("Map of '%s' failed\n", fileName);
450         goto bail;
451     }
452 
453     if (map.length < ENDHDR) {
454         err = -1;
455         LOGV("File '%s' too small to be zip (%zd)\n", fileName, map.length);
456         goto bail;
457     }
458 
459     if (!parseZipArchive(pArchive, &map)) {
460         err = -1;
461         LOGV("Parsing '%s' failed\n", fileName);
462         goto bail;
463     }
464 
465     err = 0;
466     sysCopyMap(&pArchive->map, &map);
467     map.addr = NULL;
468 
469 bail:
470     if (err != 0)
471         mzCloseZipArchive(pArchive);
472     if (map.addr != NULL)
473         sysReleaseShmem(&map);
474     return err;
475 }
476 
477 /*
478  * Close a ZipArchive, closing the file and freeing the contents.
479  *
480  * NOTE: the ZipArchive may not have been fully created.
481  */
mzCloseZipArchive(ZipArchive * pArchive)482 void mzCloseZipArchive(ZipArchive* pArchive)
483 {
484     LOGV("Closing archive %p\n", pArchive);
485 
486     if (pArchive->fd >= 0)
487         close(pArchive->fd);
488     if (pArchive->map.addr != NULL)
489         sysReleaseShmem(&pArchive->map);
490 
491     free(pArchive->pEntries);
492 
493     mzHashTableFree(pArchive->pHash);
494 
495     pArchive->fd = -1;
496     pArchive->pHash = NULL;
497     pArchive->pEntries = NULL;
498 }
499 
500 /*
501  * Find a matching entry.
502  *
503  * Returns NULL if no matching entry found.
504  */
mzFindZipEntry(const ZipArchive * pArchive,const char * entryName)505 const ZipEntry* mzFindZipEntry(const ZipArchive* pArchive,
506         const char* entryName)
507 {
508     unsigned int itemHash = computeHash(entryName, strlen(entryName));
509 
510     return (const ZipEntry*)mzHashTableLookup(pArchive->pHash,
511                 itemHash, (char*) entryName, hashcmpZipName, false);
512 }
513 
514 /*
515  * Return true if the entry is a symbolic link.
516  */
mzIsZipEntrySymlink(const ZipEntry * pEntry)517 bool mzIsZipEntrySymlink(const ZipEntry* pEntry)
518 {
519     if ((pEntry->versionMadeBy & 0xff00) == CENVEM_UNIX) {
520         return S_ISLNK(pEntry->externalFileAttributes >> 16);
521     }
522     return false;
523 }
524 
525 /* Call processFunction on the uncompressed data of a STORED entry.
526  */
processStoredEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)527 static bool processStoredEntry(const ZipArchive *pArchive,
528     const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
529     void *cookie)
530 {
531     size_t bytesLeft = pEntry->compLen;
532     while (bytesLeft > 0) {
533         unsigned char buf[32 * 1024];
534         ssize_t n;
535         size_t count;
536         bool ret;
537 
538         count = bytesLeft;
539         if (count > sizeof(buf)) {
540             count = sizeof(buf);
541         }
542         n = read(pArchive->fd, buf, count);
543         if (n < 0 || (size_t)n != count) {
544             LOGE("Can't read %zu bytes from zip file: %ld\n", count, n);
545             return false;
546         }
547         ret = processFunction(buf, n, cookie);
548         if (!ret) {
549             return false;
550         }
551         bytesLeft -= count;
552     }
553     return true;
554 }
555 
processDeflatedEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)556 static bool processDeflatedEntry(const ZipArchive *pArchive,
557     const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
558     void *cookie)
559 {
560     long result = -1;
561     unsigned char readBuf[32 * 1024];
562     unsigned char procBuf[32 * 1024];
563     z_stream zstream;
564     int zerr;
565     long compRemaining;
566 
567     compRemaining = pEntry->compLen;
568 
569     /*
570      * Initialize the zlib stream.
571      */
572     memset(&zstream, 0, sizeof(zstream));
573     zstream.zalloc = Z_NULL;
574     zstream.zfree = Z_NULL;
575     zstream.opaque = Z_NULL;
576     zstream.next_in = NULL;
577     zstream.avail_in = 0;
578     zstream.next_out = (Bytef*) procBuf;
579     zstream.avail_out = sizeof(procBuf);
580     zstream.data_type = Z_UNKNOWN;
581 
582     /*
583      * Use the undocumented "negative window bits" feature to tell zlib
584      * that there's no zlib header waiting for it.
585      */
586     zerr = inflateInit2(&zstream, -MAX_WBITS);
587     if (zerr != Z_OK) {
588         if (zerr == Z_VERSION_ERROR) {
589             LOGE("Installed zlib is not compatible with linked version (%s)\n",
590                 ZLIB_VERSION);
591         } else {
592             LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
593         }
594         goto bail;
595     }
596 
597     /*
598      * Loop while we have data.
599      */
600     do {
601         /* read as much as we can */
602         if (zstream.avail_in == 0) {
603             long getSize = (compRemaining > (long)sizeof(readBuf)) ?
604                         (long)sizeof(readBuf) : compRemaining;
605             LOGVV("+++ reading %ld bytes (%ld left)\n",
606                 getSize, compRemaining);
607 
608             int cc = read(pArchive->fd, readBuf, getSize);
609             if (cc != (int) getSize) {
610                 LOGW("inflate read failed (%d vs %ld)\n", cc, getSize);
611                 goto z_bail;
612             }
613 
614             compRemaining -= getSize;
615 
616             zstream.next_in = readBuf;
617             zstream.avail_in = getSize;
618         }
619 
620         /* uncompress the data */
621         zerr = inflate(&zstream, Z_NO_FLUSH);
622         if (zerr != Z_OK && zerr != Z_STREAM_END) {
623             LOGD("zlib inflate call failed (zerr=%d)\n", zerr);
624             goto z_bail;
625         }
626 
627         /* write when we're full or when we're done */
628         if (zstream.avail_out == 0 ||
629             (zerr == Z_STREAM_END && zstream.avail_out != sizeof(procBuf)))
630         {
631             long procSize = zstream.next_out - procBuf;
632             LOGVV("+++ processing %d bytes\n", (int) procSize);
633             bool ret = processFunction(procBuf, procSize, cookie);
634             if (!ret) {
635                 LOGW("Process function elected to fail (in inflate)\n");
636                 goto z_bail;
637             }
638 
639             zstream.next_out = procBuf;
640             zstream.avail_out = sizeof(procBuf);
641         }
642     } while (zerr == Z_OK);
643 
644     assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
645 
646     // success!
647     result = zstream.total_out;
648 
649 z_bail:
650     inflateEnd(&zstream);        /* free up any allocated structures */
651 
652 bail:
653     if (result != pEntry->uncompLen) {
654         if (result != -1)        // error already shown?
655             LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
656                 result, pEntry->uncompLen);
657         return false;
658     }
659     return true;
660 }
661 
662 /*
663  * Stream the uncompressed data through the supplied function,
664  * passing cookie to it each time it gets called.  processFunction
665  * may be called more than once.
666  *
667  * If processFunction returns false, the operation is abandoned and
668  * mzProcessZipEntryContents() immediately returns false.
669  *
670  * This is useful for calculating the hash of an entry's uncompressed contents.
671  */
mzProcessZipEntryContents(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)672 bool mzProcessZipEntryContents(const ZipArchive *pArchive,
673     const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
674     void *cookie)
675 {
676     bool ret = false;
677     off_t oldOff;
678 
679     /* save current offset */
680     oldOff = lseek(pArchive->fd, 0, SEEK_CUR);
681 
682     /* Seek to the beginning of the entry's compressed data. */
683     lseek(pArchive->fd, pEntry->offset, SEEK_SET);
684 
685     switch (pEntry->compression) {
686     case STORED:
687         ret = processStoredEntry(pArchive, pEntry, processFunction, cookie);
688         break;
689     case DEFLATED:
690         ret = processDeflatedEntry(pArchive, pEntry, processFunction, cookie);
691         break;
692     default:
693         LOGE("Unsupported compression type %d for entry '%s'\n",
694                 pEntry->compression, pEntry->fileName);
695         break;
696     }
697 
698     /* restore file offset */
699     lseek(pArchive->fd, oldOff, SEEK_SET);
700     return ret;
701 }
702 
crcProcessFunction(const unsigned char * data,int dataLen,void * crc)703 static bool crcProcessFunction(const unsigned char *data, int dataLen,
704         void *crc)
705 {
706     *(unsigned long *)crc = crc32(*(unsigned long *)crc, data, dataLen);
707     return true;
708 }
709 
710 /*
711  * Check the CRC on this entry; return true if it is correct.
712  * May do other internal checks as well.
713  */
mzIsZipEntryIntact(const ZipArchive * pArchive,const ZipEntry * pEntry)714 bool mzIsZipEntryIntact(const ZipArchive *pArchive, const ZipEntry *pEntry)
715 {
716     unsigned long crc;
717     bool ret;
718 
719     crc = crc32(0L, Z_NULL, 0);
720     ret = mzProcessZipEntryContents(pArchive, pEntry, crcProcessFunction,
721             (void *)&crc);
722     if (!ret) {
723         LOGE("Can't calculate CRC for entry\n");
724         return false;
725     }
726     if (crc != (unsigned long)pEntry->crc32) {
727         LOGW("CRC for entry %.*s (0x%08lx) != expected (0x%08lx)\n",
728                 pEntry->fileNameLen, pEntry->fileName, crc, pEntry->crc32);
729         return false;
730     }
731     return true;
732 }
733 
734 typedef struct {
735     char *buf;
736     int bufLen;
737 } CopyProcessArgs;
738 
copyProcessFunction(const unsigned char * data,int dataLen,void * cookie)739 static bool copyProcessFunction(const unsigned char *data, int dataLen,
740         void *cookie)
741 {
742     CopyProcessArgs *args = (CopyProcessArgs *)cookie;
743     if (dataLen <= args->bufLen) {
744         memcpy(args->buf, data, dataLen);
745         args->buf += dataLen;
746         args->bufLen -= dataLen;
747         return true;
748     }
749     return false;
750 }
751 
752 /*
753  * Read an entry into a buffer allocated by the caller.
754  */
mzReadZipEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,char * buf,int bufLen)755 bool mzReadZipEntry(const ZipArchive* pArchive, const ZipEntry* pEntry,
756         char *buf, int bufLen)
757 {
758     CopyProcessArgs args;
759     bool ret;
760 
761     args.buf = buf;
762     args.bufLen = bufLen;
763     ret = mzProcessZipEntryContents(pArchive, pEntry, copyProcessFunction,
764             (void *)&args);
765     if (!ret) {
766         LOGE("Can't extract entry to buffer.\n");
767         return false;
768     }
769     return true;
770 }
771 
writeProcessFunction(const unsigned char * data,int dataLen,void * cookie)772 static bool writeProcessFunction(const unsigned char *data, int dataLen,
773                                  void *cookie)
774 {
775     int fd = (int)cookie;
776 
777     ssize_t soFar = 0;
778     while (true) {
779         ssize_t n = write(fd, data+soFar, dataLen-soFar);
780         if (n <= 0) {
781             LOGE("Error writing %ld bytes from zip file from %p: %s\n",
782                  dataLen-soFar, data+soFar, strerror(errno));
783             if (errno != EINTR) {
784               return false;
785             }
786         } else if (n > 0) {
787             soFar += n;
788             if (soFar == dataLen) return true;
789             if (soFar > dataLen) {
790                 LOGE("write overrun?  (%ld bytes instead of %d)\n",
791                      soFar, dataLen);
792                 return false;
793             }
794         }
795     }
796 }
797 
798 /*
799  * Uncompress "pEntry" in "pArchive" to "fd" at the current offset.
800  */
mzExtractZipEntryToFile(const ZipArchive * pArchive,const ZipEntry * pEntry,int fd)801 bool mzExtractZipEntryToFile(const ZipArchive *pArchive,
802     const ZipEntry *pEntry, int fd)
803 {
804     bool ret = mzProcessZipEntryContents(pArchive, pEntry, writeProcessFunction,
805                                          (void*)fd);
806     if (!ret) {
807         LOGE("Can't extract entry to file.\n");
808         return false;
809     }
810     return true;
811 }
812 
813 typedef struct {
814     unsigned char* buffer;
815     long len;
816 } BufferExtractCookie;
817 
bufferProcessFunction(const unsigned char * data,int dataLen,void * cookie)818 static bool bufferProcessFunction(const unsigned char *data, int dataLen,
819     void *cookie) {
820     BufferExtractCookie *bec = (BufferExtractCookie*)cookie;
821 
822     memmove(bec->buffer, data, dataLen);
823     bec->buffer += dataLen;
824     bec->len -= dataLen;
825 
826     return true;
827 }
828 
829 /*
830  * Uncompress "pEntry" in "pArchive" to buffer, which must be large
831  * enough to hold mzGetZipEntryUncomplen(pEntry) bytes.
832  */
mzExtractZipEntryToBuffer(const ZipArchive * pArchive,const ZipEntry * pEntry,unsigned char * buffer)833 bool mzExtractZipEntryToBuffer(const ZipArchive *pArchive,
834     const ZipEntry *pEntry, unsigned char *buffer)
835 {
836     BufferExtractCookie bec;
837     bec.buffer = buffer;
838     bec.len = mzGetZipEntryUncompLen(pEntry);
839 
840     bool ret = mzProcessZipEntryContents(pArchive, pEntry,
841         bufferProcessFunction, (void*)&bec);
842     if (!ret || bec.len != 0) {
843         LOGE("Can't extract entry to memory buffer.\n");
844         return false;
845     }
846     return true;
847 }
848 
849 
850 /* Helper state to make path translation easier and less malloc-happy.
851  */
852 typedef struct {
853     const char *targetDir;
854     const char *zipDir;
855     char *buf;
856     int targetDirLen;
857     int zipDirLen;
858     int bufLen;
859 } MzPathHelper;
860 
861 /* Given the values of targetDir and zipDir in the helper,
862  * return the target filename of the provided entry.
863  * The helper must be initialized first.
864  */
targetEntryPath(MzPathHelper * helper,ZipEntry * pEntry)865 static const char *targetEntryPath(MzPathHelper *helper, ZipEntry *pEntry)
866 {
867     int needLen;
868     bool firstTime = (helper->buf == NULL);
869 
870     /* target file <-- targetDir + / + entry[zipDirLen:]
871      */
872     needLen = helper->targetDirLen + 1 +
873             pEntry->fileNameLen - helper->zipDirLen + 1;
874     if (needLen > helper->bufLen) {
875         char *newBuf;
876 
877         needLen *= 2;
878         newBuf = (char *)realloc(helper->buf, needLen);
879         if (newBuf == NULL) {
880             return NULL;
881         }
882         helper->buf = newBuf;
883         helper->bufLen = needLen;
884     }
885 
886     /* Every path will start with the target path and a slash.
887      */
888     if (firstTime) {
889         char *p = helper->buf;
890         memcpy(p, helper->targetDir, helper->targetDirLen);
891         p += helper->targetDirLen;
892         if (p == helper->buf || p[-1] != '/') {
893             helper->targetDirLen += 1;
894             *p++ = '/';
895         }
896     }
897 
898     /* Replace the custom part of the path with the appropriate
899      * part of the entry's path.
900      */
901     char *epath = helper->buf + helper->targetDirLen;
902     memcpy(epath, pEntry->fileName + helper->zipDirLen,
903             pEntry->fileNameLen - helper->zipDirLen);
904     epath += pEntry->fileNameLen - helper->zipDirLen;
905     *epath = '\0';
906 
907     return helper->buf;
908 }
909 
910 /*
911  * Inflate all entries under zipDir to the directory specified by
912  * targetDir, which must exist and be a writable directory.
913  *
914  * The immediate children of zipDir will become the immediate
915  * children of targetDir; e.g., if the archive contains the entries
916  *
917  *     a/b/c/one
918  *     a/b/c/two
919  *     a/b/c/d/three
920  *
921  * and mzExtractRecursive(a, "a/b/c", "/tmp") is called, the resulting
922  * files will be
923  *
924  *     /tmp/one
925  *     /tmp/two
926  *     /tmp/d/three
927  *
928  * Returns true on success, false on failure.
929  */
mzExtractRecursive(const ZipArchive * pArchive,const char * zipDir,const char * targetDir,int flags,const struct utimbuf * timestamp,void (* callback)(const char * fn,void *),void * cookie,struct selabel_handle * sehnd)930 bool mzExtractRecursive(const ZipArchive *pArchive,
931                         const char *zipDir, const char *targetDir,
932                         int flags, const struct utimbuf *timestamp,
933                         void (*callback)(const char *fn, void *), void *cookie,
934                         struct selabel_handle *sehnd)
935 {
936     if (zipDir[0] == '/') {
937         LOGE("mzExtractRecursive(): zipDir must be a relative path.\n");
938         return false;
939     }
940     if (targetDir[0] != '/') {
941         LOGE("mzExtractRecursive(): targetDir must be an absolute path.\n");
942         return false;
943     }
944 
945     unsigned int zipDirLen;
946     char *zpath;
947 
948     zipDirLen = strlen(zipDir);
949     zpath = (char *)malloc(zipDirLen + 2);
950     if (zpath == NULL) {
951         LOGE("Can't allocate %d bytes for zip path\n", zipDirLen + 2);
952         return false;
953     }
954     /* If zipDir is empty, we'll extract the entire zip file.
955      * Otherwise, canonicalize the path.
956      */
957     if (zipDirLen > 0) {
958         /* Make sure there's (hopefully, exactly one) slash at the
959          * end of the path.  This way we don't need to worry about
960          * accidentally extracting "one/twothree" when a path like
961          * "one/two" is specified.
962          */
963         memcpy(zpath, zipDir, zipDirLen);
964         if (zpath[zipDirLen-1] != '/') {
965             zpath[zipDirLen++] = '/';
966         }
967     }
968     zpath[zipDirLen] = '\0';
969 
970     /* Set up the helper structure that we'll use to assemble paths.
971      */
972     MzPathHelper helper;
973     helper.targetDir = targetDir;
974     helper.targetDirLen = strlen(helper.targetDir);
975     helper.zipDir = zpath;
976     helper.zipDirLen = strlen(helper.zipDir);
977     helper.buf = NULL;
978     helper.bufLen = 0;
979 
980     /* Walk through the entries and extract anything whose path begins
981      * with zpath.
982 //TODO: since the entries are sorted, binary search for the first match
983 //      and stop after the first non-match.
984      */
985     unsigned int i;
986     bool seenMatch = false;
987     int ok = true;
988     int extractCount = 0;
989     for (i = 0; i < pArchive->numEntries; i++) {
990         ZipEntry *pEntry = pArchive->pEntries + i;
991         if (pEntry->fileNameLen < zipDirLen) {
992 //TODO: look out for a single empty directory entry that matches zpath, but
993 //      missing the trailing slash.  Most zip files seem to include
994 //      the trailing slash, but I think it's legal to leave it off.
995 //      e.g., zpath "a/b/", entry "a/b", with no children of the entry.
996             /* No chance of matching.
997              */
998 #if SORT_ENTRIES
999             if (seenMatch) {
1000                 /* Since the entries are sorted, we can give up
1001                  * on the first mismatch after the first match.
1002                  */
1003                 break;
1004             }
1005 #endif
1006             continue;
1007         }
1008         /* If zpath is empty, this strncmp() will match everything,
1009          * which is what we want.
1010          */
1011         if (strncmp(pEntry->fileName, zpath, zipDirLen) != 0) {
1012 #if SORT_ENTRIES
1013             if (seenMatch) {
1014                 /* Since the entries are sorted, we can give up
1015                  * on the first mismatch after the first match.
1016                  */
1017                 break;
1018             }
1019 #endif
1020             continue;
1021         }
1022         /* This entry begins with zipDir, so we'll extract it.
1023          */
1024         seenMatch = true;
1025 
1026         /* Find the target location of the entry.
1027          */
1028         const char *targetFile = targetEntryPath(&helper, pEntry);
1029         if (targetFile == NULL) {
1030             LOGE("Can't assemble target path for \"%.*s\"\n",
1031                     pEntry->fileNameLen, pEntry->fileName);
1032             ok = false;
1033             break;
1034         }
1035 
1036         /* With DRY_RUN set, invoke the callback but don't do anything else.
1037          */
1038         if (flags & MZ_EXTRACT_DRY_RUN) {
1039             if (callback != NULL) callback(targetFile, cookie);
1040             continue;
1041         }
1042 
1043         /* Create the file or directory.
1044          */
1045 #define UNZIP_DIRMODE 0755
1046 #define UNZIP_FILEMODE 0644
1047         if (pEntry->fileName[pEntry->fileNameLen-1] == '/') {
1048             if (!(flags & MZ_EXTRACT_FILES_ONLY)) {
1049                 int ret = dirCreateHierarchy(
1050                         targetFile, UNZIP_DIRMODE, timestamp, false, sehnd);
1051                 if (ret != 0) {
1052                     LOGE("Can't create containing directory for \"%s\": %s\n",
1053                             targetFile, strerror(errno));
1054                     ok = false;
1055                     break;
1056                 }
1057                 LOGD("Extracted dir \"%s\"\n", targetFile);
1058             }
1059         } else {
1060             /* This is not a directory.  First, make sure that
1061              * the containing directory exists.
1062              */
1063             int ret = dirCreateHierarchy(
1064                     targetFile, UNZIP_DIRMODE, timestamp, true, sehnd);
1065             if (ret != 0) {
1066                 LOGE("Can't create containing directory for \"%s\": %s\n",
1067                         targetFile, strerror(errno));
1068                 ok = false;
1069                 break;
1070             }
1071 
1072             /* With FILES_ONLY set, we need to ignore metadata entirely,
1073              * so treat symlinks as regular files.
1074              */
1075             if (!(flags & MZ_EXTRACT_FILES_ONLY) && mzIsZipEntrySymlink(pEntry)) {
1076                 /* The entry is a symbolic link.
1077                  * The relative target of the symlink is in the
1078                  * data section of this entry.
1079                  */
1080                 if (pEntry->uncompLen == 0) {
1081                     LOGE("Symlink entry \"%s\" has no target\n",
1082                             targetFile);
1083                     ok = false;
1084                     break;
1085                 }
1086                 char *linkTarget = malloc(pEntry->uncompLen + 1);
1087                 if (linkTarget == NULL) {
1088                     ok = false;
1089                     break;
1090                 }
1091                 ok = mzReadZipEntry(pArchive, pEntry, linkTarget,
1092                         pEntry->uncompLen);
1093                 if (!ok) {
1094                     LOGE("Can't read symlink target for \"%s\"\n",
1095                             targetFile);
1096                     free(linkTarget);
1097                     break;
1098                 }
1099                 linkTarget[pEntry->uncompLen] = '\0';
1100 
1101                 /* Make the link.
1102                  */
1103                 ret = symlink(linkTarget, targetFile);
1104                 if (ret != 0) {
1105                     LOGE("Can't symlink \"%s\" to \"%s\": %s\n",
1106                             targetFile, linkTarget, strerror(errno));
1107                     free(linkTarget);
1108                     ok = false;
1109                     break;
1110                 }
1111                 LOGD("Extracted symlink \"%s\" -> \"%s\"\n",
1112                         targetFile, linkTarget);
1113                 free(linkTarget);
1114             } else {
1115                 /* The entry is a regular file.
1116                  * Open the target for writing.
1117                  */
1118 
1119                 char *secontext = NULL;
1120 
1121                 if (sehnd) {
1122                     selabel_lookup(sehnd, &secontext, targetFile, UNZIP_FILEMODE);
1123                     setfscreatecon(secontext);
1124                 }
1125 
1126                 int fd = creat(targetFile, UNZIP_FILEMODE);
1127 
1128                 if (secontext) {
1129                     freecon(secontext);
1130                     setfscreatecon(NULL);
1131                 }
1132 
1133                 if (fd < 0) {
1134                     LOGE("Can't create target file \"%s\": %s\n",
1135                             targetFile, strerror(errno));
1136                     ok = false;
1137                     break;
1138                 }
1139 
1140                 bool ok = mzExtractZipEntryToFile(pArchive, pEntry, fd);
1141                 close(fd);
1142                 if (!ok) {
1143                     LOGE("Error extracting \"%s\"\n", targetFile);
1144                     ok = false;
1145                     break;
1146                 }
1147 
1148                 if (timestamp != NULL && utime(targetFile, timestamp)) {
1149                     LOGE("Error touching \"%s\"\n", targetFile);
1150                     ok = false;
1151                     break;
1152                 }
1153 
1154                 LOGV("Extracted file \"%s\"\n", targetFile);
1155                 ++extractCount;
1156             }
1157         }
1158 
1159         if (callback != NULL) callback(targetFile, cookie);
1160     }
1161 
1162     LOGD("Extracted %d file(s)\n", extractCount);
1163 
1164     free(helper.buf);
1165     free(zpath);
1166 
1167     return ok;
1168 }
1169