1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20
21 #include <assert.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <limits.h>
26 #include <log/log.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <utils/Compat.h>
31 #include <utils/FileMap.h>
32 #include <zlib.h>
33
34 #include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
35
36 #include "ziparchive/zip_archive.h"
37
38 // This is for windows. If we don't open a file in binary mode, weird
39 // things will happen.
40 #ifndef O_BINARY
41 #define O_BINARY 0
42 #endif
43
44 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
45 TypeName(); \
46 TypeName(const TypeName&); \
47 void operator=(const TypeName&)
48
49 // The "end of central directory" (EOCD) record. Each archive
50 // contains exactly once such record which appears at the end of
51 // the archive. It contains archive wide information like the
52 // number of entries in the archive and the offset to the central
53 // directory of the offset.
54 struct EocdRecord {
55 static const uint32_t kSignature = 0x06054b50;
56
57 // End of central directory signature, should always be
58 // |kSignature|.
59 uint32_t eocd_signature;
60 // The number of the current "disk", i.e, the "disk" that this
61 // central directory is on.
62 //
63 // This implementation assumes that each archive spans a single
64 // disk only. i.e, that disk_num == 1.
65 uint16_t disk_num;
66 // The disk where the central directory starts.
67 //
68 // This implementation assumes that each archive spans a single
69 // disk only. i.e, that cd_start_disk == 1.
70 uint16_t cd_start_disk;
71 // The number of central directory records on this disk.
72 //
73 // This implementation assumes that each archive spans a single
74 // disk only. i.e, that num_records_on_disk == num_records.
75 uint16_t num_records_on_disk;
76 // The total number of central directory records.
77 uint16_t num_records;
78 // The size of the central directory (in bytes).
79 uint32_t cd_size;
80 // The offset of the start of the central directory, relative
81 // to the start of the file.
82 uint32_t cd_start_offset;
83 // Length of the central directory comment.
84 uint16_t comment_length;
85 private:
86 DISALLOW_IMPLICIT_CONSTRUCTORS(EocdRecord);
87 } __attribute__((packed));
88
89 // A structure representing the fixed length fields for a single
90 // record in the central directory of the archive. In addition to
91 // the fixed length fields listed here, each central directory
92 // record contains a variable length "file_name" and "extra_field"
93 // whose lengths are given by |file_name_length| and |extra_field_length|
94 // respectively.
95 struct CentralDirectoryRecord {
96 static const uint32_t kSignature = 0x02014b50;
97
98 // The start of record signature. Must be |kSignature|.
99 uint32_t record_signature;
100 // Tool version. Ignored by this implementation.
101 uint16_t version_made_by;
102 // Tool version. Ignored by this implementation.
103 uint16_t version_needed;
104 // The "general purpose bit flags" for this entry. The only
105 // flag value that we currently check for is the "data descriptor"
106 // flag.
107 uint16_t gpb_flags;
108 // The compression method for this entry, one of |kCompressStored|
109 // and |kCompressDeflated|.
110 uint16_t compression_method;
111 // The file modification time and date for this entry.
112 uint16_t last_mod_time;
113 uint16_t last_mod_date;
114 // The CRC-32 checksum for this entry.
115 uint32_t crc32;
116 // The compressed size (in bytes) of this entry.
117 uint32_t compressed_size;
118 // The uncompressed size (in bytes) of this entry.
119 uint32_t uncompressed_size;
120 // The length of the entry file name in bytes. The file name
121 // will appear immediately after this record.
122 uint16_t file_name_length;
123 // The length of the extra field info (in bytes). This data
124 // will appear immediately after the entry file name.
125 uint16_t extra_field_length;
126 // The length of the entry comment (in bytes). This data will
127 // appear immediately after the extra field.
128 uint16_t comment_length;
129 // The start disk for this entry. Ignored by this implementation).
130 uint16_t file_start_disk;
131 // File attributes. Ignored by this implementation.
132 uint16_t internal_file_attributes;
133 // File attributes. Ignored by this implementation.
134 uint32_t external_file_attributes;
135 // The offset to the local file header for this entry, from the
136 // beginning of this archive.
137 uint32_t local_file_header_offset;
138 private:
139 DISALLOW_IMPLICIT_CONSTRUCTORS(CentralDirectoryRecord);
140 } __attribute__((packed));
141
142 // The local file header for a given entry. This duplicates information
143 // present in the central directory of the archive. It is an error for
144 // the information here to be different from the central directory
145 // information for a given entry.
146 struct LocalFileHeader {
147 static const uint32_t kSignature = 0x04034b50;
148
149 // The local file header signature, must be |kSignature|.
150 uint32_t lfh_signature;
151 // Tool version. Ignored by this implementation.
152 uint16_t version_needed;
153 // The "general purpose bit flags" for this entry. The only
154 // flag value that we currently check for is the "data descriptor"
155 // flag.
156 uint16_t gpb_flags;
157 // The compression method for this entry, one of |kCompressStored|
158 // and |kCompressDeflated|.
159 uint16_t compression_method;
160 // The file modification time and date for this entry.
161 uint16_t last_mod_time;
162 uint16_t last_mod_date;
163 // The CRC-32 checksum for this entry.
164 uint32_t crc32;
165 // The compressed size (in bytes) of this entry.
166 uint32_t compressed_size;
167 // The uncompressed size (in bytes) of this entry.
168 uint32_t uncompressed_size;
169 // The length of the entry file name in bytes. The file name
170 // will appear immediately after this record.
171 uint16_t file_name_length;
172 // The length of the extra field info (in bytes). This data
173 // will appear immediately after the entry file name.
174 uint16_t extra_field_length;
175 private:
176 DISALLOW_IMPLICIT_CONSTRUCTORS(LocalFileHeader);
177 } __attribute__((packed));
178
179 struct DataDescriptor {
180 // The *optional* data descriptor start signature.
181 static const uint32_t kOptSignature = 0x08074b50;
182
183 // CRC-32 checksum of the entry.
184 uint32_t crc32;
185 // Compressed size of the entry.
186 uint32_t compressed_size;
187 // Uncompressed size of the entry.
188 uint32_t uncompressed_size;
189 private:
190 DISALLOW_IMPLICIT_CONSTRUCTORS(DataDescriptor);
191 } __attribute__((packed));
192
193 #undef DISALLOW_IMPLICIT_CONSTRUCTORS
194
195 static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
196 static const uint32_t kMaxErrorLen = 1024;
197
198 // The maximum size of a central directory or a file
199 // comment in bytes.
200 static const uint32_t kMaxCommentLen = 65535;
201
202 // The maximum number of bytes to scan backwards for the EOCD start.
203 static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
204
205 static const char* kErrorMessages[] = {
206 "Unknown return code.",
207 "Iteration ended",
208 "Zlib error",
209 "Invalid file",
210 "Invalid handle",
211 "Duplicate entries in archive",
212 "Empty archive",
213 "Entry not found",
214 "Invalid offset",
215 "Inconsistent information",
216 "Invalid entry name",
217 "I/O Error",
218 "File mapping failed"
219 };
220
221 static const int32_t kErrorMessageUpperBound = 0;
222
223 static const int32_t kIterationEnd = -1;
224
225 // We encountered a Zlib error when inflating a stream from this file.
226 // Usually indicates file corruption.
227 static const int32_t kZlibError = -2;
228
229 // The input file cannot be processed as a zip archive. Usually because
230 // it's too small, too large or does not have a valid signature.
231 static const int32_t kInvalidFile = -3;
232
233 // An invalid iteration / ziparchive handle was passed in as an input
234 // argument.
235 static const int32_t kInvalidHandle = -4;
236
237 // The zip archive contained two (or possibly more) entries with the same
238 // name.
239 static const int32_t kDuplicateEntry = -5;
240
241 // The zip archive contains no entries.
242 static const int32_t kEmptyArchive = -6;
243
244 // The specified entry was not found in the archive.
245 static const int32_t kEntryNotFound = -7;
246
247 // The zip archive contained an invalid local file header pointer.
248 static const int32_t kInvalidOffset = -8;
249
250 // The zip archive contained inconsistent entry information. This could
251 // be because the central directory & local file header did not agree, or
252 // if the actual uncompressed length or crc32 do not match their declared
253 // values.
254 static const int32_t kInconsistentInformation = -9;
255
256 // An invalid entry name was encountered.
257 static const int32_t kInvalidEntryName = -10;
258
259 // An I/O related system call (read, lseek, ftruncate, map) failed.
260 static const int32_t kIoError = -11;
261
262 // We were not able to mmap the central directory or entry contents.
263 static const int32_t kMmapFailed = -12;
264
265 static const int32_t kErrorMessageLowerBound = -13;
266
267 static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
268
269 /*
270 * A Read-only Zip archive.
271 *
272 * We want "open" and "find entry by name" to be fast operations, and
273 * we want to use as little memory as possible. We memory-map the zip
274 * central directory, and load a hash table with pointers to the filenames
275 * (which aren't null-terminated). The other fields are at a fixed offset
276 * from the filename, so we don't need to extract those (but we do need
277 * to byte-read and endian-swap them every time we want them).
278 *
279 * It's possible that somebody has handed us a massive (~1GB) zip archive,
280 * so we can't expect to mmap the entire file.
281 *
282 * To speed comparisons when doing a lookup by name, we could make the mapping
283 * "private" (copy-on-write) and null-terminate the filenames after verifying
284 * the record structure. However, this requires a private mapping of
285 * every page that the Central Directory touches. Easier to tuck a copy
286 * of the string length into the hash table entry.
287 */
288 struct ZipArchive {
289 /* open Zip archive */
290 const int fd;
291
292 /* mapped central directory area */
293 off64_t directory_offset;
294 android::FileMap* directory_map;
295
296 /* number of entries in the Zip archive */
297 uint16_t num_entries;
298
299 /*
300 * We know how many entries are in the Zip archive, so we can have a
301 * fixed-size hash table. We define a load factor of 0.75 and overallocat
302 * so the maximum number entries can never be higher than
303 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
304 */
305 uint32_t hash_table_size;
306 ZipEntryName* hash_table;
307
ZipArchiveZipArchive308 ZipArchive(const int fd) :
309 fd(fd),
310 directory_offset(0),
311 directory_map(NULL),
312 num_entries(0),
313 hash_table_size(0),
314 hash_table(NULL) {}
315
~ZipArchiveZipArchive316 ~ZipArchive() {
317 if (fd >= 0) {
318 close(fd);
319 }
320
321 if (directory_map != NULL) {
322 directory_map->release();
323 }
324 free(hash_table);
325 }
326 };
327
328 // Returns 0 on success and negative values on failure.
MapFileSegment(const int fd,const off64_t start,const size_t length,const bool read_only,const char * debug_file_name)329 static android::FileMap* MapFileSegment(const int fd, const off64_t start,
330 const size_t length, const bool read_only,
331 const char* debug_file_name) {
332 android::FileMap* file_map = new android::FileMap;
333 const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
334 if (!success) {
335 file_map->release();
336 return NULL;
337 }
338
339 return file_map;
340 }
341
CopyFileToFile(int fd,uint8_t * begin,const uint32_t length,uint64_t * crc_out)342 static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
343 static const uint32_t kBufSize = 32768;
344 uint8_t buf[kBufSize];
345
346 uint32_t count = 0;
347 uint64_t crc = 0;
348 while (count < length) {
349 uint32_t remaining = length - count;
350
351 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
352 // value.
353 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
354 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
355
356 if (actual != get_size) {
357 ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size);
358 return kIoError;
359 }
360
361 memcpy(begin + count, buf, get_size);
362 crc = crc32(crc, buf, get_size);
363 count += get_size;
364 }
365
366 *crc_out = crc;
367
368 return 0;
369 }
370
371 /*
372 * Round up to the next highest power of 2.
373 *
374 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
375 */
RoundUpPower2(uint32_t val)376 static uint32_t RoundUpPower2(uint32_t val) {
377 val--;
378 val |= val >> 1;
379 val |= val >> 2;
380 val |= val >> 4;
381 val |= val >> 8;
382 val |= val >> 16;
383 val++;
384
385 return val;
386 }
387
ComputeHash(const char * str,uint16_t len)388 static uint32_t ComputeHash(const char* str, uint16_t len) {
389 uint32_t hash = 0;
390
391 while (len--) {
392 hash = hash * 31 + *str++;
393 }
394
395 return hash;
396 }
397
398 /*
399 * Convert a ZipEntry to a hash table index, verifying that it's in a
400 * valid range.
401 */
EntryToIndex(const ZipEntryName * hash_table,const uint32_t hash_table_size,const char * name,uint16_t length)402 static int64_t EntryToIndex(const ZipEntryName* hash_table,
403 const uint32_t hash_table_size,
404 const char* name, uint16_t length) {
405 const uint32_t hash = ComputeHash(name, length);
406
407 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
408 uint32_t ent = hash & (hash_table_size - 1);
409 while (hash_table[ent].name != NULL) {
410 if (hash_table[ent].name_length == length &&
411 memcmp(hash_table[ent].name, name, length) == 0) {
412 return ent;
413 }
414
415 ent = (ent + 1) & (hash_table_size - 1);
416 }
417
418 ALOGV("Zip: Unable to find entry %.*s", length, name);
419 return kEntryNotFound;
420 }
421
422 /*
423 * Add a new entry to the hash table.
424 */
AddToHash(ZipEntryName * hash_table,const uint64_t hash_table_size,const char * name,uint16_t length)425 static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
426 const char* name, uint16_t length) {
427 const uint64_t hash = ComputeHash(name, length);
428 uint32_t ent = hash & (hash_table_size - 1);
429
430 /*
431 * We over-allocated the table, so we're guaranteed to find an empty slot.
432 * Further, we guarantee that the hashtable size is not 0.
433 */
434 while (hash_table[ent].name != NULL) {
435 if (hash_table[ent].name_length == length &&
436 memcmp(hash_table[ent].name, name, length) == 0) {
437 // We've found a duplicate entry. We don't accept it
438 ALOGW("Zip: Found duplicate entry %.*s", length, name);
439 return kDuplicateEntry;
440 }
441 ent = (ent + 1) & (hash_table_size - 1);
442 }
443
444 hash_table[ent].name = name;
445 hash_table[ent].name_length = length;
446 return 0;
447 }
448
MapCentralDirectory0(int fd,const char * debug_file_name,ZipArchive * archive,off64_t file_length,off64_t read_amount,uint8_t * scan_buffer)449 static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
450 ZipArchive* archive, off64_t file_length,
451 off64_t read_amount, uint8_t* scan_buffer) {
452 const off64_t search_start = file_length - read_amount;
453
454 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
455 ALOGW("Zip: seek %" PRId64 " failed: %s", static_cast<int64_t>(search_start),
456 strerror(errno));
457 return kIoError;
458 }
459 ssize_t actual = TEMP_FAILURE_RETRY(
460 read(fd, scan_buffer, static_cast<size_t>(read_amount)));
461 if (actual != static_cast<ssize_t>(read_amount)) {
462 ALOGW("Zip: read %" PRId64 " failed: %s", static_cast<int64_t>(read_amount),
463 strerror(errno));
464 return kIoError;
465 }
466
467 /*
468 * Scan backward for the EOCD magic. In an archive without a trailing
469 * comment, we'll find it on the first try. (We may want to consider
470 * doing an initial minimal read; if we don't find it, retry with a
471 * second read as above.)
472 */
473 int i = read_amount - sizeof(EocdRecord);
474 for (; i >= 0; i--) {
475 if (scan_buffer[i] == 0x50 &&
476 ((*reinterpret_cast<uint32_t*>(&scan_buffer[i])) == EocdRecord::kSignature)) {
477 ALOGV("+++ Found EOCD at buf+%d", i);
478 break;
479 }
480 }
481 if (i < 0) {
482 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
483 return kInvalidFile;
484 }
485
486 const off64_t eocd_offset = search_start + i;
487 const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i);
488 /*
489 * Verify that there's no trailing space at the end of the central directory
490 * and its comment.
491 */
492 const off64_t calculated_length = eocd_offset + sizeof(EocdRecord)
493 + eocd->comment_length;
494 if (calculated_length != file_length) {
495 ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
496 static_cast<int64_t>(file_length - calculated_length));
497 return kInvalidFile;
498 }
499
500 /*
501 * Grab the CD offset and size, and the number of entries in the
502 * archive and verify that they look reasonable.
503 */
504 if (eocd->cd_start_offset + eocd->cd_size > eocd_offset) {
505 ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
506 eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
507 return kInvalidOffset;
508 }
509 if (eocd->num_records == 0) {
510 ALOGW("Zip: empty archive?");
511 return kEmptyArchive;
512 }
513
514 ALOGV("+++ num_entries=%" PRIu32 "dir_size=%" PRIu32 " dir_offset=%" PRIu32,
515 eocd->num_records, eocd->cd_size, eocd->cd_start_offset);
516
517 /*
518 * It all looks good. Create a mapping for the CD, and set the fields
519 * in archive.
520 */
521 android::FileMap* map = MapFileSegment(fd,
522 static_cast<off64_t>(eocd->cd_start_offset),
523 static_cast<size_t>(eocd->cd_size),
524 true /* read only */, debug_file_name);
525 if (map == NULL) {
526 archive->directory_map = NULL;
527 return kMmapFailed;
528 }
529
530 archive->directory_map = map;
531 archive->num_entries = eocd->num_records;
532 archive->directory_offset = eocd->cd_start_offset;
533
534 return 0;
535 }
536
537 /*
538 * Find the zip Central Directory and memory-map it.
539 *
540 * On success, returns 0 after populating fields from the EOCD area:
541 * directory_offset
542 * directory_map
543 * num_entries
544 */
MapCentralDirectory(int fd,const char * debug_file_name,ZipArchive * archive)545 static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
546 ZipArchive* archive) {
547
548 // Test file length. We use lseek64 to make sure the file
549 // is small enough to be a zip file (Its size must be less than
550 // 0xffffffff bytes).
551 off64_t file_length = lseek64(fd, 0, SEEK_END);
552 if (file_length == -1) {
553 ALOGV("Zip: lseek on fd %d failed", fd);
554 return kInvalidFile;
555 }
556
557 if (file_length > (off64_t) 0xffffffff) {
558 ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
559 return kInvalidFile;
560 }
561
562 if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
563 ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
564 return kInvalidFile;
565 }
566
567 /*
568 * Perform the traditional EOCD snipe hunt.
569 *
570 * We're searching for the End of Central Directory magic number,
571 * which appears at the start of the EOCD block. It's followed by
572 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
573 * need to read the last part of the file into a buffer, dig through
574 * it to find the magic number, parse some values out, and use those
575 * to determine the extent of the CD.
576 *
577 * We start by pulling in the last part of the file.
578 */
579 off64_t read_amount = kMaxEOCDSearch;
580 if (file_length < read_amount) {
581 read_amount = file_length;
582 }
583
584 uint8_t* scan_buffer = reinterpret_cast<uint8_t*>(malloc(read_amount));
585 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
586 file_length, read_amount, scan_buffer);
587
588 free(scan_buffer);
589 return result;
590 }
591
592 /*
593 * Parses the Zip archive's Central Directory. Allocates and populates the
594 * hash table.
595 *
596 * Returns 0 on success.
597 */
ParseZipArchive(ZipArchive * archive)598 static int32_t ParseZipArchive(ZipArchive* archive) {
599 int32_t result = -1;
600 const uint8_t* const cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
601 const size_t cd_length = archive->directory_map->getDataLength();
602 const uint16_t num_entries = archive->num_entries;
603
604 /*
605 * Create hash table. We have a minimum 75% load factor, possibly as
606 * low as 50% after we round off to a power of 2. There must be at
607 * least one unused entry to avoid an infinite loop during creation.
608 */
609 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
610 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
611 sizeof(ZipEntryName));
612
613 /*
614 * Walk through the central directory, adding entries to the hash
615 * table and verifying values.
616 */
617 const uint8_t* const cd_end = cd_ptr + cd_length;
618 const uint8_t* ptr = cd_ptr;
619 for (uint16_t i = 0; i < num_entries; i++) {
620 const CentralDirectoryRecord* cdr =
621 reinterpret_cast<const CentralDirectoryRecord*>(ptr);
622 if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
623 ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
624 goto bail;
625 }
626
627 if (ptr + sizeof(CentralDirectoryRecord) > cd_end) {
628 ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
629 goto bail;
630 }
631
632 const off64_t local_header_offset = cdr->local_file_header_offset;
633 if (local_header_offset >= archive->directory_offset) {
634 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16, (int64_t)local_header_offset, i);
635 goto bail;
636 }
637
638 const uint16_t file_name_length = cdr->file_name_length;
639 const uint16_t extra_length = cdr->extra_field_length;
640 const uint16_t comment_length = cdr->comment_length;
641 const char* file_name = reinterpret_cast<const char*>(ptr + sizeof(CentralDirectoryRecord));
642
643 /* check that file name doesn't contain \0 character */
644 if (memchr(file_name, 0, file_name_length) != NULL) {
645 ALOGW("Zip: entry name can't contain \\0 character");
646 goto bail;
647 }
648
649 /* add the CDE filename to the hash table */
650 const int add_result = AddToHash(archive->hash_table,
651 archive->hash_table_size, file_name, file_name_length);
652 if (add_result) {
653 ALOGW("Zip: Error adding entry to hash table %d", add_result);
654 result = add_result;
655 goto bail;
656 }
657
658 ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
659 if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
660 ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
661 ptr - cd_ptr, cd_length, i);
662 goto bail;
663 }
664 }
665 ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
666
667 result = 0;
668
669 bail:
670 return result;
671 }
672
OpenArchiveInternal(ZipArchive * archive,const char * debug_file_name)673 static int32_t OpenArchiveInternal(ZipArchive* archive,
674 const char* debug_file_name) {
675 int32_t result = -1;
676 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
677 return result;
678 }
679
680 if ((result = ParseZipArchive(archive))) {
681 return result;
682 }
683
684 return 0;
685 }
686
OpenArchiveFd(int fd,const char * debug_file_name,ZipArchiveHandle * handle)687 int32_t OpenArchiveFd(int fd, const char* debug_file_name,
688 ZipArchiveHandle* handle) {
689 ZipArchive* archive = new ZipArchive(fd);
690 *handle = archive;
691 return OpenArchiveInternal(archive, debug_file_name);
692 }
693
OpenArchive(const char * fileName,ZipArchiveHandle * handle)694 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
695 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
696 ZipArchive* archive = new ZipArchive(fd);
697 *handle = archive;
698
699 if (fd < 0) {
700 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
701 return kIoError;
702 }
703 return OpenArchiveInternal(archive, fileName);
704 }
705
706 /*
707 * Close a ZipArchive, closing the file and freeing the contents.
708 */
CloseArchive(ZipArchiveHandle handle)709 void CloseArchive(ZipArchiveHandle handle) {
710 ZipArchive* archive = (ZipArchive*) handle;
711 ALOGV("Closing archive %p", archive);
712 delete archive;
713 }
714
UpdateEntryFromDataDescriptor(int fd,ZipEntry * entry)715 static int32_t UpdateEntryFromDataDescriptor(int fd,
716 ZipEntry *entry) {
717 uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
718 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
719 if (actual != sizeof(ddBuf)) {
720 return kIoError;
721 }
722
723 const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
724 const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
725 const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
726
727 entry->crc32 = descriptor->crc32;
728 entry->compressed_length = descriptor->compressed_size;
729 entry->uncompressed_length = descriptor->uncompressed_size;
730
731 return 0;
732 }
733
734 // Attempts to read |len| bytes into |buf| at offset |off|.
735 //
736 // This method uses pread64 on platforms that support it and
737 // lseek64 + read on platforms that don't. This implies that
738 // callers should not rely on the |fd| offset being incremented
739 // as a side effect of this call.
ReadAtOffset(int fd,uint8_t * buf,size_t len,off64_t off)740 static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
741 off64_t off) {
742 #ifdef HAVE_PREAD
743 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
744 #else
745 // The only supported platform that doesn't support pread at the moment
746 // is Windows. Only recent versions of windows support unix like forks,
747 // and even there the semantics are quite different.
748 if (lseek64(fd, off, SEEK_SET) != off) {
749 ALOGW("Zip: failed seek to offset %" PRId64, off);
750 return kIoError;
751 }
752
753 return TEMP_FAILURE_RETRY(read(fd, buf, len));
754 #endif // HAVE_PREAD
755 }
756
FindEntry(const ZipArchive * archive,const int ent,ZipEntry * data)757 static int32_t FindEntry(const ZipArchive* archive, const int ent,
758 ZipEntry* data) {
759 const uint16_t nameLen = archive->hash_table[ent].name_length;
760 const char* name = archive->hash_table[ent].name;
761
762 // Recover the start of the central directory entry from the filename
763 // pointer. The filename is the first entry past the fixed-size data,
764 // so we can just subtract back from that.
765 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(name);
766 ptr -= sizeof(CentralDirectoryRecord);
767
768 // This is the base of our mmapped region, we have to sanity check that
769 // the name that's in the hash table is a pointer to a location within
770 // this mapped region.
771 const uint8_t* base_ptr = reinterpret_cast<const uint8_t*>(
772 archive->directory_map->getDataPtr());
773 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
774 ALOGW("Zip: Invalid entry pointer");
775 return kInvalidOffset;
776 }
777
778 const CentralDirectoryRecord *cdr =
779 reinterpret_cast<const CentralDirectoryRecord*>(ptr);
780
781 // The offset of the start of the central directory in the zipfile.
782 // We keep this lying around so that we can sanity check all our lengths
783 // and our per-file structures.
784 const off64_t cd_offset = archive->directory_offset;
785
786 // Fill out the compression method, modification time, crc32
787 // and other interesting attributes from the central directory. These
788 // will later be compared against values from the local file header.
789 data->method = cdr->compression_method;
790 data->mod_time = cdr->last_mod_time;
791 data->crc32 = cdr->crc32;
792 data->compressed_length = cdr->compressed_size;
793 data->uncompressed_length = cdr->uncompressed_size;
794
795 // Figure out the local header offset from the central directory. The
796 // actual file data will begin after the local header and the name /
797 // extra comments.
798 const off64_t local_header_offset = cdr->local_file_header_offset;
799 if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
800 ALOGW("Zip: bad local hdr offset in zip");
801 return kInvalidOffset;
802 }
803
804 uint8_t lfh_buf[sizeof(LocalFileHeader)];
805 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
806 local_header_offset);
807 if (actual != sizeof(lfh_buf)) {
808 ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)local_header_offset);
809 return kIoError;
810 }
811
812 const LocalFileHeader *lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
813
814 if (lfh->lfh_signature != LocalFileHeader::kSignature) {
815 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
816 static_cast<int64_t>(local_header_offset));
817 return kInvalidOffset;
818 }
819
820 // Paranoia: Match the values specified in the local file header
821 // to those specified in the central directory.
822 if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
823 data->has_data_descriptor = 0;
824 if (data->compressed_length != lfh->compressed_size
825 || data->uncompressed_length != lfh->uncompressed_size
826 || data->crc32 != lfh->crc32) {
827 ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
828 ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
829 data->compressed_length, data->uncompressed_length, data->crc32,
830 lfh->compressed_size, lfh->uncompressed_size, lfh->crc32);
831 return kInconsistentInformation;
832 }
833 } else {
834 data->has_data_descriptor = 1;
835 }
836
837 // Check that the local file header name matches the declared
838 // name in the central directory.
839 if (lfh->file_name_length == nameLen) {
840 const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
841 if (name_offset + lfh->file_name_length >= cd_offset) {
842 ALOGW("Zip: Invalid declared length");
843 return kInvalidOffset;
844 }
845
846 uint8_t* name_buf = (uint8_t*) malloc(nameLen);
847 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
848 name_offset);
849
850 if (actual != nameLen) {
851 ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)name_offset);
852 free(name_buf);
853 return kIoError;
854 }
855
856 if (memcmp(name, name_buf, nameLen)) {
857 free(name_buf);
858 return kInconsistentInformation;
859 }
860
861 free(name_buf);
862 } else {
863 ALOGW("Zip: lfh name did not match central directory.");
864 return kInconsistentInformation;
865 }
866
867 const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader)
868 + lfh->file_name_length + lfh->extra_field_length;
869 if (data_offset > cd_offset) {
870 ALOGW("Zip: bad data offset %" PRId64 " in zip", (int64_t)data_offset);
871 return kInvalidOffset;
872 }
873
874 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
875 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
876 (int64_t)data_offset, data->compressed_length, (int64_t)cd_offset);
877 return kInvalidOffset;
878 }
879
880 if (data->method == kCompressStored &&
881 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
882 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
883 (int64_t)data_offset, data->uncompressed_length, (int64_t)cd_offset);
884 return kInvalidOffset;
885 }
886
887 data->offset = data_offset;
888 return 0;
889 }
890
891 struct IterationHandle {
892 uint32_t position;
893 const char* prefix;
894 uint16_t prefix_len;
895 ZipArchive* archive;
896 };
897
StartIteration(ZipArchiveHandle handle,void ** cookie_ptr,const char * prefix)898 int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
899 ZipArchive* archive = (ZipArchive *) handle;
900
901 if (archive == NULL || archive->hash_table == NULL) {
902 ALOGW("Zip: Invalid ZipArchiveHandle");
903 return kInvalidHandle;
904 }
905
906 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
907 cookie->position = 0;
908 cookie->prefix = prefix;
909 cookie->archive = archive;
910 if (prefix != NULL) {
911 cookie->prefix_len = strlen(prefix);
912 }
913
914 *cookie_ptr = cookie ;
915 return 0;
916 }
917
FindEntry(const ZipArchiveHandle handle,const char * entryName,ZipEntry * data)918 int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
919 ZipEntry* data) {
920 const ZipArchive* archive = (ZipArchive*) handle;
921 const int nameLen = strlen(entryName);
922 if (nameLen == 0 || nameLen > 65535) {
923 ALOGW("Zip: Invalid filename %s", entryName);
924 return kInvalidEntryName;
925 }
926
927 const int64_t ent = EntryToIndex(archive->hash_table,
928 archive->hash_table_size, entryName, nameLen);
929
930 if (ent < 0) {
931 ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
932 return ent;
933 }
934
935 return FindEntry(archive, ent, data);
936 }
937
Next(void * cookie,ZipEntry * data,ZipEntryName * name)938 int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
939 IterationHandle* handle = (IterationHandle *) cookie;
940 if (handle == NULL) {
941 return kInvalidHandle;
942 }
943
944 ZipArchive* archive = handle->archive;
945 if (archive == NULL || archive->hash_table == NULL) {
946 ALOGW("Zip: Invalid ZipArchiveHandle");
947 return kInvalidHandle;
948 }
949
950 const uint32_t currentOffset = handle->position;
951 const uint32_t hash_table_length = archive->hash_table_size;
952 const ZipEntryName *hash_table = archive->hash_table;
953
954 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
955 if (hash_table[i].name != NULL &&
956 (handle->prefix == NULL ||
957 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
958 handle->position = (i + 1);
959 const int error = FindEntry(archive, i, data);
960 if (!error) {
961 name->name = hash_table[i].name;
962 name->name_length = hash_table[i].name_length;
963 }
964
965 return error;
966 }
967 }
968
969 handle->position = 0;
970 return kIterationEnd;
971 }
972
InflateToFile(int fd,const ZipEntry * entry,uint8_t * begin,uint32_t length,uint64_t * crc_out)973 static int32_t InflateToFile(int fd, const ZipEntry* entry,
974 uint8_t* begin, uint32_t length,
975 uint64_t* crc_out) {
976 int32_t result = -1;
977 const uint32_t kBufSize = 32768;
978 uint8_t read_buf[kBufSize];
979 uint8_t write_buf[kBufSize];
980 z_stream zstream;
981 int zerr;
982
983 /*
984 * Initialize the zlib stream struct.
985 */
986 memset(&zstream, 0, sizeof(zstream));
987 zstream.zalloc = Z_NULL;
988 zstream.zfree = Z_NULL;
989 zstream.opaque = Z_NULL;
990 zstream.next_in = NULL;
991 zstream.avail_in = 0;
992 zstream.next_out = (Bytef*) write_buf;
993 zstream.avail_out = kBufSize;
994 zstream.data_type = Z_UNKNOWN;
995
996 /*
997 * Use the undocumented "negative window bits" feature to tell zlib
998 * that there's no zlib header waiting for it.
999 */
1000 zerr = inflateInit2(&zstream, -MAX_WBITS);
1001 if (zerr != Z_OK) {
1002 if (zerr == Z_VERSION_ERROR) {
1003 ALOGE("Installed zlib is not compatible with linked version (%s)",
1004 ZLIB_VERSION);
1005 } else {
1006 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
1007 }
1008
1009 return kZlibError;
1010 }
1011
1012 const uint32_t uncompressed_length = entry->uncompressed_length;
1013
1014 uint32_t compressed_length = entry->compressed_length;
1015 uint32_t write_count = 0;
1016 do {
1017 /* read as much as we can */
1018 if (zstream.avail_in == 0) {
1019 const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
1020 const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
1021 if (actual != getSize) {
1022 ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
1023 result = kIoError;
1024 goto z_bail;
1025 }
1026
1027 compressed_length -= getSize;
1028
1029 zstream.next_in = read_buf;
1030 zstream.avail_in = getSize;
1031 }
1032
1033 /* uncompress the data */
1034 zerr = inflate(&zstream, Z_NO_FLUSH);
1035 if (zerr != Z_OK && zerr != Z_STREAM_END) {
1036 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
1037 zerr, zstream.next_in, zstream.avail_in,
1038 zstream.next_out, zstream.avail_out);
1039 result = kZlibError;
1040 goto z_bail;
1041 }
1042
1043 /* write when we're full or when we're done */
1044 if (zstream.avail_out == 0 ||
1045 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
1046 const size_t write_size = zstream.next_out - write_buf;
1047 // The file might have declared a bogus length.
1048 if (write_size + write_count > length) {
1049 goto z_bail;
1050 }
1051 memcpy(begin + write_count, write_buf, write_size);
1052 write_count += write_size;
1053
1054 zstream.next_out = write_buf;
1055 zstream.avail_out = kBufSize;
1056 }
1057 } while (zerr == Z_OK);
1058
1059 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
1060
1061 // stream.adler holds the crc32 value for such streams.
1062 *crc_out = zstream.adler;
1063
1064 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
1065 ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
1066 zstream.total_out, uncompressed_length);
1067 result = kInconsistentInformation;
1068 goto z_bail;
1069 }
1070
1071 result = 0;
1072
1073 z_bail:
1074 inflateEnd(&zstream); /* free up any allocated structures */
1075
1076 return result;
1077 }
1078
ExtractToMemory(ZipArchiveHandle handle,ZipEntry * entry,uint8_t * begin,uint32_t size)1079 int32_t ExtractToMemory(ZipArchiveHandle handle,
1080 ZipEntry* entry, uint8_t* begin, uint32_t size) {
1081 ZipArchive* archive = (ZipArchive*) handle;
1082 const uint16_t method = entry->method;
1083 off64_t data_offset = entry->offset;
1084
1085 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
1086 ALOGW("Zip: lseek to data at %" PRId64 " failed", (int64_t)data_offset);
1087 return kIoError;
1088 }
1089
1090 // this should default to kUnknownCompressionMethod.
1091 int32_t return_value = -1;
1092 uint64_t crc = 0;
1093 if (method == kCompressStored) {
1094 return_value = CopyFileToFile(archive->fd, begin, size, &crc);
1095 } else if (method == kCompressDeflated) {
1096 return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
1097 }
1098
1099 if (!return_value && entry->has_data_descriptor) {
1100 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
1101 if (return_value) {
1102 return return_value;
1103 }
1104 }
1105
1106 // TODO: Fix this check by passing the right flags to inflate2 so that
1107 // it calculates the CRC for us.
1108 if (entry->crc32 != crc && false) {
1109 ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
1110 return kInconsistentInformation;
1111 }
1112
1113 return return_value;
1114 }
1115
ExtractEntryToFile(ZipArchiveHandle handle,ZipEntry * entry,int fd)1116 int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1117 ZipEntry* entry, int fd) {
1118 const int32_t declared_length = entry->uncompressed_length;
1119
1120 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1121 if (current_offset == -1) {
1122 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1123 strerror(errno));
1124 return kIoError;
1125 }
1126
1127 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1128 if (result == -1) {
1129 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1130 (int64_t)(declared_length + current_offset), strerror(errno));
1131 return kIoError;
1132 }
1133
1134 // Don't attempt to map a region of length 0. We still need the
1135 // ftruncate() though, since the API guarantees that we will truncate
1136 // the file to the end of the uncompressed output.
1137 if (declared_length == 0) {
1138 return 0;
1139 }
1140
1141 android::FileMap* map = MapFileSegment(fd, current_offset, declared_length,
1142 false, kTempMappingFileName);
1143 if (map == NULL) {
1144 return kMmapFailed;
1145 }
1146
1147 const int32_t error = ExtractToMemory(handle, entry,
1148 reinterpret_cast<uint8_t*>(map->getDataPtr()),
1149 map->getDataLength());
1150 map->release();
1151 return error;
1152 }
1153
ErrorCodeString(int32_t error_code)1154 const char* ErrorCodeString(int32_t error_code) {
1155 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1156 return kErrorMessages[error_code * -1];
1157 }
1158
1159 return kErrorMessages[0];
1160 }
1161
GetFileDescriptor(const ZipArchiveHandle handle)1162 int GetFileDescriptor(const ZipArchiveHandle handle) {
1163 return ((ZipArchive*) handle)->fd;
1164 }
1165
1166