• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *    http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * Read-only access to Zip archives, with minimal heap allocation.
19  */
20 
21 #define LOG_TAG "ziparchive"
22 
23 #include "ziparchive/zip_archive.h"
24 
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <inttypes.h>
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33 
34 #ifdef __linux__
35 #include <linux/fs.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #endif
39 
40 #include <memory>
41 #include <optional>
42 #include <span>
43 #include <vector>
44 
45 #if defined(__APPLE__)
46 #define lseek64 lseek
47 #endif
48 
49 #if defined(__BIONIC__)
50 #include <android/fdsan.h>
51 #endif
52 
53 #include <android-base/file.h>
54 #include <android-base/logging.h>
55 #include <android-base/macros.h>  // TEMP_FAILURE_RETRY may or may not be in unistd
56 #include <android-base/mapped_file.h>
57 #include <android-base/memory.h>
58 #include <android-base/strings.h>
59 #include <android-base/utf8.h>
60 #include <log/log.h>
61 
62 #include "entry_name_utils-inl.h"
63 #include "incfs_support/signal_handling.h"
64 #include "incfs_support/util.h"
65 #include "zip_archive_common.h"
66 #include "zip_archive_private.h"
67 #include "zlib.h"
68 
69 // Used to turn on crc checks - verify that the content CRC matches the values
70 // specified in the local file header and the central directory.
71 static constexpr bool kCrcChecksEnabled = false;
72 
73 // The maximum number of bytes to scan backwards for the EOCD start.
74 static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
75 
76 // Set a reasonable cap (256 GiB) for the zip file size. So the data is always valid when
77 // we parse the fields in cd or local headers as 64 bits signed integers.
78 static constexpr uint64_t kMaxFileLength = 256 * static_cast<uint64_t>(1u << 30u);
79 
80 /*
81  * A Read-only Zip archive.
82  *
83  * We want "open" and "find entry by name" to be fast operations, and
84  * we want to use as little memory as possible.  We memory-map the zip
85  * central directory, and load a hash table with pointers to the filenames
86  * (which aren't null-terminated).  The other fields are at a fixed offset
87  * from the filename, so we don't need to extract those (but we do need
88  * to byte-read and endian-swap them every time we want them).
89  *
90  * It's possible that somebody has handed us a massive (~1GB) zip archive,
91  * so we can't expect to mmap the entire file.
92  *
93  * To speed comparisons when doing a lookup by name, we could make the mapping
94  * "private" (copy-on-write) and null-terminate the filenames after verifying
95  * the record structure.  However, this requires a private mapping of
96  * every page that the Central Directory touches.  Easier to tuck a copy
97  * of the string length into the hash table entry.
98  */
99 
100 constexpr auto kPageSize = 4096;
101 
pageAlignDown(uintptr_t ptr_int)102 [[maybe_unused]] static constexpr uintptr_t pageAlignDown(uintptr_t ptr_int) {
103   return ptr_int & ~(kPageSize - 1);
104 }
105 
pageAlignUp(uintptr_t ptr_int)106 [[maybe_unused]] static constexpr uintptr_t pageAlignUp(uintptr_t ptr_int) {
107   return pageAlignDown(ptr_int + kPageSize - 1);
108 }
109 
expandToPageBounds(void * ptr,size_t size)110 [[maybe_unused]] static std::pair<void*, size_t> expandToPageBounds(void* ptr, size_t size) {
111   const auto ptr_int = reinterpret_cast<uintptr_t>(ptr);
112   const auto aligned_ptr_int = pageAlignDown(ptr_int);
113   const auto aligned_size = pageAlignUp(ptr_int + size) - aligned_ptr_int;
114   return {reinterpret_cast<void*>(aligned_ptr_int), aligned_size};
115 }
116 
maybePrefetch(const void * ptr,size_t size)117 [[maybe_unused]] static void maybePrefetch([[maybe_unused]] const void* ptr,
118                                            [[maybe_unused]] size_t size) {
119 #ifdef __linux__
120   // Let's only ask for a readahead explicitly if there's enough pages to read. A regular OS
121   // readahead implementation would take care of the smaller requests, and it would also involve
122   // only a single kernel transition, just an implicit one from the page fault.
123   //
124   // Note: there's no implementation for other OSes, as the prefetch logic is highly specific
125   // to the memory manager, and we don't have any well defined benchmarks on Windows/Mac;
126   // it also mostly matters only for the cold OS boot where no files are in the page cache yet,
127   // but we rarely would hit this situation outside of the device startup.
128   auto [aligned_ptr, aligned_size] = expandToPageBounds(const_cast<void*>(ptr), size);
129   if (aligned_size > 32 * kPageSize) {
130     if (::madvise(aligned_ptr, aligned_size, MADV_WILLNEED)) {
131       ALOGW("Zip: madvise(file, WILLNEED) failed: %s (%d)", strerror(errno), errno);
132     }
133   }
134 #endif
135 }
136 
maybePrepareSequentialReading(const void * ptr,size_t size)137 [[maybe_unused]] static void maybePrepareSequentialReading([[maybe_unused]] const void* ptr,
138                                                            [[maybe_unused]] size_t size) {
139 #ifdef __linux__
140   auto [aligned_ptr, aligned_size] = expandToPageBounds(const_cast<void*>(ptr), size);
141   if (::madvise(reinterpret_cast<void*>(aligned_ptr), aligned_size, MADV_SEQUENTIAL)) {
142     ALOGW("Zip: madvise(file, SEQUENTIAL) failed: %s (%d)", strerror(errno), errno);
143   }
144 #endif
145 }
146 
147 #if defined(__BIONIC__)
GetOwnerTag(const ZipArchive * archive)148 static uint64_t GetOwnerTag(const ZipArchive* archive) {
149   return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
150                                         reinterpret_cast<uint64_t>(archive));
151 }
152 #endif
153 
ZipArchive(MappedZipFile && map,bool assume_ownership)154 ZipArchive::ZipArchive(MappedZipFile&& map, bool assume_ownership)
155     : mapped_zip(std::move(map)),
156       close_file(assume_ownership),
157       directory_offset(0),
158       central_directory(),
159       directory_map(),
160       num_entries(0) {
161 #if defined(__BIONIC__)
162   if (assume_ownership) {
163     CHECK(mapped_zip.GetFileDescriptor() >= 0 || !mapped_zip.GetBasePtr());
164     android_fdsan_exchange_owner_tag(mapped_zip.GetFileDescriptor(), 0, GetOwnerTag(this));
165   }
166 #endif
167 }
168 
ZipArchive(const void * address,size_t length)169 ZipArchive::ZipArchive(const void* address, size_t length)
170     : mapped_zip(address, length),
171       close_file(false),
172       directory_offset(0),
173       central_directory(),
174       directory_map(),
175       num_entries(0) {}
176 
~ZipArchive()177 ZipArchive::~ZipArchive() {
178   if (close_file && mapped_zip.GetFileDescriptor() >= 0) {
179 #if defined(__BIONIC__)
180     android_fdsan_close_with_tag(mapped_zip.GetFileDescriptor(), GetOwnerTag(this));
181 #else
182     close(mapped_zip.GetFileDescriptor());
183 #endif
184   }
185 }
186 
187 struct CentralDirectoryInfo {
188   uint64_t num_records;
189   // The size of the central directory (in bytes).
190   uint64_t cd_size;
191   // The offset of the start of the central directory, relative
192   // to the start of the file.
193   uint64_t cd_start_offset;
194 };
195 
196 // Reads |T| at |readPtr| and increments |readPtr|. Returns std::nullopt if the boundary check
197 // fails.
198 template <typename T>
TryConsumeUnaligned(uint8_t ** readPtr,const uint8_t * bufStart,size_t bufSize)199 static std::optional<T> TryConsumeUnaligned(uint8_t** readPtr, const uint8_t* bufStart,
200                                             size_t bufSize) {
201   if (bufSize < sizeof(T) || *readPtr - bufStart > bufSize - sizeof(T)) {
202     ALOGW("Zip: %zu byte read exceeds the boundary of allocated buf, offset %zu, bufSize %zu",
203           sizeof(T), *readPtr - bufStart, bufSize);
204     return std::nullopt;
205   }
206   return ConsumeUnaligned<T>(readPtr);
207 }
208 
FindCentralDirectoryInfoForZip64(const char * debugFileName,ZipArchive * archive,off64_t eocdOffset,CentralDirectoryInfo * cdInfo)209 static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive,
210                                                  off64_t eocdOffset, CentralDirectoryInfo* cdInfo) {
211   if (eocdOffset <= sizeof(Zip64EocdLocator)) {
212     ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName);
213     return kInvalidFile;
214   }
215   // We expect to find the zip64 eocd locator immediately before the zip eocd.
216   const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
217   Zip64EocdLocator zip64EocdLocatorBuf;
218   const auto zip64EocdLocator = reinterpret_cast<const Zip64EocdLocator*>(
219       archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocatorBuf)),
220                                        sizeof(zip64EocdLocatorBuf), locatorOffset));
221   if (!zip64EocdLocator) {
222     ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
223           sizeof(zip64EocdLocatorBuf), locatorOffset, debugFileName);
224     return kIoError;
225   }
226 
227   if (zip64EocdLocator->locator_signature != Zip64EocdLocator::kSignature) {
228     ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
229           locatorOffset);
230     return kInvalidFile;
231   }
232 
233   const int64_t zip64EocdOffset = zip64EocdLocator->zip64_eocd_offset;
234   if (locatorOffset <= sizeof(Zip64EocdRecord) ||
235       zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
236     ALOGW("Zip: %s: Bad zip64 eocd offset %" PRId64 ", eocd locator offset %" PRId64, debugFileName,
237           zip64EocdOffset, locatorOffset);
238     return kInvalidOffset;
239   }
240 
241   Zip64EocdRecord zip64EocdRecordBuf;
242   const auto zip64EocdRecord = reinterpret_cast<const Zip64EocdRecord*>(
243       archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecordBuf),
244                                        sizeof(zip64EocdRecordBuf), zip64EocdOffset));
245   if (!zip64EocdRecord) {
246     ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
247           sizeof(zip64EocdRecordBuf), zip64EocdOffset, debugFileName);
248     return kIoError;
249   }
250 
251   if (zip64EocdRecord->record_signature != Zip64EocdRecord::kSignature) {
252     ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
253           zip64EocdOffset);
254     return kInvalidFile;
255   }
256 
257   if (zip64EocdOffset <= zip64EocdRecord->cd_size ||
258       zip64EocdRecord->cd_start_offset > zip64EocdOffset - zip64EocdRecord->cd_size) {
259     ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
260           ", zip64 eocd offset %" PRIu64,
261           debugFileName, zip64EocdRecord->cd_start_offset, zip64EocdRecord->cd_size,
262           zip64EocdOffset);
263     return kInvalidOffset;
264   }
265 
266   *cdInfo = {.num_records = zip64EocdRecord->num_records,
267              .cd_size = zip64EocdRecord->cd_size,
268              .cd_start_offset = zip64EocdRecord->cd_start_offset};
269 
270   return kSuccess;
271 }
272 
FindCentralDirectoryInfo(const char * debug_file_name,ZipArchive * archive,off64_t file_length,std::span<uint8_t> scan_buffer,CentralDirectoryInfo * cdInfo)273 static ZipError FindCentralDirectoryInfo(const char* debug_file_name,
274                                          ZipArchive* archive,
275                                          off64_t file_length,
276                                          std::span<uint8_t> scan_buffer,
277                                          CentralDirectoryInfo* cdInfo) {
278   const auto read_amount = static_cast<uint32_t>(scan_buffer.size());
279   const off64_t search_start = file_length - read_amount;
280 
281   const auto data = archive->mapped_zip.ReadAtOffset(scan_buffer.data(), read_amount, search_start);
282   if (!data) {
283     ALOGE("Zip: read %" PRId64 " from offset %" PRId64 " failed", static_cast<int64_t>(read_amount),
284           static_cast<int64_t>(search_start));
285     return kIoError;
286   }
287 
288   /*
289    * Scan backward for the EOCD magic.  In an archive without a trailing
290    * comment, we'll find it on the first try.  (We may want to consider
291    * doing an initial minimal read; if we don't find it, retry with a
292    * second read as above.)
293    */
294   CHECK_LE(read_amount, std::numeric_limits<int32_t>::max());
295   int32_t i = read_amount - sizeof(EocdRecord);
296   for (; i >= 0; i--) {
297     if (data[i] == 0x50) {
298       const uint32_t* sig_addr = reinterpret_cast<const uint32_t*>(&data[i]);
299       if (android::base::get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
300         ALOGV("+++ Found EOCD at buf+%d", i);
301         break;
302       }
303     }
304   }
305   if (i < 0) {
306     ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
307     return kInvalidFile;
308   }
309 
310   const off64_t eocd_offset = search_start + i;
311   auto eocd = reinterpret_cast<const EocdRecord*>(data + i);
312   /*
313    * Verify that there's no trailing space at the end of the central directory
314    * and its comment.
315    */
316   const off64_t calculated_length = eocd_offset + sizeof(EocdRecord) + eocd->comment_length;
317   if (calculated_length != file_length) {
318     ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
319           static_cast<int64_t>(file_length - calculated_length));
320     return kInvalidFile;
321   }
322 
323   // One of the field is 0xFFFFFFFF, look for the zip64 EOCD instead.
324   if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) {
325     ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32,
326           eocd->cd_size, eocd->cd_start_offset);
327     return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo);
328   }
329 
330   /*
331    * Grab the CD offset and size, and the number of entries in the
332    * archive and verify that they look reasonable.
333    */
334   if (static_cast<off64_t>(eocd->cd_start_offset) + eocd->cd_size > eocd_offset) {
335     ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
336           eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
337     return kInvalidOffset;
338   }
339 
340   *cdInfo = {.num_records = eocd->num_records,
341              .cd_size = eocd->cd_size,
342              .cd_start_offset = eocd->cd_start_offset};
343   return kSuccess;
344 }
345 
346 /*
347  * Find the zip Central Directory and memory-map it.
348  *
349  * On success, returns kSuccess after populating fields from the EOCD area:
350  *   directory_offset
351  *   directory_ptr
352  *   num_entries
353  */
MapCentralDirectory(const char * debug_file_name,ZipArchive * archive)354 static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* archive) {
355   // Test file length. We want to make sure the file is small enough to be a zip
356   // file.
357   off64_t file_length = archive->mapped_zip.GetFileLength();
358   if (file_length == -1) {
359     return kInvalidFile;
360   }
361 
362   if (file_length > kMaxFileLength) {
363     ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
364     return kInvalidFile;
365   }
366 
367   if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
368     ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
369     return kInvalidFile;
370   }
371 
372   /*
373    * Perform the traditional EOCD snipe hunt.
374    *
375    * We're searching for the End of Central Directory magic number,
376    * which appears at the start of the EOCD block.  It's followed by
377    * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
378    * need to read the last part of the file into a buffer, dig through
379    * it to find the magic number, parse some values out, and use those
380    * to determine the extent of the CD.
381    *
382    * We start by pulling in the last part of the file.
383    */
384   const auto read_amount = uint32_t(std::min<off64_t>(file_length, kMaxEOCDSearch));
385 
386   CentralDirectoryInfo cdInfo = {};
387   std::vector<uint8_t> scan_buffer(read_amount);
388 
389   SCOPED_SIGBUS_HANDLER({
390     incfs::util::clearAndFree(scan_buffer);
391     return kIoError;
392   });
393 
394   if (auto result = FindCentralDirectoryInfo(debug_file_name, archive,
395                                              file_length, scan_buffer, &cdInfo);
396       result != kSuccess) {
397     return result;
398   }
399 
400   scan_buffer.clear();
401 
402   if (cdInfo.num_records == 0) {
403 #if defined(__ANDROID__)
404     ALOGW("Zip: empty archive?");
405 #endif
406     return kEmptyArchive;
407   }
408 
409   if (cdInfo.cd_size >= SIZE_MAX) {
410     ALOGW("Zip: The size of central directory doesn't fit in range of size_t: %" PRIu64,
411           cdInfo.cd_size);
412     return kInvalidFile;
413   }
414 
415   ALOGV("+++ num_entries=%" PRIu64 " dir_size=%" PRIu64 " dir_offset=%" PRIu64, cdInfo.num_records,
416         cdInfo.cd_size, cdInfo.cd_start_offset);
417 
418   // It all looks good.  Create a mapping for the CD, and set the fields in archive.
419   if (!archive->InitializeCentralDirectory(static_cast<off64_t>(cdInfo.cd_start_offset),
420                                            static_cast<size_t>(cdInfo.cd_size))) {
421     return kMmapFailed;
422   }
423 
424   archive->num_entries = cdInfo.num_records;
425   archive->directory_offset = cdInfo.cd_start_offset;
426 
427   return kSuccess;
428 }
429 
ParseZip64ExtendedInfoInExtraField(const uint8_t * extraFieldStart,uint16_t extraFieldLength,uint32_t zip32UncompressedSize,uint32_t zip32CompressedSize,std::optional<uint32_t> zip32LocalFileHeaderOffset,Zip64ExtendedInfo * zip64Info)430 static ZipError ParseZip64ExtendedInfoInExtraField(
431     const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize,
432     uint32_t zip32CompressedSize, std::optional<uint32_t> zip32LocalFileHeaderOffset,
433     Zip64ExtendedInfo* zip64Info) {
434   if (extraFieldLength <= 4) {
435     ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16,
436           extraFieldLength);
437     return kInvalidFile;
438   }
439 
440   // Each header MUST consist of:
441   // Header ID - 2 bytes
442   // Data Size - 2 bytes
443   uint16_t offset = 0;
444   while (offset < extraFieldLength - 4) {
445     auto readPtr = const_cast<uint8_t*>(extraFieldStart + offset);
446     auto headerId = ConsumeUnaligned<uint16_t>(&readPtr);
447     auto dataSize = ConsumeUnaligned<uint16_t>(&readPtr);
448 
449     offset += 4;
450     if (dataSize > extraFieldLength - offset) {
451       ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize);
452       return kInvalidOffset;
453     }
454 
455     // Skip the other types of extensible data fields. Details in
456     // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5
457     if (headerId != Zip64ExtendedInfo::kHeaderId) {
458       offset += dataSize;
459       continue;
460     }
461     // Layout for Zip64 extended info (not include first 4 bytes of header)
462     // Original
463     // Size       8 bytes    Original uncompressed file size
464 
465     // Compressed
466     // Size       8 bytes    Size of compressed data
467 
468     // Relative Header
469     // Offset     8 bytes    Offset of local header record
470 
471     // Disk Start
472     // Number     4 bytes    Number of the disk on which
473     //                       this file starts
474     if (dataSize == 8 * 3 + 4) {
475       ALOGW(
476           "Zip: Found `Disk Start Number` field in extra block. Ignoring it.");
477       dataSize -= 4;
478     }
479     // Sometimes, only a subset of {uncompressed size, compressed size, relative
480     // header offset} is presents. but golang's zip writer will write out all
481     // 3 even if only 1 is necessary. We should parse all 3 fields if they are
482     // there.
483     const bool completeField = dataSize == 8 * 3;
484 
485     std::optional<uint64_t> uncompressedFileSize;
486     std::optional<uint64_t> compressedFileSize;
487     std::optional<uint64_t> localHeaderOffset;
488     if (zip32UncompressedSize == UINT32_MAX || completeField) {
489       uncompressedFileSize = TryConsumeUnaligned<uint64_t>(
490           &readPtr, extraFieldStart, extraFieldLength);
491       if (!uncompressedFileSize.has_value()) return kInvalidOffset;
492     }
493     if (zip32CompressedSize == UINT32_MAX || completeField) {
494       compressedFileSize = TryConsumeUnaligned<uint64_t>(
495           &readPtr, extraFieldStart, extraFieldLength);
496       if (!compressedFileSize.has_value()) return kInvalidOffset;
497     }
498     if (zip32LocalFileHeaderOffset == UINT32_MAX || completeField) {
499       localHeaderOffset = TryConsumeUnaligned<uint64_t>(
500           &readPtr, extraFieldStart, extraFieldLength);
501       if (!localHeaderOffset.has_value()) return kInvalidOffset;
502     }
503 
504     // calculate how many bytes we read after the data size field.
505     size_t bytesRead = readPtr - (extraFieldStart + offset);
506     if (bytesRead == 0) {
507       ALOGW("Zip: Data size should not be 0 in zip64 extended field");
508       return kInvalidFile;
509     }
510 
511     if (dataSize != bytesRead) {
512       auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
513                                    ? std::to_string(zip32LocalFileHeaderOffset.value())
514                                    : "missing";
515       ALOGW("Zip: Invalid data size in zip64 extended field, expect %zu , get %" PRIu16
516             ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
517             bytesRead, dataSize, zip32UncompressedSize, zip32CompressedSize,
518             localOffsetString.c_str());
519       return kInvalidFile;
520     }
521 
522     zip64Info->uncompressed_file_size = uncompressedFileSize;
523     zip64Info->compressed_file_size = compressedFileSize;
524     zip64Info->local_header_offset = localHeaderOffset;
525     return kSuccess;
526   }
527 
528   ALOGW("Zip: zip64 extended info isn't found in the extra field.");
529   return kInvalidFile;
530 }
531 
532 /*
533  * Parses the Zip archive's Central Directory.  Allocates and populates the
534  * hash table.
535  *
536  * Returns 0 on success.
537  */
ParseZipArchive(ZipArchive * archive)538 static ZipError ParseZipArchive(ZipArchive* archive) {
539   SCOPED_SIGBUS_HANDLER(return kIoError);
540 
541   maybePrefetch(archive->central_directory.GetBasePtr(), archive->central_directory.GetMapLength());
542   const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
543   const size_t cd_length = archive->central_directory.GetMapLength();
544   const uint8_t* const cd_end = cd_ptr + cd_length;
545   const uint64_t num_entries = archive->num_entries;
546   const uint8_t* ptr = cd_ptr;
547   uint16_t max_file_name_length = 0;
548 
549   /* Walk through the central directory and verify values */
550   for (uint64_t i = 0; i < num_entries; i++) {
551     if (ptr > cd_end - sizeof(CentralDirectoryRecord)) {
552       ALOGW("Zip: ran off the end (item #%" PRIu64 ", %zu bytes of central directory)", i,
553             cd_length);
554 #if defined(__ANDROID__)
555       android_errorWriteLog(0x534e4554, "36392138");
556 #endif
557       return kInvalidFile;
558     }
559 
560     auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
561     if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
562       ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
563       return kInvalidFile;
564     }
565 
566     const uint16_t file_name_length = cdr->file_name_length;
567     const uint16_t extra_length = cdr->extra_field_length;
568     const uint16_t comment_length = cdr->comment_length;
569     const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);
570 
571     if (file_name_length >= cd_length || file_name > cd_end - file_name_length) {
572       ALOGW("Zip: file name for entry %" PRIu64
573             " exceeds the central directory range, file_name_length: %" PRIu16 ", cd_length: %zu",
574             i, file_name_length, cd_length);
575       return kInvalidEntryName;
576     }
577 
578     max_file_name_length = std::max(max_file_name_length, file_name_length);
579 
580     const uint8_t* extra_field = file_name + file_name_length;
581     if (extra_length >= cd_length || extra_field > cd_end - extra_length) {
582       ALOGW("Zip: extra field for entry %" PRIu64
583             " exceeds the central directory range, file_name_length: %" PRIu16 ", cd_length: %zu",
584             i, extra_length, cd_length);
585       return kInvalidFile;
586     }
587 
588     off64_t local_header_offset = cdr->local_file_header_offset;
589     if (local_header_offset == UINT32_MAX) {
590       Zip64ExtendedInfo zip64_info{};
591       if (auto status = ParseZip64ExtendedInfoInExtraField(
592               extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size,
593               cdr->local_file_header_offset, &zip64_info);
594           status != kSuccess) {
595         return status;
596       }
597       CHECK(zip64_info.local_header_offset.has_value());
598       local_header_offset = zip64_info.local_header_offset.value();
599     }
600 
601     if (local_header_offset >= archive->directory_offset) {
602       ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu64,
603             static_cast<int64_t>(local_header_offset), i);
604       return kInvalidFile;
605     }
606 
607     // Check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters.
608     if (!IsValidEntryName(file_name, file_name_length)) {
609       ALOGW("Zip: invalid file name at entry %" PRIu64, i);
610       return kInvalidEntryName;
611     }
612 
613     ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
614     if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
615       ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu64, ptr - cd_ptr, cd_length, i);
616       return kInvalidFile;
617     }
618   }
619 
620   /* Create memory efficient entry map */
621   archive->cd_entry_map = CdEntryMapInterface::Create(num_entries, cd_length, max_file_name_length);
622   if (archive->cd_entry_map == nullptr) {
623     return kAllocationFailed;
624   }
625 
626   /* Central directory verified, now add entries to the hash table */
627   ptr = cd_ptr;
628   for (uint64_t i = 0; i < num_entries; i++) {
629     auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
630     std::string_view entry_name{reinterpret_cast<const char*>(ptr + sizeof(*cdr)),
631                                 cdr->file_name_length};
632     auto add_result = archive->cd_entry_map->AddToMap(entry_name, cd_ptr);
633     if (add_result != 0) {
634       ALOGW("Zip: Error adding entry to hash table %d", add_result);
635       return add_result;
636     }
637     ptr += sizeof(*cdr) + cdr->file_name_length + cdr->extra_field_length + cdr->comment_length;
638   }
639 
640   uint32_t lfh_start_bytes_buf;
641   auto lfh_start_bytes = reinterpret_cast<const uint32_t*>(archive->mapped_zip.ReadAtOffset(
642       reinterpret_cast<uint8_t*>(&lfh_start_bytes_buf), sizeof(lfh_start_bytes_buf), 0));
643   if (!lfh_start_bytes) {
644     ALOGW("Zip: Unable to read header for entry at offset == 0.");
645     return kInvalidFile;
646   }
647 
648   if (*lfh_start_bytes != LocalFileHeader::kSignature) {
649     ALOGW("Zip: Entry at offset zero has invalid LFH signature %" PRIx32, *lfh_start_bytes);
650 #if defined(__ANDROID__)
651     android_errorWriteLog(0x534e4554, "64211847");
652 #endif
653     return kInvalidFile;
654   }
655 
656   ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);
657 
658   return kSuccess;
659 }
660 
OpenArchiveInternal(ZipArchive * archive,const char * debug_file_name)661 static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
662   int32_t result = MapCentralDirectory(debug_file_name, archive);
663   return result != kSuccess ? result : ParseZipArchive(archive);
664 }
665 
OpenArchiveFd(int fd,const char * debug_file_name,ZipArchiveHandle * handle,bool assume_ownership)666 int32_t OpenArchiveFd(int fd, const char* debug_file_name, ZipArchiveHandle* handle,
667                       bool assume_ownership) {
668   ZipArchive* archive = new ZipArchive(MappedZipFile(fd), assume_ownership);
669   *handle = archive;
670   return OpenArchiveInternal(archive, debug_file_name);
671 }
672 
OpenArchiveFdRange(int fd,const char * debug_file_name,ZipArchiveHandle * handle,off64_t length,off64_t offset,bool assume_ownership)673 int32_t OpenArchiveFdRange(int fd, const char* debug_file_name, ZipArchiveHandle* handle,
674                            off64_t length, off64_t offset, bool assume_ownership) {
675   ZipArchive* archive = new ZipArchive(MappedZipFile(fd, length, offset), assume_ownership);
676   *handle = archive;
677 
678   if (length < 0) {
679     ALOGW("Invalid zip length %" PRId64, length);
680     return kIoError;
681   }
682 
683   if (offset < 0) {
684     ALOGW("Invalid zip offset %" PRId64, offset);
685     return kIoError;
686   }
687 
688   return OpenArchiveInternal(archive, debug_file_name);
689 }
690 
OpenArchive(const char * fileName,ZipArchiveHandle * handle)691 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
692   const int fd = ::android::base::utf8::open(fileName, O_RDONLY | O_BINARY | O_CLOEXEC, 0);
693   ZipArchive* archive = new ZipArchive(MappedZipFile(fd), true);
694   *handle = archive;
695 
696   if (fd < 0) {
697     ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
698     return kIoError;
699   }
700 
701   return OpenArchiveInternal(archive, fileName);
702 }
703 
OpenArchiveFromMemory(const void * address,size_t length,const char * debug_file_name,ZipArchiveHandle * handle)704 int32_t OpenArchiveFromMemory(const void* address, size_t length, const char* debug_file_name,
705                               ZipArchiveHandle* handle) {
706   ZipArchive* archive = new ZipArchive(address, length);
707   *handle = archive;
708   return OpenArchiveInternal(archive, debug_file_name);
709 }
710 
GetArchiveInfo(ZipArchiveHandle archive)711 ZipArchiveInfo GetArchiveInfo(ZipArchiveHandle archive) {
712   ZipArchiveInfo result;
713   result.archive_size = archive->mapped_zip.GetFileLength();
714   result.entry_count = archive->num_entries;
715   return result;
716 }
717 
718 /*
719  * Close a ZipArchive, closing the file and freeing the contents.
720  */
CloseArchive(ZipArchiveHandle archive)721 void CloseArchive(ZipArchiveHandle archive) {
722   ALOGV("Closing archive %p", archive);
723   delete archive;
724 }
725 
ValidateDataDescriptor(MappedZipFile & mapped_zip,const ZipEntry64 * entry)726 static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, const ZipEntry64* entry) {
727   SCOPED_SIGBUS_HANDLER(return kIoError);
728 
729   // Maximum possible size for data descriptor: 2 * 4 + 2 * 8 = 24 bytes
730   // The zip format doesn't specify the size of data descriptor. But we won't read OOB here even
731   // if the descriptor isn't present. Because the size cd + eocd in the end of the zipfile is
732   // larger than 24 bytes. And if the descriptor contains invalid data, we'll abort due to
733   // kInconsistentInformation.
734   uint8_t ddBuf[24];
735   off64_t offset = entry->offset;
736   if (entry->method != kCompressStored) {
737     offset += entry->compressed_length;
738   } else {
739     offset += entry->uncompressed_length;
740   }
741 
742   const auto ddPtr = mapped_zip.ReadAtOffset(ddBuf, sizeof(ddBuf), offset);
743   if (!ddPtr) {
744     return kIoError;
745   }
746 
747   const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddPtr));
748   const uint8_t* ddReadPtr = (ddSignature == DataDescriptor::kOptSignature) ? ddPtr + 4 : ddPtr;
749   DataDescriptor descriptor{};
750   descriptor.crc32 = ConsumeUnaligned<uint32_t>(&ddReadPtr);
751   // Don't use entry->zip64_format_size, because that is set to true even if
752   // both compressed/uncompressed size are < 0xFFFFFFFF.
753   constexpr auto u32max = std::numeric_limits<uint32_t>::max();
754   if (entry->compressed_length >= u32max ||
755       entry->uncompressed_length >= u32max) {
756     descriptor.compressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
757     descriptor.uncompressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
758   } else {
759     descriptor.compressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
760     descriptor.uncompressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
761   }
762 
763   // Validate that the values in the data descriptor match those in the central
764   // directory.
765   if (entry->compressed_length != descriptor.compressed_size ||
766       entry->uncompressed_length != descriptor.uncompressed_size ||
767       entry->crc32 != descriptor.crc32) {
768     ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu64 ", %" PRIu64 ", %" PRIx32
769           "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
770           entry->compressed_length, entry->uncompressed_length, entry->crc32,
771           descriptor.compressed_size, descriptor.uncompressed_size, descriptor.crc32);
772     return kInconsistentInformation;
773   }
774 
775   return 0;
776 }
777 
FindEntry(const ZipArchive * archive,std::string_view entryName,const uint64_t nameOffset,ZipEntry64 * data)778 static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
779                          const uint64_t nameOffset, ZipEntry64* data) {
780   std::vector<uint8_t> buffer;
781   SCOPED_SIGBUS_HANDLER({
782     incfs::util::clearAndFree(buffer);
783     return kIoError;
784   });
785 
786   // Recover the start of the central directory entry from the filename
787   // pointer.  The filename is the first entry past the fixed-size data,
788   // so we can just subtract back from that.
789   const uint8_t* base_ptr = archive->central_directory.GetBasePtr();
790   const uint8_t* ptr = base_ptr + nameOffset;
791   ptr -= sizeof(CentralDirectoryRecord);
792 
793   // This is the base of our mmapped region, we have to check that
794   // the name that's in the hash table is a pointer to a location within
795   // this mapped region.
796   if (ptr < base_ptr || ptr > base_ptr + archive->central_directory.GetMapLength()) {
797     ALOGW("Zip: Invalid entry pointer");
798     return kInvalidOffset;
799   }
800 
801   auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
802 
803   // The offset of the start of the central directory in the zipfile.
804   // We keep this lying around so that we can check all our lengths
805   // and our per-file structures.
806   const off64_t cd_offset = archive->directory_offset;
807 
808   // Fill out the compression method, modification time, crc32
809   // and other interesting attributes from the central directory. These
810   // will later be compared against values from the local file header.
811   data->method = cdr->compression_method;
812   data->mod_time = cdr->last_mod_date << 16 | cdr->last_mod_time;
813   data->crc32 = cdr->crc32;
814   data->compressed_length = cdr->compressed_size;
815   data->uncompressed_length = cdr->uncompressed_size;
816 
817   // Figure out the local header offset from the central directory. The
818   // actual file data will begin after the local header and the name /
819   // extra comments.
820   off64_t local_header_offset = cdr->local_file_header_offset;
821   // One of the info field is UINT32_MAX, try to parse the real value in the zip64 extended info in
822   // the extra field.
823   if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
824       cdr->local_file_header_offset == UINT32_MAX) {
825     const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length;
826     Zip64ExtendedInfo zip64_info{};
827     if (auto status = ParseZip64ExtendedInfoInExtraField(
828             extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size,
829             cdr->local_file_header_offset, &zip64_info);
830         status != kSuccess) {
831       return status;
832     }
833 
834     data->uncompressed_length = zip64_info.uncompressed_file_size.value_or(cdr->uncompressed_size);
835     data->compressed_length = zip64_info.compressed_file_size.value_or(cdr->compressed_size);
836     local_header_offset = zip64_info.local_header_offset.value_or(local_header_offset);
837     data->zip64_format_size =
838         cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX;
839   }
840 
841   off64_t local_header_end;
842   if (__builtin_add_overflow(local_header_offset, sizeof(LocalFileHeader), &local_header_end) ||
843       local_header_end >= cd_offset) {
844     // We tested >= because the name that follows can't be zero length.
845     ALOGW("Zip: bad local hdr offset in zip");
846     return kInvalidOffset;
847   }
848 
849   uint8_t lfh_buf[sizeof(LocalFileHeader)];
850   const auto lfh = reinterpret_cast<const LocalFileHeader*>(
851       archive->mapped_zip.ReadAtOffset(lfh_buf, sizeof(lfh_buf), local_header_offset));
852   if (!lfh) {
853     ALOGW("Zip: failed reading lfh name from offset %" PRId64,
854           static_cast<int64_t>(local_header_offset));
855     return kIoError;
856   }
857 
858   if (lfh->lfh_signature != LocalFileHeader::kSignature) {
859     ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
860           static_cast<int64_t>(local_header_offset));
861     return kInvalidOffset;
862   }
863 
864   // Check that the local file header name matches the declared name in the central directory.
865   CHECK_LE(entryName.size(), UINT16_MAX);
866   auto name_length = static_cast<uint16_t>(entryName.size());
867   if (lfh->file_name_length != name_length) {
868     ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
869           std::string(entryName).c_str(), lfh->file_name_length, name_length);
870     return kInconsistentInformation;
871   }
872   off64_t name_offset;
873   if (__builtin_add_overflow(local_header_offset, sizeof(LocalFileHeader), &name_offset)) {
874     ALOGW("Zip: lfh name offset invalid");
875     return kInvalidOffset;
876   }
877   off64_t name_end;
878   if (__builtin_add_overflow(name_offset, name_length, &name_end) || name_end > cd_offset) {
879     // We tested > cd_offset here because the file data that follows can be zero length.
880     ALOGW("Zip: lfh name length invalid");
881     return kInvalidOffset;
882   }
883 
884   // An optimization: get enough memory on the stack to be able to use it later without an extra
885   // allocation when reading the zip64 extended info. Reasonable names should be under half the
886   // MAX_PATH (256 chars), and Zip64 header size is 32 bytes; archives often have some other extras,
887   // e.g. alignment, so 128 bytes is outght to be enough for (almost) anybody. If it's not we'll
888   // reallocate later anyway.
889   uint8_t static_buf[128];
890   auto name_buf = static_buf;
891   if (name_length > std::size(static_buf)) {
892     buffer.resize(name_length);
893     name_buf = buffer.data();
894   }
895   const auto read_name = archive->mapped_zip.ReadAtOffset(name_buf, name_length, name_offset);
896   if (!read_name) {
897     ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
898     return kIoError;
899   }
900   if (memcmp(entryName.data(), read_name, name_length) != 0) {
901     ALOGW("Zip: lfh name did not match central directory");
902     return kInconsistentInformation;
903   }
904 
905   uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
906   uint64_t lfh_compressed_size = lfh->compressed_size;
907   if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
908     if (lfh_uncompressed_size != UINT32_MAX || lfh_compressed_size != UINT32_MAX) {
909       ALOGW(
910           "Zip: The zip64 extended field in the local header MUST include BOTH original and "
911           "compressed file size fields.");
912       return kInvalidFile;
913     }
914 
915     const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
916     const uint16_t lfh_extra_field_size = lfh->extra_field_length;
917     if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
918       ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str());
919       return kInvalidOffset;
920     }
921 
922     auto lfh_extra_field_buf = static_buf;
923     if (lfh_extra_field_size > std::size(static_buf)) {
924       // Make sure vector won't try to copy existing data if it needs to reallocate.
925       buffer.clear();
926       buffer.resize(lfh_extra_field_size);
927       lfh_extra_field_buf = buffer.data();
928     }
929     const auto local_extra_field = archive->mapped_zip.ReadAtOffset(
930         lfh_extra_field_buf, lfh_extra_field_size, lfh_extra_field_offset);
931     if (!local_extra_field) {
932       ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
933       return kIoError;
934     }
935 
936     Zip64ExtendedInfo zip64_info{};
937     if (auto status = ParseZip64ExtendedInfoInExtraField(
938             local_extra_field, lfh_extra_field_size, lfh->uncompressed_size, lfh->compressed_size,
939             std::nullopt, &zip64_info);
940         status != kSuccess) {
941       return status;
942     }
943 
944     CHECK(zip64_info.uncompressed_file_size.has_value());
945     CHECK(zip64_info.compressed_file_size.has_value());
946     lfh_uncompressed_size = zip64_info.uncompressed_file_size.value();
947     lfh_compressed_size = zip64_info.compressed_file_size.value();
948   }
949 
950   // Paranoia: Match the values specified in the local file header
951   // to those specified in the central directory.
952 
953   // Warn if central directory and local file header don't agree on the use
954   // of a trailing Data Descriptor. The reference implementation is inconsistent
955   // and appears to use the LFH value during extraction (unzip) but the CD value
956   // while displayng information about archives (zipinfo). The spec remains
957   // silent on this inconsistency as well.
958   //
959   // For now, always use the version from the LFH but make sure that the values
960   // specified in the central directory match those in the data descriptor.
961   //
962   // NOTE: It's also worth noting that unzip *does* warn about inconsistencies in
963   // bit 11 (EFS: The language encoding flag, marking that filename and comment are
964   // encoded using UTF-8). This implementation does not check for the presence of
965   // that flag and always enforces that entry names are valid UTF-8.
966   if ((lfh->gpb_flags & kGPBDDFlagMask) != (cdr->gpb_flags & kGPBDDFlagMask)) {
967     ALOGW("Zip: gpb flag mismatch at bit 3. expected {%04" PRIx16 "}, was {%04" PRIx16 "}",
968           cdr->gpb_flags, lfh->gpb_flags);
969   }
970 
971   // If there is no trailing data descriptor, verify that the central directory and local file
972   // header agree on the crc, compressed, and uncompressed sizes of the entry.
973   if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
974     data->has_data_descriptor = 0;
975     if (data->compressed_length != lfh_compressed_size ||
976         data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) {
977       ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu64 ", %" PRIu64 ", %" PRIx32
978             "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
979             data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size,
980             lfh_uncompressed_size, lfh->crc32);
981       return kInconsistentInformation;
982     }
983   } else {
984     data->has_data_descriptor = 1;
985   }
986 
987   // 4.4.2.1: the upper byte of `version_made_by` gives the source OS. Unix is 3.
988   data->version_made_by = cdr->version_made_by;
989   data->external_file_attributes = cdr->external_file_attributes;
990   if ((data->version_made_by >> 8) == 3) {
991     data->unix_mode = (cdr->external_file_attributes >> 16) & 0xffff;
992   } else {
993     data->unix_mode = 0777;
994   }
995 
996   // 4.4.4: general purpose bit flags.
997   data->gpbf = lfh->gpb_flags;
998 
999   // 4.4.14: the lowest bit of the internal file attributes field indicates text.
1000   // Currently only needed to implement zipinfo.
1001   data->is_text = (cdr->internal_file_attributes & 1);
1002 
1003   const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) +
1004                               lfh->file_name_length + lfh->extra_field_length;
1005   if (data_offset > cd_offset) {
1006     ALOGW("Zip: bad data offset %" PRId64 " in zip", static_cast<int64_t>(data_offset));
1007     return kInvalidOffset;
1008   }
1009 
1010   if (data->compressed_length > cd_offset - data_offset) {
1011     ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu64 " > %" PRId64 ")",
1012           static_cast<int64_t>(data_offset), data->compressed_length,
1013           static_cast<int64_t>(cd_offset));
1014     return kInvalidOffset;
1015   }
1016 
1017   if (data->method == kCompressStored && data->uncompressed_length > cd_offset - data_offset) {
1018     ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu64 " > %" PRId64 ")",
1019           static_cast<int64_t>(data_offset), data->uncompressed_length,
1020           static_cast<int64_t>(cd_offset));
1021     return kInvalidOffset;
1022   }
1023 
1024   data->offset = data_offset;
1025   return 0;
1026 }
1027 
1028 struct IterationHandle {
1029   ZipArchive* archive;
1030 
1031   std::function<bool(std::string_view)> matcher;
1032 
1033   uint32_t position = 0;
1034 
IterationHandleIterationHandle1035   IterationHandle(ZipArchive* archive, std::function<bool(std::string_view)> in_matcher)
1036       : archive(archive), matcher(std::move(in_matcher)) {}
1037 
MatchIterationHandle1038   bool Match(std::string_view entry_name) const { return !matcher || matcher(entry_name); }
1039 };
1040 
StartIteration(ZipArchiveHandle archive,void ** cookie_ptr,const std::string_view optional_prefix,const std::string_view optional_suffix)1041 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
1042                        const std::string_view optional_prefix,
1043                        const std::string_view optional_suffix) {
1044   if (optional_prefix.size() > static_cast<size_t>(UINT16_MAX) ||
1045       optional_suffix.size() > static_cast<size_t>(UINT16_MAX)) {
1046     ALOGW("Zip: prefix/suffix too long");
1047     return kInvalidEntryName;
1048   }
1049   if (optional_prefix.empty() && optional_suffix.empty()) {
1050     return StartIteration(archive, cookie_ptr, std::function<bool(std::string_view)>{});
1051   }
1052   auto matcher = [prefix = std::string(optional_prefix),
1053                   suffix = std::string(optional_suffix)](std::string_view name) mutable {
1054     return android::base::StartsWith(name, prefix) && android::base::EndsWith(name, suffix);
1055   };
1056   return StartIteration(archive, cookie_ptr, std::move(matcher));
1057 }
1058 
StartIteration(ZipArchiveHandle archive,void ** cookie_ptr,std::function<bool (std::string_view)> matcher)1059 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
1060                        std::function<bool(std::string_view)> matcher) {
1061   if (archive == nullptr || archive->cd_entry_map == nullptr) {
1062     ALOGW("Zip: Invalid ZipArchiveHandle");
1063     return kInvalidHandle;
1064   }
1065 
1066   archive->cd_entry_map->ResetIteration();
1067   *cookie_ptr = new IterationHandle(archive, std::move(matcher));
1068   return 0;
1069 }
1070 
EndIteration(void * cookie)1071 void EndIteration(void* cookie) {
1072   delete reinterpret_cast<IterationHandle*>(cookie);
1073 }
1074 
CopyFromZipEntry64(ZipEntry * dst,const ZipEntry64 * src)1075 int32_t ZipEntry::CopyFromZipEntry64(ZipEntry* dst, const ZipEntry64* src) {
1076   if (src->compressed_length > UINT32_MAX || src->uncompressed_length > UINT32_MAX) {
1077     ALOGW(
1078         "Zip: the entry size is too large to fit into the 32 bits ZipEntry, uncompressed "
1079         "length %" PRIu64 ", compressed length %" PRIu64,
1080         src->uncompressed_length, src->compressed_length);
1081     return kUnsupportedEntrySize;
1082   }
1083 
1084   *dst = *src;
1085   dst->uncompressed_length = static_cast<uint32_t>(src->uncompressed_length);
1086   dst->compressed_length = static_cast<uint32_t>(src->compressed_length);
1087   return kSuccess;
1088 }
1089 
FindEntry(const ZipArchiveHandle archive,const std::string_view entryName,ZipEntry * data)1090 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName,
1091                   ZipEntry* data) {
1092   ZipEntry64 entry64;
1093   if (auto status = FindEntry(archive, entryName, &entry64); status != kSuccess) {
1094     return status;
1095   }
1096 
1097   return ZipEntry::CopyFromZipEntry64(data, &entry64);
1098 }
1099 
FindEntry(const ZipArchiveHandle archive,const std::string_view entryName,ZipEntry64 * data)1100 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName,
1101                   ZipEntry64* data) {
1102   if (entryName.empty() || entryName.size() > static_cast<size_t>(UINT16_MAX)) {
1103     ALOGW("Zip: Invalid filename of length %zu", entryName.size());
1104     return kInvalidEntryName;
1105   }
1106 
1107   const auto [result, offset] =
1108       archive->cd_entry_map->GetCdEntryOffset(entryName, archive->central_directory.GetBasePtr());
1109   if (result != 0) {
1110     ALOGV("Zip: Could not find entry %.*s", static_cast<int>(entryName.size()), entryName.data());
1111     return static_cast<int32_t>(result);  // kEntryNotFound is safe to truncate.
1112   }
1113   // We know there are at most hash_table_size entries, safe to truncate.
1114   return FindEntry(archive, entryName, offset, data);
1115 }
1116 
Next(void * cookie,ZipEntry * data,std::string * name)1117 int32_t Next(void* cookie, ZipEntry* data, std::string* name) {
1118   ZipEntry64 entry64;
1119   if (auto status = Next(cookie, &entry64, name); status != kSuccess) {
1120     return status;
1121   }
1122 
1123   return ZipEntry::CopyFromZipEntry64(data, &entry64);
1124 }
1125 
Next(void * cookie,ZipEntry * data,std::string_view * name)1126 int32_t Next(void* cookie, ZipEntry* data, std::string_view* name) {
1127   ZipEntry64 entry64;
1128   if (auto status = Next(cookie, &entry64, name); status != kSuccess) {
1129     return status;
1130   }
1131 
1132   return ZipEntry::CopyFromZipEntry64(data, &entry64);
1133 }
1134 
Next(void * cookie,ZipEntry64 * data,std::string * name)1135 int32_t Next(void* cookie, ZipEntry64* data, std::string* name) {
1136   std::string_view sv;
1137   int32_t result = Next(cookie, data, &sv);
1138   if (result == 0 && name) {
1139     *name = std::string(sv);
1140   }
1141   return result;
1142 }
1143 
Next(void * cookie,ZipEntry64 * data,std::string_view * name)1144 int32_t Next(void* cookie, ZipEntry64* data, std::string_view* name) {
1145   IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
1146   if (handle == nullptr) {
1147     ALOGW("Zip: Null ZipArchiveHandle");
1148     return kInvalidHandle;
1149   }
1150 
1151   ZipArchive* archive = handle->archive;
1152   if (archive == nullptr || archive->cd_entry_map == nullptr) {
1153     ALOGW("Zip: Invalid ZipArchiveHandle");
1154     return kInvalidHandle;
1155   }
1156 
1157   SCOPED_SIGBUS_HANDLER(return kIoError);
1158 
1159   auto entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
1160   while (entry != std::pair<std::string_view, uint64_t>()) {
1161     const auto [entry_name, offset] = entry;
1162     if (handle->Match(entry_name)) {
1163       const int error = FindEntry(archive, entry_name, offset, data);
1164       if (!error && name) {
1165         *name = entry_name;
1166       }
1167       return error;
1168     }
1169     entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
1170   }
1171 
1172   archive->cd_entry_map->ResetIteration();
1173   return kIterationEnd;
1174 }
1175 
1176 // A Writer that writes data to a fixed size memory region.
1177 // The size of the memory region must be equal to the total size of
1178 // the data appended to it.
1179 class MemoryWriter final : public zip_archive::Writer {
1180  public:
Create(uint8_t * buf,size_t size,const ZipEntry64 * entry)1181   static std::optional<MemoryWriter> Create(uint8_t* buf, size_t size,
1182                                             const ZipEntry64* entry) {
1183     const uint64_t declared_length = entry->uncompressed_length;
1184     if (declared_length > size) {
1185       ALOGW("Zip: file size %" PRIu64 " is larger than the buffer size %zu.", declared_length,
1186             size);
1187       return {};
1188     }
1189 
1190     return std::make_optional<MemoryWriter>(buf, size);
1191   }
1192 
Append(uint8_t * buf,size_t buf_size)1193   virtual bool Append(uint8_t* buf, size_t buf_size) override {
1194     if (buf_size == 0 || (buf >= buf_ && buf < buf_ + size_)) {
1195       return true;
1196     }
1197 
1198     if (size_ < buf_size || bytes_written_ > size_ - buf_size) {
1199       ALOGW("Zip: Unexpected size %zu (declared) vs %zu (actual)", size_,
1200             bytes_written_ + buf_size);
1201       return false;
1202     }
1203 
1204     memcpy(buf_ + bytes_written_, buf, buf_size);
1205     bytes_written_ += buf_size;
1206     return true;
1207   }
1208 
GetBuffer(size_t length)1209   Buffer GetBuffer(size_t length) override {
1210     if (length > size_) {
1211       // Special case for empty files: zlib wants at least some buffer but won't ever write there.
1212       if (size_ == 0 && length <= sizeof(bytes_written_)) {
1213         return {reinterpret_cast<uint8_t*>(&bytes_written_), length};
1214       }
1215       return {};
1216     }
1217     return {buf_, length};
1218   }
1219 
MemoryWriter(uint8_t * buf,size_t size)1220   MemoryWriter(uint8_t* buf, size_t size) : buf_(buf), size_(size), bytes_written_(0) {}
1221 
1222  private:
1223   uint8_t* const buf_{nullptr};
1224   const size_t size_;
1225   size_t bytes_written_;
1226 };
1227 
1228 // A Writer that appends data to a file |fd| at its current position.
1229 // The file will be truncated to the end of the written data.
1230 class FileWriter final : public zip_archive::Writer {
1231  public:
1232   // Creates a FileWriter for |fd| and prepare to write |entry| to it,
1233   // guaranteeing that the file descriptor is valid and that there's enough
1234   // space on the volume to write out the entry completely and that the file
1235   // is truncated to the correct length (no truncation if |fd| references a
1236   // block device).
1237   //
1238   // Returns a valid FileWriter on success, |nullopt| if an error occurred.
Create(int fd,const ZipEntry64 * entry)1239   static std::optional<FileWriter> Create(int fd, const ZipEntry64* entry) {
1240     const uint64_t declared_length = entry->uncompressed_length;
1241     const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1242     if (current_offset == -1) {
1243       ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, strerror(errno));
1244       return {};
1245     }
1246 
1247     if (declared_length > SIZE_MAX || declared_length > INT64_MAX) {
1248       ALOGW("Zip: file size %" PRIu64 " is too large to extract.", declared_length);
1249       return {};
1250     }
1251 
1252 #if defined(__linux__)
1253     if (declared_length > 0) {
1254       // Make sure we have enough space on the volume to extract the compressed
1255       // entry. Note that the call to ftruncate below will change the file size but
1256       // will not allocate space on disk and this call to fallocate will not
1257       // change the file size.
1258       // Note: fallocate is only supported by the following filesystems -
1259       // btrfs, ext4, ocfs2, and xfs. Therefore fallocate might fail with
1260       // EOPNOTSUPP error when issued in other filesystems.
1261       // Hence, check for the return error code before concluding that the
1262       // disk does not have enough space.
1263       long result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length));
1264       if (result == -1 && errno == ENOSPC) {
1265         ALOGW("Zip: unable to allocate %" PRIu64 " bytes at offset %" PRId64 ": %s",
1266               declared_length, static_cast<int64_t>(current_offset), strerror(errno));
1267         return {};
1268       }
1269     }
1270 #endif  // __linux__
1271 
1272     struct stat sb;
1273     if (fstat(fd, &sb) == -1) {
1274       ALOGW("Zip: unable to fstat file: %s", strerror(errno));
1275       return {};
1276     }
1277 
1278     // Block device doesn't support ftruncate(2).
1279     if (!S_ISBLK(sb.st_mode)) {
1280       long result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1281       if (result == -1) {
1282         ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1283               static_cast<int64_t>(declared_length + current_offset), strerror(errno));
1284         return {};
1285       }
1286     }
1287 
1288     return std::make_optional<FileWriter>(fd, declared_length);
1289   }
1290 
Append(uint8_t * buf,size_t buf_size)1291   virtual bool Append(uint8_t* buf, size_t buf_size) override {
1292     if (declared_length_ < buf_size || total_bytes_written_ > declared_length_ - buf_size) {
1293       ALOGW("Zip: Unexpected size %zu  (declared) vs %zu (actual)", declared_length_,
1294             total_bytes_written_ + buf_size);
1295       return false;
1296     }
1297 
1298     const bool result = android::base::WriteFully(fd_, buf, buf_size);
1299     if (result) {
1300       total_bytes_written_ += buf_size;
1301     } else {
1302       ALOGW("Zip: unable to write %zu bytes to file; %s", buf_size, strerror(errno));
1303     }
1304 
1305     return result;
1306   }
1307 
FileWriter(const int fd=-1,const uint64_t declared_length=0)1308   explicit FileWriter(const int fd = -1, const uint64_t declared_length = 0)
1309       : Writer(),
1310         fd_(fd),
1311         declared_length_(static_cast<size_t>(declared_length)),
1312         total_bytes_written_(0) {
1313     CHECK_LE(declared_length, SIZE_MAX);
1314   }
1315 
1316  private:
1317   int fd_;
1318   const size_t declared_length_;
1319   size_t total_bytes_written_;
1320 };
1321 
1322 class EntryReader final : public zip_archive::Reader {
1323  public:
EntryReader(const MappedZipFile & zip_file,const ZipEntry64 * entry)1324   EntryReader(const MappedZipFile& zip_file, const ZipEntry64* entry)
1325       : Reader(), zip_file_(zip_file), entry_(entry) {}
1326 
ReadAtOffset(uint8_t * buf,size_t len,off64_t offset) const1327   bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const override {
1328     const auto res = zip_file_.ReadAtOffset(buf, len, entry_->offset + offset);
1329     if (!res) return false;
1330     if (res != buf) {
1331       memcpy(buf, res, len);
1332     }
1333     return true;
1334   }
1335 
AccessAtOffset(uint8_t * buf,size_t len,off64_t offset) const1336   const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const override {
1337     return zip_file_.ReadAtOffset(buf, len, entry_->offset + offset);
1338   }
1339 
IsZeroCopy() const1340   bool IsZeroCopy() const override { return zip_file_.GetBasePtr() != nullptr; }
1341 
1342  private:
1343   const MappedZipFile& zip_file_;
1344   const ZipEntry64* entry_;
1345 };
1346 
1347 // This method is using libz macros with old-style-casts
1348 #pragma GCC diagnostic push
1349 #pragma GCC diagnostic ignored "-Wold-style-cast"
zlib_inflateInit2(z_stream * stream,int window_bits)1350 static inline int zlib_inflateInit2(z_stream* stream, int window_bits) {
1351   return inflateInit2(stream, window_bits);
1352 }
1353 #pragma GCC diagnostic pop
1354 
1355 namespace zip_archive {
1356 
1357 // Moved out of line to avoid -Wweak-vtables.
GetBuffer(size_t)1358 auto Writer::GetBuffer(size_t) -> Buffer {
1359   return {};
1360 }
1361 
AccessAtOffset(uint8_t * buf,size_t len,off64_t offset) const1362 const uint8_t* Reader::AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const {
1363   return ReadAtOffset(buf, len, offset) ? buf : nullptr;
1364 }
1365 
IsZeroCopy() const1366 bool Reader::IsZeroCopy() const {
1367   return false;
1368 }
1369 
1370 }  // namespace zip_archive
1371 
bufferToSpan(zip_archive::Writer::Buffer buf)1372 static std::span<uint8_t> bufferToSpan(zip_archive::Writer::Buffer buf) {
1373   return {buf.first, ssize_t(buf.second)};
1374 }
1375 
1376 template <bool OnIncfs>
inflateImpl(const zip_archive::Reader & reader,const uint64_t compressed_length,const uint64_t uncompressed_length,zip_archive::Writer * writer,uint64_t * crc_out)1377 static int32_t inflateImpl(const zip_archive::Reader& reader,
1378                            const uint64_t compressed_length,
1379                            const uint64_t uncompressed_length,
1380                            zip_archive::Writer* writer, uint64_t* crc_out) {
1381   constexpr uint64_t kBufSize = 32768;
1382 
1383   std::vector<uint8_t> read_buf;
1384   uint64_t max_read_size;
1385   if (reader.IsZeroCopy()) {
1386     max_read_size = std::min<uint64_t>(std::numeric_limits<uint32_t>::max(), compressed_length);
1387   } else {
1388     max_read_size = std::min(compressed_length, kBufSize);
1389     read_buf.resize(static_cast<size_t>(max_read_size));
1390   }
1391 
1392   std::vector<uint8_t> write_buf;
1393   // For some files zlib needs more space than the uncompressed buffer size, e.g. when inflating
1394   // an empty file.
1395   const auto min_write_buffer_size = std::max(compressed_length, uncompressed_length);
1396   auto write_span = bufferToSpan(writer->GetBuffer(size_t(min_write_buffer_size)));
1397   bool direct_writer;
1398   if (write_span.size() >= min_write_buffer_size) {
1399     direct_writer = true;
1400   } else {
1401     direct_writer = false;
1402     write_buf.resize(static_cast<size_t>(std::min(min_write_buffer_size, kBufSize)));
1403     write_span = write_buf;
1404   }
1405 
1406   /*
1407    * Initialize the zlib stream struct.
1408    */
1409   z_stream zstream = {};
1410   zstream.zalloc = Z_NULL;
1411   zstream.zfree = Z_NULL;
1412   zstream.opaque = Z_NULL;
1413   zstream.next_in = NULL;
1414   zstream.avail_in = 0;
1415   zstream.next_out = write_span.data();
1416   zstream.avail_out = static_cast<uint32_t>(write_span.size());
1417   zstream.data_type = Z_UNKNOWN;
1418 
1419   /*
1420    * Use the undocumented "negative window bits" feature to tell zlib
1421    * that there's no zlib header waiting for it.
1422    */
1423   int zerr = zlib_inflateInit2(&zstream, -MAX_WBITS);
1424   if (zerr != Z_OK) {
1425     if (zerr == Z_VERSION_ERROR) {
1426       ALOGE("Installed zlib is not compatible with linked version (%s)", ZLIB_VERSION);
1427     } else {
1428       ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
1429     }
1430 
1431     return kZlibError;
1432   }
1433 
1434   auto zstream_deleter = [](z_stream* stream) {
1435     inflateEnd(stream); /* free up any allocated structures */
1436   };
1437 
1438   std::unique_ptr<z_stream, decltype(zstream_deleter)> zstream_guard(&zstream, zstream_deleter);
1439   static_assert(sizeof(zstream_guard) == sizeof(void*));
1440 
1441   SCOPED_SIGBUS_HANDLER_CONDITIONAL(OnIncfs, {
1442     zstream_guard.reset();
1443     incfs::util::clearAndFree(read_buf);
1444     incfs::util::clearAndFree(write_buf);
1445     return kIoError;
1446   });
1447 
1448   const bool compute_crc = (crc_out != nullptr);
1449   uLong crc = 0;
1450   uint64_t remaining_bytes = compressed_length;
1451   uint64_t total_output = 0;
1452   do {
1453     /* read as much as we can */
1454     if (zstream.avail_in == 0) {
1455       const auto read_size = static_cast<uint32_t>(std::min(remaining_bytes, max_read_size));
1456       const off64_t offset = (compressed_length - remaining_bytes);
1457       auto buf = reader.AccessAtOffset(read_buf.data(), read_size, offset);
1458       if (!buf) {
1459         ALOGW("Zip: inflate read failed, getSize = %u: %s", read_size, strerror(errno));
1460         return kIoError;
1461       }
1462 
1463       remaining_bytes -= read_size;
1464 
1465       zstream.next_in = buf;
1466       zstream.avail_in = read_size;
1467     }
1468 
1469     /* uncompress the data */
1470     zerr = inflate(&zstream, Z_NO_FLUSH);
1471     if (zerr != Z_OK && zerr != Z_STREAM_END) {
1472       ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)", zerr, zstream.next_in,
1473             zstream.avail_in, zstream.next_out, zstream.avail_out);
1474       return kZlibError;
1475     }
1476 
1477     /* write when we're full or when we're done */
1478     if (zstream.avail_out == 0 ||
1479         (zerr == Z_STREAM_END && zstream.avail_out != write_span.size())) {
1480       const size_t write_size = zstream.next_out - write_span.data();
1481       if (compute_crc) {
1482         DCHECK_LE(write_size, write_span.size());
1483         crc = crc32(crc, write_span.data(), static_cast<uint32_t>(write_size));
1484       }
1485       total_output += write_span.size() - zstream.avail_out;
1486 
1487       if (direct_writer) {
1488         write_span = write_span.subspan(write_size);
1489       } else if (!writer->Append(write_span.data(), write_size)) {
1490         return kIoError;
1491       }
1492 
1493       if (zstream.avail_out == 0) {
1494         zstream.next_out = write_span.data();
1495         zstream.avail_out = static_cast<uint32_t>(write_span.size());
1496       }
1497     }
1498   } while (zerr == Z_OK);
1499 
1500   CHECK_EQ(zerr, Z_STREAM_END); /* other errors should've been caught */
1501 
1502   // NOTE: zstream.adler is always set to 0, because we're using the -MAX_WBITS
1503   // "feature" of zlib to tell it there won't be a zlib file header. zlib
1504   // doesn't bother calculating the checksum in that scenario. We just do
1505   // it ourselves above because there are no additional gains to be made by
1506   // having zlib calculate it for us, since they do it by calling crc32 in
1507   // the same manner that we have above.
1508   if (compute_crc) {
1509     *crc_out = crc;
1510   }
1511   if (total_output != uncompressed_length || remaining_bytes != 0) {
1512     ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu64 ")", zstream.total_out,
1513           uncompressed_length);
1514     return kInconsistentInformation;
1515   }
1516 
1517   return 0;
1518 }
1519 
InflateEntryToWriter(MappedZipFile & mapped_zip,const ZipEntry64 * entry,zip_archive::Writer * writer,uint64_t * crc_out)1520 static int32_t InflateEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry64* entry,
1521                                     zip_archive::Writer* writer, uint64_t* crc_out) {
1522   const EntryReader reader(mapped_zip, entry);
1523   return inflateImpl<true>(reader, entry->compressed_length,
1524                            entry->uncompressed_length, writer, crc_out);
1525 }
1526 
CopyEntryToWriter(MappedZipFile & mapped_zip,const ZipEntry64 * entry,zip_archive::Writer * writer,uint64_t * crc_out)1527 static int32_t CopyEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry64* entry,
1528                                  zip_archive::Writer* writer, uint64_t* crc_out) {
1529   constexpr uint64_t kBufSize = 32768;
1530   std::vector<uint8_t> buf;
1531   std::span<uint8_t> write_span{};
1532   uint64_t max_read_size;
1533   if (mapped_zip.GetBasePtr() == nullptr ||
1534       mapped_zip.GetFileLength() < entry->uncompressed_length) {
1535     // Check if we can read directly into the writer.
1536     write_span = bufferToSpan(writer->GetBuffer(size_t(entry->uncompressed_length)));
1537     if (write_span.size() >= entry->uncompressed_length) {
1538       max_read_size = entry->uncompressed_length;
1539     } else {
1540       max_read_size = std::min(entry->uncompressed_length, kBufSize);
1541       buf.resize((static_cast<size_t>(max_read_size)));
1542       write_span = buf;
1543     }
1544   } else {
1545     max_read_size = entry->uncompressed_length;
1546   }
1547 
1548   SCOPED_SIGBUS_HANDLER({
1549     incfs::util::clearAndFree(buf);
1550     return kIoError;
1551   });
1552 
1553   const uint64_t length = entry->uncompressed_length;
1554   uint64_t count = 0;
1555   uLong crc = 0;
1556   while (count < length) {
1557     uint64_t remaining = length - count;
1558     off64_t offset = entry->offset + count;
1559 
1560     // Safe conversion because even kBufSize is narrow enough for a 32 bit signed value.
1561     const auto block_size = static_cast<uint32_t>(std::min(remaining, max_read_size));
1562 
1563     const auto read_buf = mapped_zip.ReadAtOffset(write_span.data(), block_size, offset);
1564     if (!read_buf) {
1565       ALOGW("CopyFileToFile: copy read failed, block_size = %u, offset = %" PRId64 ": %s",
1566             block_size, static_cast<int64_t>(offset), strerror(errno));
1567       return kIoError;
1568     }
1569 
1570     if (!writer->Append(const_cast<uint8_t*>(read_buf), block_size)) {
1571       return kIoError;
1572     }
1573     // Advance our span if it's a direct buffer (there's a span but local buffer's empty).
1574     if (!write_span.empty() && buf.empty()) {
1575       write_span = write_span.subspan(block_size);
1576     }
1577     if (crc_out) {
1578       crc = crc32(crc, read_buf, block_size);
1579     }
1580     count += block_size;
1581   }
1582 
1583   if (crc_out) {
1584     *crc_out = crc;
1585   }
1586 
1587   return 0;
1588 }
1589 
extractToWriter(ZipArchiveHandle handle,const ZipEntry64 * entry,zip_archive::Writer * writer)1590 static int32_t extractToWriter(ZipArchiveHandle handle, const ZipEntry64* entry,
1591                                zip_archive::Writer* writer) {
1592   const uint16_t method = entry->method;
1593 
1594   // this should default to kUnknownCompressionMethod.
1595   int32_t return_value = -1;
1596   uint64_t crc = 0;
1597   if (method == kCompressStored) {
1598     return_value =
1599         CopyEntryToWriter(handle->mapped_zip, entry, writer, kCrcChecksEnabled ? &crc : nullptr);
1600   } else if (method == kCompressDeflated) {
1601     return_value =
1602         InflateEntryToWriter(handle->mapped_zip, entry, writer, kCrcChecksEnabled ? &crc : nullptr);
1603   }
1604 
1605   if (!return_value && entry->has_data_descriptor) {
1606     return_value = ValidateDataDescriptor(handle->mapped_zip, entry);
1607     if (return_value) {
1608       return return_value;
1609     }
1610   }
1611 
1612   // Validate that the CRC matches the calculated value.
1613   if (kCrcChecksEnabled && (entry->crc32 != static_cast<uint32_t>(crc))) {
1614     ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
1615     return kInconsistentInformation;
1616   }
1617 
1618   return return_value;
1619 }
1620 
ExtractToMemory(ZipArchiveHandle archive,const ZipEntry * entry,uint8_t * begin,size_t size)1621 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry* entry, uint8_t* begin,
1622                         size_t size) {
1623   ZipEntry64 entry64(*entry);
1624   return ExtractToMemory(archive, &entry64, begin, size);
1625 }
1626 
ExtractToMemory(ZipArchiveHandle archive,const ZipEntry64 * entry,uint8_t * begin,size_t size)1627 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry64* entry, uint8_t* begin,
1628                         size_t size) {
1629   auto writer = MemoryWriter::Create(begin, size, entry);
1630   if (!writer) {
1631     return kIoError;
1632   }
1633   return extractToWriter(archive, entry, &writer.value());
1634 }
1635 
ExtractEntryToFile(ZipArchiveHandle archive,const ZipEntry * entry,int fd)1636 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry* entry, int fd) {
1637   ZipEntry64 entry64(*entry);
1638   return ExtractEntryToFile(archive, &entry64, fd);
1639 }
1640 
ExtractEntryToFile(ZipArchiveHandle archive,const ZipEntry64 * entry,int fd)1641 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry64* entry, int fd) {
1642   auto writer = FileWriter::Create(fd, entry);
1643   if (!writer) {
1644     return kIoError;
1645   }
1646   return extractToWriter(archive, entry, &writer.value());
1647 }
1648 
GetFileDescriptor(const ZipArchiveHandle archive)1649 int GetFileDescriptor(const ZipArchiveHandle archive) {
1650   return archive->mapped_zip.GetFileDescriptor();
1651 }
1652 
GetFileDescriptorOffset(const ZipArchiveHandle archive)1653 off64_t GetFileDescriptorOffset(const ZipArchiveHandle archive) {
1654   return archive->mapped_zip.GetFileOffset();
1655 }
1656 
1657 //
1658 // ZIPARCHIVE_DISABLE_CALLBACK_API disables all APIs that accept user callbacks.
1659 // It gets defined for the incfs-supporting version of libziparchive, where one
1660 // has to control all the code accessing the archive. See more at
1661 // incfs_support/signal_handling.h
1662 //
1663 #if !ZIPARCHIVE_DISABLE_CALLBACK_API && !defined(_WIN32)
1664 class ProcessWriter final : public zip_archive::Writer {
1665  public:
ProcessWriter(ProcessZipEntryFunction func,void * cookie)1666   ProcessWriter(ProcessZipEntryFunction func, void* cookie)
1667       : Writer(), proc_function_(func), cookie_(cookie) {}
1668 
Append(uint8_t * buf,size_t buf_size)1669   virtual bool Append(uint8_t* buf, size_t buf_size) override {
1670     return proc_function_(buf, buf_size, cookie_);
1671   }
1672 
1673  private:
1674   ProcessZipEntryFunction proc_function_;
1675   void* cookie_;
1676 };
1677 
ProcessZipEntryContents(ZipArchiveHandle archive,const ZipEntry * entry,ProcessZipEntryFunction func,void * cookie)1678 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry* entry,
1679                                 ProcessZipEntryFunction func, void* cookie) {
1680   ZipEntry64 entry64(*entry);
1681   return ProcessZipEntryContents(archive, &entry64, func, cookie);
1682 }
1683 
ProcessZipEntryContents(ZipArchiveHandle archive,const ZipEntry64 * entry,ProcessZipEntryFunction func,void * cookie)1684 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry64* entry,
1685                                 ProcessZipEntryFunction func, void* cookie) {
1686   ProcessWriter writer(func, cookie);
1687   return extractToWriter(archive, entry, &writer);
1688 }
1689 
1690 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API && !defined(_WIN32)
1691 
MappedZipFile(int fd,off64_t length,off64_t offset)1692 MappedZipFile::MappedZipFile(int fd, off64_t length, off64_t offset)
1693     : fd_(fd), fd_offset_(offset), data_length_(length) {
1694   // TODO(b/287285733): restore mmap() when the cold cache regression is fixed.
1695 #if 0
1696   // Only try to mmap all files in 64-bit+ processes as it's too easy to use up the whole
1697   // virtual address space on 32-bits, causing out of memory errors later.
1698   if constexpr (sizeof(void*) >= 8) {
1699     // Note: GetFileLength() here fills |data_length_| if it was empty.
1700     // TODO(b/261875471): remove the incfs exclusion when the driver deadlock is fixed.
1701     if (fd >= 0 && !incfs::util::isIncfsFd(fd) && GetFileLength() > 0 &&
1702         GetFileLength() < std::numeric_limits<size_t>::max()) {
1703       mapped_file_ =
1704           android::base::MappedFile::FromFd(fd, fd_offset_, size_t(data_length_), PROT_READ);
1705       if (mapped_file_) {
1706         maybePrepareSequentialReading(mapped_file_->data(), size_t(data_length_));
1707         base_ptr_ = mapped_file_->data();
1708       }
1709     }
1710   }
1711 #endif  // 0
1712 }
1713 
GetFileDescriptor() const1714 int MappedZipFile::GetFileDescriptor() const {
1715   return fd_;
1716 }
1717 
GetBasePtr() const1718 const void* MappedZipFile::GetBasePtr() const {
1719   return base_ptr_;
1720 }
1721 
GetFileOffset() const1722 off64_t MappedZipFile::GetFileOffset() const {
1723   return fd_offset_;
1724 }
1725 
GetFileLength() const1726 off64_t MappedZipFile::GetFileLength() const {
1727   if (data_length_ >= 0) {
1728     return data_length_;
1729   }
1730   if (fd_ < 0) {
1731     ALOGE("Zip: invalid file map");
1732   } else {
1733     struct stat st;
1734     if (fstat(fd_, &st)) {
1735       ALOGE("Zip: fstat(%d) failed: %s", fd_, strerror(errno));
1736     } else {
1737       if (S_ISBLK(st.st_mode)) {
1738 #if defined(__linux__)
1739         // Block devices are special - they report 0 as st_size.
1740         uint64_t size;
1741         if (ioctl(fd_, BLKGETSIZE64, &size)) {
1742           ALOGE("Zip: ioctl(%d, BLKGETSIZE64) failed: %s", fd_, strerror(errno));
1743         } else {
1744           data_length_ = size - fd_offset_;
1745         }
1746 #endif
1747       } else {
1748         data_length_ = st.st_size - fd_offset_;
1749       }
1750     }
1751   }
1752   return data_length_;
1753 }
1754 
1755 // Attempts to read |len| bytes into |buf| at offset |off|.
ReadAtOffset(uint8_t * buf,size_t len,off64_t off) const1756 const uint8_t* MappedZipFile::ReadAtOffset(uint8_t* buf, size_t len, off64_t off) const {
1757   if (base_ptr_) {
1758     if (off < 0 || data_length_ < len || off > data_length_ - len) {
1759       ALOGE("Zip: invalid offset: %" PRId64 ", read length: %zu, data length: %" PRId64, off, len,
1760             data_length_);
1761       return nullptr;
1762     }
1763     maybePrefetch(static_cast<const uint8_t*>(base_ptr_) + off, len);
1764     return static_cast<const uint8_t*>(base_ptr_) + off;
1765   }
1766   if (fd_ < 0) {
1767     ALOGE("Zip: invalid zip file");
1768     return nullptr;
1769   }
1770 
1771   if (off < 0) {
1772     ALOGE("Zip: invalid offset %" PRId64, off);
1773     return nullptr;
1774   }
1775 
1776   off64_t read_offset;
1777   if (__builtin_add_overflow(fd_offset_, off, &read_offset)) {
1778     ALOGE("Zip: invalid read offset %" PRId64 " overflows, fd offset %" PRId64, off, fd_offset_);
1779     return nullptr;
1780   }
1781 
1782   if (data_length_ != -1) {
1783     off64_t read_end;
1784     if (len > std::numeric_limits<off64_t>::max() ||
1785         __builtin_add_overflow(off, static_cast<off64_t>(len), &read_end)) {
1786       ALOGE("Zip: invalid read length %" PRId64 " overflows, offset %" PRId64,
1787             static_cast<off64_t>(len), off);
1788       return nullptr;
1789     }
1790 
1791     if (read_end > data_length_) {
1792       ALOGE("Zip: invalid read length %" PRId64 " exceeds data length %" PRId64 ", offset %" PRId64,
1793             static_cast<off64_t>(len), data_length_, off);
1794       return nullptr;
1795     }
1796   }
1797 
1798   // Make sure to read at offset to ensure concurrent access to the fd.
1799   if (!android::base::ReadFullyAtOffset(fd_, buf, len, read_offset)) {
1800     ALOGE("Zip: failed to read at offset %" PRId64, off);
1801     return nullptr;
1802   }
1803   return buf;
1804 }
1805 
Initialize(const void * map_base_ptr,off64_t cd_start_offset,size_t cd_size)1806 void CentralDirectory::Initialize(const void* map_base_ptr, off64_t cd_start_offset,
1807                                   size_t cd_size) {
1808   base_ptr_ = static_cast<const uint8_t*>(map_base_ptr) + cd_start_offset;
1809   length_ = cd_size;
1810 }
1811 
InitializeCentralDirectory(off64_t cd_start_offset,size_t cd_size)1812 bool ZipArchive::InitializeCentralDirectory(off64_t cd_start_offset, size_t cd_size) {
1813   if (!mapped_zip.GetBasePtr()) {
1814     directory_map = android::base::MappedFile::FromFd(mapped_zip.GetFileDescriptor(),
1815                                                       mapped_zip.GetFileOffset() + cd_start_offset,
1816                                                       cd_size, PROT_READ);
1817     if (!directory_map) {
1818       ALOGE("Zip: failed to map central directory (offset %" PRId64 ", size %zu): %s",
1819             cd_start_offset, cd_size, strerror(errno));
1820       return false;
1821     }
1822 
1823     CHECK_EQ(directory_map->size(), cd_size);
1824     central_directory.Initialize(directory_map->data(), 0 /*offset*/, cd_size);
1825   } else {
1826     if (mapped_zip.GetBasePtr() == nullptr) {
1827       ALOGE(
1828           "Zip: Failed to map central directory, bad mapped_zip base "
1829           "pointer");
1830       return false;
1831     }
1832     if (static_cast<off64_t>(cd_start_offset) + static_cast<off64_t>(cd_size) >
1833         mapped_zip.GetFileLength()) {
1834       ALOGE(
1835           "Zip: Failed to map central directory, offset exceeds mapped memory region (start_offset "
1836           "%" PRId64 ", cd_size %zu, mapped_region_size %" PRId64 ")",
1837           static_cast<int64_t>(cd_start_offset), cd_size, mapped_zip.GetFileLength());
1838       return false;
1839     }
1840 
1841     central_directory.Initialize(mapped_zip.GetBasePtr(), cd_start_offset, cd_size);
1842   }
1843   return true;
1844 }
1845 
1846 // This function returns the embedded timestamp as is and doesn't perform validation.
GetModificationTime() const1847 tm ZipEntryCommon::GetModificationTime() const {
1848   tm t = {};
1849 
1850   t.tm_hour = (mod_time >> 11) & 0x1f;
1851   t.tm_min = (mod_time >> 5) & 0x3f;
1852   t.tm_sec = (mod_time & 0x1f) << 1;
1853 
1854   t.tm_year = ((mod_time >> 25) & 0x7f) + 80;
1855   t.tm_mon = ((mod_time >> 21) & 0xf) - 1;
1856   t.tm_mday = (mod_time >> 16) & 0x1f;
1857 
1858   return t;
1859 }
1860 
1861 namespace zip_archive {
1862 
Inflate(const Reader & reader,const uint64_t compressed_length,const uint64_t uncompressed_length,Writer * writer,uint64_t * crc_out)1863 int32_t Inflate(const Reader& reader, const uint64_t compressed_length,
1864                 const uint64_t uncompressed_length, Writer* writer,
1865                 uint64_t* crc_out) {
1866   return inflateImpl<false>(reader, compressed_length, uncompressed_length,
1867                             writer, crc_out);
1868 }
1869 
1870 //
1871 // ZIPARCHIVE_DISABLE_CALLBACK_API disables all APIs that accept user callbacks.
1872 // It gets defined for the incfs-supporting version of libziparchive, where one
1873 // has to control all the code accessing the archive. See more at
1874 // incfs_support/signal_handling.h
1875 //
1876 #if !ZIPARCHIVE_DISABLE_CALLBACK_API
1877 
ExtractToWriter(ZipArchiveHandle handle,const ZipEntry64 * entry,zip_archive::Writer * writer)1878 int32_t ExtractToWriter(ZipArchiveHandle handle, const ZipEntry64* entry,
1879                         zip_archive::Writer* writer) {
1880   return extractToWriter(handle, entry, writer);
1881 }
1882 
1883 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API
1884 
1885 }  // namespace zip_archive
1886