• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *    http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * Read-only access to Zip archives, with minimal heap allocation.
19  */
20 
21 #define LOG_TAG "ziparchive"
22 
23 #include "ziparchive/zip_archive.h"
24 
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <inttypes.h>
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33 
34 #ifdef __linux__
35 #include <linux/fs.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #endif
39 
40 #include <memory>
41 #include <optional>
42 #include <span>
43 #include <vector>
44 
45 #if defined(__APPLE__)
46 #define lseek64 lseek
47 #endif
48 
49 #if defined(__BIONIC__)
50 #include <android/fdsan.h>
51 #endif
52 
53 #include <android-base/file.h>
54 #include <android-base/logging.h>
55 #include <android-base/macros.h>  // TEMP_FAILURE_RETRY may or may not be in unistd
56 #include <android-base/mapped_file.h>
57 #include <android-base/memory.h>
58 #include <android-base/strings.h>
59 #include <android-base/utf8.h>
60 #include <log/log.h>
61 
62 #include "entry_name_utils-inl.h"
63 #include "incfs_support/signal_handling.h"
64 #include "incfs_support/util.h"
65 #include "zip_archive_common.h"
66 #include "zip_archive_private.h"
67 #include "zlib.h"
68 
69 // Used to turn on crc checks - verify that the content CRC matches the values
70 // specified in the local file header and the central directory.
71 static constexpr bool kCrcChecksEnabled = false;
72 
73 // The maximum number of bytes to scan backwards for the EOCD start.
74 static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
75 
76 // Set a reasonable cap (256 GiB) for the zip file size. So the data is always valid when
77 // we parse the fields in cd or local headers as 64 bits signed integers.
78 static constexpr uint64_t kMaxFileLength = 256 * static_cast<uint64_t>(1u << 30u);
79 
80 /*
81  * A Read-only Zip archive.
82  *
83  * We want "open" and "find entry by name" to be fast operations, and
84  * we want to use as little memory as possible.  We memory-map the zip
85  * central directory, and load a hash table with pointers to the filenames
86  * (which aren't null-terminated).  The other fields are at a fixed offset
87  * from the filename, so we don't need to extract those (but we do need
88  * to byte-read and endian-swap them every time we want them).
89  *
90  * It's possible that somebody has handed us a massive (~1GB) zip archive,
91  * so we can't expect to mmap the entire file.
92  *
93  * To speed comparisons when doing a lookup by name, we could make the mapping
94  * "private" (copy-on-write) and null-terminate the filenames after verifying
95  * the record structure.  However, this requires a private mapping of
96  * every page that the Central Directory touches.  Easier to tuck a copy
97  * of the string length into the hash table entry.
98  */
99 
100 #ifdef __linux__
101 static const size_t kPageSize = getpagesize();
102 #else
103 constexpr size_t kPageSize = 4096;
104 #endif
105 
pageAlignDown(uintptr_t ptr_int)106 [[maybe_unused]] static uintptr_t pageAlignDown(uintptr_t ptr_int) {
107   return ptr_int & ~(kPageSize - 1);
108 }
109 
pageAlignUp(uintptr_t ptr_int)110 [[maybe_unused]] static uintptr_t pageAlignUp(uintptr_t ptr_int) {
111   return pageAlignDown(ptr_int + kPageSize - 1);
112 }
113 
expandToPageBounds(void * ptr,size_t size)114 [[maybe_unused]] static std::pair<void*, size_t> expandToPageBounds(void* ptr, size_t size) {
115   const auto ptr_int = reinterpret_cast<uintptr_t>(ptr);
116   const auto aligned_ptr_int = pageAlignDown(ptr_int);
117   const auto aligned_size = pageAlignUp(ptr_int + size) - aligned_ptr_int;
118   return {reinterpret_cast<void*>(aligned_ptr_int), aligned_size};
119 }
120 
maybePrefetch(const void * ptr,size_t size)121 [[maybe_unused]] static void maybePrefetch([[maybe_unused]] const void* ptr,
122                                            [[maybe_unused]] size_t size) {
123 #ifdef __linux__
124   // Let's only ask for a readahead explicitly if there's enough pages to read. A regular OS
125   // readahead implementation would take care of the smaller requests, and it would also involve
126   // only a single kernel transition, just an implicit one from the page fault.
127   //
128   // Note: there's no implementation for other OSes, as the prefetch logic is highly specific
129   // to the memory manager, and we don't have any well defined benchmarks on Windows/Mac;
130   // it also mostly matters only for the cold OS boot where no files are in the page cache yet,
131   // but we rarely would hit this situation outside of the device startup.
132   auto [aligned_ptr, aligned_size] = expandToPageBounds(const_cast<void*>(ptr), size);
133   if (aligned_size > 32 * kPageSize) {
134     if (::madvise(aligned_ptr, aligned_size, MADV_WILLNEED)) {
135       ALOGW("Zip: madvise(file, WILLNEED) failed: %s (%d)", strerror(errno), errno);
136     }
137   }
138 #endif
139 }
140 
maybePrepareSequentialReading(const void * ptr,size_t size)141 [[maybe_unused]] static void maybePrepareSequentialReading([[maybe_unused]] const void* ptr,
142                                                            [[maybe_unused]] size_t size) {
143 #ifdef __linux__
144   auto [aligned_ptr, aligned_size] = expandToPageBounds(const_cast<void*>(ptr), size);
145   if (::madvise(reinterpret_cast<void*>(aligned_ptr), aligned_size, MADV_SEQUENTIAL)) {
146     ALOGW("Zip: madvise(file, SEQUENTIAL) failed: %s (%d)", strerror(errno), errno);
147   }
148 #endif
149 }
150 
151 #if defined(__BIONIC__)
GetOwnerTag(const ZipArchive * archive)152 static uint64_t GetOwnerTag(const ZipArchive* archive) {
153   return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
154                                         reinterpret_cast<uint64_t>(archive));
155 }
156 #endif
157 
ZipArchive(MappedZipFile && map,bool assume_ownership)158 ZipArchive::ZipArchive(MappedZipFile&& map, bool assume_ownership)
159     : mapped_zip(std::move(map)),
160       close_file(assume_ownership),
161       directory_offset(0),
162       central_directory(),
163       directory_map(),
164       num_entries(0) {
165 #if defined(__BIONIC__)
166   if (assume_ownership) {
167     CHECK(mapped_zip.GetFileDescriptor() >= 0 || !mapped_zip.GetBasePtr());
168     android_fdsan_exchange_owner_tag(mapped_zip.GetFileDescriptor(), 0, GetOwnerTag(this));
169   }
170 #endif
171 }
172 
ZipArchive(const void * address,size_t length)173 ZipArchive::ZipArchive(const void* address, size_t length)
174     : mapped_zip(address, length),
175       close_file(false),
176       directory_offset(0),
177       central_directory(),
178       directory_map(),
179       num_entries(0) {}
180 
~ZipArchive()181 ZipArchive::~ZipArchive() {
182   if (close_file && mapped_zip.GetFileDescriptor() >= 0) {
183 #if defined(__BIONIC__)
184     android_fdsan_close_with_tag(mapped_zip.GetFileDescriptor(), GetOwnerTag(this));
185 #else
186     close(mapped_zip.GetFileDescriptor());
187 #endif
188   }
189 }
190 
191 struct CentralDirectoryInfo {
192   uint64_t num_records;
193   // The size of the central directory (in bytes).
194   uint64_t cd_size;
195   // The offset of the start of the central directory, relative
196   // to the start of the file.
197   uint64_t cd_start_offset;
198 };
199 
200 // Reads |T| at |readPtr| and increments |readPtr|. Returns std::nullopt if the boundary check
201 // fails.
202 template <typename T>
TryConsumeUnaligned(uint8_t ** readPtr,const uint8_t * bufStart,size_t bufSize)203 static std::optional<T> TryConsumeUnaligned(uint8_t** readPtr, const uint8_t* bufStart,
204                                             size_t bufSize) {
205   if (bufSize < sizeof(T) || *readPtr - bufStart > bufSize - sizeof(T)) {
206     ALOGW("Zip: %zu byte read exceeds the boundary of allocated buf, offset %zu, bufSize %zu",
207           sizeof(T), *readPtr - bufStart, bufSize);
208     return std::nullopt;
209   }
210   return ConsumeUnaligned<T>(readPtr);
211 }
212 
FindCentralDirectoryInfoForZip64(const char * debugFileName,ZipArchive * archive,off64_t eocdOffset,CentralDirectoryInfo * cdInfo)213 static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive,
214                                                  off64_t eocdOffset, CentralDirectoryInfo* cdInfo) {
215   if (eocdOffset <= sizeof(Zip64EocdLocator)) {
216     ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName);
217     return kInvalidFile;
218   }
219   // We expect to find the zip64 eocd locator immediately before the zip eocd.
220   const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
221   Zip64EocdLocator zip64EocdLocatorBuf;
222   const auto zip64EocdLocator = reinterpret_cast<const Zip64EocdLocator*>(
223       archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocatorBuf)),
224                                        sizeof(zip64EocdLocatorBuf), locatorOffset));
225   if (!zip64EocdLocator) {
226     ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
227           sizeof(zip64EocdLocatorBuf), locatorOffset, debugFileName);
228     return kIoError;
229   }
230 
231   if (zip64EocdLocator->locator_signature != Zip64EocdLocator::kSignature) {
232     ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
233           locatorOffset);
234     return kInvalidFile;
235   }
236 
237   const int64_t zip64EocdOffset = zip64EocdLocator->zip64_eocd_offset;
238   if (locatorOffset <= sizeof(Zip64EocdRecord) ||
239       zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
240     ALOGW("Zip: %s: Bad zip64 eocd offset %" PRId64 ", eocd locator offset %" PRId64, debugFileName,
241           zip64EocdOffset, locatorOffset);
242     return kInvalidOffset;
243   }
244 
245   Zip64EocdRecord zip64EocdRecordBuf;
246   const auto zip64EocdRecord = reinterpret_cast<const Zip64EocdRecord*>(
247       archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecordBuf),
248                                        sizeof(zip64EocdRecordBuf), zip64EocdOffset));
249   if (!zip64EocdRecord) {
250     ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
251           sizeof(zip64EocdRecordBuf), zip64EocdOffset, debugFileName);
252     return kIoError;
253   }
254 
255   if (zip64EocdRecord->record_signature != Zip64EocdRecord::kSignature) {
256     ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
257           zip64EocdOffset);
258     return kInvalidFile;
259   }
260 
261   if (zip64EocdOffset <= zip64EocdRecord->cd_size ||
262       zip64EocdRecord->cd_start_offset > zip64EocdOffset - zip64EocdRecord->cd_size) {
263     ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
264           ", zip64 eocd offset %" PRIu64,
265           debugFileName, zip64EocdRecord->cd_start_offset, zip64EocdRecord->cd_size,
266           zip64EocdOffset);
267     return kInvalidOffset;
268   }
269 
270   *cdInfo = {.num_records = zip64EocdRecord->num_records,
271              .cd_size = zip64EocdRecord->cd_size,
272              .cd_start_offset = zip64EocdRecord->cd_start_offset};
273 
274   return kSuccess;
275 }
276 
FindCentralDirectoryInfo(const char * debug_file_name,ZipArchive * archive,off64_t file_length,std::span<uint8_t> scan_buffer,CentralDirectoryInfo * cdInfo)277 static ZipError FindCentralDirectoryInfo(const char* debug_file_name,
278                                          ZipArchive* archive,
279                                          off64_t file_length,
280                                          std::span<uint8_t> scan_buffer,
281                                          CentralDirectoryInfo* cdInfo) {
282   const auto read_amount = static_cast<uint32_t>(scan_buffer.size());
283   const off64_t search_start = file_length - read_amount;
284 
285   const auto data = archive->mapped_zip.ReadAtOffset(scan_buffer.data(), read_amount, search_start);
286   if (!data) {
287     ALOGE("Zip: read %" PRId64 " from offset %" PRId64 " failed", static_cast<int64_t>(read_amount),
288           static_cast<int64_t>(search_start));
289     return kIoError;
290   }
291 
292   /*
293    * Scan backward for the EOCD magic.  In an archive without a trailing
294    * comment, we'll find it on the first try.  (We may want to consider
295    * doing an initial minimal read; if we don't find it, retry with a
296    * second read as above.)
297    */
298   CHECK_LE(read_amount, std::numeric_limits<int32_t>::max());
299   int32_t i = read_amount - sizeof(EocdRecord);
300   for (; i >= 0; i--) {
301     if (data[i] == 0x50) {
302       const uint32_t* sig_addr = reinterpret_cast<const uint32_t*>(&data[i]);
303       if (android::base::get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
304         ALOGV("+++ Found EOCD at buf+%d", i);
305         break;
306       }
307     }
308   }
309   if (i < 0) {
310     ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
311     return kInvalidFile;
312   }
313 
314   const off64_t eocd_offset = search_start + i;
315   auto eocd = reinterpret_cast<const EocdRecord*>(data + i);
316   /*
317    * Verify that there's no trailing space at the end of the central directory
318    * and its comment.
319    */
320   const off64_t calculated_length = eocd_offset + sizeof(EocdRecord) + eocd->comment_length;
321   if (calculated_length != file_length) {
322     ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
323           static_cast<int64_t>(file_length - calculated_length));
324     return kInvalidFile;
325   }
326 
327   // One of the field is 0xFFFFFFFF, look for the zip64 EOCD instead.
328   if (eocd->num_records_on_disk == UINT16_MAX || eocd->num_records == UINT16_MAX ||
329       eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX ||
330       eocd->comment_length == UINT16_MAX) {
331     ALOGV("Looking for the zip64 EOCD (cd_size: %" PRIu32 ", cd_start_offset: %" PRIu32
332           ", comment_length: %" PRIu16 ", num_records: %" PRIu16 ", num_records_on_disk: %" PRIu16
333           ")",
334           eocd->cd_size, eocd->cd_start_offset, eocd->comment_length, eocd->num_records,
335           eocd->num_records_on_disk);
336     return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo);
337   }
338 
339   /*
340    * Grab the CD offset and size, and the number of entries in the
341    * archive and verify that they look reasonable.
342    */
343   if (static_cast<off64_t>(eocd->cd_start_offset) + eocd->cd_size > eocd_offset) {
344     ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
345           eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
346     return kInvalidOffset;
347   }
348 
349   *cdInfo = {.num_records = eocd->num_records,
350              .cd_size = eocd->cd_size,
351              .cd_start_offset = eocd->cd_start_offset};
352   return kSuccess;
353 }
354 
355 /*
356  * Find the zip Central Directory and memory-map it.
357  *
358  * On success, returns kSuccess after populating fields from the EOCD area:
359  *   directory_offset
360  *   directory_ptr
361  *   num_entries
362  */
MapCentralDirectory(const char * debug_file_name,ZipArchive * archive)363 static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* archive) {
364   // Test file length. We want to make sure the file is small enough to be a zip
365   // file.
366   off64_t file_length = archive->mapped_zip.GetFileLength();
367   if (file_length == -1) {
368     return kInvalidFile;
369   }
370 
371   if (file_length > kMaxFileLength) {
372     ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
373     return kInvalidFile;
374   }
375 
376   if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
377     ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
378     return kInvalidFile;
379   }
380 
381   /*
382    * Perform the traditional EOCD snipe hunt.
383    *
384    * We're searching for the End of Central Directory magic number,
385    * which appears at the start of the EOCD block.  It's followed by
386    * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
387    * need to read the last part of the file into a buffer, dig through
388    * it to find the magic number, parse some values out, and use those
389    * to determine the extent of the CD.
390    *
391    * We start by pulling in the last part of the file.
392    */
393   const auto read_amount = uint32_t(std::min<off64_t>(file_length, kMaxEOCDSearch));
394 
395   CentralDirectoryInfo cdInfo = {};
396   std::vector<uint8_t> scan_buffer(read_amount);
397 
398   SCOPED_SIGBUS_HANDLER({
399     incfs::util::clearAndFree(scan_buffer);
400     return kIoError;
401   });
402 
403   if (auto result = FindCentralDirectoryInfo(debug_file_name, archive,
404                                              file_length, scan_buffer, &cdInfo);
405       result != kSuccess) {
406     return result;
407   }
408 
409   scan_buffer.clear();
410 
411   if (cdInfo.num_records == 0) {
412 #if defined(__ANDROID__)
413     ALOGW("Zip: empty archive?");
414 #endif
415     return kEmptyArchive;
416   }
417 
418   if (cdInfo.cd_size >= SIZE_MAX) {
419     ALOGW("Zip: The size of central directory doesn't fit in range of size_t: %" PRIu64,
420           cdInfo.cd_size);
421     return kInvalidFile;
422   }
423 
424   ALOGV("+++ num_entries=%" PRIu64 " dir_size=%" PRIu64 " dir_offset=%" PRIu64, cdInfo.num_records,
425         cdInfo.cd_size, cdInfo.cd_start_offset);
426 
427   // It all looks good.  Create a mapping for the CD, and set the fields in archive.
428   if (!archive->InitializeCentralDirectory(static_cast<off64_t>(cdInfo.cd_start_offset),
429                                            static_cast<size_t>(cdInfo.cd_size))) {
430     return kMmapFailed;
431   }
432 
433   archive->num_entries = cdInfo.num_records;
434   archive->directory_offset = cdInfo.cd_start_offset;
435 
436   return kSuccess;
437 }
438 
ParseZip64ExtendedInfoInExtraField(const uint8_t * extraFieldStart,uint16_t extraFieldLength,uint32_t zip32UncompressedSize,uint32_t zip32CompressedSize,std::optional<uint32_t> zip32LocalFileHeaderOffset,Zip64ExtendedInfo * zip64Info)439 static ZipError ParseZip64ExtendedInfoInExtraField(
440     const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize,
441     uint32_t zip32CompressedSize, std::optional<uint32_t> zip32LocalFileHeaderOffset,
442     Zip64ExtendedInfo* zip64Info) {
443   if (extraFieldLength <= 4) {
444     ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16,
445           extraFieldLength);
446     return kInvalidFile;
447   }
448 
449   // Each header MUST consist of:
450   // Header ID - 2 bytes
451   // Data Size - 2 bytes
452   uint16_t offset = 0;
453   while (offset < extraFieldLength - 4) {
454     auto readPtr = const_cast<uint8_t*>(extraFieldStart + offset);
455     auto headerId = ConsumeUnaligned<uint16_t>(&readPtr);
456     auto dataSize = ConsumeUnaligned<uint16_t>(&readPtr);
457 
458     offset += 4;
459     if (dataSize > extraFieldLength - offset) {
460       ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize);
461       return kInvalidOffset;
462     }
463 
464     // Skip the other types of extensible data fields. Details in
465     // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5
466     if (headerId != Zip64ExtendedInfo::kHeaderId) {
467       offset += dataSize;
468       continue;
469     }
470     // Layout for Zip64 extended info (not include first 4 bytes of header)
471     // Original
472     // Size       8 bytes    Original uncompressed file size
473 
474     // Compressed
475     // Size       8 bytes    Size of compressed data
476 
477     // Relative Header
478     // Offset     8 bytes    Offset of local header record
479 
480     // Disk Start
481     // Number     4 bytes    Number of the disk on which
482     //                       this file starts
483     if (dataSize == 8 * 3 + 4) {
484       ALOGW(
485           "Zip: Found `Disk Start Number` field in extra block. Ignoring it.");
486       dataSize -= 4;
487     }
488     // Sometimes, only a subset of {uncompressed size, compressed size, relative
489     // header offset} is presents. but golang's zip writer will write out all
490     // 3 even if only 1 is necessary. We should parse all 3 fields if they are
491     // there.
492     const bool completeField = dataSize == 8 * 3;
493 
494     std::optional<uint64_t> uncompressedFileSize;
495     std::optional<uint64_t> compressedFileSize;
496     std::optional<uint64_t> localHeaderOffset;
497     if (zip32UncompressedSize == UINT32_MAX || completeField) {
498       uncompressedFileSize = TryConsumeUnaligned<uint64_t>(
499           &readPtr, extraFieldStart, extraFieldLength);
500       if (!uncompressedFileSize.has_value()) return kInvalidOffset;
501     }
502     if (zip32CompressedSize == UINT32_MAX || completeField) {
503       compressedFileSize = TryConsumeUnaligned<uint64_t>(
504           &readPtr, extraFieldStart, extraFieldLength);
505       if (!compressedFileSize.has_value()) return kInvalidOffset;
506     }
507     if (zip32LocalFileHeaderOffset == UINT32_MAX || completeField) {
508       localHeaderOffset = TryConsumeUnaligned<uint64_t>(
509           &readPtr, extraFieldStart, extraFieldLength);
510       if (!localHeaderOffset.has_value()) return kInvalidOffset;
511     }
512 
513     // calculate how many bytes we read after the data size field.
514     size_t bytesRead = readPtr - (extraFieldStart + offset);
515     if (bytesRead == 0) {
516       ALOGW("Zip: Data size should not be 0 in zip64 extended field");
517       return kInvalidFile;
518     }
519 
520     if (dataSize != bytesRead) {
521       auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
522                                    ? std::to_string(zip32LocalFileHeaderOffset.value())
523                                    : "missing";
524       ALOGW("Zip: Invalid data size in zip64 extended field, expect %zu , get %" PRIu16
525             ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
526             bytesRead, dataSize, zip32UncompressedSize, zip32CompressedSize,
527             localOffsetString.c_str());
528       return kInvalidFile;
529     }
530 
531     zip64Info->uncompressed_file_size = uncompressedFileSize;
532     zip64Info->compressed_file_size = compressedFileSize;
533     zip64Info->local_header_offset = localHeaderOffset;
534     return kSuccess;
535   }
536 
537   ALOGW("Zip: zip64 extended info isn't found in the extra field.");
538   return kInvalidFile;
539 }
540 
541 /*
542  * Parses the Zip archive's Central Directory.  Allocates and populates the
543  * hash table.
544  *
545  * Returns 0 on success.
546  */
ParseZipArchive(ZipArchive * archive)547 static ZipError ParseZipArchive(ZipArchive* archive) {
548   SCOPED_SIGBUS_HANDLER(return kIoError);
549 
550   maybePrefetch(archive->central_directory.GetBasePtr(), archive->central_directory.GetMapLength());
551   const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
552   const size_t cd_length = archive->central_directory.GetMapLength();
553   const uint8_t* const cd_end = cd_ptr + cd_length;
554   const uint64_t num_entries = archive->num_entries;
555   const uint8_t* ptr = cd_ptr;
556   uint16_t max_file_name_length = 0;
557 
558   /* Walk through the central directory and verify values */
559   for (uint64_t i = 0; i < num_entries; i++) {
560     if (ptr > cd_end - sizeof(CentralDirectoryRecord)) {
561       ALOGW("Zip: ran off the end (item #%" PRIu64 ", %zu bytes of central directory)", i,
562             cd_length);
563 #if defined(__ANDROID__)
564       android_errorWriteLog(0x534e4554, "36392138");
565 #endif
566       return kInvalidFile;
567     }
568 
569     auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
570     if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
571       ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
572       return kInvalidFile;
573     }
574 
575     const uint16_t file_name_length = cdr->file_name_length;
576     const uint16_t extra_length = cdr->extra_field_length;
577     const uint16_t comment_length = cdr->comment_length;
578     const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);
579 
580     if (file_name_length >= cd_length || file_name > cd_end - file_name_length) {
581       ALOGW("Zip: file name for entry %" PRIu64
582             " exceeds the central directory range, file_name_length: %" PRIu16 ", cd_length: %zu",
583             i, file_name_length, cd_length);
584       return kInvalidEntryName;
585     }
586 
587     max_file_name_length = std::max(max_file_name_length, file_name_length);
588 
589     const uint8_t* extra_field = file_name + file_name_length;
590     if (extra_length >= cd_length || extra_field > cd_end - extra_length) {
591       ALOGW("Zip: extra field for entry %" PRIu64
592             " exceeds the central directory range, file_name_length: %" PRIu16 ", cd_length: %zu",
593             i, extra_length, cd_length);
594       return kInvalidFile;
595     }
596 
597     off64_t local_header_offset = cdr->local_file_header_offset;
598     if (local_header_offset == UINT32_MAX) {
599       Zip64ExtendedInfo zip64_info{};
600       if (auto status = ParseZip64ExtendedInfoInExtraField(
601               extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size,
602               cdr->local_file_header_offset, &zip64_info);
603           status != kSuccess) {
604         return status;
605       }
606       CHECK(zip64_info.local_header_offset.has_value());
607       local_header_offset = zip64_info.local_header_offset.value();
608     }
609 
610     if (local_header_offset >= archive->directory_offset) {
611       ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu64,
612             static_cast<int64_t>(local_header_offset), i);
613       return kInvalidFile;
614     }
615 
616     // Check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters.
617     if (!IsValidEntryName(file_name, file_name_length)) {
618       ALOGW("Zip: invalid file name at entry %" PRIu64, i);
619       return kInvalidEntryName;
620     }
621 
622     ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
623     if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
624       ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu64, ptr - cd_ptr, cd_length, i);
625       return kInvalidFile;
626     }
627   }
628 
629   /* Create memory efficient entry map */
630   archive->cd_entry_map = CdEntryMapInterface::Create(num_entries, cd_length, max_file_name_length);
631   if (archive->cd_entry_map == nullptr) {
632     return kAllocationFailed;
633   }
634 
635   /* Central directory verified, now add entries to the hash table */
636   ptr = cd_ptr;
637   for (uint64_t i = 0; i < num_entries; i++) {
638     auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
639     std::string_view entry_name{reinterpret_cast<const char*>(ptr + sizeof(*cdr)),
640                                 cdr->file_name_length};
641     auto add_result = archive->cd_entry_map->AddToMap(entry_name, cd_ptr);
642     if (add_result != 0) {
643       ALOGW("Zip: Error adding entry to hash table %d", add_result);
644       return add_result;
645     }
646     ptr += sizeof(*cdr) + cdr->file_name_length + cdr->extra_field_length + cdr->comment_length;
647   }
648 
649   uint32_t lfh_start_bytes_buf;
650   auto lfh_start_bytes = reinterpret_cast<const uint32_t*>(archive->mapped_zip.ReadAtOffset(
651       reinterpret_cast<uint8_t*>(&lfh_start_bytes_buf), sizeof(lfh_start_bytes_buf), 0));
652   if (!lfh_start_bytes) {
653     ALOGW("Zip: Unable to read header for entry at offset == 0.");
654     return kInvalidFile;
655   }
656 
657   if (*lfh_start_bytes != LocalFileHeader::kSignature) {
658     ALOGW("Zip: Entry at offset zero has invalid LFH signature %" PRIx32, *lfh_start_bytes);
659 #if defined(__ANDROID__)
660     android_errorWriteLog(0x534e4554, "64211847");
661 #endif
662     return kInvalidFile;
663   }
664 
665   ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);
666 
667   return kSuccess;
668 }
669 
OpenArchiveInternal(ZipArchive * archive,const char * debug_file_name)670 static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
671   int32_t result = MapCentralDirectory(debug_file_name, archive);
672   return result != kSuccess ? result : ParseZipArchive(archive);
673 }
674 
OpenArchiveFd(int fd,const char * debug_file_name,ZipArchiveHandle * handle,bool assume_ownership)675 int32_t OpenArchiveFd(int fd, const char* debug_file_name, ZipArchiveHandle* handle,
676                       bool assume_ownership) {
677   ZipArchive* archive = new ZipArchive(MappedZipFile(fd), assume_ownership);
678   *handle = archive;
679   return OpenArchiveInternal(archive, debug_file_name);
680 }
681 
OpenArchiveFdRange(int fd,const char * debug_file_name,ZipArchiveHandle * handle,off64_t length,off64_t offset,bool assume_ownership)682 int32_t OpenArchiveFdRange(int fd, const char* debug_file_name, ZipArchiveHandle* handle,
683                            off64_t length, off64_t offset, bool assume_ownership) {
684   ZipArchive* archive = new ZipArchive(MappedZipFile(fd, length, offset), assume_ownership);
685   *handle = archive;
686 
687   if (length < 0) {
688     ALOGW("Invalid zip length %" PRId64, length);
689     return kIoError;
690   }
691 
692   if (offset < 0) {
693     ALOGW("Invalid zip offset %" PRId64, offset);
694     return kIoError;
695   }
696 
697   return OpenArchiveInternal(archive, debug_file_name);
698 }
699 
OpenArchive(const char * fileName,ZipArchiveHandle * handle)700 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
701   const int fd = ::android::base::utf8::open(fileName, O_RDONLY | O_BINARY | O_CLOEXEC, 0);
702   ZipArchive* archive = new ZipArchive(MappedZipFile(fd), true);
703   *handle = archive;
704 
705   if (fd < 0) {
706     ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
707     return kIoError;
708   }
709 
710   return OpenArchiveInternal(archive, fileName);
711 }
712 
OpenArchiveFromMemory(const void * address,size_t length,const char * debug_file_name,ZipArchiveHandle * handle)713 int32_t OpenArchiveFromMemory(const void* address, size_t length, const char* debug_file_name,
714                               ZipArchiveHandle* handle) {
715   ZipArchive* archive = new ZipArchive(address, length);
716   *handle = archive;
717   return OpenArchiveInternal(archive, debug_file_name);
718 }
719 
GetArchiveInfo(ZipArchiveHandle archive)720 ZipArchiveInfo GetArchiveInfo(ZipArchiveHandle archive) {
721   ZipArchiveInfo result;
722   result.archive_size = archive->mapped_zip.GetFileLength();
723   result.entry_count = archive->num_entries;
724   return result;
725 }
726 
727 /*
728  * Close a ZipArchive, closing the file and freeing the contents.
729  */
CloseArchive(ZipArchiveHandle archive)730 void CloseArchive(ZipArchiveHandle archive) {
731   ALOGV("Closing archive %p", archive);
732   delete archive;
733 }
734 
ValidateDataDescriptor(MappedZipFile & mapped_zip,const ZipEntry64 * entry)735 static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, const ZipEntry64* entry) {
736   SCOPED_SIGBUS_HANDLER(return kIoError);
737 
738   // Maximum possible size for data descriptor: 2 * 4 + 2 * 8 = 24 bytes
739   // The zip format doesn't specify the size of data descriptor. But we won't read OOB here even
740   // if the descriptor isn't present. Because the size cd + eocd in the end of the zipfile is
741   // larger than 24 bytes. And if the descriptor contains invalid data, we'll abort due to
742   // kInconsistentInformation.
743   uint8_t ddBuf[24];
744   off64_t offset = entry->offset;
745   if (entry->method != kCompressStored) {
746     offset += entry->compressed_length;
747   } else {
748     offset += entry->uncompressed_length;
749   }
750 
751   const auto ddPtr = mapped_zip.ReadAtOffset(ddBuf, sizeof(ddBuf), offset);
752   if (!ddPtr) {
753     return kIoError;
754   }
755 
756   const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddPtr));
757   const uint8_t* ddReadPtr = (ddSignature == DataDescriptor::kOptSignature) ? ddPtr + 4 : ddPtr;
758   DataDescriptor descriptor{};
759   descriptor.crc32 = ConsumeUnaligned<uint32_t>(&ddReadPtr);
760   // Don't use entry->zip64_format_size, because that is set to true even if
761   // both compressed/uncompressed size are < 0xFFFFFFFF.
762   constexpr auto u32max = std::numeric_limits<uint32_t>::max();
763   if (entry->compressed_length >= u32max ||
764       entry->uncompressed_length >= u32max) {
765     descriptor.compressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
766     descriptor.uncompressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
767   } else {
768     descriptor.compressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
769     descriptor.uncompressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
770   }
771 
772   // Validate that the values in the data descriptor match those in the central
773   // directory.
774   if (entry->compressed_length != descriptor.compressed_size ||
775       entry->uncompressed_length != descriptor.uncompressed_size ||
776       entry->crc32 != descriptor.crc32) {
777     ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu64 ", %" PRIu64 ", %" PRIx32
778           "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
779           entry->compressed_length, entry->uncompressed_length, entry->crc32,
780           descriptor.compressed_size, descriptor.uncompressed_size, descriptor.crc32);
781     return kInconsistentInformation;
782   }
783 
784   return 0;
785 }
786 
FindEntry(const ZipArchive * archive,std::string_view entryName,const uint64_t nameOffset,ZipEntry64 * data)787 static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
788                          const uint64_t nameOffset, ZipEntry64* data) {
789   std::vector<uint8_t> buffer;
790   SCOPED_SIGBUS_HANDLER({
791     incfs::util::clearAndFree(buffer);
792     return kIoError;
793   });
794 
795   // Recover the start of the central directory entry from the filename
796   // pointer.  The filename is the first entry past the fixed-size data,
797   // so we can just subtract back from that.
798   const uint8_t* base_ptr = archive->central_directory.GetBasePtr();
799   const uint8_t* ptr = base_ptr + nameOffset;
800   ptr -= sizeof(CentralDirectoryRecord);
801 
802   // This is the base of our mmapped region, we have to check that
803   // the name that's in the hash table is a pointer to a location within
804   // this mapped region.
805   if (ptr < base_ptr || ptr > base_ptr + archive->central_directory.GetMapLength()) {
806     ALOGW("Zip: Invalid entry pointer");
807     return kInvalidOffset;
808   }
809 
810   auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
811 
812   // The offset of the start of the central directory in the zipfile.
813   // We keep this lying around so that we can check all our lengths
814   // and our per-file structures.
815   const off64_t cd_offset = archive->directory_offset;
816 
817   // Fill out the compression method, modification time, crc32
818   // and other interesting attributes from the central directory. These
819   // will later be compared against values from the local file header.
820   data->method = cdr->compression_method;
821   data->mod_time = cdr->last_mod_date << 16 | cdr->last_mod_time;
822   data->crc32 = cdr->crc32;
823   data->compressed_length = cdr->compressed_size;
824   data->uncompressed_length = cdr->uncompressed_size;
825 
826   // Figure out the local header offset from the central directory. The
827   // actual file data will begin after the local header and the name /
828   // extra comments.
829   off64_t local_header_offset = cdr->local_file_header_offset;
830   // One of the info field is UINT32_MAX, try to parse the real value in the zip64 extended info in
831   // the extra field.
832   if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
833       cdr->local_file_header_offset == UINT32_MAX) {
834     const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length;
835     Zip64ExtendedInfo zip64_info{};
836     if (auto status = ParseZip64ExtendedInfoInExtraField(
837             extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size,
838             cdr->local_file_header_offset, &zip64_info);
839         status != kSuccess) {
840       return status;
841     }
842 
843     data->uncompressed_length = zip64_info.uncompressed_file_size.value_or(cdr->uncompressed_size);
844     data->compressed_length = zip64_info.compressed_file_size.value_or(cdr->compressed_size);
845     local_header_offset = zip64_info.local_header_offset.value_or(local_header_offset);
846     data->zip64_format_size =
847         cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX;
848   }
849 
850   off64_t local_header_end;
851   if (__builtin_add_overflow(local_header_offset, sizeof(LocalFileHeader), &local_header_end) ||
852       local_header_end >= cd_offset) {
853     // We tested >= because the name that follows can't be zero length.
854     ALOGW("Zip: bad local hdr offset in zip");
855     return kInvalidOffset;
856   }
857 
858   uint8_t lfh_buf[sizeof(LocalFileHeader)];
859   const auto lfh = reinterpret_cast<const LocalFileHeader*>(
860       archive->mapped_zip.ReadAtOffset(lfh_buf, sizeof(lfh_buf), local_header_offset));
861   if (!lfh) {
862     ALOGW("Zip: failed reading lfh name from offset %" PRId64,
863           static_cast<int64_t>(local_header_offset));
864     return kIoError;
865   }
866 
867   if (lfh->lfh_signature != LocalFileHeader::kSignature) {
868     ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
869           static_cast<int64_t>(local_header_offset));
870     return kInvalidOffset;
871   }
872 
873   // Check that the local file header name matches the declared name in the central directory.
874   CHECK_LE(entryName.size(), UINT16_MAX);
875   auto name_length = static_cast<uint16_t>(entryName.size());
876   if (lfh->file_name_length != name_length) {
877     ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
878           std::string(entryName).c_str(), lfh->file_name_length, name_length);
879     return kInconsistentInformation;
880   }
881   off64_t name_offset;
882   if (__builtin_add_overflow(local_header_offset, sizeof(LocalFileHeader), &name_offset)) {
883     ALOGW("Zip: lfh name offset invalid");
884     return kInvalidOffset;
885   }
886   off64_t name_end;
887   if (__builtin_add_overflow(name_offset, name_length, &name_end) || name_end > cd_offset) {
888     // We tested > cd_offset here because the file data that follows can be zero length.
889     ALOGW("Zip: lfh name length invalid");
890     return kInvalidOffset;
891   }
892 
893   // An optimization: get enough memory on the stack to be able to use it later without an extra
894   // allocation when reading the zip64 extended info. Reasonable names should be under half the
895   // MAX_PATH (256 chars), and Zip64 header size is 32 bytes; archives often have some other extras,
896   // e.g. alignment, so 128 bytes is outght to be enough for (almost) anybody. If it's not we'll
897   // reallocate later anyway.
898   uint8_t static_buf[128];
899   auto name_buf = static_buf;
900   if (name_length > std::size(static_buf)) {
901     buffer.resize(name_length);
902     name_buf = buffer.data();
903   }
904   const auto read_name = archive->mapped_zip.ReadAtOffset(name_buf, name_length, name_offset);
905   if (!read_name) {
906     ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
907     return kIoError;
908   }
909   if (memcmp(entryName.data(), read_name, name_length) != 0) {
910     ALOGW("Zip: lfh name did not match central directory");
911     return kInconsistentInformation;
912   }
913 
914   // Check the extra field length, regardless of whether it's used, or what it's used for.
915   const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
916   const uint16_t lfh_extra_field_size = lfh->extra_field_length;
917   if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
918     ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str());
919     return kInvalidOffset;
920   }
921 
922   data->extra_field_size = lfh_extra_field_size;
923 
924   // Check whether the extra field is being used for zip64.
925   uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
926   uint64_t lfh_compressed_size = lfh->compressed_size;
927   if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
928     if (lfh_uncompressed_size != UINT32_MAX || lfh_compressed_size != UINT32_MAX) {
929       ALOGW(
930           "Zip: zip64 on Android requires both compressed and uncompressed length to be "
931           "UINT32_MAX");
932       return kInvalidFile;
933     }
934 
935     auto lfh_extra_field_buf = static_buf;
936     if (lfh_extra_field_size > std::size(static_buf)) {
937       // Make sure vector won't try to copy existing data if it needs to reallocate.
938       buffer.clear();
939       buffer.resize(lfh_extra_field_size);
940       lfh_extra_field_buf = buffer.data();
941     }
942     const auto local_extra_field = archive->mapped_zip.ReadAtOffset(
943         lfh_extra_field_buf, lfh_extra_field_size, lfh_extra_field_offset);
944     if (!local_extra_field) {
945       ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
946       return kIoError;
947     }
948 
949     Zip64ExtendedInfo zip64_info{};
950     if (auto status = ParseZip64ExtendedInfoInExtraField(
951             local_extra_field, lfh_extra_field_size, lfh->uncompressed_size, lfh->compressed_size,
952             std::nullopt, &zip64_info);
953         status != kSuccess) {
954       return status;
955     }
956 
957     CHECK(zip64_info.uncompressed_file_size.has_value());
958     CHECK(zip64_info.compressed_file_size.has_value());
959     lfh_uncompressed_size = zip64_info.uncompressed_file_size.value();
960     lfh_compressed_size = zip64_info.compressed_file_size.value();
961   }
962 
963   // Paranoia: Match the values specified in the local file header
964   // to those specified in the central directory.
965 
966   // Warn if central directory and local file header don't agree on the use
967   // of a trailing Data Descriptor. The reference implementation is inconsistent
968   // and appears to use the LFH value during extraction (unzip) but the CD value
969   // while displayng information about archives (zipinfo). The spec remains
970   // silent on this inconsistency as well.
971   //
972   // For now, always use the version from the LFH but make sure that the values
973   // specified in the central directory match those in the data descriptor.
974   //
975   // NOTE: It's also worth noting that unzip *does* warn about inconsistencies in
976   // bit 11 (EFS: The language encoding flag, marking that filename and comment are
977   // encoded using UTF-8). This implementation does not check for the presence of
978   // that flag and always enforces that entry names are valid UTF-8.
979   if ((lfh->gpb_flags & kGPBDDFlagMask) != (cdr->gpb_flags & kGPBDDFlagMask)) {
980     ALOGW("Zip: gpb flag mismatch at bit 3. expected {%04" PRIx16 "}, was {%04" PRIx16 "}",
981           cdr->gpb_flags, lfh->gpb_flags);
982   }
983 
984   // If there is no trailing data descriptor, verify that the central directory and local file
985   // header agree on the crc, compressed, and uncompressed sizes of the entry.
986   if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
987     data->has_data_descriptor = 0;
988     if (data->compressed_length != lfh_compressed_size ||
989         data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) {
990       ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu64 ", %" PRIu64 ", %" PRIx32
991             "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
992             data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size,
993             lfh_uncompressed_size, lfh->crc32);
994       return kInconsistentInformation;
995     }
996   } else {
997     data->has_data_descriptor = 1;
998   }
999 
1000   // 4.4.2.1: the upper byte of `version_made_by` gives the source OS. Unix is 3.
1001   data->version_made_by = cdr->version_made_by;
1002   data->external_file_attributes = cdr->external_file_attributes;
1003   if ((data->version_made_by >> 8) == 3) {
1004     data->unix_mode = (cdr->external_file_attributes >> 16) & 0xffff;
1005   } else {
1006     data->unix_mode = 0777;
1007   }
1008 
1009   // 4.4.4: general purpose bit flags.
1010   data->gpbf = lfh->gpb_flags;
1011 
1012   // 4.4.14: the lowest bit of the internal file attributes field indicates text.
1013   // Currently only needed to implement zipinfo.
1014   data->is_text = (cdr->internal_file_attributes & 1);
1015 
1016   const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) +
1017                               lfh->file_name_length + lfh->extra_field_length;
1018   if (data_offset > cd_offset) {
1019     ALOGW("Zip: bad data offset %" PRId64 " in zip", static_cast<int64_t>(data_offset));
1020     return kInvalidOffset;
1021   }
1022 
1023   if (data->compressed_length > cd_offset - data_offset) {
1024     ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu64 " > %" PRId64 ")",
1025           static_cast<int64_t>(data_offset), data->compressed_length,
1026           static_cast<int64_t>(cd_offset));
1027     return kInvalidOffset;
1028   }
1029 
1030   if (data->method == kCompressStored && data->uncompressed_length > cd_offset - data_offset) {
1031     ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu64 " > %" PRId64 ")",
1032           static_cast<int64_t>(data_offset), data->uncompressed_length,
1033           static_cast<int64_t>(cd_offset));
1034     return kInvalidOffset;
1035   }
1036 
1037   data->offset = data_offset;
1038   return 0;
1039 }
1040 
1041 struct IterationHandle {
1042   ZipArchive* archive;
1043 
1044   std::function<bool(std::string_view)> matcher;
1045 
1046   uint32_t position = 0;
1047 
IterationHandleIterationHandle1048   IterationHandle(ZipArchive* archive, std::function<bool(std::string_view)> in_matcher)
1049       : archive(archive), matcher(std::move(in_matcher)) {}
1050 
MatchIterationHandle1051   bool Match(std::string_view entry_name) const { return !matcher || matcher(entry_name); }
1052 };
1053 
StartIteration(ZipArchiveHandle archive,void ** cookie_ptr,const std::string_view optional_prefix,const std::string_view optional_suffix)1054 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
1055                        const std::string_view optional_prefix,
1056                        const std::string_view optional_suffix) {
1057   if (optional_prefix.size() > static_cast<size_t>(UINT16_MAX) ||
1058       optional_suffix.size() > static_cast<size_t>(UINT16_MAX)) {
1059     ALOGW("Zip: prefix/suffix too long");
1060     return kInvalidEntryName;
1061   }
1062   if (optional_prefix.empty() && optional_suffix.empty()) {
1063     return StartIteration(archive, cookie_ptr, std::function<bool(std::string_view)>{});
1064   }
1065   auto matcher = [prefix = std::string(optional_prefix),
1066                   suffix = std::string(optional_suffix)](std::string_view name) mutable {
1067     return android::base::StartsWith(name, prefix) && android::base::EndsWith(name, suffix);
1068   };
1069   return StartIteration(archive, cookie_ptr, std::move(matcher));
1070 }
1071 
StartIteration(ZipArchiveHandle archive,void ** cookie_ptr,std::function<bool (std::string_view)> matcher)1072 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
1073                        std::function<bool(std::string_view)> matcher) {
1074   if (archive == nullptr || archive->cd_entry_map == nullptr) {
1075     ALOGW("Zip: Invalid ZipArchiveHandle");
1076     return kInvalidHandle;
1077   }
1078 
1079   archive->cd_entry_map->ResetIteration();
1080   *cookie_ptr = new IterationHandle(archive, std::move(matcher));
1081   return 0;
1082 }
1083 
EndIteration(void * cookie)1084 void EndIteration(void* cookie) {
1085   delete reinterpret_cast<IterationHandle*>(cookie);
1086 }
1087 
CopyFromZipEntry64(ZipEntry * dst,const ZipEntry64 * src)1088 int32_t ZipEntry::CopyFromZipEntry64(ZipEntry* dst, const ZipEntry64* src) {
1089   if (src->compressed_length > UINT32_MAX || src->uncompressed_length > UINT32_MAX) {
1090     ALOGW(
1091         "Zip: the entry size is too large to fit into the 32 bits ZipEntry, uncompressed "
1092         "length %" PRIu64 ", compressed length %" PRIu64,
1093         src->uncompressed_length, src->compressed_length);
1094     return kUnsupportedEntrySize;
1095   }
1096 
1097   *dst = *src;
1098   dst->uncompressed_length = static_cast<uint32_t>(src->uncompressed_length);
1099   dst->compressed_length = static_cast<uint32_t>(src->compressed_length);
1100   return kSuccess;
1101 }
1102 
FindEntry(const ZipArchiveHandle archive,const std::string_view entryName,ZipEntry * data)1103 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName,
1104                   ZipEntry* data) {
1105   ZipEntry64 entry64;
1106   if (auto status = FindEntry(archive, entryName, &entry64); status != kSuccess) {
1107     return status;
1108   }
1109 
1110   return ZipEntry::CopyFromZipEntry64(data, &entry64);
1111 }
1112 
FindEntry(const ZipArchiveHandle archive,const std::string_view entryName,ZipEntry64 * data)1113 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName,
1114                   ZipEntry64* data) {
1115   if (entryName.empty() || entryName.size() > static_cast<size_t>(UINT16_MAX)) {
1116     ALOGW("Zip: Invalid filename of length %zu", entryName.size());
1117     return kInvalidEntryName;
1118   }
1119 
1120   const auto [result, offset] =
1121       archive->cd_entry_map->GetCdEntryOffset(entryName, archive->central_directory.GetBasePtr());
1122   if (result != 0) {
1123     ALOGV("Zip: Could not find entry %.*s", static_cast<int>(entryName.size()), entryName.data());
1124     return static_cast<int32_t>(result);  // kEntryNotFound is safe to truncate.
1125   }
1126   // We know there are at most hash_table_size entries, safe to truncate.
1127   return FindEntry(archive, entryName, offset, data);
1128 }
1129 
Next(void * cookie,ZipEntry * data,std::string * name)1130 int32_t Next(void* cookie, ZipEntry* data, std::string* name) {
1131   ZipEntry64 entry64;
1132   if (auto status = Next(cookie, &entry64, name); status != kSuccess) {
1133     return status;
1134   }
1135 
1136   return ZipEntry::CopyFromZipEntry64(data, &entry64);
1137 }
1138 
Next(void * cookie,ZipEntry * data,std::string_view * name)1139 int32_t Next(void* cookie, ZipEntry* data, std::string_view* name) {
1140   ZipEntry64 entry64;
1141   if (auto status = Next(cookie, &entry64, name); status != kSuccess) {
1142     return status;
1143   }
1144 
1145   return ZipEntry::CopyFromZipEntry64(data, &entry64);
1146 }
1147 
Next(void * cookie,ZipEntry64 * data,std::string * name)1148 int32_t Next(void* cookie, ZipEntry64* data, std::string* name) {
1149   std::string_view sv;
1150   int32_t result = Next(cookie, data, &sv);
1151   if (result == 0 && name) {
1152     *name = std::string(sv);
1153   }
1154   return result;
1155 }
1156 
Next(void * cookie,ZipEntry64 * data,std::string_view * name)1157 int32_t Next(void* cookie, ZipEntry64* data, std::string_view* name) {
1158   IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
1159   if (handle == nullptr) {
1160     ALOGW("Zip: Null ZipArchiveHandle");
1161     return kInvalidHandle;
1162   }
1163 
1164   ZipArchive* archive = handle->archive;
1165   if (archive == nullptr || archive->cd_entry_map == nullptr) {
1166     ALOGW("Zip: Invalid ZipArchiveHandle");
1167     return kInvalidHandle;
1168   }
1169 
1170   SCOPED_SIGBUS_HANDLER(return kIoError);
1171 
1172   auto entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
1173   while (entry != std::pair<std::string_view, uint64_t>()) {
1174     const auto [entry_name, offset] = entry;
1175     if (handle->Match(entry_name)) {
1176       const int error = FindEntry(archive, entry_name, offset, data);
1177       if (!error && name) {
1178         *name = entry_name;
1179       }
1180       return error;
1181     }
1182     entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
1183   }
1184 
1185   archive->cd_entry_map->ResetIteration();
1186   return kIterationEnd;
1187 }
1188 
1189 // A Writer that writes data to a fixed size memory region.
1190 // The size of the memory region must be equal to the total size of
1191 // the data appended to it.
1192 class MemoryWriter final : public zip_archive::Writer {
1193  public:
Create(uint8_t * buf,size_t size,const ZipEntry64 * entry)1194   static std::optional<MemoryWriter> Create(uint8_t* buf, size_t size,
1195                                             const ZipEntry64* entry) {
1196     const uint64_t declared_length = entry->uncompressed_length;
1197     if (declared_length > size) {
1198       ALOGE("Zip: file size %" PRIu64 " is larger than the buffer size %zu.", declared_length,
1199             size);
1200       return {};
1201     }
1202 
1203     return std::make_optional<MemoryWriter>(buf, size);
1204   }
1205 
Append(uint8_t * buf,size_t buf_size)1206   virtual bool Append(uint8_t* buf, size_t buf_size) override {
1207     if (buf_size == 0 || (buf >= buf_ && buf < buf_ + size_)) {
1208       return true;
1209     }
1210 
1211     if (size_ < buf_size || bytes_written_ > size_ - buf_size) {
1212       ALOGE("Zip: Unexpected size %zu (declared) vs %zu (actual)", size_,
1213             bytes_written_ + buf_size);
1214       return false;
1215     }
1216 
1217     memcpy(buf_ + bytes_written_, buf, buf_size);
1218     bytes_written_ += buf_size;
1219     return true;
1220   }
1221 
GetBuffer(size_t length)1222   Buffer GetBuffer(size_t length) override {
1223     if (length > size_) {
1224       // Special case for empty files: zlib wants at least some buffer but won't ever write there.
1225       if (size_ == 0 && length <= sizeof(bytes_written_)) {
1226         return {reinterpret_cast<uint8_t*>(&bytes_written_), length};
1227       }
1228       return {};
1229     }
1230     return {buf_, length};
1231   }
1232 
MemoryWriter(uint8_t * buf,size_t size)1233   MemoryWriter(uint8_t* buf, size_t size) : buf_(buf), size_(size), bytes_written_(0) {}
1234 
1235  private:
1236   uint8_t* const buf_{nullptr};
1237   const size_t size_;
1238   size_t bytes_written_;
1239 };
1240 
1241 // A Writer that appends data to a file |fd| at its current position.
1242 // The file will be truncated to the end of the written data.
1243 class FileWriter final : public zip_archive::Writer {
1244  public:
1245   // Creates a FileWriter for |fd| and prepare to write |entry| to it,
1246   // guaranteeing that the file descriptor is valid and that there's enough
1247   // space on the volume to write out the entry completely and that the file
1248   // is truncated to the correct length (no truncation if |fd| references a
1249   // block device).
1250   //
1251   // Returns a valid FileWriter on success, |nullopt| if an error occurred.
Create(int fd,const ZipEntry64 * entry)1252   static std::optional<FileWriter> Create(int fd, const ZipEntry64* entry) {
1253     const uint64_t declared_length = entry->uncompressed_length;
1254     const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1255     if (current_offset == -1) {
1256       ALOGE("Zip: unable to seek to current location on fd %d: %s", fd, strerror(errno));
1257       return {};
1258     }
1259 
1260     if (declared_length > SIZE_MAX || declared_length > INT64_MAX) {
1261       ALOGE("Zip: file size %" PRIu64 " is too large to extract.", declared_length);
1262       return {};
1263     }
1264 
1265 #if defined(__linux__)
1266     if (declared_length > 0) {
1267       // Make sure we have enough space on the volume to extract the compressed
1268       // entry. Note that the call to ftruncate below will change the file size but
1269       // will not allocate space on disk and this call to fallocate will not
1270       // change the file size.
1271       // Note: fallocate is only supported by the following filesystems -
1272       // btrfs, ext4, ocfs2, and xfs. Therefore fallocate might fail with
1273       // EOPNOTSUPP error when issued in other filesystems.
1274       // Hence, check for the return error code before concluding that the
1275       // disk does not have enough space.
1276       long result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length));
1277       if (result == -1 && errno == ENOSPC) {
1278         ALOGE("Zip: unable to allocate %" PRIu64 " bytes at offset %" PRId64 ": %s",
1279               declared_length, static_cast<int64_t>(current_offset), strerror(errno));
1280         return {};
1281       }
1282     }
1283 #endif  // __linux__
1284 
1285     struct stat sb;
1286     if (fstat(fd, &sb) == -1) {
1287       ALOGE("Zip: unable to fstat file: %s", strerror(errno));
1288       return {};
1289     }
1290 
1291     // Block device doesn't support ftruncate(2).
1292     if (!S_ISBLK(sb.st_mode)) {
1293       long result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1294       if (result == -1) {
1295         ALOGE("Zip: unable to truncate file to %" PRId64 ": %s",
1296               static_cast<int64_t>(declared_length + current_offset), strerror(errno));
1297         return {};
1298       }
1299     }
1300 
1301     return std::make_optional<FileWriter>(fd, declared_length);
1302   }
1303 
Append(uint8_t * buf,size_t buf_size)1304   virtual bool Append(uint8_t* buf, size_t buf_size) override {
1305     if (declared_length_ < buf_size || total_bytes_written_ > declared_length_ - buf_size) {
1306       ALOGE("Zip: Unexpected size %zu  (declared) vs %zu (actual)", declared_length_,
1307             total_bytes_written_ + buf_size);
1308       return false;
1309     }
1310 
1311     const bool result = android::base::WriteFully(fd_, buf, buf_size);
1312     if (result) {
1313       total_bytes_written_ += buf_size;
1314     } else {
1315       ALOGE("Zip: unable to write %zu bytes to file; %s", buf_size, strerror(errno));
1316     }
1317 
1318     return result;
1319   }
1320 
FileWriter(const int fd=-1,const uint64_t declared_length=0)1321   explicit FileWriter(const int fd = -1, const uint64_t declared_length = 0)
1322       : Writer(),
1323         fd_(fd),
1324         declared_length_(static_cast<size_t>(declared_length)),
1325         total_bytes_written_(0) {
1326     CHECK_LE(declared_length, SIZE_MAX);
1327   }
1328 
1329  private:
1330   int fd_;
1331   const size_t declared_length_;
1332   size_t total_bytes_written_;
1333 };
1334 
1335 class EntryReader final : public zip_archive::Reader {
1336  public:
EntryReader(const MappedZipFile & zip_file,const ZipEntry64 * entry)1337   EntryReader(const MappedZipFile& zip_file, const ZipEntry64* entry)
1338       : Reader(), zip_file_(zip_file), entry_(entry) {}
1339 
ReadAtOffset(uint8_t * buf,size_t len,off64_t offset) const1340   bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const override {
1341     const auto res = zip_file_.ReadAtOffset(buf, len, entry_->offset + offset);
1342     if (!res) return false;
1343     if (res != buf) {
1344       memcpy(buf, res, len);
1345     }
1346     return true;
1347   }
1348 
AccessAtOffset(uint8_t * buf,size_t len,off64_t offset) const1349   const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const override {
1350     return zip_file_.ReadAtOffset(buf, len, entry_->offset + offset);
1351   }
1352 
IsZeroCopy() const1353   bool IsZeroCopy() const override { return zip_file_.GetBasePtr() != nullptr; }
1354 
1355  private:
1356   const MappedZipFile& zip_file_;
1357   const ZipEntry64* entry_;
1358 };
1359 
1360 // This method is using libz macros with old-style-casts
1361 #pragma GCC diagnostic push
1362 #pragma GCC diagnostic ignored "-Wold-style-cast"
zlib_inflateInit2(z_stream * stream,int window_bits)1363 static inline int zlib_inflateInit2(z_stream* stream, int window_bits) {
1364   return inflateInit2(stream, window_bits);
1365 }
1366 #pragma GCC diagnostic pop
1367 
1368 namespace zip_archive {
1369 
1370 // Moved out of line to avoid -Wweak-vtables.
GetBuffer(size_t)1371 auto Writer::GetBuffer(size_t) -> Buffer {
1372   return {};
1373 }
1374 
AccessAtOffset(uint8_t * buf,size_t len,off64_t offset) const1375 const uint8_t* Reader::AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const {
1376   return ReadAtOffset(buf, len, offset) ? buf : nullptr;
1377 }
1378 
IsZeroCopy() const1379 bool Reader::IsZeroCopy() const {
1380   return false;
1381 }
1382 
1383 }  // namespace zip_archive
1384 
bufferToSpan(zip_archive::Writer::Buffer buf)1385 static std::span<uint8_t> bufferToSpan(zip_archive::Writer::Buffer buf) {
1386   return std::span<uint8_t>(buf.first, buf.second);
1387 }
1388 
1389 template <bool OnIncfs>
inflateImpl(const zip_archive::Reader & reader,const uint64_t compressed_length,const uint64_t uncompressed_length,zip_archive::Writer * writer,uint64_t * crc_out)1390 static int32_t inflateImpl(const zip_archive::Reader& reader,
1391                            const uint64_t compressed_length,
1392                            const uint64_t uncompressed_length,
1393                            zip_archive::Writer* writer, uint64_t* crc_out) {
1394   constexpr uint64_t kBufSize = 32768;
1395 
1396   std::vector<uint8_t> read_buf;
1397   uint64_t max_read_size;
1398   if (reader.IsZeroCopy()) {
1399     max_read_size = std::min<uint64_t>(std::numeric_limits<uint32_t>::max(), compressed_length);
1400   } else {
1401     max_read_size = std::min(compressed_length, kBufSize);
1402     read_buf.resize(static_cast<size_t>(max_read_size));
1403   }
1404 
1405   std::vector<uint8_t> write_buf;
1406   // For some files zlib needs more space than the uncompressed buffer size, e.g. when inflating
1407   // an empty file.
1408   const auto min_write_buffer_size = std::max(compressed_length, uncompressed_length);
1409   auto write_span = bufferToSpan(writer->GetBuffer(size_t(min_write_buffer_size)));
1410   bool direct_writer;
1411   if (write_span.size() >= min_write_buffer_size) {
1412     direct_writer = true;
1413   } else {
1414     direct_writer = false;
1415     write_buf.resize(static_cast<size_t>(std::min(min_write_buffer_size, kBufSize)));
1416     write_span = write_buf;
1417   }
1418 
1419   /*
1420    * Initialize the zlib stream struct.
1421    */
1422   z_stream zstream = {};
1423   zstream.zalloc = Z_NULL;
1424   zstream.zfree = Z_NULL;
1425   zstream.opaque = Z_NULL;
1426   zstream.next_in = NULL;
1427   zstream.avail_in = 0;
1428   zstream.next_out = write_span.data();
1429   zstream.avail_out = static_cast<uint32_t>(write_span.size());
1430   zstream.data_type = Z_UNKNOWN;
1431 
1432   /*
1433    * Use the undocumented "negative window bits" feature to tell zlib
1434    * that there's no zlib header waiting for it.
1435    */
1436   int zerr = zlib_inflateInit2(&zstream, -MAX_WBITS);
1437   if (zerr != Z_OK) {
1438     if (zerr == Z_VERSION_ERROR) {
1439       ALOGE("Installed zlib is not compatible with linked version (%s)", ZLIB_VERSION);
1440     } else {
1441       ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
1442     }
1443 
1444     return kZlibError;
1445   }
1446 
1447   auto zstream_deleter = [](z_stream* stream) {
1448     inflateEnd(stream); /* free up any allocated structures */
1449   };
1450 
1451   std::unique_ptr<z_stream, decltype(zstream_deleter)> zstream_guard(&zstream, zstream_deleter);
1452   static_assert(sizeof(zstream_guard) == sizeof(void*));
1453 
1454   SCOPED_SIGBUS_HANDLER_CONDITIONAL(OnIncfs, {
1455     zstream_guard.reset();
1456     incfs::util::clearAndFree(read_buf);
1457     incfs::util::clearAndFree(write_buf);
1458     return kIoError;
1459   });
1460 
1461   const bool compute_crc = (crc_out != nullptr);
1462   uLong crc = 0;
1463   uint64_t remaining_bytes = compressed_length;
1464   uint64_t total_output = 0;
1465   do {
1466     /* read as much as we can */
1467     if (zstream.avail_in == 0) {
1468       const auto read_size = static_cast<uint32_t>(std::min(remaining_bytes, max_read_size));
1469       const off64_t offset = (compressed_length - remaining_bytes);
1470       auto buf = reader.AccessAtOffset(read_buf.data(), read_size, offset);
1471       if (!buf) {
1472         ALOGW("Zip: inflate read failed, getSize = %u: %s", read_size, strerror(errno));
1473         return kIoError;
1474       }
1475 
1476       remaining_bytes -= read_size;
1477 
1478       zstream.next_in = buf;
1479       zstream.avail_in = read_size;
1480     }
1481 
1482     /* uncompress the data */
1483     zerr = inflate(&zstream, Z_NO_FLUSH);
1484     if (zerr != Z_OK && zerr != Z_STREAM_END) {
1485       ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)", zerr, zstream.next_in,
1486             zstream.avail_in, zstream.next_out, zstream.avail_out);
1487       return kZlibError;
1488     }
1489 
1490     /* write when we're full or when we're done */
1491     if (zstream.avail_out == 0 ||
1492         (zerr == Z_STREAM_END && zstream.avail_out != write_span.size())) {
1493       const size_t write_size = zstream.next_out - write_span.data();
1494       if (compute_crc) {
1495         DCHECK_LE(write_size, write_span.size());
1496         crc = crc32(crc, write_span.data(), static_cast<uint32_t>(write_size));
1497       }
1498       total_output += write_span.size() - zstream.avail_out;
1499 
1500       if (direct_writer) {
1501         write_span = write_span.subspan(write_size);
1502       } else if (!writer->Append(write_span.data(), write_size)) {
1503         return kIoError;
1504       }
1505 
1506       if (zstream.avail_out == 0) {
1507         zstream.next_out = write_span.data();
1508         zstream.avail_out = static_cast<uint32_t>(write_span.size());
1509       }
1510     }
1511   } while (zerr == Z_OK);
1512 
1513   CHECK_EQ(zerr, Z_STREAM_END); /* other errors should've been caught */
1514 
1515   // NOTE: zstream.adler is always set to 0, because we're using the -MAX_WBITS
1516   // "feature" of zlib to tell it there won't be a zlib file header. zlib
1517   // doesn't bother calculating the checksum in that scenario. We just do
1518   // it ourselves above because there are no additional gains to be made by
1519   // having zlib calculate it for us, since they do it by calling crc32 in
1520   // the same manner that we have above.
1521   if (compute_crc) {
1522     *crc_out = crc;
1523   }
1524   if (total_output != uncompressed_length || remaining_bytes != 0) {
1525     ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu64 ")", zstream.total_out,
1526           uncompressed_length);
1527     return kInconsistentInformation;
1528   }
1529 
1530   return 0;
1531 }
1532 
InflateEntryToWriter(MappedZipFile & mapped_zip,const ZipEntry64 * entry,zip_archive::Writer * writer,uint64_t * crc_out)1533 static int32_t InflateEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry64* entry,
1534                                     zip_archive::Writer* writer, uint64_t* crc_out) {
1535   const EntryReader reader(mapped_zip, entry);
1536   return inflateImpl<true>(reader, entry->compressed_length,
1537                            entry->uncompressed_length, writer, crc_out);
1538 }
1539 
CopyEntryToWriter(MappedZipFile & mapped_zip,const ZipEntry64 * entry,zip_archive::Writer * writer,uint64_t * crc_out)1540 static int32_t CopyEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry64* entry,
1541                                  zip_archive::Writer* writer, uint64_t* crc_out) {
1542   constexpr uint64_t kBufSize = 32768;
1543   std::vector<uint8_t> buf;
1544   std::span<uint8_t> write_span{};
1545   uint64_t max_read_size;
1546   if (mapped_zip.GetBasePtr() == nullptr ||
1547       mapped_zip.GetFileLength() < entry->uncompressed_length) {
1548     // Check if we can read directly into the writer.
1549     write_span = bufferToSpan(writer->GetBuffer(size_t(entry->uncompressed_length)));
1550     if (write_span.size() >= entry->uncompressed_length) {
1551       max_read_size = entry->uncompressed_length;
1552     } else {
1553       max_read_size = std::min(entry->uncompressed_length, kBufSize);
1554       buf.resize((static_cast<size_t>(max_read_size)));
1555       write_span = buf;
1556     }
1557   } else {
1558     max_read_size = entry->uncompressed_length;
1559   }
1560 
1561   SCOPED_SIGBUS_HANDLER({
1562     incfs::util::clearAndFree(buf);
1563     return kIoError;
1564   });
1565 
1566   const uint64_t length = entry->uncompressed_length;
1567   uint64_t count = 0;
1568   uLong crc = 0;
1569   while (count < length) {
1570     uint64_t remaining = length - count;
1571     off64_t offset = entry->offset + count;
1572 
1573     // Safe conversion because even kBufSize is narrow enough for a 32 bit signed value.
1574     const auto block_size = static_cast<uint32_t>(std::min(remaining, max_read_size));
1575 
1576     const auto read_buf = mapped_zip.ReadAtOffset(write_span.data(), block_size, offset);
1577     if (!read_buf) {
1578       ALOGW("CopyFileToFile: copy read failed, block_size = %u, offset = %" PRId64 ": %s",
1579             block_size, static_cast<int64_t>(offset), strerror(errno));
1580       return kIoError;
1581     }
1582 
1583     if (!writer->Append(const_cast<uint8_t*>(read_buf), block_size)) {
1584       return kIoError;
1585     }
1586     // Advance our span if it's a direct buffer (there's a span but local buffer's empty).
1587     if (!write_span.empty() && buf.empty()) {
1588       write_span = write_span.subspan(block_size);
1589     }
1590     if (crc_out) {
1591       crc = crc32(crc, read_buf, block_size);
1592     }
1593     count += block_size;
1594   }
1595 
1596   if (crc_out) {
1597     *crc_out = crc;
1598   }
1599 
1600   return 0;
1601 }
1602 
extractToWriter(ZipArchiveHandle handle,const ZipEntry64 * entry,zip_archive::Writer * writer)1603 static int32_t extractToWriter(ZipArchiveHandle handle, const ZipEntry64* entry,
1604                                zip_archive::Writer* writer) {
1605   const uint16_t method = entry->method;
1606 
1607   // this should default to kUnknownCompressionMethod.
1608   int32_t return_value = -1;
1609   uint64_t crc = 0;
1610   if (method == kCompressStored) {
1611     return_value =
1612         CopyEntryToWriter(handle->mapped_zip, entry, writer, kCrcChecksEnabled ? &crc : nullptr);
1613   } else if (method == kCompressDeflated) {
1614     return_value =
1615         InflateEntryToWriter(handle->mapped_zip, entry, writer, kCrcChecksEnabled ? &crc : nullptr);
1616   }
1617 
1618   if (!return_value && entry->has_data_descriptor) {
1619     return_value = ValidateDataDescriptor(handle->mapped_zip, entry);
1620     if (return_value) {
1621       return return_value;
1622     }
1623   }
1624 
1625   // Validate that the CRC matches the calculated value.
1626   if (kCrcChecksEnabled && (entry->crc32 != static_cast<uint32_t>(crc))) {
1627     ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
1628     return kInconsistentInformation;
1629   }
1630 
1631   return return_value;
1632 }
1633 
ExtractToMemory(ZipArchiveHandle archive,const ZipEntry * entry,uint8_t * begin,size_t size)1634 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry* entry, uint8_t* begin,
1635                         size_t size) {
1636   ZipEntry64 entry64(*entry);
1637   return ExtractToMemory(archive, &entry64, begin, size);
1638 }
1639 
ExtractToMemory(ZipArchiveHandle archive,const ZipEntry64 * entry,uint8_t * begin,size_t size)1640 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry64* entry, uint8_t* begin,
1641                         size_t size) {
1642   auto writer = MemoryWriter::Create(begin, size, entry);
1643   if (!writer) {
1644     return kIoError;
1645   }
1646   return extractToWriter(archive, entry, &writer.value());
1647 }
1648 
ExtractEntryToFile(ZipArchiveHandle archive,const ZipEntry * entry,int fd)1649 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry* entry, int fd) {
1650   ZipEntry64 entry64(*entry);
1651   return ExtractEntryToFile(archive, &entry64, fd);
1652 }
1653 
ExtractEntryToFile(ZipArchiveHandle archive,const ZipEntry64 * entry,int fd)1654 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry64* entry, int fd) {
1655   auto writer = FileWriter::Create(fd, entry);
1656   if (!writer) {
1657     return kIoError;
1658   }
1659   return extractToWriter(archive, entry, &writer.value());
1660 }
1661 
GetFileDescriptor(const ZipArchiveHandle archive)1662 int GetFileDescriptor(const ZipArchiveHandle archive) {
1663   return archive->mapped_zip.GetFileDescriptor();
1664 }
1665 
GetFileDescriptorOffset(const ZipArchiveHandle archive)1666 off64_t GetFileDescriptorOffset(const ZipArchiveHandle archive) {
1667   return archive->mapped_zip.GetFileOffset();
1668 }
1669 
1670 //
1671 // ZIPARCHIVE_DISABLE_CALLBACK_API disables all APIs that accept user callbacks.
1672 // It gets defined for the incfs-supporting version of libziparchive, where one
1673 // has to control all the code accessing the archive. See more at
1674 // incfs_support/signal_handling.h
1675 //
1676 #if !ZIPARCHIVE_DISABLE_CALLBACK_API && !defined(_WIN32)
1677 class ProcessWriter final : public zip_archive::Writer {
1678  public:
ProcessWriter(ProcessZipEntryFunction func,void * cookie)1679   ProcessWriter(ProcessZipEntryFunction func, void* cookie)
1680       : Writer(), proc_function_(func), cookie_(cookie) {}
1681 
Append(uint8_t * buf,size_t buf_size)1682   virtual bool Append(uint8_t* buf, size_t buf_size) override {
1683     return proc_function_(buf, buf_size, cookie_);
1684   }
1685 
1686  private:
1687   ProcessZipEntryFunction proc_function_;
1688   void* cookie_;
1689 };
1690 
ProcessZipEntryContents(ZipArchiveHandle archive,const ZipEntry * entry,ProcessZipEntryFunction func,void * cookie)1691 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry* entry,
1692                                 ProcessZipEntryFunction func, void* cookie) {
1693   ZipEntry64 entry64(*entry);
1694   return ProcessZipEntryContents(archive, &entry64, func, cookie);
1695 }
1696 
ProcessZipEntryContents(ZipArchiveHandle archive,const ZipEntry64 * entry,ProcessZipEntryFunction func,void * cookie)1697 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry64* entry,
1698                                 ProcessZipEntryFunction func, void* cookie) {
1699   ProcessWriter writer(func, cookie);
1700   return extractToWriter(archive, entry, &writer);
1701 }
1702 
1703 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API && !defined(_WIN32)
1704 
MappedZipFile(int fd,off64_t length,off64_t offset)1705 MappedZipFile::MappedZipFile(int fd, off64_t length, off64_t offset)
1706     : fd_(fd), fd_offset_(offset), data_length_(length) {
1707   // TODO(b/287285733): restore mmap() when the cold cache regression is fixed.
1708 #if 0
1709   // Only try to mmap all files in 64-bit+ processes as it's too easy to use up the whole
1710   // virtual address space on 32-bits, causing out of memory errors later.
1711   if constexpr (sizeof(void*) >= 8) {
1712     // Note: GetFileLength() here fills |data_length_| if it was empty.
1713     // TODO(b/261875471): remove the incfs exclusion when the driver deadlock is fixed.
1714     if (fd >= 0 && !incfs::util::isIncfsFd(fd) && GetFileLength() > 0 &&
1715         GetFileLength() < std::numeric_limits<size_t>::max()) {
1716       mapped_file_ =
1717           android::base::MappedFile::FromFd(fd, fd_offset_, size_t(data_length_), PROT_READ);
1718       if (mapped_file_) {
1719         maybePrepareSequentialReading(mapped_file_->data(), size_t(data_length_));
1720         base_ptr_ = mapped_file_->data();
1721       }
1722     }
1723   }
1724 #endif  // 0
1725 }
1726 
GetFileDescriptor() const1727 int MappedZipFile::GetFileDescriptor() const {
1728   return fd_;
1729 }
1730 
GetBasePtr() const1731 const void* MappedZipFile::GetBasePtr() const {
1732   return base_ptr_;
1733 }
1734 
GetFileOffset() const1735 off64_t MappedZipFile::GetFileOffset() const {
1736   return fd_offset_;
1737 }
1738 
GetFileLength() const1739 off64_t MappedZipFile::GetFileLength() const {
1740   if (data_length_ >= 0) {
1741     return data_length_;
1742   }
1743   if (fd_ < 0) {
1744     ALOGE("Zip: invalid file map");
1745   } else {
1746     struct stat st;
1747     if (fstat(fd_, &st)) {
1748       ALOGE("Zip: fstat(%d) failed: %s", fd_, strerror(errno));
1749     } else {
1750       if (S_ISBLK(st.st_mode)) {
1751 #if defined(__linux__)
1752         // Block devices are special - they report 0 as st_size.
1753         uint64_t size;
1754         if (ioctl(fd_, BLKGETSIZE64, &size)) {
1755           ALOGE("Zip: ioctl(%d, BLKGETSIZE64) failed: %s", fd_, strerror(errno));
1756         } else {
1757           data_length_ = size - fd_offset_;
1758         }
1759 #endif
1760       } else {
1761         data_length_ = st.st_size - fd_offset_;
1762       }
1763     }
1764   }
1765   return data_length_;
1766 }
1767 
1768 // Attempts to read |len| bytes into |buf| at offset |off|.
ReadAtOffset(uint8_t * buf,size_t len,off64_t off) const1769 const uint8_t* MappedZipFile::ReadAtOffset(uint8_t* buf, size_t len, off64_t off) const {
1770   if (base_ptr_) {
1771     if (off < 0 || data_length_ < len || off > data_length_ - len) {
1772       ALOGE("Zip: invalid offset: %" PRId64 ", read length: %zu, data length: %" PRId64, off, len,
1773             data_length_);
1774       return nullptr;
1775     }
1776     maybePrefetch(static_cast<const uint8_t*>(base_ptr_) + off, len);
1777     return static_cast<const uint8_t*>(base_ptr_) + off;
1778   }
1779   if (fd_ < 0) {
1780     ALOGE("Zip: invalid zip file");
1781     return nullptr;
1782   }
1783 
1784   if (off < 0) {
1785     ALOGE("Zip: invalid offset %" PRId64, off);
1786     return nullptr;
1787   }
1788 
1789   off64_t read_offset;
1790   if (__builtin_add_overflow(fd_offset_, off, &read_offset)) {
1791     ALOGE("Zip: invalid read offset %" PRId64 " overflows, fd offset %" PRId64, off, fd_offset_);
1792     return nullptr;
1793   }
1794 
1795   if (data_length_ != -1) {
1796     off64_t read_end;
1797     if (len > std::numeric_limits<off64_t>::max() ||
1798         __builtin_add_overflow(off, static_cast<off64_t>(len), &read_end)) {
1799       ALOGE("Zip: invalid read length %" PRId64 " overflows, offset %" PRId64,
1800             static_cast<off64_t>(len), off);
1801       return nullptr;
1802     }
1803 
1804     if (read_end > data_length_) {
1805       ALOGE("Zip: invalid read length %" PRId64 " exceeds data length %" PRId64 ", offset %" PRId64,
1806             static_cast<off64_t>(len), data_length_, off);
1807       return nullptr;
1808     }
1809   }
1810 
1811   // Make sure to read at offset to ensure concurrent access to the fd.
1812   if (!android::base::ReadFullyAtOffset(fd_, buf, len, read_offset)) {
1813     ALOGE("Zip: failed to read at offset %" PRId64, off);
1814     return nullptr;
1815   }
1816   return buf;
1817 }
1818 
Initialize(const void * map_base_ptr,off64_t cd_start_offset,size_t cd_size)1819 void CentralDirectory::Initialize(const void* map_base_ptr, off64_t cd_start_offset,
1820                                   size_t cd_size) {
1821   base_ptr_ = static_cast<const uint8_t*>(map_base_ptr) + cd_start_offset;
1822   length_ = cd_size;
1823 }
1824 
InitializeCentralDirectory(off64_t cd_start_offset,size_t cd_size)1825 bool ZipArchive::InitializeCentralDirectory(off64_t cd_start_offset, size_t cd_size) {
1826   if (!mapped_zip.GetBasePtr()) {
1827     directory_map = android::base::MappedFile::FromFd(mapped_zip.GetFileDescriptor(),
1828                                                       mapped_zip.GetFileOffset() + cd_start_offset,
1829                                                       cd_size, PROT_READ);
1830     if (!directory_map) {
1831       ALOGE("Zip: failed to map central directory (offset %" PRId64 ", size %zu): %s",
1832             cd_start_offset, cd_size, strerror(errno));
1833       return false;
1834     }
1835 
1836     CHECK_EQ(directory_map->size(), cd_size);
1837     central_directory.Initialize(directory_map->data(), 0 /*offset*/, cd_size);
1838   } else {
1839     if (mapped_zip.GetBasePtr() == nullptr) {
1840       ALOGE(
1841           "Zip: Failed to map central directory, bad mapped_zip base "
1842           "pointer");
1843       return false;
1844     }
1845     if (static_cast<off64_t>(cd_start_offset) + static_cast<off64_t>(cd_size) >
1846         mapped_zip.GetFileLength()) {
1847       ALOGE(
1848           "Zip: Failed to map central directory, offset exceeds mapped memory region (start_offset "
1849           "%" PRId64 ", cd_size %zu, mapped_region_size %" PRId64 ")",
1850           static_cast<int64_t>(cd_start_offset), cd_size, mapped_zip.GetFileLength());
1851       return false;
1852     }
1853 
1854     central_directory.Initialize(mapped_zip.GetBasePtr(), cd_start_offset, cd_size);
1855   }
1856   return true;
1857 }
1858 
1859 // This function returns the embedded timestamp as is and doesn't perform validation.
GetModificationTime() const1860 tm ZipEntryCommon::GetModificationTime() const {
1861   tm t = {};
1862 
1863   t.tm_hour = (mod_time >> 11) & 0x1f;
1864   t.tm_min = (mod_time >> 5) & 0x3f;
1865   t.tm_sec = (mod_time & 0x1f) << 1;
1866 
1867   t.tm_year = ((mod_time >> 25) & 0x7f) + 80;
1868   t.tm_mon = ((mod_time >> 21) & 0xf) - 1;
1869   t.tm_mday = (mod_time >> 16) & 0x1f;
1870 
1871   return t;
1872 }
1873 
1874 namespace zip_archive {
1875 
Inflate(const Reader & reader,const uint64_t compressed_length,const uint64_t uncompressed_length,Writer * writer,uint64_t * crc_out)1876 int32_t Inflate(const Reader& reader, const uint64_t compressed_length,
1877                 const uint64_t uncompressed_length, Writer* writer,
1878                 uint64_t* crc_out) {
1879   return inflateImpl<false>(reader, compressed_length, uncompressed_length,
1880                             writer, crc_out);
1881 }
1882 
1883 //
1884 // ZIPARCHIVE_DISABLE_CALLBACK_API disables all APIs that accept user callbacks.
1885 // It gets defined for the incfs-supporting version of libziparchive, where one
1886 // has to control all the code accessing the archive. See more at
1887 // incfs_support/signal_handling.h
1888 //
1889 #if !ZIPARCHIVE_DISABLE_CALLBACK_API
1890 
ExtractToWriter(ZipArchiveHandle handle,const ZipEntry64 * entry,zip_archive::Writer * writer)1891 int32_t ExtractToWriter(ZipArchiveHandle handle, const ZipEntry64* entry,
1892                         zip_archive::Writer* writer) {
1893   return extractToWriter(handle, entry, writer);
1894 }
1895 
1896 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API
1897 
1898 }  // namespace zip_archive
1899