1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 /* 20 * Read-only access to Zip archives, with minimal heap allocation. 21 */ 22 23 #include <stdint.h> 24 #include <string.h> 25 #include <sys/cdefs.h> 26 #include <sys/types.h> 27 28 #include <functional> 29 #include <string> 30 #include <string_view> 31 #include <utility> 32 33 #include "android-base/macros.h" 34 #include "android-base/off64_t.h" 35 36 /* Zip compression methods we support */ 37 enum { 38 kCompressStored = 0, // no compression 39 kCompressDeflated = 8, // standard deflate 40 }; 41 42 // This struct holds the common information of a zip entry other than the 43 // the entry size. The compressed and uncompressed length will be handled 44 // separately in the derived class. 45 struct ZipEntryCommon { 46 // Compression method. One of kCompressStored or kCompressDeflated. 47 // See also `gpbf` for deflate subtypes. 48 uint16_t method; 49 50 // Modification time. The zipfile format specifies 51 // that the first two little endian bytes contain the time 52 // and the last two little endian bytes contain the date. 53 // See `GetModificationTime`. Use signed integer to avoid the 54 // sub-overflow. 55 // TODO: should be overridden by extra time field, if present. 56 int32_t mod_time; 57 58 // Returns `mod_time` as a broken-down struct tm. 59 struct tm GetModificationTime() const; 60 61 // Suggested Unix mode for this entry, from the zip archive if created on 62 // Unix, or a default otherwise. See also `external_file_attributes`. 63 mode_t unix_mode; 64 65 // 1 if this entry contains a data descriptor segment, 0 66 // otherwise. 67 uint8_t has_data_descriptor; 68 69 // Crc32 value of this ZipEntry. This information might 70 // either be stored in the local file header or in a special 71 // Data descriptor footer at the end of the file entry. 72 uint32_t crc32; 73 74 // If the value of uncompressed length and compressed length are stored in 75 // the zip64 extended info of the extra field. 76 bool zip64_format_size{false}; 77 78 // The offset to the start of data for this ZipEntry. 79 off64_t offset; 80 81 // The version of zip and the host file system this came from (for zipinfo). 82 uint16_t version_made_by; 83 84 // The raw attributes, whose interpretation depends on the host 85 // file system in `version_made_by` (for zipinfo). See also `unix_mode`. 86 uint32_t external_file_attributes; 87 88 // Specifics about the deflation (for zipinfo). 89 uint16_t gpbf; 90 // Whether this entry is believed to be text or binary (for zipinfo). 91 bool is_text; 92 93 // extra field size 94 uint16_t extra_field_size; 95 }; 96 97 struct ZipEntry64; 98 // Many users of the library assume the entry size is capped at UNIT32_MAX. So we keep 99 // the interface for the old ZipEntry here; and we could switch them over to the new 100 // ZipEntry64 later. 101 struct ZipEntry : public ZipEntryCommon { 102 // Compressed length of this ZipEntry. The maximum value is UNIT32_MAX. 103 // Might be present either in the local file header or in the data 104 // descriptor footer. 105 uint32_t compressed_length{0}; 106 107 // Uncompressed length of this ZipEntry. The maximum value is UNIT32_MAX. 108 // Might be present either in the local file header or in the data 109 // descriptor footer. 110 uint32_t uncompressed_length{0}; 111 112 // Copies the contents of a ZipEntry64 object to a 32 bits ZipEntry. Returns 0 if the 113 // size of the entry fits into uint32_t, returns a negative error code 114 // (kUnsupportedEntrySize) otherwise. 115 static int32_t CopyFromZipEntry64(ZipEntry* dst, const ZipEntry64* src); 116 117 private: 118 ZipEntry& operator=(const ZipEntryCommon& other) { 119 ZipEntryCommon::operator=(other); 120 return *this; 121 } 122 }; 123 124 // Represents information about a zip entry in a zip file. 125 struct ZipEntry64 : public ZipEntryCommon { 126 // Compressed length of this ZipEntry. The maximum value is UNIT64_MAX. 127 // Might be present either in the local file header, the zip64 extended field, 128 // or in the data descriptor footer. 129 uint64_t compressed_length{0}; 130 131 // Uncompressed length of this ZipEntry. The maximum value is UNIT64_MAX. 132 // Might be present either in the local file header, the zip64 extended field, 133 // or in the data descriptor footer. 134 uint64_t uncompressed_length{0}; 135 136 explicit ZipEntry64() = default; ZipEntry64ZipEntry64137 explicit ZipEntry64(const ZipEntry& zip_entry) : ZipEntryCommon(zip_entry) { 138 compressed_length = zip_entry.compressed_length; 139 uncompressed_length = zip_entry.uncompressed_length; 140 } 141 }; 142 143 struct ZipArchive; 144 typedef ZipArchive* ZipArchiveHandle; 145 146 /* 147 * Open a Zip archive, and sets handle to the value of the opaque 148 * handle for the file. This handle must be released by calling 149 * CloseArchive with this handle. 150 * 151 * Returns 0 on success, and negative values on failure. 152 */ 153 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle); 154 155 /* 156 * Like OpenArchive, but takes a file descriptor open for reading 157 * at the start of the file. The descriptor must be mappable (this does 158 * not allow access to a stream). 159 * 160 * Sets handle to the value of the opaque handle for this file descriptor. 161 * This handle must be released by calling CloseArchive with this handle. 162 * 163 * If assume_ownership parameter is 'true' calling CloseArchive will close 164 * the file. 165 * 166 * This function maps and scans the central directory and builds a table 167 * of entries for future lookups. 168 * 169 * "debugFileName" will appear in error messages, but is not otherwise used. 170 * 171 * Returns 0 on success, and negative values on failure. 172 */ 173 int32_t OpenArchiveFd(const int fd, const char* debugFileName, ZipArchiveHandle* handle, 174 bool assume_ownership = true); 175 176 int32_t OpenArchiveFdRange(const int fd, const char* debugFileName, ZipArchiveHandle* handle, 177 off64_t length, off64_t offset, bool assume_ownership = true); 178 179 int32_t OpenArchiveFromMemory(const void* address, size_t length, const char* debugFileName, 180 ZipArchiveHandle* handle); 181 /* 182 * Close archive, releasing resources associated with it. This will 183 * unmap the central directory of the zipfile and free all internal 184 * data structures associated with the file. It is an error to use 185 * this handle for any further operations without an intervening 186 * call to one of the OpenArchive variants. 187 */ 188 void CloseArchive(ZipArchiveHandle archive); 189 190 /** See GetArchiveInfo(). */ 191 struct ZipArchiveInfo { 192 /** The size in bytes of the archive itself. Used by zipinfo. */ 193 off64_t archive_size; 194 /** The number of entries in the archive. */ 195 uint64_t entry_count; 196 }; 197 198 /** 199 * Returns information about the given archive. 200 */ 201 ZipArchiveInfo GetArchiveInfo(ZipArchiveHandle archive); 202 203 /* 204 * Find an entry in the Zip archive, by name. |data| must be non-null. 205 * 206 * Returns 0 if an entry is found, and populates |data| with information 207 * about this entry. Returns negative values otherwise. 208 * 209 * It's important to note that |data->crc32|, |data->compLen| and 210 * |data->uncompLen| might be set to values from the central directory 211 * if this file entry contains a data descriptor footer. To verify crc32s 212 * and length, a call to VerifyCrcAndLengths must be made after entry data 213 * has been processed. 214 * 215 * On non-Windows platforms this method does not modify internal state and 216 * can be called concurrently. 217 */ 218 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName, 219 ZipEntry64* data); 220 221 /* 222 * Start iterating over all entries of a zip file. The order of iteration 223 * is not guaranteed to be the same as the order of elements 224 * in the central directory but is stable for a given zip file. |cookie| will 225 * contain the value of an opaque cookie which can be used to make one or more 226 * calls to Next. All calls to StartIteration must be matched by a call to 227 * EndIteration to free any allocated memory. 228 * 229 * This method also accepts optional prefix and suffix to restrict iteration to 230 * entry names that start with |optional_prefix| or end with |optional_suffix|. 231 * 232 * Returns 0 on success and negative values on failure. 233 */ 234 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr, 235 const std::string_view optional_prefix = "", 236 const std::string_view optional_suffix = ""); 237 238 /* 239 * Start iterating over all entries of a zip file. Use the matcher functor to 240 * restrict iteration to entry names that make the functor return true. 241 * 242 * Returns 0 on success and negative values on failure. 243 */ 244 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr, 245 std::function<bool(std::string_view entry_name)> matcher); 246 247 /* 248 * Advance to the next element in the zipfile in iteration order. 249 * 250 * Returns 0 on success, -1 if there are no more elements in this 251 * archive and lower negative values on failure. 252 */ 253 int32_t Next(void* cookie, ZipEntry64* data, std::string_view* name); 254 int32_t Next(void* cookie, ZipEntry64* data, std::string* name); 255 256 /* 257 * End iteration over all entries of a zip file and frees the memory allocated 258 * in StartIteration. 259 */ 260 void EndIteration(void* cookie); 261 262 /* 263 * Uncompress and write an entry to an open file identified by |fd|. 264 * |entry->uncompressed_length| bytes will be written to the file at 265 * its current offset, and the file will be truncated at the end of 266 * the uncompressed data (no truncation if |fd| references a block 267 * device). 268 * 269 * Returns 0 on success and negative values on failure. 270 */ 271 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry64* entry, int fd); 272 273 /** 274 * Uncompress a given zip entry to the memory region at |begin| and of 275 * size |size|. This size is expected to be the same as the *declared* 276 * uncompressed length of the zip entry. It is an error if the *actual* 277 * number of uncompressed bytes differs from this number. 278 * 279 * Returns 0 on success and negative values on failure. 280 */ 281 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry64* entry, uint8_t* begin, 282 size_t size); 283 284 int GetFileDescriptor(const ZipArchiveHandle archive); 285 286 /** 287 * Returns the offset of the zip archive in the backing file descriptor, or 0 if the zip archive is 288 * not backed by a file descriptor. 289 */ 290 off64_t GetFileDescriptorOffset(const ZipArchiveHandle archive); 291 292 const char* ErrorCodeString(int32_t error_code); 293 294 // Many users of libziparchive assume the entry size to be 32 bits long. So we keep these 295 // interfaces that use 32 bit ZipEntry to make old code work. TODO(xunchang) Remove the 32 bit 296 // wrapper functions once we switch all users to recognize ZipEntry64. 297 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName, ZipEntry* data); 298 int32_t Next(void* cookie, ZipEntry* data, std::string* name); 299 int32_t Next(void* cookie, ZipEntry* data, std::string_view* name); 300 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry* entry, int fd); 301 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry* entry, uint8_t* begin, 302 size_t size); 303 304 // 305 // This gets defined for the version of the library that need to control all 306 // code accessing the zip file. Details in incfs_support/signal_handling.h 307 // 308 #if !ZIPARCHIVE_DISABLE_CALLBACK_API 309 310 #if !defined(_WIN32) 311 typedef bool (*ProcessZipEntryFunction)(const uint8_t* buf, size_t buf_size, void* cookie); 312 313 /* 314 * Stream the uncompressed data through the supplied function, 315 * passing cookie to it each time it gets called. 316 */ 317 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry* entry, 318 ProcessZipEntryFunction func, void* cookie); 319 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry64* entry, 320 ProcessZipEntryFunction func, void* cookie); 321 #endif // !defined(_WIN32) 322 323 #endif // !ZIPARCHIVE_DISABLE_CALLBACK_API 324 325 namespace zip_archive { 326 327 class Writer { 328 public: 329 virtual bool Append(uint8_t* buf, size_t buf_size) = 0; 330 331 // Returns the internal buffer that can we written into directly. 332 using Buffer = std::pair<uint8_t*, size_t>; 333 virtual Buffer GetBuffer(size_t length); 334 335 protected: 336 Writer() = default; 337 ~Writer() = default; 338 339 private: 340 DISALLOW_COPY_AND_ASSIGN(Writer); 341 }; 342 343 class LowLevelReader { 344 public: 345 // Get |len| bytes of data starting at |offset|, either by copying them into the supplied |buf|, 346 // or returning an internal buffer directly. 347 // Returns a pointer to the data (which can be different from |buf|), or |nullptr| on error. 348 virtual const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0; 349 350 // Returns |true| if the reader doesn't need an external buffer but instead returns its own one. 351 virtual bool IsZeroCopy() const = 0; 352 353 protected: 354 LowLevelReader() = default; 355 ~LowLevelReader() = default; 356 357 private: 358 DISALLOW_COPY_AND_ASSIGN(LowLevelReader); 359 }; 360 361 class Reader : public LowLevelReader { 362 public: 363 virtual bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0; 364 365 // Ensure the existing classes implementing Reader don't need to bother with 366 // the new method. 367 const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const override; 368 bool IsZeroCopy() const override; 369 370 protected: 371 Reader() = default; 372 ~Reader() = default; 373 374 private: 375 DISALLOW_COPY_AND_ASSIGN(Reader); 376 }; 377 378 // 379 // This gets defined for the version of the library that need to control all 380 // code accessing the zip file. Details in incfs_support/signal_handling.h 381 // 382 #if !ZIPARCHIVE_DISABLE_CALLBACK_API 383 384 /** 385 * Uncompress a given zip entry to given |writer|. 386 * 387 * Returns 0 on success and negative values on failure. 388 */ 389 int32_t ExtractToWriter(ZipArchiveHandle handle, const ZipEntry64* entry, 390 zip_archive::Writer* writer); 391 392 #endif // !ZIPARCHIVE_DISABLE_CALLBACK_API 393 394 /* 395 * Inflates the first |compressed_length| bytes of |reader| to a given |writer|. 396 * |crc_out| is set to the CRC32 checksum of the uncompressed data. 397 * 398 * Returns 0 on success and negative values on failure, for example if |reader| 399 * cannot supply the right amount of data, or if the number of bytes written to 400 * data does not match |uncompressed_length|. 401 * 402 * If |crc_out| is not nullptr, it is set to the crc32 checksum of the 403 * uncompressed data. 404 * 405 * NOTE: in the IncFS version of the library this function remains 406 * unprotected, because the data |reader| is supplying is under the full reader's 407 * control; it's the reader's duty to ensure it is available and OK to access. 408 */ 409 int32_t Inflate(const Reader& reader, const uint64_t compressed_length, 410 const uint64_t uncompressed_length, Writer* writer, uint64_t* crc_out); 411 412 } // namespace zip_archive 413