• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 /*
20  * Read-only access to Zip archives, with minimal heap allocation.
21  */
22 
23 #include <stdint.h>
24 #include <string.h>
25 #include <sys/cdefs.h>
26 #include <sys/types.h>
27 
28 #include <functional>
29 #include <string>
30 #include <string_view>
31 #include <utility>
32 
33 #include "android-base/macros.h"
34 #include "android-base/off64_t.h"
35 
36 /* Zip compression methods we support */
37 enum {
38   kCompressStored = 0,    // no compression
39   kCompressDeflated = 8,  // standard deflate
40 };
41 
42 // This struct holds the common information of a zip entry other than the
43 // the entry size. The compressed and uncompressed length will be handled
44 // separately in the derived class.
45 struct ZipEntryCommon {
46   // Compression method. One of kCompressStored or kCompressDeflated.
47   // See also `gpbf` for deflate subtypes.
48   uint16_t method;
49 
50   // Modification time. The zipfile format specifies
51   // that the first two little endian bytes contain the time
52   // and the last two little endian bytes contain the date.
53   // See `GetModificationTime`. Use signed integer to avoid the
54   // sub-overflow.
55   // TODO: should be overridden by extra time field, if present.
56   int32_t mod_time;
57 
58   // Returns `mod_time` as a broken-down struct tm.
59   struct tm GetModificationTime() const;
60 
61   // Suggested Unix mode for this entry, from the zip archive if created on
62   // Unix, or a default otherwise. See also `external_file_attributes`.
63   mode_t unix_mode;
64 
65   // 1 if this entry contains a data descriptor segment, 0
66   // otherwise.
67   uint8_t has_data_descriptor;
68 
69   // Crc32 value of this ZipEntry. This information might
70   // either be stored in the local file header or in a special
71   // Data descriptor footer at the end of the file entry.
72   uint32_t crc32;
73 
74   // If the value of uncompressed length and compressed length are stored in
75   // the zip64 extended info of the extra field.
76   bool zip64_format_size{false};
77 
78   // The offset to the start of data for this ZipEntry.
79   off64_t offset;
80 
81   // The version of zip and the host file system this came from (for zipinfo).
82   uint16_t version_made_by;
83 
84   // The raw attributes, whose interpretation depends on the host
85   // file system in `version_made_by` (for zipinfo). See also `unix_mode`.
86   uint32_t external_file_attributes;
87 
88   // Specifics about the deflation (for zipinfo).
89   uint16_t gpbf;
90   // Whether this entry is believed to be text or binary (for zipinfo).
91   bool is_text;
92 };
93 
94 struct ZipEntry64;
95 // Many users of the library assume the entry size is capped at UNIT32_MAX. So we keep
96 // the interface for the old ZipEntry here; and we could switch them over to the new
97 // ZipEntry64 later.
98 struct ZipEntry : public ZipEntryCommon {
99   // Compressed length of this ZipEntry. The maximum value is UNIT32_MAX.
100   // Might be present either in the local file header or in the data
101   // descriptor footer.
102   uint32_t compressed_length{0};
103 
104   // Uncompressed length of this ZipEntry. The maximum value is UNIT32_MAX.
105   // Might be present either in the local file header or in the data
106   // descriptor footer.
107   uint32_t uncompressed_length{0};
108 
109   // Copies the contents of a ZipEntry64 object to a 32 bits ZipEntry. Returns 0 if the
110   // size of the entry fits into uint32_t, returns a negative error code
111   // (kUnsupportedEntrySize) otherwise.
112   static int32_t CopyFromZipEntry64(ZipEntry* dst, const ZipEntry64* src);
113 
114  private:
115   ZipEntry& operator=(const ZipEntryCommon& other) {
116     ZipEntryCommon::operator=(other);
117     return *this;
118   }
119 };
120 
121 // Represents information about a zip entry in a zip file.
122 struct ZipEntry64 : public ZipEntryCommon {
123   // Compressed length of this ZipEntry. The maximum value is UNIT64_MAX.
124   // Might be present either in the local file header, the zip64 extended field,
125   // or in the data descriptor footer.
126   uint64_t compressed_length{0};
127 
128   // Uncompressed length of this ZipEntry. The maximum value is UNIT64_MAX.
129   // Might be present either in the local file header, the zip64 extended field,
130   // or in the data descriptor footer.
131   uint64_t uncompressed_length{0};
132 
133   explicit ZipEntry64() = default;
ZipEntry64ZipEntry64134   explicit ZipEntry64(const ZipEntry& zip_entry) : ZipEntryCommon(zip_entry) {
135     compressed_length = zip_entry.compressed_length;
136     uncompressed_length = zip_entry.uncompressed_length;
137   }
138 };
139 
140 struct ZipArchive;
141 typedef ZipArchive* ZipArchiveHandle;
142 
143 /*
144  * Open a Zip archive, and sets handle to the value of the opaque
145  * handle for the file. This handle must be released by calling
146  * CloseArchive with this handle.
147  *
148  * Returns 0 on success, and negative values on failure.
149  */
150 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle);
151 
152 /*
153  * Like OpenArchive, but takes a file descriptor open for reading
154  * at the start of the file.  The descriptor must be mappable (this does
155  * not allow access to a stream).
156  *
157  * Sets handle to the value of the opaque handle for this file descriptor.
158  * This handle must be released by calling CloseArchive with this handle.
159  *
160  * If assume_ownership parameter is 'true' calling CloseArchive will close
161  * the file.
162  *
163  * This function maps and scans the central directory and builds a table
164  * of entries for future lookups.
165  *
166  * "debugFileName" will appear in error messages, but is not otherwise used.
167  *
168  * Returns 0 on success, and negative values on failure.
169  */
170 int32_t OpenArchiveFd(const int fd, const char* debugFileName, ZipArchiveHandle* handle,
171                       bool assume_ownership = true);
172 
173 int32_t OpenArchiveFdRange(const int fd, const char* debugFileName, ZipArchiveHandle* handle,
174                            off64_t length, off64_t offset, bool assume_ownership = true);
175 
176 int32_t OpenArchiveFromMemory(const void* address, size_t length, const char* debugFileName,
177                               ZipArchiveHandle* handle);
178 /*
179  * Close archive, releasing resources associated with it. This will
180  * unmap the central directory of the zipfile and free all internal
181  * data structures associated with the file. It is an error to use
182  * this handle for any further operations without an intervening
183  * call to one of the OpenArchive variants.
184  */
185 void CloseArchive(ZipArchiveHandle archive);
186 
187 /** See GetArchiveInfo(). */
188 struct ZipArchiveInfo {
189   /** The size in bytes of the archive itself. Used by zipinfo. */
190   off64_t archive_size;
191   /** The number of entries in the archive. */
192   uint64_t entry_count;
193 };
194 
195 /**
196  * Returns information about the given archive.
197  */
198 ZipArchiveInfo GetArchiveInfo(ZipArchiveHandle archive);
199 
200 /*
201  * Find an entry in the Zip archive, by name. |data| must be non-null.
202  *
203  * Returns 0 if an entry is found, and populates |data| with information
204  * about this entry. Returns negative values otherwise.
205  *
206  * It's important to note that |data->crc32|, |data->compLen| and
207  * |data->uncompLen| might be set to values from the central directory
208  * if this file entry contains a data descriptor footer. To verify crc32s
209  * and length, a call to VerifyCrcAndLengths must be made after entry data
210  * has been processed.
211  *
212  * On non-Windows platforms this method does not modify internal state and
213  * can be called concurrently.
214  */
215 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName,
216                   ZipEntry64* data);
217 
218 /*
219  * Start iterating over all entries of a zip file. The order of iteration
220  * is not guaranteed to be the same as the order of elements
221  * in the central directory but is stable for a given zip file. |cookie| will
222  * contain the value of an opaque cookie which can be used to make one or more
223  * calls to Next. All calls to StartIteration must be matched by a call to
224  * EndIteration to free any allocated memory.
225  *
226  * This method also accepts optional prefix and suffix to restrict iteration to
227  * entry names that start with |optional_prefix| or end with |optional_suffix|.
228  *
229  * Returns 0 on success and negative values on failure.
230  */
231 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
232                        const std::string_view optional_prefix = "",
233                        const std::string_view optional_suffix = "");
234 
235 /*
236  * Start iterating over all entries of a zip file. Use the matcher functor to
237  * restrict iteration to entry names that make the functor return true.
238  *
239  * Returns 0 on success and negative values on failure.
240  */
241 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
242                        std::function<bool(std::string_view entry_name)> matcher);
243 
244 /*
245  * Advance to the next element in the zipfile in iteration order.
246  *
247  * Returns 0 on success, -1 if there are no more elements in this
248  * archive and lower negative values on failure.
249  */
250 int32_t Next(void* cookie, ZipEntry64* data, std::string_view* name);
251 int32_t Next(void* cookie, ZipEntry64* data, std::string* name);
252 
253 /*
254  * End iteration over all entries of a zip file and frees the memory allocated
255  * in StartIteration.
256  */
257 void EndIteration(void* cookie);
258 
259 /*
260  * Uncompress and write an entry to an open file identified by |fd|.
261  * |entry->uncompressed_length| bytes will be written to the file at
262  * its current offset, and the file will be truncated at the end of
263  * the uncompressed data (no truncation if |fd| references a block
264  * device).
265  *
266  * Returns 0 on success and negative values on failure.
267  */
268 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry64* entry, int fd);
269 
270 /**
271  * Uncompress a given zip entry to the memory region at |begin| and of
272  * size |size|. This size is expected to be the same as the *declared*
273  * uncompressed length of the zip entry. It is an error if the *actual*
274  * number of uncompressed bytes differs from this number.
275  *
276  * Returns 0 on success and negative values on failure.
277  */
278 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry64* entry, uint8_t* begin,
279                         size_t size);
280 
281 int GetFileDescriptor(const ZipArchiveHandle archive);
282 
283 /**
284  * Returns the offset of the zip archive in the backing file descriptor, or 0 if the zip archive is
285  * not backed by a file descriptor.
286  */
287 off64_t GetFileDescriptorOffset(const ZipArchiveHandle archive);
288 
289 const char* ErrorCodeString(int32_t error_code);
290 
291 // Many users of libziparchive assume the entry size to be 32 bits long. So we keep these
292 // interfaces that use 32 bit ZipEntry to make old code work. TODO(xunchang) Remove the 32 bit
293 // wrapper functions once we switch all users to recognize ZipEntry64.
294 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName, ZipEntry* data);
295 int32_t Next(void* cookie, ZipEntry* data, std::string* name);
296 int32_t Next(void* cookie, ZipEntry* data, std::string_view* name);
297 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry* entry, int fd);
298 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry* entry, uint8_t* begin,
299                         size_t size);
300 
301 //
302 // This gets defined for the version of the library that need to control all
303 // code accessing the zip file. Details in incfs_support/signal_handling.h
304 //
305 #if !ZIPARCHIVE_DISABLE_CALLBACK_API
306 
307 #if !defined(_WIN32)
308 typedef bool (*ProcessZipEntryFunction)(const uint8_t* buf, size_t buf_size, void* cookie);
309 
310 /*
311  * Stream the uncompressed data through the supplied function,
312  * passing cookie to it each time it gets called.
313  */
314 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry* entry,
315                                 ProcessZipEntryFunction func, void* cookie);
316 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry64* entry,
317                                 ProcessZipEntryFunction func, void* cookie);
318 #endif  // !defined(_WIN32)
319 
320 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API
321 
322 namespace zip_archive {
323 
324 class Writer {
325  public:
326   virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
327 
328   // Returns the internal buffer that can we written into directly.
329   using Buffer = std::pair<uint8_t*, size_t>;
330   virtual Buffer GetBuffer(size_t length);
331 
332  protected:
333   Writer() = default;
334   ~Writer() = default;
335 
336  private:
337   DISALLOW_COPY_AND_ASSIGN(Writer);
338 };
339 
340 class LowLevelReader {
341  public:
342   // Get |len| bytes of data starting at |offset|, either by copying them into the supplied |buf|,
343   // or returning an internal buffer directly.
344   // Returns a pointer to the data (which can be different from |buf|), or |nullptr| on error.
345   virtual const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0;
346 
347   // Returns |true| if the reader doesn't need an external buffer but instead returns its own one.
348   virtual bool IsZeroCopy() const = 0;
349 
350  protected:
351   LowLevelReader() = default;
352   ~LowLevelReader() = default;
353 
354  private:
355   DISALLOW_COPY_AND_ASSIGN(LowLevelReader);
356 };
357 
358 class Reader : public LowLevelReader {
359  public:
360   virtual bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0;
361 
362   // Ensure the existing classes implementing Reader don't need to bother with
363   // the new method.
364   const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const override;
365   bool IsZeroCopy() const override;
366 
367  protected:
368   Reader() = default;
369   ~Reader() = default;
370 
371  private:
372   DISALLOW_COPY_AND_ASSIGN(Reader);
373 };
374 
375 //
376 // This gets defined for the version of the library that need to control all
377 // code accessing the zip file. Details in incfs_support/signal_handling.h
378 //
379 #if !ZIPARCHIVE_DISABLE_CALLBACK_API
380 
381 /**
382  * Uncompress a given zip entry to given |writer|.
383  *
384  * Returns 0 on success and negative values on failure.
385  */
386 int32_t ExtractToWriter(ZipArchiveHandle handle, const ZipEntry64* entry,
387                         zip_archive::Writer* writer);
388 
389 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API
390 
391 /*
392  * Inflates the first |compressed_length| bytes of |reader| to a given |writer|.
393  * |crc_out| is set to the CRC32 checksum of the uncompressed data.
394  *
395  * Returns 0 on success and negative values on failure, for example if |reader|
396  * cannot supply the right amount of data, or if the number of bytes written to
397  * data does not match |uncompressed_length|.
398  *
399  * If |crc_out| is not nullptr, it is set to the crc32 checksum of the
400  * uncompressed data.
401  *
402  * NOTE: in the IncFS version of the library this function remains
403  * unprotected, because the data |reader| is supplying is under the full reader's
404  * control; it's the reader's duty to ensure it is available and OK to access.
405  */
406 int32_t Inflate(const Reader& reader, const uint64_t compressed_length,
407                 const uint64_t uncompressed_length, Writer* writer, uint64_t* crc_out);
408 
409 }  // namespace zip_archive
410