• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <limits>
11 #include <memory>
12 #include <string>
13 
14 #include "base/callback.h"
15 #include "base/files/file.h"
16 #include "base/files/file_path.h"
17 #include "base/memory/weak_ptr.h"
18 #include "base/numerics/safe_conversions.h"
19 #include "base/time/time.h"
20 
21 #if defined(USE_SYSTEM_MINIZIP)
22 #include <minizip/unzip.h>
23 #else
24 #include "third_party/zlib/contrib/minizip/unzip.h"
25 #endif
26 
27 namespace zip {
28 
29 // A delegate interface used to stream out an entry; see
30 // ZipReader::ExtractCurrentEntry.
31 class WriterDelegate {
32  public:
~WriterDelegate()33   virtual ~WriterDelegate() {}
34 
35   // Invoked once before any data is streamed out to pave the way (e.g., to open
36   // the output file). Return false on failure to cancel extraction.
PrepareOutput()37   virtual bool PrepareOutput() { return true; }
38 
39   // Invoked to write the next chunk of data. Return false on failure to cancel
40   // extraction.
WriteBytes(const char * data,int num_bytes)41   virtual bool WriteBytes(const char* data, int num_bytes) { return true; }
42 
43   // Sets the last-modified time of the data.
SetTimeModified(const base::Time & time)44   virtual void SetTimeModified(const base::Time& time) {}
45 
46   // Called with the POSIX file permissions of the data; POSIX implementations
47   // may apply some of the permissions (for example, the executable bit) to the
48   // output file.
SetPosixFilePermissions(int mode)49   virtual void SetPosixFilePermissions(int mode) {}
50 
51   // Called if an error occurred while extracting the file. The WriterDelegate
52   // can then remove and clean up the partially extracted data.
OnError()53   virtual void OnError() {}
54 };
55 
56 // This class is used for reading ZIP archives. A typical use case of this class
57 // is to scan entries in a ZIP archive and extract them. The code will look
58 // like:
59 //
60 //   ZipReader reader;
61 //   if (!reader.Open(zip_path)) {
62 //     // Cannot open
63 //     return;
64 //   }
65 //
66 //   while (const ZipReader::entry* entry = reader.Next()) {
67 //     auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path);
68 //     if (!reader.ExtractCurrentEntry(writer)) {
69 //           // Cannot extract
70 //           return;
71 //     }
72 //   }
73 //
74 //   if (!reader.ok()) {
75 //     // Error while enumerating entries
76 //     return;
77 //   }
78 //
79 class ZipReader {
80  public:
81   // A callback that is called when the operation is successful.
82   using SuccessCallback = base::OnceClosure;
83   // A callback that is called when the operation fails.
84   using FailureCallback = base::OnceClosure;
85   // A callback that is called periodically during the operation with the number
86   // of bytes that have been processed so far.
87   using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
88 
89   // Information of an entry (file or directory) in a ZIP archive.
90   struct Entry {
91     // Path of this entry, in its original encoding as it is stored in the ZIP
92     // archive. The encoding is not specified here. It might or might not be
93     // UTF-8, and the caller needs to use other means to determine the encoding
94     // if it wants to interpret this path correctly.
95     std::string path_in_original_encoding;
96 
97     // Path of the entry, converted to Unicode. This path is usually relative
98     // (eg "foo/bar.txt"), but it can also be absolute (eg "/foo/bar.txt") or
99     // parent-relative (eg "../foo/bar.txt"). See also |is_unsafe|.
100     base::FilePath path;
101 
102     // Size of the original uncompressed file, or 0 if the entry is a directory.
103     // This value should not be trusted, because it is stored as metadata in the
104     // ZIP archive and can be different from the real uncompressed size.
105     int64_t original_size;
106 
107     // Last modified time. If the timestamp stored in the ZIP archive is not
108     // valid, the Unix epoch will be returned.
109     //
110     // The timestamp stored in the ZIP archive uses the MS-DOS date and time
111     // format.
112     //
113     // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
114     //
115     // As such the following limitations apply:
116     // * Only years from 1980 to 2107 can be represented.
117     // * The timestamp has a 2-second resolution.
118     // * There is no timezone information, so the time is interpreted as UTC.
119     base::Time last_modified;
120 
121     // True if the entry is a directory.
122     // False if the entry is a file.
123     bool is_directory;
124 
125     // True if the entry path is considered unsafe, ie if it is absolute or if
126     // it contains "..".
127     bool is_unsafe;
128 
129     // True if the file content is encrypted.
130     bool is_encrypted;
131 
132     // Entry POSIX permissions (POSIX systems only).
133     int posix_mode;
134   };
135 
136   ZipReader();
137 
138   ZipReader(const ZipReader&) = delete;
139   ZipReader& operator=(const ZipReader&) = delete;
140 
141   ~ZipReader();
142 
143   // Opens the ZIP archive specified by |zip_path|. Returns true on
144   // success.
145   bool Open(const base::FilePath& zip_path);
146 
147   // Opens the ZIP archive referred to by the platform file |zip_fd|, without
148   // taking ownership of |zip_fd|. Returns true on success.
149   bool OpenFromPlatformFile(base::PlatformFile zip_fd);
150 
151   // Opens the zip data stored in |data|. This class uses a weak reference to
152   // the given sring while extracting files, i.e. the caller should keep the
153   // string until it finishes extracting files.
154   bool OpenFromString(const std::string& data);
155 
156   // Closes the currently opened ZIP archive. This function is called in the
157   // destructor of the class, so you usually don't need to call this.
158   void Close();
159 
160   // Sets the encoding of entry paths in the ZIP archive.
161   // By default, paths are assumed to be in UTF-8.
SetEncoding(std::string encoding)162   void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); }
163 
164   // Sets the decryption password that will be used to decrypt encrypted file in
165   // the ZIP archive.
SetPassword(std::string password)166   void SetPassword(std::string password) { password_ = std::move(password); }
167 
168   // Gets the next entry. Returns null if there is no more entry, or if an error
169   // occurred while scanning entries. The returned Entry is owned by this
170   // ZipReader, and is valid until Next() is called again or until this
171   // ZipReader is closed.
172   //
173   // This function should be called before operations over the current entry
174   // like ExtractCurrentEntryToFile().
175   //
176   // while (const ZipReader::Entry* entry = reader.Next()) {
177   //   // Do something with the current entry here.
178   //   ...
179   // }
180   //
181   // // Finished scanning entries.
182   // // Check if the scanning stopped because of an error.
183   // if (!reader.ok()) {
184   //   // There was an error.
185   //   ...
186   // }
187   const Entry* Next();
188 
189   // Returns true if the enumeration of entries was successful, or false if it
190   // stopped because of an error.
ok()191   bool ok() const { return ok_; }
192 
193   // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
194   // starting from the beginning of the entry.
195   //
196   // Returns true if the entire file was extracted without error.
197   //
198   // Precondition: Next() returned a non-null Entry.
199   bool ExtractCurrentEntry(WriterDelegate* delegate,
200                            uint64_t num_bytes_to_extract =
201                                std::numeric_limits<uint64_t>::max()) const;
202 
203   // Asynchronously extracts the current entry to the given output file path. If
204   // the current entry is a directory it just creates the directory
205   // synchronously instead.
206   //
207   // |success_callback| will be called on success and |failure_callback| will be
208   // called on failure. |progress_callback| will be called at least once.
209   // Callbacks will be posted to the current MessageLoop in-order.
210   //
211   // Precondition: Next() returned a non-null Entry.
212   void ExtractCurrentEntryToFilePathAsync(
213       const base::FilePath& output_file_path,
214       SuccessCallback success_callback,
215       FailureCallback failure_callback,
216       ProgressCallback progress_callback);
217 
218   // Extracts the current entry into memory. If the current entry is a
219   // directory, |*output| is set to the empty string. If the current entry is a
220   // file, |*output| is filled with its contents.
221   //
222   // The value in |Entry::original_size| cannot be trusted, so the real size of
223   // the uncompressed contents can be different. |max_read_bytes| limits the
224   // amount of memory used to carry the entry.
225   //
226   // Returns true if the entire content is read without error. If the content is
227   // bigger than |max_read_bytes|, this function returns false and |*output| is
228   // filled with |max_read_bytes| of data. If an error occurs, this function
229   // returns false and |*output| contains the content extracted so far, which
230   // might be garbage data.
231   //
232   // Precondition: Next() returned a non-null Entry.
233   bool ExtractCurrentEntryToString(uint64_t max_read_bytes,
234                                    std::string* output) const;
235 
ExtractCurrentEntryToString(std::string * output)236   bool ExtractCurrentEntryToString(std::string* output) const {
237     return ExtractCurrentEntryToString(
238         base::checked_cast<uint64_t>(output->max_size()), output);
239   }
240 
241   // Returns the number of entries in the ZIP archive.
242   //
243   // Precondition: one of the Open() methods returned true.
num_entries()244   int num_entries() const { return num_entries_; }
245 
246  private:
247   // Common code used both in Open and OpenFromFd.
248   bool OpenInternal();
249 
250   // Resets the internal state.
251   void Reset();
252 
253   // Opens the current entry in the ZIP archive. On success, returns true and
254   // updates the current entry state |entry_|.
255   //
256   // Note that there is no matching CloseEntry(). The current entry state is
257   // reset automatically as needed.
258   bool OpenEntry();
259 
260   // Extracts a chunk of the file to the target.  Will post a task for the next
261   // chunk and success/failure/progress callbacks as necessary.
262   void ExtractChunk(base::File target_file,
263                     SuccessCallback success_callback,
264                     FailureCallback failure_callback,
265                     ProgressCallback progress_callback,
266                     const int64_t offset);
267 
268   std::string encoding_;
269   std::string password_;
270   unzFile zip_file_;
271   int num_entries_;
272   int next_index_;
273   bool reached_end_;
274   bool ok_;
275   Entry entry_;
276 
277   base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
278 };
279 
280 // A writer delegate that writes to a given File. This file is expected to be
281 // initially empty.
282 class FileWriterDelegate : public WriterDelegate {
283  public:
284   // Constructs a FileWriterDelegate that manipulates |file|. The delegate will
285   // not own |file|, therefore the caller must guarantee |file| will outlive the
286   // delegate.
287   explicit FileWriterDelegate(base::File* file);
288 
289   // Constructs a FileWriterDelegate that takes ownership of |file|.
290   explicit FileWriterDelegate(base::File owned_file);
291 
292   FileWriterDelegate(const FileWriterDelegate&) = delete;
293   FileWriterDelegate& operator=(const FileWriterDelegate&) = delete;
294 
295   ~FileWriterDelegate() override;
296 
297   // Returns true if the file handle passed to the constructor is valid.
298   bool PrepareOutput() override;
299 
300   // Writes |num_bytes| bytes of |data| to the file, returning false on error or
301   // if not all bytes could be written.
302   bool WriteBytes(const char* data, int num_bytes) override;
303 
304   // Sets the last-modified time of the data.
305   void SetTimeModified(const base::Time& time) override;
306 
307   // On POSIX systems, sets the file to be executable if the source file was
308   // executable.
309   void SetPosixFilePermissions(int mode) override;
310 
311   // Empties the file to avoid leaving garbage data in it.
312   void OnError() override;
313 
314   // Gets the number of bytes written into the file.
file_length()315   int64_t file_length() { return file_length_; }
316 
317  protected:
318   // The delegate can optionally own the file it modifies, in which case
319   // owned_file_ is set and file_ is an alias for owned_file_.
320   base::File owned_file_;
321 
322   // The file the delegate modifies.
323   base::File* const file_ = &owned_file_;
324 
325   int64_t file_length_ = 0;
326 };
327 
328 // A writer delegate that creates and writes a file at a given path. This does
329 // not overwrite any existing file.
330 class FilePathWriterDelegate : public FileWriterDelegate {
331  public:
332   explicit FilePathWriterDelegate(base::FilePath output_file_path);
333 
334   FilePathWriterDelegate(const FilePathWriterDelegate&) = delete;
335   FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete;
336 
337   ~FilePathWriterDelegate() override;
338 
339   // Creates the output file and any necessary intermediate directories. Does
340   // not overwrite any existing file, and returns false if the output file
341   // cannot be created because another file conflicts with it.
342   bool PrepareOutput() override;
343 
344   // Deletes the output file.
345   void OnError() override;
346 
347  private:
348   const base::FilePath output_file_path_;
349 };
350 
351 }  // namespace zip
352 
353 #endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
354