• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <limits>
11 #include <memory>
12 #include <string>
13 
14 #include "base/callback.h"
15 #include "base/files/file.h"
16 #include "base/files/file_path.h"
17 #include "base/memory/weak_ptr.h"
18 #include "base/numerics/safe_conversions.h"
19 #include "base/time/time.h"
20 
21 #if defined(USE_SYSTEM_MINIZIP)
22 #include <minizip/unzip.h>
23 #else
24 #include "third_party/zlib/contrib/minizip/unzip.h"
25 #endif
26 
27 namespace zip {
28 
29 // A delegate interface used to stream out an entry; see
30 // ZipReader::ExtractCurrentEntry.
31 class WriterDelegate {
32  public:
~WriterDelegate()33   virtual ~WriterDelegate() {}
34 
35   // Invoked once before any data is streamed out to pave the way (e.g., to open
36   // the output file). Return false on failure to cancel extraction.
PrepareOutput()37   virtual bool PrepareOutput() { return true; }
38 
39   // Invoked to write the next chunk of data. Return false on failure to cancel
40   // extraction.
WriteBytes(const char * data,int num_bytes)41   virtual bool WriteBytes(const char* data, int num_bytes) { return true; }
42 
43   // Sets the last-modified time of the data.
SetTimeModified(const base::Time & time)44   virtual void SetTimeModified(const base::Time& time) {}
45 
46   // Called with the POSIX file permissions of the data; POSIX implementations
47   // may apply some of the permissions (for example, the executable bit) to the
48   // output file.
SetPosixFilePermissions(int mode)49   virtual void SetPosixFilePermissions(int mode) {}
50 
51   // Called if an error occurred while extracting the file. The WriterDelegate
52   // can then remove and clean up the partially extracted data.
OnError()53   virtual void OnError() {}
54 };
55 
56 // This class is used for reading ZIP archives. A typical use case of this class
57 // is to scan entries in a ZIP archive and extract them. The code will look
58 // like:
59 //
60 //   ZipReader reader;
61 //   if (!reader.Open(zip_path)) {
62 //     // Cannot open
63 //     return;
64 //   }
65 //
66 //   while (const ZipReader::entry* entry = reader.Next()) {
67 //     auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path);
68 //     if (!reader.ExtractCurrentEntry(writer)) {
69 //           // Cannot extract
70 //           return;
71 //     }
72 //   }
73 //
74 //   if (!reader.ok()) {
75 //     // Error while enumerating entries
76 //     return;
77 //   }
78 //
79 class ZipReader {
80  public:
81   // A callback that is called when the operation is successful.
82   using SuccessCallback = base::OnceClosure;
83   // A callback that is called when the operation fails.
84   using FailureCallback = base::OnceClosure;
85   // A callback that is called periodically during the operation with the number
86   // of bytes that have been processed so far.
87   using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
88 
89   // Information of an entry (file or directory) in a ZIP archive.
90   struct Entry {
91     // Path of this entry, in its original encoding as it is stored in the ZIP
92     // archive. The encoding is not specified here. It might or might not be
93     // UTF-8, and the caller needs to use other means to determine the encoding
94     // if it wants to interpret this path correctly.
95     std::string path_in_original_encoding;
96 
97     // Path of the entry, converted to Unicode. This path is relative (eg
98     // "foo/bar.txt"). Absolute paths (eg "/foo/bar.txt") or paths containing
99     // ".." or "." components (eg "../foo/bar.txt") are converted to safe
100     // relative paths. Eg:
101     // (In ZIP) -> (Entry.path)
102     // /foo/bar -> ROOT/foo/bar
103     // ../a     -> UP/a
104     // ./a      -> DOT/a
105     base::FilePath path;
106 
107     // Size of the original uncompressed file, or 0 if the entry is a directory.
108     // This value should not be trusted, because it is stored as metadata in the
109     // ZIP archive and can be different from the real uncompressed size.
110     int64_t original_size;
111 
112     // Last modified time. If the timestamp stored in the ZIP archive is not
113     // valid, the Unix epoch will be returned.
114     //
115     // The timestamp stored in the ZIP archive uses the MS-DOS date and time
116     // format.
117     //
118     // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
119     //
120     // As such the following limitations apply:
121     // * Only years from 1980 to 2107 can be represented.
122     // * The timestamp has a 2-second resolution.
123     // * There is no timezone information, so the time is interpreted as UTC.
124     base::Time last_modified;
125 
126     // True if the entry is a directory.
127     // False if the entry is a file.
128     bool is_directory;
129 
130     // True if the entry path cannot be converted to a safe relative path. This
131     // happens if a file entry (not a directory) has a filename "." or "..".
132     bool is_unsafe;
133 
134     // True if the file content is encrypted.
135     bool is_encrypted;
136 
137     // Entry POSIX permissions (POSIX systems only).
138     int posix_mode;
139   };
140 
141   ZipReader();
142 
143   ZipReader(const ZipReader&) = delete;
144   ZipReader& operator=(const ZipReader&) = delete;
145 
146   ~ZipReader();
147 
148   // Opens the ZIP archive specified by |zip_path|. Returns true on
149   // success.
150   bool Open(const base::FilePath& zip_path);
151 
152   // Opens the ZIP archive referred to by the platform file |zip_fd|, without
153   // taking ownership of |zip_fd|. Returns true on success.
154   bool OpenFromPlatformFile(base::PlatformFile zip_fd);
155 
156   // Opens the zip data stored in |data|. This class uses a weak reference to
157   // the given sring while extracting files, i.e. the caller should keep the
158   // string until it finishes extracting files.
159   bool OpenFromString(const std::string& data);
160 
161   // Closes the currently opened ZIP archive. This function is called in the
162   // destructor of the class, so you usually don't need to call this.
163   void Close();
164 
165   // Sets the encoding of entry paths in the ZIP archive.
166   // By default, paths are assumed to be in UTF-8.
SetEncoding(std::string encoding)167   void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); }
168 
169   // Sets the decryption password that will be used to decrypt encrypted file in
170   // the ZIP archive.
SetPassword(std::string password)171   void SetPassword(std::string password) { password_ = std::move(password); }
172 
173   // Gets the next entry. Returns null if there is no more entry, or if an error
174   // occurred while scanning entries. The returned Entry is owned by this
175   // ZipReader, and is valid until Next() is called again or until this
176   // ZipReader is closed.
177   //
178   // This function should be called before operations over the current entry
179   // like ExtractCurrentEntryToFile().
180   //
181   // while (const ZipReader::Entry* entry = reader.Next()) {
182   //   // Do something with the current entry here.
183   //   ...
184   // }
185   //
186   // // Finished scanning entries.
187   // // Check if the scanning stopped because of an error.
188   // if (!reader.ok()) {
189   //   // There was an error.
190   //   ...
191   // }
192   const Entry* Next();
193 
194   // Returns true if the enumeration of entries was successful, or false if it
195   // stopped because of an error.
ok()196   bool ok() const { return ok_; }
197 
198   // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
199   // starting from the beginning of the entry.
200   //
201   // Returns true if the entire file was extracted without error.
202   //
203   // Precondition: Next() returned a non-null Entry.
204   bool ExtractCurrentEntry(WriterDelegate* delegate,
205                            uint64_t num_bytes_to_extract =
206                                std::numeric_limits<uint64_t>::max()) const;
207 
208   // Asynchronously extracts the current entry to the given output file path. If
209   // the current entry is a directory it just creates the directory
210   // synchronously instead.
211   //
212   // |success_callback| will be called on success and |failure_callback| will be
213   // called on failure. |progress_callback| will be called at least once.
214   // Callbacks will be posted to the current MessageLoop in-order.
215   //
216   // Precondition: Next() returned a non-null Entry.
217   void ExtractCurrentEntryToFilePathAsync(
218       const base::FilePath& output_file_path,
219       SuccessCallback success_callback,
220       FailureCallback failure_callback,
221       ProgressCallback progress_callback);
222 
223   // Extracts the current entry into memory. If the current entry is a
224   // directory, |*output| is set to the empty string. If the current entry is a
225   // file, |*output| is filled with its contents.
226   //
227   // The value in |Entry::original_size| cannot be trusted, so the real size of
228   // the uncompressed contents can be different. |max_read_bytes| limits the
229   // amount of memory used to carry the entry.
230   //
231   // Returns true if the entire content is read without error. If the content is
232   // bigger than |max_read_bytes|, this function returns false and |*output| is
233   // filled with |max_read_bytes| of data. If an error occurs, this function
234   // returns false and |*output| contains the content extracted so far, which
235   // might be garbage data.
236   //
237   // Precondition: Next() returned a non-null Entry.
238   bool ExtractCurrentEntryToString(uint64_t max_read_bytes,
239                                    std::string* output) const;
240 
ExtractCurrentEntryToString(std::string * output)241   bool ExtractCurrentEntryToString(std::string* output) const {
242     return ExtractCurrentEntryToString(
243         base::checked_cast<uint64_t>(output->max_size()), output);
244   }
245 
246   // Returns the number of entries in the ZIP archive.
247   //
248   // Precondition: one of the Open() methods returned true.
num_entries()249   int num_entries() const { return num_entries_; }
250 
251  private:
252   // Common code used both in Open and OpenFromFd.
253   bool OpenInternal();
254 
255   // Resets the internal state.
256   void Reset();
257 
258   // Opens the current entry in the ZIP archive. On success, returns true and
259   // updates the current entry state |entry_|.
260   //
261   // Note that there is no matching CloseEntry(). The current entry state is
262   // reset automatically as needed.
263   bool OpenEntry();
264 
265   // Normalizes the given path passed as UTF-16 string piece. Sets entry_.path,
266   // entry_.is_directory and entry_.is_unsafe.
267   void Normalize(base::StringPiece16 in);
268 
269   // Extracts a chunk of the file to the target.  Will post a task for the next
270   // chunk and success/failure/progress callbacks as necessary.
271   void ExtractChunk(base::File target_file,
272                     SuccessCallback success_callback,
273                     FailureCallback failure_callback,
274                     ProgressCallback progress_callback,
275                     const int64_t offset);
276 
277   std::string encoding_;
278   std::string password_;
279   unzFile zip_file_;
280   int num_entries_;
281   int next_index_;
282   bool reached_end_;
283   bool ok_;
284   Entry entry_;
285 
286   base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
287 };
288 
289 // A writer delegate that writes to a given File. It is recommended that this
290 // file be initially empty.
291 class FileWriterDelegate : public WriterDelegate {
292  public:
293   // Constructs a FileWriterDelegate that manipulates |file|. The delegate will
294   // not own |file|, therefore the caller must guarantee |file| will outlive the
295   // delegate.
296   explicit FileWriterDelegate(base::File* file);
297 
298   // Constructs a FileWriterDelegate that takes ownership of |file|.
299   explicit FileWriterDelegate(base::File owned_file);
300 
301   FileWriterDelegate(const FileWriterDelegate&) = delete;
302   FileWriterDelegate& operator=(const FileWriterDelegate&) = delete;
303 
304   ~FileWriterDelegate() override;
305 
306   // Returns true if the file handle passed to the constructor is valid.
307   bool PrepareOutput() override;
308 
309   // Writes |num_bytes| bytes of |data| to the file, returning false on error or
310   // if not all bytes could be written.
311   bool WriteBytes(const char* data, int num_bytes) override;
312 
313   // Sets the last-modified time of the data.
314   void SetTimeModified(const base::Time& time) override;
315 
316   // On POSIX systems, sets the file to be executable if the source file was
317   // executable.
318   void SetPosixFilePermissions(int mode) override;
319 
320   // Empties the file to avoid leaving garbage data in it.
321   void OnError() override;
322 
323   // Gets the number of bytes written into the file.
file_length()324   int64_t file_length() { return file_length_; }
325 
326  protected:
327   // The delegate can optionally own the file it modifies, in which case
328   // owned_file_ is set and file_ is an alias for owned_file_.
329   base::File owned_file_;
330 
331   // The file the delegate modifies.
332   base::File* const file_ = &owned_file_;
333 
334   int64_t file_length_ = 0;
335 };
336 
337 // A writer delegate that creates and writes a file at a given path. This does
338 // not overwrite any existing file.
339 class FilePathWriterDelegate : public FileWriterDelegate {
340  public:
341   explicit FilePathWriterDelegate(base::FilePath output_file_path);
342 
343   FilePathWriterDelegate(const FilePathWriterDelegate&) = delete;
344   FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete;
345 
346   ~FilePathWriterDelegate() override;
347 
348   // Creates the output file and any necessary intermediate directories. Does
349   // not overwrite any existing file, and returns false if the output file
350   // cannot be created because another file conflicts with it.
351   bool PrepareOutput() override;
352 
353   // Deletes the output file.
354   void OnError() override;
355 
356  private:
357   const base::FilePath output_file_path_;
358 };
359 
360 }  // namespace zip
361 
362 #endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
363