1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 6 7 #include <stddef.h> 8 #include <stdint.h> 9 10 #include <limits> 11 #include <memory> 12 #include <string> 13 14 #include "base/callback.h" 15 #include "base/files/file.h" 16 #include "base/files/file_path.h" 17 #include "base/memory/weak_ptr.h" 18 #include "base/numerics/safe_conversions.h" 19 #include "base/time/time.h" 20 21 #if defined(USE_SYSTEM_MINIZIP) 22 #include <minizip/unzip.h> 23 #else 24 #include "third_party/zlib/contrib/minizip/unzip.h" 25 #endif 26 27 namespace zip { 28 29 // A delegate interface used to stream out an entry; see 30 // ZipReader::ExtractCurrentEntry. 31 class WriterDelegate { 32 public: ~WriterDelegate()33 virtual ~WriterDelegate() {} 34 35 // Invoked once before any data is streamed out to pave the way (e.g., to open 36 // the output file). Return false on failure to cancel extraction. PrepareOutput()37 virtual bool PrepareOutput() { return true; } 38 39 // Invoked to write the next chunk of data. Return false on failure to cancel 40 // extraction. WriteBytes(const char * data,int num_bytes)41 virtual bool WriteBytes(const char* data, int num_bytes) { return true; } 42 43 // Sets the last-modified time of the data. SetTimeModified(const base::Time & time)44 virtual void SetTimeModified(const base::Time& time) {} 45 46 // Called with the POSIX file permissions of the data; POSIX implementations 47 // may apply some of the permissions (for example, the executable bit) to the 48 // output file. SetPosixFilePermissions(int mode)49 virtual void SetPosixFilePermissions(int mode) {} 50 51 // Called if an error occurred while extracting the file. The WriterDelegate 52 // can then remove and clean up the partially extracted data. OnError()53 virtual void OnError() {} 54 }; 55 56 // This class is used for reading ZIP archives. A typical use case of this class 57 // is to scan entries in a ZIP archive and extract them. The code will look 58 // like: 59 // 60 // ZipReader reader; 61 // if (!reader.Open(zip_path)) { 62 // // Cannot open 63 // return; 64 // } 65 // 66 // while (const ZipReader::entry* entry = reader.Next()) { 67 // auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path); 68 // if (!reader.ExtractCurrentEntry(writer)) { 69 // // Cannot extract 70 // return; 71 // } 72 // } 73 // 74 // if (!reader.ok()) { 75 // // Error while enumerating entries 76 // return; 77 // } 78 // 79 class ZipReader { 80 public: 81 // A callback that is called when the operation is successful. 82 using SuccessCallback = base::OnceClosure; 83 // A callback that is called when the operation fails. 84 using FailureCallback = base::OnceClosure; 85 // A callback that is called periodically during the operation with the number 86 // of bytes that have been processed so far. 87 using ProgressCallback = base::RepeatingCallback<void(int64_t)>; 88 89 // Information of an entry (file or directory) in a ZIP archive. 90 struct Entry { 91 // Path of this entry, in its original encoding as it is stored in the ZIP 92 // archive. The encoding is not specified here. It might or might not be 93 // UTF-8, and the caller needs to use other means to determine the encoding 94 // if it wants to interpret this path correctly. 95 std::string path_in_original_encoding; 96 97 // Path of the entry, converted to Unicode. This path is usually relative 98 // (eg "foo/bar.txt"), but it can also be absolute (eg "/foo/bar.txt") or 99 // parent-relative (eg "../foo/bar.txt"). See also |is_unsafe|. 100 base::FilePath path; 101 102 // Size of the original uncompressed file, or 0 if the entry is a directory. 103 // This value should not be trusted, because it is stored as metadata in the 104 // ZIP archive and can be different from the real uncompressed size. 105 int64_t original_size; 106 107 // Last modified time. If the timestamp stored in the ZIP archive is not 108 // valid, the Unix epoch will be returned. 109 // 110 // The timestamp stored in the ZIP archive uses the MS-DOS date and time 111 // format. 112 // 113 // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx 114 // 115 // As such the following limitations apply: 116 // * Only years from 1980 to 2107 can be represented. 117 // * The timestamp has a 2-second resolution. 118 // * There is no timezone information, so the time is interpreted as UTC. 119 base::Time last_modified; 120 121 // True if the entry is a directory. 122 // False if the entry is a file. 123 bool is_directory; 124 125 // True if the entry path is considered unsafe, ie if it is absolute or if 126 // it contains "..". 127 bool is_unsafe; 128 129 // True if the file content is encrypted. 130 bool is_encrypted; 131 132 // Entry POSIX permissions (POSIX systems only). 133 int posix_mode; 134 }; 135 136 ZipReader(); 137 138 ZipReader(const ZipReader&) = delete; 139 ZipReader& operator=(const ZipReader&) = delete; 140 141 ~ZipReader(); 142 143 // Opens the ZIP archive specified by |zip_path|. Returns true on 144 // success. 145 bool Open(const base::FilePath& zip_path); 146 147 // Opens the ZIP archive referred to by the platform file |zip_fd|, without 148 // taking ownership of |zip_fd|. Returns true on success. 149 bool OpenFromPlatformFile(base::PlatformFile zip_fd); 150 151 // Opens the zip data stored in |data|. This class uses a weak reference to 152 // the given sring while extracting files, i.e. the caller should keep the 153 // string until it finishes extracting files. 154 bool OpenFromString(const std::string& data); 155 156 // Closes the currently opened ZIP archive. This function is called in the 157 // destructor of the class, so you usually don't need to call this. 158 void Close(); 159 160 // Sets the encoding of entry paths in the ZIP archive. 161 // By default, paths are assumed to be in UTF-8. SetEncoding(std::string encoding)162 void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); } 163 164 // Sets the decryption password that will be used to decrypt encrypted file in 165 // the ZIP archive. SetPassword(std::string password)166 void SetPassword(std::string password) { password_ = std::move(password); } 167 168 // Gets the next entry. Returns null if there is no more entry, or if an error 169 // occurred while scanning entries. The returned Entry is owned by this 170 // ZipReader, and is valid until Next() is called again or until this 171 // ZipReader is closed. 172 // 173 // This function should be called before operations over the current entry 174 // like ExtractCurrentEntryToFile(). 175 // 176 // while (const ZipReader::Entry* entry = reader.Next()) { 177 // // Do something with the current entry here. 178 // ... 179 // } 180 // 181 // // Finished scanning entries. 182 // // Check if the scanning stopped because of an error. 183 // if (!reader.ok()) { 184 // // There was an error. 185 // ... 186 // } 187 const Entry* Next(); 188 189 // Returns true if the enumeration of entries was successful, or false if it 190 // stopped because of an error. ok()191 bool ok() const { return ok_; } 192 193 // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, 194 // starting from the beginning of the entry. 195 // 196 // Returns true if the entire file was extracted without error. 197 // 198 // Precondition: Next() returned a non-null Entry. 199 bool ExtractCurrentEntry(WriterDelegate* delegate, 200 uint64_t num_bytes_to_extract = 201 std::numeric_limits<uint64_t>::max()) const; 202 203 // Asynchronously extracts the current entry to the given output file path. If 204 // the current entry is a directory it just creates the directory 205 // synchronously instead. 206 // 207 // |success_callback| will be called on success and |failure_callback| will be 208 // called on failure. |progress_callback| will be called at least once. 209 // Callbacks will be posted to the current MessageLoop in-order. 210 // 211 // Precondition: Next() returned a non-null Entry. 212 void ExtractCurrentEntryToFilePathAsync( 213 const base::FilePath& output_file_path, 214 SuccessCallback success_callback, 215 FailureCallback failure_callback, 216 ProgressCallback progress_callback); 217 218 // Extracts the current entry into memory. If the current entry is a 219 // directory, |*output| is set to the empty string. If the current entry is a 220 // file, |*output| is filled with its contents. 221 // 222 // The value in |Entry::original_size| cannot be trusted, so the real size of 223 // the uncompressed contents can be different. |max_read_bytes| limits the 224 // amount of memory used to carry the entry. 225 // 226 // Returns true if the entire content is read without error. If the content is 227 // bigger than |max_read_bytes|, this function returns false and |*output| is 228 // filled with |max_read_bytes| of data. If an error occurs, this function 229 // returns false and |*output| contains the content extracted so far, which 230 // might be garbage data. 231 // 232 // Precondition: Next() returned a non-null Entry. 233 bool ExtractCurrentEntryToString(uint64_t max_read_bytes, 234 std::string* output) const; 235 ExtractCurrentEntryToString(std::string * output)236 bool ExtractCurrentEntryToString(std::string* output) const { 237 return ExtractCurrentEntryToString( 238 base::checked_cast<uint64_t>(output->max_size()), output); 239 } 240 241 // Returns the number of entries in the ZIP archive. 242 // 243 // Precondition: one of the Open() methods returned true. num_entries()244 int num_entries() const { return num_entries_; } 245 246 private: 247 // Common code used both in Open and OpenFromFd. 248 bool OpenInternal(); 249 250 // Resets the internal state. 251 void Reset(); 252 253 // Opens the current entry in the ZIP archive. On success, returns true and 254 // updates the current entry state |entry_|. 255 // 256 // Note that there is no matching CloseEntry(). The current entry state is 257 // reset automatically as needed. 258 bool OpenEntry(); 259 260 // Extracts a chunk of the file to the target. Will post a task for the next 261 // chunk and success/failure/progress callbacks as necessary. 262 void ExtractChunk(base::File target_file, 263 SuccessCallback success_callback, 264 FailureCallback failure_callback, 265 ProgressCallback progress_callback, 266 const int64_t offset); 267 268 std::string encoding_; 269 std::string password_; 270 unzFile zip_file_; 271 int num_entries_; 272 int next_index_; 273 bool reached_end_; 274 bool ok_; 275 Entry entry_; 276 277 base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this}; 278 }; 279 280 // A writer delegate that writes to a given File. This file is expected to be 281 // initially empty. 282 class FileWriterDelegate : public WriterDelegate { 283 public: 284 // Constructs a FileWriterDelegate that manipulates |file|. The delegate will 285 // not own |file|, therefore the caller must guarantee |file| will outlive the 286 // delegate. 287 explicit FileWriterDelegate(base::File* file); 288 289 // Constructs a FileWriterDelegate that takes ownership of |file|. 290 explicit FileWriterDelegate(base::File owned_file); 291 292 FileWriterDelegate(const FileWriterDelegate&) = delete; 293 FileWriterDelegate& operator=(const FileWriterDelegate&) = delete; 294 295 ~FileWriterDelegate() override; 296 297 // Returns true if the file handle passed to the constructor is valid. 298 bool PrepareOutput() override; 299 300 // Writes |num_bytes| bytes of |data| to the file, returning false on error or 301 // if not all bytes could be written. 302 bool WriteBytes(const char* data, int num_bytes) override; 303 304 // Sets the last-modified time of the data. 305 void SetTimeModified(const base::Time& time) override; 306 307 // On POSIX systems, sets the file to be executable if the source file was 308 // executable. 309 void SetPosixFilePermissions(int mode) override; 310 311 // Empties the file to avoid leaving garbage data in it. 312 void OnError() override; 313 314 // Gets the number of bytes written into the file. file_length()315 int64_t file_length() { return file_length_; } 316 317 protected: 318 // The delegate can optionally own the file it modifies, in which case 319 // owned_file_ is set and file_ is an alias for owned_file_. 320 base::File owned_file_; 321 322 // The file the delegate modifies. 323 base::File* const file_ = &owned_file_; 324 325 int64_t file_length_ = 0; 326 }; 327 328 // A writer delegate that creates and writes a file at a given path. This does 329 // not overwrite any existing file. 330 class FilePathWriterDelegate : public FileWriterDelegate { 331 public: 332 explicit FilePathWriterDelegate(base::FilePath output_file_path); 333 334 FilePathWriterDelegate(const FilePathWriterDelegate&) = delete; 335 FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete; 336 337 ~FilePathWriterDelegate() override; 338 339 // Creates the output file and any necessary intermediate directories. Does 340 // not overwrite any existing file, and returns false if the output file 341 // cannot be created because another file conflicts with it. 342 bool PrepareOutput() override; 343 344 // Deletes the output file. 345 void OnError() override; 346 347 private: 348 const base::FilePath output_file_path_; 349 }; 350 351 } // namespace zip 352 353 #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 354