1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 6 7 #include <stddef.h> 8 #include <stdint.h> 9 10 #include <limits> 11 #include <memory> 12 #include <string> 13 14 #include "base/callback.h" 15 #include "base/files/file.h" 16 #include "base/files/file_path.h" 17 #include "base/memory/weak_ptr.h" 18 #include "base/numerics/safe_conversions.h" 19 #include "base/time/time.h" 20 21 #if defined(USE_SYSTEM_MINIZIP) 22 #include <minizip/unzip.h> 23 #else 24 #include "third_party/zlib/contrib/minizip/unzip.h" 25 #endif 26 27 namespace zip { 28 29 // A delegate interface used to stream out an entry; see 30 // ZipReader::ExtractCurrentEntry. 31 class WriterDelegate { 32 public: ~WriterDelegate()33 virtual ~WriterDelegate() {} 34 35 // Invoked once before any data is streamed out to pave the way (e.g., to open 36 // the output file). Return false on failure to cancel extraction. PrepareOutput()37 virtual bool PrepareOutput() { return true; } 38 39 // Invoked to write the next chunk of data. Return false on failure to cancel 40 // extraction. WriteBytes(const char * data,int num_bytes)41 virtual bool WriteBytes(const char* data, int num_bytes) { return true; } 42 43 // Sets the last-modified time of the data. SetTimeModified(const base::Time & time)44 virtual void SetTimeModified(const base::Time& time) {} 45 46 // Called with the POSIX file permissions of the data; POSIX implementations 47 // may apply some of the permissions (for example, the executable bit) to the 48 // output file. SetPosixFilePermissions(int mode)49 virtual void SetPosixFilePermissions(int mode) {} 50 51 // Called if an error occurred while extracting the file. The WriterDelegate 52 // can then remove and clean up the partially extracted data. OnError()53 virtual void OnError() {} 54 }; 55 56 // This class is used for reading ZIP archives. A typical use case of this class 57 // is to scan entries in a ZIP archive and extract them. The code will look 58 // like: 59 // 60 // ZipReader reader; 61 // if (!reader.Open(zip_path)) { 62 // // Cannot open 63 // return; 64 // } 65 // 66 // while (const ZipReader::entry* entry = reader.Next()) { 67 // auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path); 68 // if (!reader.ExtractCurrentEntry(writer)) { 69 // // Cannot extract 70 // return; 71 // } 72 // } 73 // 74 // if (!reader.ok()) { 75 // // Error while enumerating entries 76 // return; 77 // } 78 // 79 class ZipReader { 80 public: 81 // A callback that is called when the operation is successful. 82 using SuccessCallback = base::OnceClosure; 83 // A callback that is called when the operation fails. 84 using FailureCallback = base::OnceClosure; 85 // A callback that is called periodically during the operation with the number 86 // of bytes that have been processed so far. 87 using ProgressCallback = base::RepeatingCallback<void(int64_t)>; 88 89 // Information of an entry (file or directory) in a ZIP archive. 90 struct Entry { 91 // Path of this entry, in its original encoding as it is stored in the ZIP 92 // archive. The encoding is not specified here. It might or might not be 93 // UTF-8, and the caller needs to use other means to determine the encoding 94 // if it wants to interpret this path correctly. 95 std::string path_in_original_encoding; 96 97 // Path of the entry, converted to Unicode. This path is relative (eg 98 // "foo/bar.txt"). Absolute paths (eg "/foo/bar.txt") or paths containing 99 // ".." or "." components (eg "../foo/bar.txt") are converted to safe 100 // relative paths. Eg: 101 // (In ZIP) -> (Entry.path) 102 // /foo/bar -> ROOT/foo/bar 103 // ../a -> UP/a 104 // ./a -> DOT/a 105 base::FilePath path; 106 107 // Size of the original uncompressed file, or 0 if the entry is a directory. 108 // This value should not be trusted, because it is stored as metadata in the 109 // ZIP archive and can be different from the real uncompressed size. 110 int64_t original_size; 111 112 // Last modified time. If the timestamp stored in the ZIP archive is not 113 // valid, the Unix epoch will be returned. 114 // 115 // The timestamp stored in the ZIP archive uses the MS-DOS date and time 116 // format. 117 // 118 // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx 119 // 120 // As such the following limitations apply: 121 // * Only years from 1980 to 2107 can be represented. 122 // * The timestamp has a 2-second resolution. 123 // * There is no timezone information, so the time is interpreted as UTC. 124 base::Time last_modified; 125 126 // True if the entry is a directory. 127 // False if the entry is a file. 128 bool is_directory; 129 130 // True if the entry path cannot be converted to a safe relative path. This 131 // happens if a file entry (not a directory) has a filename "." or "..". 132 bool is_unsafe; 133 134 // True if the file content is encrypted. 135 bool is_encrypted; 136 137 // Entry POSIX permissions (POSIX systems only). 138 int posix_mode; 139 }; 140 141 ZipReader(); 142 143 ZipReader(const ZipReader&) = delete; 144 ZipReader& operator=(const ZipReader&) = delete; 145 146 ~ZipReader(); 147 148 // Opens the ZIP archive specified by |zip_path|. Returns true on 149 // success. 150 bool Open(const base::FilePath& zip_path); 151 152 // Opens the ZIP archive referred to by the platform file |zip_fd|, without 153 // taking ownership of |zip_fd|. Returns true on success. 154 bool OpenFromPlatformFile(base::PlatformFile zip_fd); 155 156 // Opens the zip data stored in |data|. This class uses a weak reference to 157 // the given sring while extracting files, i.e. the caller should keep the 158 // string until it finishes extracting files. 159 bool OpenFromString(const std::string& data); 160 161 // Closes the currently opened ZIP archive. This function is called in the 162 // destructor of the class, so you usually don't need to call this. 163 void Close(); 164 165 // Sets the encoding of entry paths in the ZIP archive. 166 // By default, paths are assumed to be in UTF-8. SetEncoding(std::string encoding)167 void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); } 168 169 // Sets the decryption password that will be used to decrypt encrypted file in 170 // the ZIP archive. SetPassword(std::string password)171 void SetPassword(std::string password) { password_ = std::move(password); } 172 173 // Gets the next entry. Returns null if there is no more entry, or if an error 174 // occurred while scanning entries. The returned Entry is owned by this 175 // ZipReader, and is valid until Next() is called again or until this 176 // ZipReader is closed. 177 // 178 // This function should be called before operations over the current entry 179 // like ExtractCurrentEntryToFile(). 180 // 181 // while (const ZipReader::Entry* entry = reader.Next()) { 182 // // Do something with the current entry here. 183 // ... 184 // } 185 // 186 // // Finished scanning entries. 187 // // Check if the scanning stopped because of an error. 188 // if (!reader.ok()) { 189 // // There was an error. 190 // ... 191 // } 192 const Entry* Next(); 193 194 // Returns true if the enumeration of entries was successful, or false if it 195 // stopped because of an error. ok()196 bool ok() const { return ok_; } 197 198 // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, 199 // starting from the beginning of the entry. 200 // 201 // Returns true if the entire file was extracted without error. 202 // 203 // Precondition: Next() returned a non-null Entry. 204 bool ExtractCurrentEntry(WriterDelegate* delegate, 205 uint64_t num_bytes_to_extract = 206 std::numeric_limits<uint64_t>::max()) const; 207 208 // Asynchronously extracts the current entry to the given output file path. If 209 // the current entry is a directory it just creates the directory 210 // synchronously instead. 211 // 212 // |success_callback| will be called on success and |failure_callback| will be 213 // called on failure. |progress_callback| will be called at least once. 214 // Callbacks will be posted to the current MessageLoop in-order. 215 // 216 // Precondition: Next() returned a non-null Entry. 217 void ExtractCurrentEntryToFilePathAsync( 218 const base::FilePath& output_file_path, 219 SuccessCallback success_callback, 220 FailureCallback failure_callback, 221 ProgressCallback progress_callback); 222 223 // Extracts the current entry into memory. If the current entry is a 224 // directory, |*output| is set to the empty string. If the current entry is a 225 // file, |*output| is filled with its contents. 226 // 227 // The value in |Entry::original_size| cannot be trusted, so the real size of 228 // the uncompressed contents can be different. |max_read_bytes| limits the 229 // amount of memory used to carry the entry. 230 // 231 // Returns true if the entire content is read without error. If the content is 232 // bigger than |max_read_bytes|, this function returns false and |*output| is 233 // filled with |max_read_bytes| of data. If an error occurs, this function 234 // returns false and |*output| contains the content extracted so far, which 235 // might be garbage data. 236 // 237 // Precondition: Next() returned a non-null Entry. 238 bool ExtractCurrentEntryToString(uint64_t max_read_bytes, 239 std::string* output) const; 240 ExtractCurrentEntryToString(std::string * output)241 bool ExtractCurrentEntryToString(std::string* output) const { 242 return ExtractCurrentEntryToString( 243 base::checked_cast<uint64_t>(output->max_size()), output); 244 } 245 246 // Returns the number of entries in the ZIP archive. 247 // 248 // Precondition: one of the Open() methods returned true. num_entries()249 int num_entries() const { return num_entries_; } 250 251 private: 252 // Common code used both in Open and OpenFromFd. 253 bool OpenInternal(); 254 255 // Resets the internal state. 256 void Reset(); 257 258 // Opens the current entry in the ZIP archive. On success, returns true and 259 // updates the current entry state |entry_|. 260 // 261 // Note that there is no matching CloseEntry(). The current entry state is 262 // reset automatically as needed. 263 bool OpenEntry(); 264 265 // Normalizes the given path passed as UTF-16 string piece. Sets entry_.path, 266 // entry_.is_directory and entry_.is_unsafe. 267 void Normalize(base::StringPiece16 in); 268 269 // Extracts a chunk of the file to the target. Will post a task for the next 270 // chunk and success/failure/progress callbacks as necessary. 271 void ExtractChunk(base::File target_file, 272 SuccessCallback success_callback, 273 FailureCallback failure_callback, 274 ProgressCallback progress_callback, 275 const int64_t offset); 276 277 std::string encoding_; 278 std::string password_; 279 unzFile zip_file_; 280 int num_entries_; 281 int next_index_; 282 bool reached_end_; 283 bool ok_; 284 Entry entry_; 285 286 base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this}; 287 }; 288 289 // A writer delegate that writes to a given File. It is recommended that this 290 // file be initially empty. 291 class FileWriterDelegate : public WriterDelegate { 292 public: 293 // Constructs a FileWriterDelegate that manipulates |file|. The delegate will 294 // not own |file|, therefore the caller must guarantee |file| will outlive the 295 // delegate. 296 explicit FileWriterDelegate(base::File* file); 297 298 // Constructs a FileWriterDelegate that takes ownership of |file|. 299 explicit FileWriterDelegate(base::File owned_file); 300 301 FileWriterDelegate(const FileWriterDelegate&) = delete; 302 FileWriterDelegate& operator=(const FileWriterDelegate&) = delete; 303 304 ~FileWriterDelegate() override; 305 306 // Returns true if the file handle passed to the constructor is valid. 307 bool PrepareOutput() override; 308 309 // Writes |num_bytes| bytes of |data| to the file, returning false on error or 310 // if not all bytes could be written. 311 bool WriteBytes(const char* data, int num_bytes) override; 312 313 // Sets the last-modified time of the data. 314 void SetTimeModified(const base::Time& time) override; 315 316 // On POSIX systems, sets the file to be executable if the source file was 317 // executable. 318 void SetPosixFilePermissions(int mode) override; 319 320 // Empties the file to avoid leaving garbage data in it. 321 void OnError() override; 322 323 // Gets the number of bytes written into the file. file_length()324 int64_t file_length() { return file_length_; } 325 326 protected: 327 // The delegate can optionally own the file it modifies, in which case 328 // owned_file_ is set and file_ is an alias for owned_file_. 329 base::File owned_file_; 330 331 // The file the delegate modifies. 332 base::File* const file_ = &owned_file_; 333 334 int64_t file_length_ = 0; 335 }; 336 337 // A writer delegate that creates and writes a file at a given path. This does 338 // not overwrite any existing file. 339 class FilePathWriterDelegate : public FileWriterDelegate { 340 public: 341 explicit FilePathWriterDelegate(base::FilePath output_file_path); 342 343 FilePathWriterDelegate(const FilePathWriterDelegate&) = delete; 344 FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete; 345 346 ~FilePathWriterDelegate() override; 347 348 // Creates the output file and any necessary intermediate directories. Does 349 // not overwrite any existing file, and returns false if the output file 350 // cannot be created because another file conflicts with it. 351 bool PrepareOutput() override; 352 353 // Deletes the output file. 354 void OnError() override; 355 356 private: 357 const base::FilePath output_file_path_; 358 }; 359 360 } // namespace zip 361 362 #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 363