1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 6 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 7 8 #include <string> 9 10 #include "base/basictypes.h" 11 #include "base/file_util.h" 12 #include "base/files/file_path.h" 13 #include "base/memory/scoped_ptr.h" 14 #include "base/platform_file.h" 15 #include "base/time/time.h" 16 17 #if defined(USE_SYSTEM_MINIZIP) 18 #include <minizip/unzip.h> 19 #else 20 #include "third_party/zlib/contrib/minizip/unzip.h" 21 #endif 22 23 namespace zip { 24 25 // This class is used for reading zip files. A typical use case of this 26 // class is to scan entries in a zip file and extract them. The code will 27 // look like: 28 // 29 // ZipReader reader; 30 // reader.Open(zip_file_path); 31 // while (reader.HasMore()) { 32 // reader.OpenCurrentEntryInZip(); 33 // reader.ExtractCurrentEntryToDirectory(output_directory_path); 34 // reader.AdvanceToNextEntry(); 35 // } 36 // 37 // For simplicty, error checking is omitted in the example code above. The 38 // production code should check return values from all of these functions. 39 // 40 // This calls can also be used for random access of contents in a zip file 41 // using LocateAndOpenEntry(). 42 // 43 class ZipReader { 44 public: 45 // This class represents information of an entry (file or directory) in 46 // a zip file. 47 class EntryInfo { 48 public: 49 EntryInfo(const std::string& filename_in_zip, 50 const unz_file_info& raw_file_info); 51 52 // Returns the file path. The path is usually relative like 53 // "foo/bar.txt", but if it's absolute, is_unsafe() returns true. file_path()54 const base::FilePath& file_path() const { return file_path_; } 55 56 // Returns the size of the original file (i.e. after uncompressed). 57 // Returns 0 if the entry is a directory. original_size()58 int64 original_size() const { return original_size_; } 59 60 // Returns the last modified time. last_modified()61 base::Time last_modified() const { return last_modified_; } 62 63 // Returns true if the entry is a directory. is_directory()64 bool is_directory() const { return is_directory_; } 65 66 // Returns true if the entry is unsafe, like having ".." or invalid 67 // UTF-8 characters in its file name, or the file path is absolute. is_unsafe()68 bool is_unsafe() const { return is_unsafe_; } 69 70 private: 71 const base::FilePath file_path_; 72 int64 original_size_; 73 base::Time last_modified_; 74 bool is_directory_; 75 bool is_unsafe_; 76 DISALLOW_COPY_AND_ASSIGN(EntryInfo); 77 }; 78 79 ZipReader(); 80 ~ZipReader(); 81 82 // Opens the zip file specified by |zip_file_path|. Returns true on 83 // success. 84 bool Open(const base::FilePath& zip_file_path); 85 86 // Opens the zip file referred to by the platform file |zip_fd|. 87 // Returns true on success. 88 bool OpenFromPlatformFile(base::PlatformFile zip_fd); 89 90 // Opens the zip data stored in |data|. This class uses a weak reference to 91 // the given sring while extracting files, i.e. the caller should keep the 92 // string until it finishes extracting files. 93 bool OpenFromString(const std::string& data); 94 95 // Closes the currently opened zip file. This function is called in the 96 // destructor of the class, so you usually don't need to call this. 97 void Close(); 98 99 // Returns true if there is at least one entry to read. This function is 100 // used to scan entries with AdvanceToNextEntry(), like: 101 // 102 // while (reader.HasMore()) { 103 // // Do something with the current file here. 104 // reader.AdvanceToNextEntry(); 105 // } 106 bool HasMore(); 107 108 // Advances the next entry. Returns true on success. 109 bool AdvanceToNextEntry(); 110 111 // Opens the current entry in the zip file. On success, returns true and 112 // updates the the current entry state (i.e. current_entry_info() is 113 // updated). This function should be called before operations over the 114 // current entry like ExtractCurrentEntryToFile(). 115 // 116 // Note that there is no CloseCurrentEntryInZip(). The the current entry 117 // state is reset automatically as needed. 118 bool OpenCurrentEntryInZip(); 119 120 // Locates an entry in the zip file and opens it. Returns true on 121 // success. This function internally calls OpenCurrentEntryInZip() on 122 // success. On failure, current_entry_info() becomes NULL. 123 bool LocateAndOpenEntry(const base::FilePath& path_in_zip); 124 125 // Extracts the current entry to the given output file path. If the 126 // current file is a directory, just creates a directory 127 // instead. Returns true on success. OpenCurrentEntryInZip() must be 128 // called beforehand. 129 // 130 // This function does not preserve the timestamp of the original entry. 131 bool ExtractCurrentEntryToFilePath(const base::FilePath& output_file_path); 132 133 // Extracts the current entry to the given output directory path using 134 // ExtractCurrentEntryToFilePath(). Sub directories are created as needed 135 // based on the file path of the current entry. For example, if the file 136 // path in zip is "foo/bar.txt", and the output directory is "output", 137 // "output/foo/bar.txt" will be created. 138 // 139 // Returns true on success. OpenCurrentEntryInZip() must be called 140 // beforehand. 141 bool ExtractCurrentEntryIntoDirectory( 142 const base::FilePath& output_directory_path); 143 144 #if defined(OS_POSIX) 145 // Extracts the current entry by writing directly to a file descriptor. 146 // Does not close the file descriptor. Returns true on success. 147 bool ExtractCurrentEntryToFd(int fd); 148 #endif 149 150 // Returns the current entry info. Returns NULL if the current entry is 151 // not yet opened. OpenCurrentEntryInZip() must be called beforehand. current_entry_info()152 EntryInfo* current_entry_info() const { 153 return current_entry_info_.get(); 154 } 155 156 // Returns the number of entries in the zip file. 157 // Open() must be called beforehand. num_entries()158 int num_entries() const { return num_entries_; } 159 160 private: 161 // Common code used both in Open and OpenFromFd. 162 bool OpenInternal(); 163 164 // Resets the internal state. 165 void Reset(); 166 167 unzFile zip_file_; 168 int num_entries_; 169 bool reached_end_; 170 scoped_ptr<EntryInfo> current_entry_info_; 171 172 DISALLOW_COPY_AND_ASSIGN(ZipReader); 173 }; 174 175 } // namespace zip 176 177 #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 178