1 /* 2 * Copyright (c) 2025 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef LIBZIPARCHIVE_EXTRACTORTOOL_ZIP_FILE_H 17 #define LIBZIPARCHIVE_EXTRACTORTOOL_ZIP_FILE_H 18 19 #include <memory> 20 #include <set> 21 #include <string> 22 #include <unordered_map> 23 #include <vector> 24 25 #include "unzip.h" 26 27 #include "file_mapper.h" 28 #include "libpandabase/os/mutex.h" 29 #include "libpandabase/utils/logger.h" 30 31 namespace ark::extractor { 32 class ZipFileReader; 33 struct CentralDirEntry; 34 struct ZipEntry; 35 using ZipPos = ZPOS64_T; 36 using ZipEntryMap = std::unordered_map<std::string, ZipEntry>; 37 using BytePtr = Byte *; 38 39 // Local file header: descript in APPNOTE-6.3.4 40 // local file header signature 4 bytes (0x04034b50) 41 // version needed to extract 2 bytes 42 // general purpose bit flag 2 bytes 43 // compression method 2 bytes 10 44 // last mod file time 2 bytes 45 // last mod file date 2 bytes 46 // crc-32 4 bytes 47 // compressed size 4 bytes 22 48 // uncompressed size 4 bytes 49 // file name length 2 bytes 50 // extra field length 2 bytes 30 51 struct __attribute__((packed)) LocalHeader { 52 uint32_t signature = 0; 53 uint16_t versionNeeded = 0; 54 uint16_t flags = 0; 55 uint16_t compressionMethod = 0; 56 uint16_t modifiedTime = 0; 57 uint16_t modifiedDate = 0; 58 uint32_t crc = 0; 59 uint32_t compressedSize = 0; 60 uint32_t uncompressedSize = 0; 61 uint16_t nameSize = 0; 62 uint16_t extraSize = 0; 63 }; 64 65 // central file header 66 // Central File header: 67 // central file header signature 4 bytes (0x02014b50) 68 // version made by 2 bytes 69 // version needed to extract 2 bytes 70 // general purpose bit flag 2 bytes 10 71 // compression method 2 bytes 72 // last mod file time 2 bytes 73 // last mod file date 2 bytes 74 // crc-32 4 bytes 20 75 // compressed size 4 bytes 76 // uncompressed size 4 bytes 77 // file name length 2 bytes 30 78 // extra field length 2 bytes 79 // file comment length 2 bytes 80 // disk number start 2 bytes 81 // internal file attributes 2 bytes 82 // external file attributes 4 bytes 83 // relative offset of local header 4 bytes 46byte 84 struct __attribute__((packed)) CentralDirEntry { 85 uint32_t signature = 0; 86 uint16_t versionMade = 0; 87 uint16_t versionNeeded = 0; 88 uint16_t flags = 0; // general purpose bit flag 89 uint16_t compressionMethod = 0; 90 uint16_t modifiedTime = 0; 91 uint16_t modifiedDate = 0; 92 uint32_t crc = 0; 93 uint32_t compressedSize = 0; 94 uint32_t uncompressedSize = 0; 95 uint16_t nameSize = 0; 96 uint16_t extraSize = 0; 97 uint16_t commentSize = 0; 98 uint16_t diskNumStart = 0; 99 uint16_t internalAttr = 0; 100 uint32_t externalAttr = 0; 101 uint32_t localHeaderOffset = 0; 102 }; 103 104 // end of central directory packed structure 105 // end of central dir signature 4 bytes (0x06054b50) 106 // number of this disk 2 bytes 107 // number of the disk with the 108 // start of the central directory 2 bytes 109 // total number of entries in the 110 // central directory on this disk 2 bytes 111 // total number of entries in 112 // the central directory 2 bytes 113 // size of the central directory 4 bytes 114 // offset of start of central 115 // directory with respect to 116 // the starting disk number 4 bytes 117 // .ZIP file comment length 2 bytes 118 struct __attribute__((packed)) EndDir { 119 uint32_t signature = 0; 120 uint16_t numDisk = 0; 121 uint16_t startDiskOfCentralDir = 0; 122 uint16_t totalEntriesInThisDisk = 0; 123 uint16_t totalEntries = 0; 124 uint32_t sizeOfCentralDir = 0; 125 uint32_t offset = 0; 126 uint16_t commentLen = 0; 127 }; 128 129 // Data descriptor: 130 // data descriptor signature 4 bytes (0x06054b50) 131 // crc-32 4 bytes 132 // compressed size 4 bytes 133 // uncompressed size 4 bytes 134 // This descriptor MUST exist if bit 3 of the general purpose bit flag is set (see below). 135 // It is byte aligned and immediately follows the last byte of compressed data. 136 struct __attribute__((packed)) DataDesc { 137 uint32_t signature = 0; 138 uint32_t crc = 0; 139 uint32_t compressedSize = 0; 140 uint32_t uncompressedSize = 0; 141 }; 142 143 struct ZipEntry { // NOLINT(cppcoreguidelines-special-member-functions) 144 ZipEntry() = default; 145 explicit ZipEntry(const CentralDirEntry ¢ralEntry); 146 ~ZipEntry() = default; 147 148 uint16_t compressionMethod = 0; // NOLINT(misc-non-private-member-variables-in-classes) 149 uint32_t uncompressedSize = 0; // NOLINT(misc-non-private-member-variables-in-classes) 150 uint32_t compressedSize = 0; // NOLINT(misc-non-private-member-variables-in-classes) 151 uint32_t localHeaderOffset = 0; // NOLINT(misc-non-private-member-variables-in-classes) 152 uint32_t crc = 0; // NOLINT(misc-non-private-member-variables-in-classes) 153 uint16_t flags = 0; // NOLINT(misc-non-private-member-variables-in-classes) 154 uint16_t modifiedTime = 0; // NOLINT(misc-non-private-member-variables-in-classes) 155 uint16_t modifiedDate = 0; // NOLINT(misc-non-private-member-variables-in-classes) 156 std::string fileName; // NOLINT(misc-non-private-member-variables-in-classes) 157 }; 158 159 struct DirTreeNode { 160 bool isDir = false; 161 std::unordered_map<std::string, std::shared_ptr<DirTreeNode>> children; 162 }; 163 164 enum class CacheMode : uint32_t { 165 CACHE_NONE = 0, 166 CACHE_CASE, // This mode depends on file amount in hap. 167 CACHE_ALL 168 }; 169 170 // zip file extract class for bundle format. 171 class ZipFile { // NOLINT(cppcoreguidelines-special-member-functions) 172 public: 173 explicit ZipFile(const std::string &pathName); 174 ~ZipFile(); 175 /** 176 * @brief Open zip file. 177 * @return Returns true if the zip file is successfully opened; returns false otherwise. 178 */ 179 bool Open(); 180 void Close(); 181 /** 182 * @brief Get all entries in the zip file. 183 * @param start Indicates the zip content location start position. 184 * @param length Indicates the zip content length. 185 * @return Returns the ZipEntryMap object cotain all entries. 186 */ 187 const ZipEntryMap &GetAllEntries() const; 188 /** 189 * @brief Has entry by name. 190 * @param entryName Indicates the entry name. 191 * @return Returns true if the ZipEntry is successfully finded; returns false otherwise. 192 */ 193 bool HasEntry(const std::string &entryName) const; 194 195 bool IsDirExist(const std::string &dir); 196 void GetAllFileList(const std::string &srcPath, std::vector<std::string> &assetList); 197 void GetChildNames(const std::string &srcPath, std::set<std::string> &fileSet); 198 199 /** 200 * @brief Get entry by name. 201 * @param entryName Indicates the entry name. 202 * @param resultEntry Indicates the obtained ZipEntry object. 203 * @return Returns true if the ZipEntry is successfully finded; returns false otherwise. 204 */ 205 bool GetEntry(const std::string &entryName, ZipEntry &resultEntry) const; 206 bool GetDataOffsetRelative(const ZipEntry &zipEntry, ZipPos &offset, uint32_t &length) const; 207 bool ExtractFileFromMMap(const std::string &file, void *mmapDataPtr, 208 std::unique_ptr<uint8_t[]> &dataPtr, // NOLINT(modernize-avoid-c-arrays) 209 size_t &len) const; 210 211 std::unique_ptr<FileMapper> CreateFileMapper(const std::string &fileName, FileMapperType type) const; 212 // NOLINTNEXTLINE(modernize-avoid-c-arrays) 213 bool ExtractToBufByName(const std::string &fileName, std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const; 214 void SetCacheMode(CacheMode cacheMode); 215 bool UseDirCache() const; 216 217 private: 218 /** 219 * @brief Check the EndDir object. 220 * @param endDir Indicates the EndDir object to check. 221 * @return Returns true if successfully checked; returns false otherwise. 222 */ 223 bool CheckEndDir(const EndDir &endDir) const; 224 /** 225 * @brief Parse the EndDir. 226 * @return Returns true if successfully Parsed; returns false otherwise. 227 */ 228 bool ParseEndDirectory(); 229 /** 230 * @brief Parse one entry. 231 * @return Returns true if successfully parsed; returns false otherwise. 232 */ 233 bool ParseOneEntry(uint8_t *&entryPtr); 234 /** 235 * @brief Parse all Entries. 236 * @return Returns true if successfully parsed; returns false otherwise. 237 */ 238 bool ParseAllEntries(); 239 /** 240 * @brief Get LocalHeader object size. 241 * @param nameSize Indicates the nameSize. 242 * @param extraSize Indicates the extraSize. 243 * @return Returns size of LocalHeader. 244 */ 245 size_t GetLocalHeaderSize(const uint16_t nameSize = 0, const uint16_t extraSize = 0) const; 246 /** 247 * @brief Get entry data offset. 248 * @param zipEntry Indicates the ZipEntry object. 249 * @param extraSize Indicates the extraSize. 250 * @return Returns position. 251 */ 252 ZipPos GetEntryDataOffset(const ZipEntry &zipEntry, const uint16_t extraSize) const; 253 /** 254 * @brief Check data description. 255 * @param zipEntry Indicates the ZipEntry object. 256 * @param localHeader Indicates the localHeader object. 257 * @return Returns true if successfully checked; returns false otherwise. 258 */ 259 bool CheckDataDesc(const ZipEntry &zipEntry, const LocalHeader &localHeader) const; 260 /** 261 * @brief Check coherency LocalHeader object. 262 * @param zipEntry Indicates the ZipEntry object. 263 * @param extraSize Indicates the obtained size. 264 * @return Returns true if successfully checked; returns false otherwise. 265 */ 266 bool CheckCoherencyLocalHeader(const ZipEntry &zipEntry, uint16_t &extraSize) const; 267 /** 268 * @brief Get Entry start. 269 * @param zipEntry Indicates the ZipEntry object. 270 * @param extraSize Indicates the extra size. 271 * @return Returns true if successfully Seeked; returns false otherwise. 272 */ 273 size_t GetEntryStart(const ZipEntry &zipEntry, const uint16_t extraSize) const; 274 /** 275 * @brief Init zlib stream. 276 * @param zstream Indicates the obtained z_stream object. 277 * @return Returns true if successfully init; returns false otherwise. 278 */ 279 bool InitZStream(z_stream &zstream) const; 280 bool UnzipWithInflatedFromMMap(const ZipEntry &zipEntry, const uint16_t extraSize, void *mmapDataPtr, 281 // NOLINTNEXTLINE(modernize-avoid-c-arrays) 282 std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const; 283 bool CopyInflateOut(z_stream &zstream, size_t inflateLen, uint8_t **dstDataPtr, BytePtr bufOut, 284 uint8_t &errorTimes) const; 285 bool ReadZStreamFromMMap(const BytePtr &buffer, void *&dataPtr, z_stream &zstream, 286 uint32_t &remainCompressedSize) const; 287 288 std::shared_ptr<DirTreeNode> GetDirRoot(); 289 std::shared_ptr<DirTreeNode> MakeDirTree() const; 290 291 bool IsDirExistCache(const std::string &dir); 292 void GetAllFileListCache(const std::string &srcPath, std::vector<std::string> &assetList); 293 void GetChildNamesCache(const std::string &srcPath, std::set<std::string> &fileSet); 294 295 bool IsDirExistNormal(const std::string &dir); 296 void GetAllFileListNormal(const std::string &srcPath, std::vector<std::string> &assetList); 297 void GetChildNamesNormal(const std::string &srcPath, std::set<std::string> &fileSet); 298 299 private: 300 std::string pathName_; 301 std::shared_ptr<ZipFileReader> zipFileReader_; 302 EndDir endDir_; 303 ZipEntryMap entriesMap_; 304 os::memory::Mutex dirRootMutex_; 305 std::shared_ptr<DirTreeNode> dirRoot_; 306 // offset of central directory relative to zip file. 307 ZipPos centralDirPos_ = 0; 308 // this zip content start offset relative to zip file. 309 ZipPos fileStartPos_ = 0; 310 // this zip content length in the zip file. 311 ZipPos fileLength_ = 0; 312 bool isOpen_ = false; 313 CacheMode cacheMode_ = CacheMode::CACHE_CASE; 314 }; 315 } // namespace ark::extractor 316 #endif 317