• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef LIBZIPARCHIVE_EXTRACTORTOOL_ZIP_FILE_H
17 #define LIBZIPARCHIVE_EXTRACTORTOOL_ZIP_FILE_H
18 
19 #include <memory>
20 #include <set>
21 #include <string>
22 #include <unordered_map>
23 #include <vector>
24 
25 #include "unzip.h"
26 
27 #include "file_mapper.h"
28 #include "libpandabase/os/mutex.h"
29 #include "libpandabase/utils/logger.h"
30 
31 namespace ark::extractor {
32 class ZipFileReader;
33 struct CentralDirEntry;
34 struct ZipEntry;
35 using ZipPos = ZPOS64_T;
36 using ZipEntryMap = std::unordered_map<std::string, ZipEntry>;
37 using BytePtr = Byte *;
38 
39 // Local file header: descript in APPNOTE-6.3.4
40 //    local file header signature     4 bytes  (0x04034b50)
41 //    version needed to extract       2 bytes
42 //    general purpose bit flag        2 bytes
43 //    compression method              2 bytes  10
44 //    last mod file time              2 bytes
45 //    last mod file date              2 bytes
46 //    crc-32                          4 bytes
47 //    compressed size                 4 bytes  22
48 //    uncompressed size               4 bytes
49 //    file name length                2 bytes
50 //    extra field length              2 bytes  30
51 struct __attribute__((packed)) LocalHeader {
52     uint32_t signature = 0;
53     uint16_t versionNeeded = 0;
54     uint16_t flags = 0;
55     uint16_t compressionMethod = 0;
56     uint16_t modifiedTime = 0;
57     uint16_t modifiedDate = 0;
58     uint32_t crc = 0;
59     uint32_t compressedSize = 0;
60     uint32_t uncompressedSize = 0;
61     uint16_t nameSize = 0;
62     uint16_t extraSize = 0;
63 };
64 
65 // central file header
66 //    Central File header:
67 //    central file header signature   4 bytes  (0x02014b50)
68 //    version made by                 2 bytes
69 //    version needed to extract       2 bytes
70 //    general purpose bit flag        2 bytes  10
71 //    compression method              2 bytes
72 //    last mod file time              2 bytes
73 //    last mod file date              2 bytes
74 //    crc-32                          4 bytes  20
75 //    compressed size                 4 bytes
76 //    uncompressed size               4 bytes
77 //    file name length                2 bytes  30
78 //    extra field length              2 bytes
79 //    file comment length             2 bytes
80 //    disk number start               2 bytes
81 //    internal file attributes        2 bytes
82 //    external file attributes        4 bytes
83 //    relative offset of local header 4 bytes 46byte
84 struct __attribute__((packed)) CentralDirEntry {
85     uint32_t signature = 0;
86     uint16_t versionMade = 0;
87     uint16_t versionNeeded = 0;
88     uint16_t flags = 0;  // general purpose bit flag
89     uint16_t compressionMethod = 0;
90     uint16_t modifiedTime = 0;
91     uint16_t modifiedDate = 0;
92     uint32_t crc = 0;
93     uint32_t compressedSize = 0;
94     uint32_t uncompressedSize = 0;
95     uint16_t nameSize = 0;
96     uint16_t extraSize = 0;
97     uint16_t commentSize = 0;
98     uint16_t diskNumStart = 0;
99     uint16_t internalAttr = 0;
100     uint32_t externalAttr = 0;
101     uint32_t localHeaderOffset = 0;
102 };
103 
104 // end of central directory packed structure
105 //    end of central dir signature    4 bytes  (0x06054b50)
106 //    number of this disk             2 bytes
107 //    number of the disk with the
108 //    start of the central directory  2 bytes
109 //    total number of entries in the
110 //    central directory on this disk  2 bytes
111 //    total number of entries in
112 //    the central directory           2 bytes
113 //    size of the central directory   4 bytes
114 //    offset of start of central
115 //    directory with respect to
116 //    the starting disk number        4 bytes
117 //    .ZIP file comment length        2 bytes
118 struct __attribute__((packed)) EndDir {
119     uint32_t signature = 0;
120     uint16_t numDisk = 0;
121     uint16_t startDiskOfCentralDir = 0;
122     uint16_t totalEntriesInThisDisk = 0;
123     uint16_t totalEntries = 0;
124     uint32_t sizeOfCentralDir = 0;
125     uint32_t offset = 0;
126     uint16_t commentLen = 0;
127 };
128 
129 // Data descriptor:
130 //    data descriptor signature       4 bytes  (0x06054b50)
131 //    crc-32                          4 bytes
132 //    compressed size                 4 bytes
133 //    uncompressed size               4 bytes
134 // This descriptor MUST exist if bit 3 of the general purpose bit flag is set (see below).
135 // It is byte aligned and immediately follows the last byte of compressed data.
136 struct __attribute__((packed)) DataDesc {
137     uint32_t signature = 0;
138     uint32_t crc = 0;
139     uint32_t compressedSize = 0;
140     uint32_t uncompressedSize = 0;
141 };
142 
143 struct ZipEntry {  // NOLINT(cppcoreguidelines-special-member-functions)
144     ZipEntry() = default;
145     explicit ZipEntry(const CentralDirEntry &centralEntry);
146     ~ZipEntry() = default;
147 
148     uint16_t compressionMethod = 0;  // NOLINT(misc-non-private-member-variables-in-classes)
149     uint32_t uncompressedSize = 0;   // NOLINT(misc-non-private-member-variables-in-classes)
150     uint32_t compressedSize = 0;     // NOLINT(misc-non-private-member-variables-in-classes)
151     uint32_t localHeaderOffset = 0;  // NOLINT(misc-non-private-member-variables-in-classes)
152     uint32_t crc = 0;                // NOLINT(misc-non-private-member-variables-in-classes)
153     uint16_t flags = 0;              // NOLINT(misc-non-private-member-variables-in-classes)
154     uint16_t modifiedTime = 0;       // NOLINT(misc-non-private-member-variables-in-classes)
155     uint16_t modifiedDate = 0;       // NOLINT(misc-non-private-member-variables-in-classes)
156     std::string fileName;            // NOLINT(misc-non-private-member-variables-in-classes)
157 };
158 
159 struct DirTreeNode {
160     bool isDir = false;
161     std::unordered_map<std::string, std::shared_ptr<DirTreeNode>> children;
162 };
163 
164 enum class CacheMode : uint32_t {
165     CACHE_NONE = 0,
166     CACHE_CASE,  // This mode depends on file amount in hap.
167     CACHE_ALL
168 };
169 
170 // zip file extract class for bundle format.
171 class ZipFile {  // NOLINT(cppcoreguidelines-special-member-functions)
172 public:
173     explicit ZipFile(const std::string &pathName);
174     ~ZipFile();
175     /**
176      * @brief Open zip file.
177      * @return Returns true if the zip file is successfully opened; returns false otherwise.
178      */
179     bool Open();
180     void Close();
181     /**
182      * @brief Get all entries in the zip file.
183      * @param start Indicates the zip content location start position.
184      * @param length Indicates the zip content length.
185      * @return Returns the ZipEntryMap object cotain all entries.
186      */
187     const ZipEntryMap &GetAllEntries() const;
188     /**
189      * @brief Has entry by name.
190      * @param entryName Indicates the entry name.
191      * @return Returns true if the ZipEntry is successfully finded; returns false otherwise.
192      */
193     bool HasEntry(const std::string &entryName) const;
194 
195     bool IsDirExist(const std::string &dir);
196     void GetAllFileList(const std::string &srcPath, std::vector<std::string> &assetList);
197     void GetChildNames(const std::string &srcPath, std::set<std::string> &fileSet);
198 
199     /**
200      * @brief Get entry by name.
201      * @param entryName Indicates the entry name.
202      * @param resultEntry Indicates the obtained ZipEntry object.
203      * @return Returns true if the ZipEntry is successfully finded; returns false otherwise.
204      */
205     bool GetEntry(const std::string &entryName, ZipEntry &resultEntry) const;
206     bool GetDataOffsetRelative(const ZipEntry &zipEntry, ZipPos &offset, uint32_t &length) const;
207     bool ExtractFileFromMMap(const std::string &file, void *mmapDataPtr,
208                              std::unique_ptr<uint8_t[]> &dataPtr,  // NOLINT(modernize-avoid-c-arrays)
209                              size_t &len) const;
210 
211     std::unique_ptr<FileMapper> CreateFileMapper(const std::string &fileName, FileMapperType type) const;
212     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
213     bool ExtractToBufByName(const std::string &fileName, std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const;
214     void SetCacheMode(CacheMode cacheMode);
215     bool UseDirCache() const;
216 
217 private:
218     /**
219      * @brief Check the EndDir object.
220      * @param endDir Indicates the EndDir object to check.
221      * @return Returns true if  successfully checked; returns false otherwise.
222      */
223     bool CheckEndDir(const EndDir &endDir) const;
224     /**
225      * @brief Parse the EndDir.
226      * @return Returns true if  successfully Parsed; returns false otherwise.
227      */
228     bool ParseEndDirectory();
229     /**
230      * @brief Parse one entry.
231      * @return Returns true if successfully parsed; returns false otherwise.
232      */
233     bool ParseOneEntry(uint8_t *&entryPtr);
234     /**
235      * @brief Parse all Entries.
236      * @return Returns true if successfully parsed; returns false otherwise.
237      */
238     bool ParseAllEntries();
239     /**
240      * @brief Get LocalHeader object size.
241      * @param nameSize Indicates the nameSize.
242      * @param extraSize Indicates the extraSize.
243      * @return Returns size of LocalHeader.
244      */
245     size_t GetLocalHeaderSize(const uint16_t nameSize = 0, const uint16_t extraSize = 0) const;
246     /**
247      * @brief Get entry data offset.
248      * @param zipEntry Indicates the ZipEntry object.
249      * @param extraSize Indicates the extraSize.
250      * @return Returns position.
251      */
252     ZipPos GetEntryDataOffset(const ZipEntry &zipEntry, const uint16_t extraSize) const;
253     /**
254      * @brief Check data description.
255      * @param zipEntry Indicates the ZipEntry object.
256      * @param localHeader Indicates the localHeader object.
257      * @return Returns true if successfully checked; returns false otherwise.
258      */
259     bool CheckDataDesc(const ZipEntry &zipEntry, const LocalHeader &localHeader) const;
260     /**
261      * @brief Check coherency LocalHeader object.
262      * @param zipEntry Indicates the ZipEntry object.
263      * @param extraSize Indicates the obtained size.
264      * @return Returns true if successfully checked; returns false otherwise.
265      */
266     bool CheckCoherencyLocalHeader(const ZipEntry &zipEntry, uint16_t &extraSize) const;
267     /**
268      * @brief Get Entry start.
269      * @param zipEntry Indicates the ZipEntry object.
270      * @param extraSize Indicates the extra size.
271      * @return Returns true if successfully Seeked; returns false otherwise.
272      */
273     size_t GetEntryStart(const ZipEntry &zipEntry, const uint16_t extraSize) const;
274     /**
275      * @brief Init zlib stream.
276      * @param zstream Indicates the obtained z_stream object.
277      * @return Returns true if successfully init; returns false otherwise.
278      */
279     bool InitZStream(z_stream &zstream) const;
280     bool UnzipWithInflatedFromMMap(const ZipEntry &zipEntry, const uint16_t extraSize, void *mmapDataPtr,
281                                    // NOLINTNEXTLINE(modernize-avoid-c-arrays)
282                                    std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const;
283     bool CopyInflateOut(z_stream &zstream, size_t inflateLen, uint8_t **dstDataPtr, BytePtr bufOut,
284                         uint8_t &errorTimes) const;
285     bool ReadZStreamFromMMap(const BytePtr &buffer, void *&dataPtr, z_stream &zstream,
286                              uint32_t &remainCompressedSize) const;
287 
288     std::shared_ptr<DirTreeNode> GetDirRoot();
289     std::shared_ptr<DirTreeNode> MakeDirTree() const;
290 
291     bool IsDirExistCache(const std::string &dir);
292     void GetAllFileListCache(const std::string &srcPath, std::vector<std::string> &assetList);
293     void GetChildNamesCache(const std::string &srcPath, std::set<std::string> &fileSet);
294 
295     bool IsDirExistNormal(const std::string &dir);
296     void GetAllFileListNormal(const std::string &srcPath, std::vector<std::string> &assetList);
297     void GetChildNamesNormal(const std::string &srcPath, std::set<std::string> &fileSet);
298 
299 private:
300     std::string pathName_;
301     std::shared_ptr<ZipFileReader> zipFileReader_;
302     EndDir endDir_;
303     ZipEntryMap entriesMap_;
304     os::memory::Mutex dirRootMutex_;
305     std::shared_ptr<DirTreeNode> dirRoot_;
306     // offset of central directory relative to zip file.
307     ZipPos centralDirPos_ = 0;
308     // this zip content start offset relative to zip file.
309     ZipPos fileStartPos_ = 0;
310     // this zip content length in the zip file.
311     ZipPos fileLength_ = 0;
312     bool isOpen_ = false;
313     CacheMode cacheMode_ = CacheMode::CACHE_CASE;
314 };
315 }  // namespace ark::extractor
316 #endif
317