• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "zip_file.h"
17 
18 #include <ostream>
19 #include <string>
20 
21 #include <securec.h>
22 
23 #include "file_mapper.h"
24 #include "zip_file_reader.h"
25 
26 namespace ark::extractor {
27 namespace {
28 constexpr uint32_t MAX_FILE_NAME = 4096;
29 constexpr uint32_t UNZIP_BUFFER_SIZE = 1024;
30 constexpr uint32_t UNZIP_BUF_IN_LEN = 160 * UNZIP_BUFFER_SIZE;   // in  buffer length: 160KB
31 constexpr uint32_t UNZIP_BUF_OUT_LEN = 320 * UNZIP_BUFFER_SIZE;  // out buffer length: 320KB
32 constexpr uint32_t LOCAL_HEADER_SIGNATURE = 0x04034b50;
33 constexpr uint32_t CENTRAL_SIGNATURE = 0x02014b50;
34 constexpr uint32_t EOCD_SIGNATURE = 0x06054b50;
35 constexpr uint32_t DATA_DESC_SIGNATURE = 0x08074b50;
36 constexpr uint32_t FLAG_DATA_DESC = 0x8;
37 constexpr uint8_t INFLATE_ERROR_TIMES = 5;
38 constexpr uint8_t MAP_FILE_SUFFIX = 4;
39 constexpr char FILE_SEPARATOR_CHAR = '/';
40 constexpr const char *WRONG_FILE_SEPARATOR = "//";
41 constexpr uint32_t CACHE_CASE_THRESHOLD = 10000;
42 
43 // NOLINTNEXTLINE(performance-unnecessary-value-param)
GetTreeFileList(std::shared_ptr<DirTreeNode> root,const std::string & rootPath,std::vector<std::string> & assetList)44 void GetTreeFileList(std::shared_ptr<DirTreeNode> root, const std::string &rootPath,
45                      std::vector<std::string> &assetList)
46 {
47     if (root == nullptr) {
48         return;
49     }
50     if (!root->isDir && !rootPath.empty()) {
51         assetList.push_back(rootPath);
52     } else {
53         std::string prefix = rootPath;
54         if (!prefix.empty()) {
55             prefix.push_back(FILE_SEPARATOR_CHAR);
56         }
57         for (const auto &child : root->children) {
58             GetTreeFileList(child.second, prefix + child.first, assetList);
59         }
60     }
61 }
62 
63 // NOLINTNEXTLINE(performance-unnecessary-value-param)
AddEntryToTree(const std::string & fileName,std::shared_ptr<DirTreeNode> root)64 void AddEntryToTree(const std::string &fileName, std::shared_ptr<DirTreeNode> root)
65 {
66     if (root == nullptr) {
67         return;
68     }
69     size_t cur = 0;
70     auto parent = root;
71     do {
72         while (cur < fileName.size() && fileName[cur] == FILE_SEPARATOR_CHAR) {
73             cur++;
74         }
75         if (cur >= fileName.size()) {
76             break;
77         }
78         auto next = fileName.find(FILE_SEPARATOR_CHAR, cur);
79         auto nodeName = fileName.substr(cur, next - cur);
80         auto it = parent->children.find(nodeName);
81         if (it != parent->children.end()) {
82             parent = it->second;
83         } else {
84             auto node = std::make_shared<DirTreeNode>();
85             node->isDir = next != std::string::npos;
86             parent->children.emplace(nodeName, node);
87             parent = node;
88         }
89         cur = next;
90     } while (cur != std::string::npos);
91 }
92 
IsRootDir(const std::string & dirName)93 inline bool IsRootDir(const std::string &dirName)
94 {
95     return dirName.size() == 1 && dirName.back() == FILE_SEPARATOR_CHAR;
96 }
97 }  // namespace
98 
ZipEntry(const CentralDirEntry & centralEntry)99 ZipEntry::ZipEntry(const CentralDirEntry &centralEntry)
100 {
101     compressionMethod = centralEntry.compressionMethod;
102     uncompressedSize = centralEntry.uncompressedSize;
103     compressedSize = centralEntry.compressedSize;
104     localHeaderOffset = centralEntry.localHeaderOffset;
105     crc = centralEntry.crc;
106     flags = centralEntry.flags;
107     modifiedTime = centralEntry.modifiedTime;
108     modifiedDate = centralEntry.modifiedDate;
109 }
110 
ZipFile(const std::string & pathName)111 ZipFile::ZipFile(const std::string &pathName) : pathName_(pathName) {}  // NOLINT(modernize-pass-by-value)
112 
~ZipFile()113 ZipFile::~ZipFile()
114 {
115     Close();
116 }
117 
CheckEndDir(const EndDir & endDir) const118 bool ZipFile::CheckEndDir(const EndDir &endDir) const
119 {
120     size_t lenEndDir = sizeof(EndDir);
121     if ((endDir.numDisk != 0) || (endDir.signature != EOCD_SIGNATURE) || (endDir.startDiskOfCentralDir != 0) ||
122         (endDir.offset >= fileLength_) || (endDir.totalEntriesInThisDisk != endDir.totalEntries) ||
123         (endDir.commentLen != 0) ||
124         // central dir can't overlap end of central dir
125         ((endDir.offset + endDir.sizeOfCentralDir + lenEndDir) > fileLength_)) {
126         LOG(WARNING, ZIPARCHIVE) << "failed: fileLen: " << fileLength_ << ", signature: " << endDir.signature
127                                  << ", numDisk: " << endDir.numDisk
128                                  << ", startDiskOfCentralDir: " << endDir.startDiskOfCentralDir
129                                  << ", totalEntriesInThisDisk: " << endDir.totalEntriesInThisDisk
130                                  << ", totalEntries: " << endDir.totalEntries
131                                  << ", sizeOfCentralDir: " << endDir.sizeOfCentralDir << ", offset: " << endDir.offset
132                                  << ", commentLen: " << endDir.commentLen;
133         return false;
134     }
135     return true;
136 }
137 
ParseEndDirectory()138 bool ZipFile::ParseEndDirectory()
139 {
140     size_t endDirLen = sizeof(EndDir);
141     size_t endFilePos = fileStartPos_ + fileLength_;
142 
143     if (fileLength_ <= endDirLen) {
144         LOG(ERROR, ZIPARCHIVE) << "fileStartPos_:" << fileStartPos_ << " <= fileLength_:" << fileLength_;
145         return false;
146     }
147 
148     size_t eocdPos = endFilePos - endDirLen;
149     if (!zipFileReader_->ReadBuffer(reinterpret_cast<uint8_t *>(&endDir_), eocdPos, sizeof(EndDir))) {
150         LOG(ERROR, ZIPARCHIVE) << "read EOCD failed";
151         return false;
152     }
153 
154     centralDirPos_ = endDir_.offset + fileStartPos_;
155 
156     return CheckEndDir(endDir_);
157 }
158 
ParseOneEntry(uint8_t * & entryPtr)159 bool ZipFile::ParseOneEntry(uint8_t *&entryPtr)
160 {
161     if (entryPtr == nullptr) {
162         LOG(ERROR, ZIPARCHIVE) << "null entryPtr";
163         return false;
164     }
165 
166     CentralDirEntry directoryEntry;
167     if (memcpy_s(&directoryEntry, sizeof(CentralDirEntry), entryPtr, sizeof(CentralDirEntry)) != EOK) {
168         LOG(ERROR, ZIPARCHIVE) << "Mem copy directory entry failed";
169         return false;
170     }
171 
172     if (directoryEntry.signature != CENTRAL_SIGNATURE) {
173         LOG(ERROR, ZIPARCHIVE) << "check signature failed";
174         return false;
175     }
176 
177     entryPtr += sizeof(CentralDirEntry);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
178     size_t fileLength = (directoryEntry.nameSize >= MAX_FILE_NAME) ? (MAX_FILE_NAME - 1) : directoryEntry.nameSize;
179     std::string fileName(fileLength, 0);
180     if (memcpy_s(&(fileName[0]), fileLength, entryPtr, fileLength) != EOK) {
181         LOG(ERROR, ZIPARCHIVE) << "Mem copy file name failed";
182         return false;
183     }
184 
185     ZipEntry currentEntry(directoryEntry);
186     currentEntry.fileName = fileName;
187     entriesMap_[fileName] = currentEntry;
188     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
189     entryPtr += directoryEntry.nameSize + directoryEntry.extraSize + directoryEntry.commentSize;
190     return true;
191 }
192 
MakeDirTree() const193 std::shared_ptr<DirTreeNode> ZipFile::MakeDirTree() const
194 {
195     auto root = std::make_shared<DirTreeNode>();
196     root->isDir = true;
197     for (const auto &[fileName, entry] : entriesMap_) {
198         AddEntryToTree(fileName, root);
199     }
200     return root;
201 }
202 
GetDirRoot()203 std::shared_ptr<DirTreeNode> ZipFile::GetDirRoot()
204 {
205     if (!isOpen_) {
206         return nullptr;
207     }
208     os::memory::LockHolder lock(dirRootMutex_);
209     if (dirRoot_ == nullptr) {
210         dirRoot_ = MakeDirTree();
211     }
212     return dirRoot_;
213 }
214 
ParseAllEntries()215 bool ZipFile::ParseAllEntries()
216 {
217     auto centralData =
218         zipFileReader_->ReadBuffer(static_cast<size_t>(centralDirPos_), static_cast<size_t>(endDir_.sizeOfCentralDir));
219     if (centralData.empty()) {
220         LOG(ERROR, ZIPARCHIVE) << "centralData empty for " << pathName_ << " failed";
221         return false;
222     }
223 
224     bool ret = true;
225     auto *entryPtr = reinterpret_cast<uint8_t *>(centralData.data());
226     for (uint16_t i = 0; i < endDir_.totalEntries; i++) {
227         if (!ParseOneEntry(entryPtr)) {
228             LOG(ERROR, ZIPARCHIVE) << "Parse entry" << i << " failed";
229             ret = false;
230             break;
231         }
232     }
233 
234     return ret;
235 }
236 
Open()237 bool ZipFile::Open()
238 {
239     if (isOpen_) {
240         return true;
241     }
242 
243     if (pathName_.length() > PATH_MAX) {
244         LOG(ERROR, ZIPARCHIVE) << "pathName length > PATH_MAX";
245         return false;
246     }
247 
248     zipFileReader_ = ZipFileReader::CreateZipFileReader(pathName_);
249     if (!zipFileReader_) {
250         LOG(ERROR, ZIPARCHIVE) << "open file error: " << pathName_ << ", errno: " << errno;
251         return false;
252     }
253 
254     if (fileLength_ == 0) {
255         auto fileLength = zipFileReader_->GetFileLen();
256         fileLength_ = static_cast<ZipPos>(fileLength);
257         if (fileStartPos_ >= fileLength_) {
258             zipFileReader_.reset();
259             return false;
260         }
261 
262         fileLength_ -= fileStartPos_;
263     }
264 
265     bool result = ParseEndDirectory();
266     if (result) {
267         result = ParseAllEntries();
268     }
269     // it means open file success.
270     isOpen_ = true;
271     return result;
272 }
273 
Close()274 void ZipFile::Close()
275 {
276     if (!isOpen_ || zipFileReader_ == nullptr) {
277         return;
278     }
279 
280     isOpen_ = false;
281     entriesMap_.clear();
282     {
283         os::memory::LockHolder lock(dirRootMutex_);
284         dirRoot_.reset();
285     }
286     pathName_ = "";
287 
288     zipFileReader_.reset();
289 }
290 
291 // Get all file zipEntry in this file
GetAllEntries() const292 const ZipEntryMap &ZipFile::GetAllEntries() const
293 {
294     return entriesMap_;
295 }
296 
HasEntry(const std::string & entryName) const297 bool ZipFile::HasEntry(const std::string &entryName) const
298 {
299     return entriesMap_.find(entryName) != entriesMap_.end();
300 }
301 
SetCacheMode(CacheMode cacheMode)302 void ZipFile::SetCacheMode(CacheMode cacheMode)
303 {
304     os::memory::LockHolder lock(dirRootMutex_);
305     cacheMode_ = cacheMode;
306     if (!UseDirCache()) {
307         dirRoot_.reset();
308     }
309 }
310 
UseDirCache() const311 bool ZipFile::UseDirCache() const
312 {
313     auto mode = cacheMode_;
314     bool useCache = mode == CacheMode::CACHE_ALL;
315     if (mode == CacheMode::CACHE_CASE && entriesMap_.size() >= CACHE_CASE_THRESHOLD) {
316         useCache = true;
317     }
318     return useCache;
319 }
320 
IsDirExist(const std::string & dir)321 bool ZipFile::IsDirExist(const std::string &dir)
322 {
323     if (dir.empty()) {
324         LOG(ERROR, ZIPARCHIVE) << "dir is empty";
325         return false;
326     }
327     if (IsRootDir(dir)) {
328         return true;
329     }
330     if (dir.find(WRONG_FILE_SEPARATOR) != std::string::npos) {
331         LOG(WARNING, ZIPARCHIVE) << "Wrong format";
332         return false;
333     }
334 
335     auto tmpDir = dir;
336     if (tmpDir.front() == FILE_SEPARATOR_CHAR) {
337         tmpDir.erase(tmpDir.begin());
338     }
339     if (tmpDir.back() != FILE_SEPARATOR_CHAR) {
340         tmpDir.push_back(FILE_SEPARATOR_CHAR);
341     }
342     if (entriesMap_.count(tmpDir) > 0) {
343         return true;
344     }
345     tmpDir.pop_back();
346     if (entriesMap_.count(tmpDir) > 0) {
347         LOG(WARNING, ZIPARCHIVE) << "file not dir";
348         return false;
349     }
350 
351     if (UseDirCache()) {
352         return IsDirExistCache(tmpDir);
353     }
354     return IsDirExistNormal(tmpDir);
355 }
356 
GetAllFileList(const std::string & srcPath,std::vector<std::string> & assetList)357 void ZipFile::GetAllFileList(const std::string &srcPath, std::vector<std::string> &assetList)
358 {
359     if (srcPath.empty()) {
360         LOG(ERROR, ZIPARCHIVE) << "dir is empty";
361         return;
362     }
363     if (IsRootDir(srcPath)) {
364         for (const auto &[fileName, fileInfo] : entriesMap_) {
365             if (!fileName.empty() && fileName.back() != FILE_SEPARATOR_CHAR) {
366                 assetList.push_back(fileName);
367             }
368         }
369         return;
370     }
371     if (srcPath.find(WRONG_FILE_SEPARATOR) != std::string::npos) {
372         LOG(WARNING, ZIPARCHIVE) << "Wrong format";
373         return;
374     }
375 
376     auto tmpDir = srcPath;
377     if (tmpDir.front() == FILE_SEPARATOR_CHAR) {
378         tmpDir.erase(tmpDir.begin());
379     }
380     if (tmpDir.back() != FILE_SEPARATOR_CHAR) {
381         tmpDir.push_back(FILE_SEPARATOR_CHAR);
382     }
383     if (entriesMap_.count(tmpDir) > 0) {
384         return;
385     }
386     tmpDir.pop_back();
387     if (entriesMap_.count(tmpDir) > 0) {
388         LOG(WARNING, ZIPARCHIVE) << "file not dir";
389         return;
390     }
391 
392     if (UseDirCache()) {
393         GetAllFileListCache(tmpDir, assetList);
394     } else {
395         GetAllFileListNormal(tmpDir, assetList);
396     }
397 }
398 
GetChildNames(const std::string & srcPath,std::set<std::string> & fileSet)399 void ZipFile::GetChildNames(const std::string &srcPath, std::set<std::string> &fileSet)
400 {
401     if (srcPath.empty()) {
402         LOG(ERROR, ZIPARCHIVE) << "dir is empty";
403         return;
404     }
405     if (srcPath.find(WRONG_FILE_SEPARATOR) != std::string::npos) {
406         LOG(WARNING, ZIPARCHIVE) << "Wrong format";
407         return;
408     }
409     auto tmpDir = srcPath;
410     if (!IsRootDir(tmpDir)) {
411         if (tmpDir.front() == FILE_SEPARATOR_CHAR) {
412             tmpDir.erase(tmpDir.begin());
413         }
414         if (tmpDir.back() != FILE_SEPARATOR_CHAR) {
415             tmpDir.push_back(FILE_SEPARATOR_CHAR);
416         }
417         if (entriesMap_.count(tmpDir) > 0) {
418             return;
419         }
420         tmpDir.pop_back();
421         if (entriesMap_.count(tmpDir) > 0) {
422             LOG(WARNING, ZIPARCHIVE) << "file not dir";
423             return;
424         }
425     }
426 
427     if (UseDirCache()) {
428         GetChildNamesCache(tmpDir, fileSet);
429     } else {
430         GetChildNamesNormal(tmpDir, fileSet);
431     }
432 }
433 
IsDirExistCache(const std::string & dir)434 bool ZipFile::IsDirExistCache(const std::string &dir)
435 {
436     auto parent = GetDirRoot();
437     if (parent == nullptr) {
438         LOG(ERROR, ZIPARCHIVE) << "null parent";
439         return false;
440     }
441     size_t cur = 0;
442     do {
443         while (cur < dir.size() && dir[cur] == FILE_SEPARATOR_CHAR) {
444             cur++;
445         }
446         if (cur >= dir.size()) {
447             break;
448         }
449         auto next = dir.find(FILE_SEPARATOR_CHAR, cur);
450         auto nodeName = dir.substr(cur, next - cur);
451         auto it = parent->children.find(nodeName);
452         if (it == parent->children.end()) {
453             LOG(ERROR, ZIPARCHIVE) << "dir not found, dir : " << dir;
454             return false;
455         }
456         parent = it->second;
457         cur = next;
458     } while (cur != std::string::npos);
459 
460     return true;
461 }
462 
GetAllFileListCache(const std::string & srcPath,std::vector<std::string> & assetList)463 void ZipFile::GetAllFileListCache(const std::string &srcPath, std::vector<std::string> &assetList)
464 {
465     auto parent = GetDirRoot();
466     if (parent == nullptr) {
467         LOG(ERROR, ZIPARCHIVE) << "null parent";
468         return;
469     }
470 
471     auto rootName = srcPath.back() == FILE_SEPARATOR_CHAR ? srcPath.substr(0, srcPath.length() - 1) : srcPath;
472 
473     size_t cur = 0;
474     do {
475         while (cur < rootName.size() && rootName[cur] == FILE_SEPARATOR_CHAR) {
476             cur++;
477         }
478         if (cur >= rootName.size()) {
479             break;
480         }
481         auto next = rootName.find(FILE_SEPARATOR_CHAR, cur);
482         auto nodeName = rootName.substr(cur, next - cur);
483         auto it = parent->children.find(nodeName);
484         if (it == parent->children.end()) {
485             LOG(ERROR, ZIPARCHIVE) << "srcPath not found, srcPath : " << rootName;
486             return;
487         }
488         parent = it->second;
489         cur = next;
490     } while (cur != std::string::npos);
491 
492     GetTreeFileList(parent, rootName, assetList);
493 }
494 
GetChildNamesCache(const std::string & srcPath,std::set<std::string> & fileSet)495 void ZipFile::GetChildNamesCache(const std::string &srcPath, std::set<std::string> &fileSet)
496 {
497     size_t cur = 0;
498     auto parent = GetDirRoot();
499     if (parent == nullptr) {
500         LOG(ERROR, ZIPARCHIVE) << "null parent";
501         return;
502     }
503     do {
504         while (cur < srcPath.size() && srcPath[cur] == FILE_SEPARATOR_CHAR) {
505             cur++;
506         }
507         if (cur >= srcPath.size()) {
508             break;
509         }
510         auto next = srcPath.find(FILE_SEPARATOR_CHAR, cur);
511         auto nodeName = srcPath.substr(cur, next - cur);
512         auto it = parent->children.find(nodeName);
513         if (it == parent->children.end()) {
514             LOG(ERROR, ZIPARCHIVE) << "srcPath not found, srcPath : " << srcPath;
515             return;
516         }
517         parent = it->second;
518         cur = next;
519     } while (cur != std::string::npos);
520 
521     for (const auto &child : parent->children) {
522         fileSet.insert(child.first);
523     }
524 }
525 
IsDirExistNormal(const std::string & dir)526 bool ZipFile::IsDirExistNormal(const std::string &dir)
527 {
528     auto targetDir = dir;
529     if (targetDir.back() != FILE_SEPARATOR_CHAR) {
530         targetDir.push_back(FILE_SEPARATOR_CHAR);
531     }
532     for (const auto &[fileName, fileInfo] : entriesMap_) {
533         if (fileName.size() > targetDir.size() && fileName.substr(0, targetDir.size()) == targetDir) {
534             return true;
535         }
536     }
537     return false;
538 }
539 
GetAllFileListNormal(const std::string & srcPath,std::vector<std::string> & assetList)540 void ZipFile::GetAllFileListNormal(const std::string &srcPath, std::vector<std::string> &assetList)
541 {
542     auto targetDir = srcPath;
543     if (targetDir.back() != FILE_SEPARATOR_CHAR) {
544         targetDir.push_back(FILE_SEPARATOR_CHAR);
545     }
546     for (const auto &[fileName, fileInfo] : entriesMap_) {
547         if (fileName.size() > targetDir.size() && fileName.back() != FILE_SEPARATOR_CHAR &&
548             fileName.substr(0, targetDir.size()) == targetDir) {
549             assetList.push_back(fileName);
550         }
551     }
552 }
553 
GetChildNamesNormal(const std::string & srcPath,std::set<std::string> & fileSet)554 void ZipFile::GetChildNamesNormal(const std::string &srcPath, std::set<std::string> &fileSet)
555 {
556     auto targetDir = srcPath;
557     if (targetDir.back() != FILE_SEPARATOR_CHAR) {
558         targetDir.push_back(FILE_SEPARATOR_CHAR);
559     }
560     if (IsRootDir(srcPath)) {
561         for (const auto &[fileName, fileInfo] : entriesMap_) {
562             auto nextPos = fileName.find(FILE_SEPARATOR_CHAR);
563             fileSet.insert(nextPos == std::string::npos ? fileName : fileName.substr(0, nextPos));
564         }
565         return;
566     }
567     for (const auto &[fileName, fileInfo] : entriesMap_) {
568         if (fileName.size() > targetDir.size() && fileName.substr(0, targetDir.size()) == targetDir) {
569             fileSet.insert(fileName.substr(targetDir.size(),
570                                            fileName.find(FILE_SEPARATOR_CHAR, targetDir.size()) - targetDir.size()));
571         }
572     }
573 }
574 
GetEntry(const std::string & entryName,ZipEntry & resultEntry) const575 bool ZipFile::GetEntry(const std::string &entryName, ZipEntry &resultEntry) const
576 {
577     auto iter = entriesMap_.find(entryName);
578     if (iter != entriesMap_.end()) {
579         resultEntry = iter->second;
580         return true;
581     }
582     return false;
583 }
584 
GetLocalHeaderSize(const uint16_t nameSize,const uint16_t extraSize) const585 size_t ZipFile::GetLocalHeaderSize(const uint16_t nameSize, const uint16_t extraSize) const
586 {
587     return sizeof(LocalHeader) + nameSize + extraSize;
588 }
589 
CheckDataDesc(const ZipEntry & zipEntry,const LocalHeader & localHeader) const590 bool ZipFile::CheckDataDesc(const ZipEntry &zipEntry, const LocalHeader &localHeader) const
591 {
592     uint32_t crcLocal = 0;
593     uint32_t compressedLocal = 0;
594     uint32_t uncompressedLocal = 0;
595 
596     if ((localHeader.flags & FLAG_DATA_DESC) != 0U) {  // use data desc
597         DataDesc dataDesc;
598         auto descPos = zipEntry.localHeaderOffset + GetLocalHeaderSize(localHeader.nameSize, localHeader.extraSize);
599         descPos += fileStartPos_ + zipEntry.compressedSize;
600 
601         if (!zipFileReader_->ReadBuffer(reinterpret_cast<uint8_t *>(&dataDesc), descPos, sizeof(DataDesc))) {
602             LOG(ERROR, ZIPARCHIVE) << "ReadBuffer failed";
603             return false;
604         }
605 
606         if (dataDesc.signature != DATA_DESC_SIGNATURE) {
607             LOG(ERROR, ZIPARCHIVE) << "check signature failed";
608             return false;
609         }
610 
611         crcLocal = dataDesc.crc;
612         compressedLocal = dataDesc.compressedSize;
613         uncompressedLocal = dataDesc.uncompressedSize;
614     } else {
615         crcLocal = localHeader.crc;
616         compressedLocal = localHeader.compressedSize;
617         uncompressedLocal = localHeader.uncompressedSize;
618     }
619 
620     if ((zipEntry.crc != crcLocal) || (zipEntry.compressedSize != compressedLocal) ||
621         (zipEntry.uncompressedSize != uncompressedLocal)) {
622         LOG(ERROR, ZIPARCHIVE) << "size corrupted";
623         return false;
624     }
625 
626     return true;
627 }
628 
CheckCoherencyLocalHeader(const ZipEntry & zipEntry,uint16_t & extraSize) const629 bool ZipFile::CheckCoherencyLocalHeader(const ZipEntry &zipEntry, uint16_t &extraSize) const
630 {
631     // current only support store and Z_DEFLATED method
632     if ((zipEntry.compressionMethod != Z_DEFLATED) && (zipEntry.compressionMethod != 0)) {
633         LOG(ERROR, ZIPARCHIVE) << "compressionMethod " << zipEntry.compressionMethod << " not support";
634         return false;
635     }
636 
637     auto nameSize = zipEntry.fileName.length();
638     auto startPos = fileStartPos_ + zipEntry.localHeaderOffset;
639     size_t buffSize = sizeof(LocalHeader) + nameSize;
640     auto buff = zipFileReader_->ReadBuffer(startPos, buffSize);
641     if (buff.size() < buffSize) {
642         LOG(ERROR, ZIPARCHIVE) << "read header failed";
643         return false;
644     }
645 
646     LocalHeader localHeader = {0};
647     if (memcpy_s(&localHeader, sizeof(LocalHeader), buff.data(), sizeof(LocalHeader)) != EOK) {
648         LOG(ERROR, ZIPARCHIVE) << "memcpy localheader failed";
649         return false;
650     }
651     if ((localHeader.signature != LOCAL_HEADER_SIGNATURE) ||
652         (zipEntry.compressionMethod != localHeader.compressionMethod)) {
653         LOG(ERROR, ZIPARCHIVE) << "signature or compressionMethod failed";
654         return false;
655     }
656 
657     if (localHeader.nameSize != nameSize && nameSize < MAX_FILE_NAME - 1) {
658         LOG(ERROR, ZIPARCHIVE) << "name corrupted";
659         return false;
660     }
661     std::string fileName = buff.substr(sizeof(LocalHeader));
662     if (zipEntry.fileName != fileName) {
663         LOG(ERROR, ZIPARCHIVE) << "name corrupted";
664         return false;
665     }
666 
667     if (!CheckDataDesc(zipEntry, localHeader)) {
668         LOG(ERROR, ZIPARCHIVE) << "check data desc failed";
669         return false;
670     }
671 
672     extraSize = localHeader.extraSize;
673     return true;
674 }
675 
GetEntryStart(const ZipEntry & zipEntry,const uint16_t extraSize) const676 size_t ZipFile::GetEntryStart(const ZipEntry &zipEntry, const uint16_t extraSize) const
677 {
678     ZipPos startOffset = zipEntry.localHeaderOffset;
679     // get data offset, add signature+localheader+namesize+extrasize
680     startOffset += GetLocalHeaderSize(zipEntry.fileName.length(), extraSize);
681     startOffset += fileStartPos_;  // add file start relative to file stream
682 
683     return startOffset;
684 }
685 
InitZStream(z_stream & zstream) const686 bool ZipFile::InitZStream(z_stream &zstream) const
687 {
688     // init zlib stream
689     if (memset_s(&zstream, sizeof(z_stream), 0, sizeof(z_stream)) != 0) {
690         LOG(ERROR, ZIPARCHIVE) << "stream buffer init failed";
691         return false;
692     }
693     int32_t zlibErr = inflateInit2(&zstream, -MAX_WBITS);
694     if (zlibErr != Z_OK) {
695         LOG(ERROR, ZIPARCHIVE) << "init failed";
696         return false;
697     }
698 
699     BytePtr bufOut = new (std::nothrow) Byte[UNZIP_BUF_OUT_LEN];  // NOLINT(modernize-use-auto)
700     if (bufOut == nullptr) {
701         LOG(ERROR, ZIPARCHIVE) << "null bufOut";
702         return false;
703     }
704 
705     BytePtr bufIn = new (std::nothrow) Byte[UNZIP_BUF_IN_LEN];  // NOLINT(modernize-use-auto)
706     if (bufIn == nullptr) {
707         LOG(ERROR, ZIPARCHIVE) << "null bufIn";
708         delete[] bufOut;
709         return false;
710     }
711     zstream.next_out = bufOut;
712     zstream.next_in = bufIn;
713     zstream.avail_out = UNZIP_BUF_OUT_LEN;
714     return true;
715 }
716 
GetEntryDataOffset(const ZipEntry & zipEntry,const uint16_t extraSize) const717 ZipPos ZipFile::GetEntryDataOffset(const ZipEntry &zipEntry, const uint16_t extraSize) const
718 {
719     // get entry data offset relative file
720     ZipPos offset = zipEntry.localHeaderOffset;
721 
722     offset += GetLocalHeaderSize(zipEntry.fileName.length(), extraSize);
723     offset += fileStartPos_;
724 
725     return offset;
726 }
727 
GetDataOffsetRelative(const ZipEntry & zipEntry,ZipPos & offset,uint32_t & length) const728 bool ZipFile::GetDataOffsetRelative(const ZipEntry &zipEntry, ZipPos &offset, uint32_t &length) const
729 {
730     uint16_t extraSize = 0;
731     if (!CheckCoherencyLocalHeader(zipEntry, extraSize)) {
732         LOG(ERROR, ZIPARCHIVE) << "check coherency local header failed";
733         return false;
734     }
735 
736     offset = GetEntryDataOffset(zipEntry, extraSize);
737     length = zipEntry.compressedSize;
738     return true;
739 }
740 
741 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
ExtractFileFromMMap(const std::string & file,void * mmapDataPtr,std::unique_ptr<uint8_t[]> & dataPtr,size_t & len) const742 bool ZipFile::ExtractFileFromMMap(const std::string &file, void *mmapDataPtr, std::unique_ptr<uint8_t[]> &dataPtr,
743                                   size_t &len) const
744 {
745     ZipEntry zipEntry;
746     if (!GetEntry(file, zipEntry)) {
747         LOG(ERROR, ZIPARCHIVE) << "not find file";
748         return false;
749     }
750 
751     if (zipEntry.compressionMethod == 0U) {
752         LOG(ERROR, ZIPARCHIVE) << "file is not extracted, file: " << file;
753         return false;
754     }
755 
756     uint16_t extraSize = 0;
757     if (!CheckCoherencyLocalHeader(zipEntry, extraSize)) {
758         LOG(ERROR, ZIPARCHIVE) << "check coherency local header failed";
759         return false;
760     }
761 
762     return UnzipWithInflatedFromMMap(zipEntry, extraSize, mmapDataPtr, dataPtr, len);
763 }
764 
UnzipWithInflatedFromMMap(const ZipEntry & zipEntry,const uint16_t extraSize,void * mmapDataPtr,std::unique_ptr<uint8_t[]> & dataPtr,size_t & len) const765 bool ZipFile::UnzipWithInflatedFromMMap(const ZipEntry &zipEntry, [[maybe_unused]] const uint16_t extraSize,
766                                         // NOLINTNEXTLINE(modernize-avoid-c-arrays)
767                                         void *mmapDataPtr, std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const
768 {
769     z_stream zstream;
770     if (!InitZStream(zstream)) {
771         LOG(ERROR, ZIPARCHIVE) << "init zstream failed";
772         return false;
773     }
774 
775     BytePtr bufIn = zstream.next_in;
776     BytePtr bufOut = zstream.next_out;
777 
778     bool ret = true;
779     int32_t zlibErr = Z_OK;
780     uint32_t remainCompressedSize = zipEntry.compressedSize;
781     size_t inflateLen = 0;
782     uint8_t errorTimes = 0;
783 
784     len = zipEntry.uncompressedSize;
785     dataPtr = std::make_unique<uint8_t[]>(len);  // NOLINT(modernize-avoid-c-arrays)
786     auto *dstDataPtr = static_cast<uint8_t *>(dataPtr.get());
787     void *mmapSrcDataPtr = mmapDataPtr;
788 
789     while ((remainCompressedSize > 0) || (zstream.avail_in > 0)) {
790         if (!ReadZStreamFromMMap(bufIn, mmapSrcDataPtr, zstream, remainCompressedSize)) {
791             ret = false;
792             break;
793         }
794 
795         zlibErr = inflate(&zstream, Z_SYNC_FLUSH);
796         if ((zlibErr >= Z_OK) && (zstream.msg != nullptr)) {
797             LOG(ERROR, ZIPARCHIVE) << "unzip failed, zlibErr: " << zlibErr << ", msg: " << zstream.msg;
798             ret = false;
799             break;
800         }
801 
802         inflateLen = UNZIP_BUF_OUT_LEN - zstream.avail_out;
803         if (!CopyInflateOut(zstream, inflateLen, &dstDataPtr, bufOut, errorTimes)) {
804             break;
805         }
806     }
807 
808     // free all dynamically allocated data structures except the next_in and next_out for this stream.
809     zlibErr = inflateEnd(&zstream);
810     if (zlibErr != Z_OK) {
811         LOG(ERROR, ZIPARCHIVE) << "inflateEnd failed, zlibErr: " << zlibErr;
812         ret = false;
813     }
814 
815     delete[] bufOut;
816     delete[] bufIn;
817     return ret;
818 }
819 
CopyInflateOut(z_stream & zstream,size_t inflateLen,uint8_t ** dstDataPtr,BytePtr bufOut,uint8_t & errorTimes) const820 bool ZipFile::CopyInflateOut(z_stream &zstream, size_t inflateLen, uint8_t **dstDataPtr, BytePtr bufOut,
821                              uint8_t &errorTimes) const
822 {
823     if (inflateLen > 0) {
824         if (memcpy_s(*dstDataPtr, inflateLen, bufOut, inflateLen) != EOK) {
825             LOG(ERROR, ZIPARCHIVE) << "memcpy failed";
826             return false;
827         }
828 
829         *dstDataPtr += inflateLen;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
830         zstream.next_out = bufOut;
831         zstream.avail_out = UNZIP_BUF_OUT_LEN;
832         errorTimes = 0;
833     } else {
834         errorTimes++;
835     }
836     if (errorTimes >= INFLATE_ERROR_TIMES) {
837         LOG(ERROR, ZIPARCHIVE) << "data is abnormal";
838         return false;
839     }
840 
841     return true;
842 }
843 
ReadZStreamFromMMap(const BytePtr & buffer,void * & dataPtr,z_stream & zstream,uint32_t & remainCompressedSize) const844 bool ZipFile::ReadZStreamFromMMap(const BytePtr &buffer, void *&dataPtr, z_stream &zstream,
845                                   uint32_t &remainCompressedSize) const
846 {
847     if (dataPtr == nullptr) {
848         LOG(ERROR, ZIPARCHIVE) << "dataPtr is nullptr";
849         return false;
850     }
851 
852     auto *srcDataPtr = static_cast<uint8_t *>(dataPtr);
853     if (zstream.avail_in == 0) {
854         size_t remainBytes = (remainCompressedSize > UNZIP_BUF_IN_LEN) ? UNZIP_BUF_IN_LEN : remainCompressedSize;
855         size_t readBytes = sizeof(Byte) * remainBytes;
856         if (memcpy_s(buffer, readBytes, srcDataPtr, readBytes) != EOK) {
857             LOG(ERROR, ZIPARCHIVE) << "memcpy failed";
858             return false;
859         }
860         srcDataPtr += readBytes;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
861         remainCompressedSize -= remainBytes;
862         zstream.avail_in = remainBytes;
863         zstream.next_in = buffer;
864     }
865     dataPtr = srcDataPtr;
866     return true;
867 }
868 
CreateFileMapper(const std::string & fileName,FileMapperType type) const869 std::unique_ptr<FileMapper> ZipFile::CreateFileMapper(const std::string &fileName, FileMapperType type) const
870 {
871     ZipEntry zipEntry;
872     if (!GetEntry(fileName, zipEntry)) {
873         return nullptr;
874     }
875 
876     ZipPos offset = 0;
877     uint32_t length = 0;
878     if (!GetDataOffsetRelative(zipEntry, offset, length)) {
879         LOG(ERROR, ZIPARCHIVE) << "GetDataOffsetRelative failed hapPath: " << fileName;
880         return nullptr;
881     }
882     bool compress = zipEntry.compressionMethod > 0;
883     if (type == FileMapperType::SAFE_ABC && compress) {
884         LOG(WARNING, ZIPARCHIVE) << "Entry is compressed for safe: " << fileName;
885     }
886     std::unique_ptr<FileMapper> fileMapper = std::make_unique<FileMapper>();
887     if (zipFileReader_ == nullptr) {
888         LOG(ERROR, ZIPARCHIVE) << "zipFileReader_ is nullptr";
889         return nullptr;
890     }
891     auto result = false;
892     if (type == FileMapperType::NORMAL_MEM) {
893         result = fileMapper->CreateFileMapper(zipFileReader_, fileName, offset, length, compress);
894     } else {
895         result = fileMapper->CreateFileMapper(fileName, compress, zipFileReader_->GetFd(), offset, length, type);
896         if (result && type == FileMapperType::SAFE_ABC) {
897             zipFileReader_->SetClosable(false);
898         }
899     }
900 
901     if (!result) {
902         return nullptr;
903     }
904     return fileMapper;
905 }
906 
907 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
ExtractToBufByName(const std::string & fileName,std::unique_ptr<uint8_t[]> & dataPtr,size_t & len) const908 bool ZipFile::ExtractToBufByName(const std::string &fileName, std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const
909 {
910     ZipEntry zipEntry;
911     if (!GetEntry(fileName, zipEntry)) {
912         if (fileName.length() > MAP_FILE_SUFFIX && fileName.substr(fileName.length() - MAP_FILE_SUFFIX) != ".map") {
913             LOG(ERROR, ZIPARCHIVE) << "GetEntry failed hapPath: " << fileName;
914         }
915         return false;
916     }
917     uint16_t extraSize = 0;
918     if (!CheckCoherencyLocalHeader(zipEntry, extraSize)) {
919         LOG(ERROR, ZIPARCHIVE) << "check coherency local header failed";
920         return false;
921     }
922     if (zipFileReader_ == nullptr) {
923         LOG(ERROR, ZIPARCHIVE) << "zipFileReader_ is nullptr";
924         return false;
925     }
926     ZipPos offset = GetEntryDataOffset(zipEntry, extraSize);
927     uint32_t length = zipEntry.compressedSize;
928     auto dataTmp = std::make_unique<uint8_t[]>(length);  // NOLINT(modernize-avoid-c-arrays)
929     if (!zipFileReader_->ReadBuffer(dataTmp.get(), offset, length)) {
930         LOG(ERROR, ZIPARCHIVE) << "read file failed, len: " << length << ", fileName: " << fileName
931                                << ", offset: " << offset;
932         dataTmp.reset();
933         return false;
934     }
935 
936     if (zipEntry.compressionMethod > 0) {
937         return UnzipWithInflatedFromMMap(zipEntry, extraSize, dataTmp.get(), dataPtr, len);
938     }
939 
940     len = length;
941     dataPtr = std::move(dataTmp);
942 
943     return true;
944 }
945 }  // namespace ark::extractor
946