1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "zip_file.h"
17
18 #include <ostream>
19 #include <string>
20
21 #include <securec.h>
22
23 #include "file_mapper.h"
24 #include "zip_file_reader.h"
25
26 namespace ark::extractor {
27 namespace {
28 constexpr uint32_t MAX_FILE_NAME = 4096;
29 constexpr uint32_t UNZIP_BUFFER_SIZE = 1024;
30 constexpr uint32_t UNZIP_BUF_IN_LEN = 160 * UNZIP_BUFFER_SIZE; // in buffer length: 160KB
31 constexpr uint32_t UNZIP_BUF_OUT_LEN = 320 * UNZIP_BUFFER_SIZE; // out buffer length: 320KB
32 constexpr uint32_t LOCAL_HEADER_SIGNATURE = 0x04034b50;
33 constexpr uint32_t CENTRAL_SIGNATURE = 0x02014b50;
34 constexpr uint32_t EOCD_SIGNATURE = 0x06054b50;
35 constexpr uint32_t DATA_DESC_SIGNATURE = 0x08074b50;
36 constexpr uint32_t FLAG_DATA_DESC = 0x8;
37 constexpr uint8_t INFLATE_ERROR_TIMES = 5;
38 constexpr uint8_t MAP_FILE_SUFFIX = 4;
39 constexpr char FILE_SEPARATOR_CHAR = '/';
40 constexpr const char *WRONG_FILE_SEPARATOR = "//";
41 constexpr uint32_t CACHE_CASE_THRESHOLD = 10000;
42
43 // NOLINTNEXTLINE(performance-unnecessary-value-param)
GetTreeFileList(std::shared_ptr<DirTreeNode> root,const std::string & rootPath,std::vector<std::string> & assetList)44 void GetTreeFileList(std::shared_ptr<DirTreeNode> root, const std::string &rootPath,
45 std::vector<std::string> &assetList)
46 {
47 if (root == nullptr) {
48 return;
49 }
50 if (!root->isDir && !rootPath.empty()) {
51 assetList.push_back(rootPath);
52 } else {
53 std::string prefix = rootPath;
54 if (!prefix.empty()) {
55 prefix.push_back(FILE_SEPARATOR_CHAR);
56 }
57 for (const auto &child : root->children) {
58 GetTreeFileList(child.second, prefix + child.first, assetList);
59 }
60 }
61 }
62
63 // NOLINTNEXTLINE(performance-unnecessary-value-param)
AddEntryToTree(const std::string & fileName,std::shared_ptr<DirTreeNode> root)64 void AddEntryToTree(const std::string &fileName, std::shared_ptr<DirTreeNode> root)
65 {
66 if (root == nullptr) {
67 return;
68 }
69 size_t cur = 0;
70 auto parent = root;
71 do {
72 while (cur < fileName.size() && fileName[cur] == FILE_SEPARATOR_CHAR) {
73 cur++;
74 }
75 if (cur >= fileName.size()) {
76 break;
77 }
78 auto next = fileName.find(FILE_SEPARATOR_CHAR, cur);
79 auto nodeName = fileName.substr(cur, next - cur);
80 auto it = parent->children.find(nodeName);
81 if (it != parent->children.end()) {
82 parent = it->second;
83 } else {
84 auto node = std::make_shared<DirTreeNode>();
85 node->isDir = next != std::string::npos;
86 parent->children.emplace(nodeName, node);
87 parent = node;
88 }
89 cur = next;
90 } while (cur != std::string::npos);
91 }
92
IsRootDir(const std::string & dirName)93 inline bool IsRootDir(const std::string &dirName)
94 {
95 return dirName.size() == 1 && dirName.back() == FILE_SEPARATOR_CHAR;
96 }
97 } // namespace
98
ZipEntry(const CentralDirEntry & centralEntry)99 ZipEntry::ZipEntry(const CentralDirEntry ¢ralEntry)
100 {
101 compressionMethod = centralEntry.compressionMethod;
102 uncompressedSize = centralEntry.uncompressedSize;
103 compressedSize = centralEntry.compressedSize;
104 localHeaderOffset = centralEntry.localHeaderOffset;
105 crc = centralEntry.crc;
106 flags = centralEntry.flags;
107 modifiedTime = centralEntry.modifiedTime;
108 modifiedDate = centralEntry.modifiedDate;
109 }
110
ZipFile(const std::string & pathName)111 ZipFile::ZipFile(const std::string &pathName) : pathName_(pathName) {} // NOLINT(modernize-pass-by-value)
112
~ZipFile()113 ZipFile::~ZipFile()
114 {
115 Close();
116 }
117
CheckEndDir(const EndDir & endDir) const118 bool ZipFile::CheckEndDir(const EndDir &endDir) const
119 {
120 size_t lenEndDir = sizeof(EndDir);
121 if ((endDir.numDisk != 0) || (endDir.signature != EOCD_SIGNATURE) || (endDir.startDiskOfCentralDir != 0) ||
122 (endDir.offset >= fileLength_) || (endDir.totalEntriesInThisDisk != endDir.totalEntries) ||
123 (endDir.commentLen != 0) ||
124 // central dir can't overlap end of central dir
125 ((endDir.offset + endDir.sizeOfCentralDir + lenEndDir) > fileLength_)) {
126 LOG(WARNING, ZIPARCHIVE) << "failed: fileLen: " << fileLength_ << ", signature: " << endDir.signature
127 << ", numDisk: " << endDir.numDisk
128 << ", startDiskOfCentralDir: " << endDir.startDiskOfCentralDir
129 << ", totalEntriesInThisDisk: " << endDir.totalEntriesInThisDisk
130 << ", totalEntries: " << endDir.totalEntries
131 << ", sizeOfCentralDir: " << endDir.sizeOfCentralDir << ", offset: " << endDir.offset
132 << ", commentLen: " << endDir.commentLen;
133 return false;
134 }
135 return true;
136 }
137
ParseEndDirectory()138 bool ZipFile::ParseEndDirectory()
139 {
140 size_t endDirLen = sizeof(EndDir);
141 size_t endFilePos = fileStartPos_ + fileLength_;
142
143 if (fileLength_ <= endDirLen) {
144 LOG(ERROR, ZIPARCHIVE) << "fileStartPos_:" << fileStartPos_ << " <= fileLength_:" << fileLength_;
145 return false;
146 }
147
148 size_t eocdPos = endFilePos - endDirLen;
149 if (!zipFileReader_->ReadBuffer(reinterpret_cast<uint8_t *>(&endDir_), eocdPos, sizeof(EndDir))) {
150 LOG(ERROR, ZIPARCHIVE) << "read EOCD failed";
151 return false;
152 }
153
154 centralDirPos_ = endDir_.offset + fileStartPos_;
155
156 return CheckEndDir(endDir_);
157 }
158
ParseOneEntry(uint8_t * & entryPtr)159 bool ZipFile::ParseOneEntry(uint8_t *&entryPtr)
160 {
161 if (entryPtr == nullptr) {
162 LOG(ERROR, ZIPARCHIVE) << "null entryPtr";
163 return false;
164 }
165
166 CentralDirEntry directoryEntry;
167 if (memcpy_s(&directoryEntry, sizeof(CentralDirEntry), entryPtr, sizeof(CentralDirEntry)) != EOK) {
168 LOG(ERROR, ZIPARCHIVE) << "Mem copy directory entry failed";
169 return false;
170 }
171
172 if (directoryEntry.signature != CENTRAL_SIGNATURE) {
173 LOG(ERROR, ZIPARCHIVE) << "check signature failed";
174 return false;
175 }
176
177 entryPtr += sizeof(CentralDirEntry); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
178 size_t fileLength = (directoryEntry.nameSize >= MAX_FILE_NAME) ? (MAX_FILE_NAME - 1) : directoryEntry.nameSize;
179 std::string fileName(fileLength, 0);
180 if (memcpy_s(&(fileName[0]), fileLength, entryPtr, fileLength) != EOK) {
181 LOG(ERROR, ZIPARCHIVE) << "Mem copy file name failed";
182 return false;
183 }
184
185 ZipEntry currentEntry(directoryEntry);
186 currentEntry.fileName = fileName;
187 entriesMap_[fileName] = currentEntry;
188 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
189 entryPtr += directoryEntry.nameSize + directoryEntry.extraSize + directoryEntry.commentSize;
190 return true;
191 }
192
MakeDirTree() const193 std::shared_ptr<DirTreeNode> ZipFile::MakeDirTree() const
194 {
195 auto root = std::make_shared<DirTreeNode>();
196 root->isDir = true;
197 for (const auto &[fileName, entry] : entriesMap_) {
198 AddEntryToTree(fileName, root);
199 }
200 return root;
201 }
202
GetDirRoot()203 std::shared_ptr<DirTreeNode> ZipFile::GetDirRoot()
204 {
205 if (!isOpen_) {
206 return nullptr;
207 }
208 os::memory::LockHolder lock(dirRootMutex_);
209 if (dirRoot_ == nullptr) {
210 dirRoot_ = MakeDirTree();
211 }
212 return dirRoot_;
213 }
214
ParseAllEntries()215 bool ZipFile::ParseAllEntries()
216 {
217 auto centralData =
218 zipFileReader_->ReadBuffer(static_cast<size_t>(centralDirPos_), static_cast<size_t>(endDir_.sizeOfCentralDir));
219 if (centralData.empty()) {
220 LOG(ERROR, ZIPARCHIVE) << "centralData empty for " << pathName_ << " failed";
221 return false;
222 }
223
224 bool ret = true;
225 auto *entryPtr = reinterpret_cast<uint8_t *>(centralData.data());
226 for (uint16_t i = 0; i < endDir_.totalEntries; i++) {
227 if (!ParseOneEntry(entryPtr)) {
228 LOG(ERROR, ZIPARCHIVE) << "Parse entry" << i << " failed";
229 ret = false;
230 break;
231 }
232 }
233
234 return ret;
235 }
236
Open()237 bool ZipFile::Open()
238 {
239 if (isOpen_) {
240 return true;
241 }
242
243 if (pathName_.length() > PATH_MAX) {
244 LOG(ERROR, ZIPARCHIVE) << "pathName length > PATH_MAX";
245 return false;
246 }
247
248 zipFileReader_ = ZipFileReader::CreateZipFileReader(pathName_);
249 if (!zipFileReader_) {
250 LOG(ERROR, ZIPARCHIVE) << "open file error: " << pathName_ << ", errno: " << errno;
251 return false;
252 }
253
254 if (fileLength_ == 0) {
255 auto fileLength = zipFileReader_->GetFileLen();
256 fileLength_ = static_cast<ZipPos>(fileLength);
257 if (fileStartPos_ >= fileLength_) {
258 zipFileReader_.reset();
259 return false;
260 }
261
262 fileLength_ -= fileStartPos_;
263 }
264
265 bool result = ParseEndDirectory();
266 if (result) {
267 result = ParseAllEntries();
268 }
269 // it means open file success.
270 isOpen_ = true;
271 return result;
272 }
273
Close()274 void ZipFile::Close()
275 {
276 if (!isOpen_ || zipFileReader_ == nullptr) {
277 return;
278 }
279
280 isOpen_ = false;
281 entriesMap_.clear();
282 {
283 os::memory::LockHolder lock(dirRootMutex_);
284 dirRoot_.reset();
285 }
286 pathName_ = "";
287
288 zipFileReader_.reset();
289 }
290
291 // Get all file zipEntry in this file
GetAllEntries() const292 const ZipEntryMap &ZipFile::GetAllEntries() const
293 {
294 return entriesMap_;
295 }
296
HasEntry(const std::string & entryName) const297 bool ZipFile::HasEntry(const std::string &entryName) const
298 {
299 return entriesMap_.find(entryName) != entriesMap_.end();
300 }
301
SetCacheMode(CacheMode cacheMode)302 void ZipFile::SetCacheMode(CacheMode cacheMode)
303 {
304 os::memory::LockHolder lock(dirRootMutex_);
305 cacheMode_ = cacheMode;
306 if (!UseDirCache()) {
307 dirRoot_.reset();
308 }
309 }
310
UseDirCache() const311 bool ZipFile::UseDirCache() const
312 {
313 auto mode = cacheMode_;
314 bool useCache = mode == CacheMode::CACHE_ALL;
315 if (mode == CacheMode::CACHE_CASE && entriesMap_.size() >= CACHE_CASE_THRESHOLD) {
316 useCache = true;
317 }
318 return useCache;
319 }
320
IsDirExist(const std::string & dir)321 bool ZipFile::IsDirExist(const std::string &dir)
322 {
323 if (dir.empty()) {
324 LOG(ERROR, ZIPARCHIVE) << "dir is empty";
325 return false;
326 }
327 if (IsRootDir(dir)) {
328 return true;
329 }
330 if (dir.find(WRONG_FILE_SEPARATOR) != std::string::npos) {
331 LOG(WARNING, ZIPARCHIVE) << "Wrong format";
332 return false;
333 }
334
335 auto tmpDir = dir;
336 if (tmpDir.front() == FILE_SEPARATOR_CHAR) {
337 tmpDir.erase(tmpDir.begin());
338 }
339 if (tmpDir.back() != FILE_SEPARATOR_CHAR) {
340 tmpDir.push_back(FILE_SEPARATOR_CHAR);
341 }
342 if (entriesMap_.count(tmpDir) > 0) {
343 return true;
344 }
345 tmpDir.pop_back();
346 if (entriesMap_.count(tmpDir) > 0) {
347 LOG(WARNING, ZIPARCHIVE) << "file not dir";
348 return false;
349 }
350
351 if (UseDirCache()) {
352 return IsDirExistCache(tmpDir);
353 }
354 return IsDirExistNormal(tmpDir);
355 }
356
GetAllFileList(const std::string & srcPath,std::vector<std::string> & assetList)357 void ZipFile::GetAllFileList(const std::string &srcPath, std::vector<std::string> &assetList)
358 {
359 if (srcPath.empty()) {
360 LOG(ERROR, ZIPARCHIVE) << "dir is empty";
361 return;
362 }
363 if (IsRootDir(srcPath)) {
364 for (const auto &[fileName, fileInfo] : entriesMap_) {
365 if (!fileName.empty() && fileName.back() != FILE_SEPARATOR_CHAR) {
366 assetList.push_back(fileName);
367 }
368 }
369 return;
370 }
371 if (srcPath.find(WRONG_FILE_SEPARATOR) != std::string::npos) {
372 LOG(WARNING, ZIPARCHIVE) << "Wrong format";
373 return;
374 }
375
376 auto tmpDir = srcPath;
377 if (tmpDir.front() == FILE_SEPARATOR_CHAR) {
378 tmpDir.erase(tmpDir.begin());
379 }
380 if (tmpDir.back() != FILE_SEPARATOR_CHAR) {
381 tmpDir.push_back(FILE_SEPARATOR_CHAR);
382 }
383 if (entriesMap_.count(tmpDir) > 0) {
384 return;
385 }
386 tmpDir.pop_back();
387 if (entriesMap_.count(tmpDir) > 0) {
388 LOG(WARNING, ZIPARCHIVE) << "file not dir";
389 return;
390 }
391
392 if (UseDirCache()) {
393 GetAllFileListCache(tmpDir, assetList);
394 } else {
395 GetAllFileListNormal(tmpDir, assetList);
396 }
397 }
398
GetChildNames(const std::string & srcPath,std::set<std::string> & fileSet)399 void ZipFile::GetChildNames(const std::string &srcPath, std::set<std::string> &fileSet)
400 {
401 if (srcPath.empty()) {
402 LOG(ERROR, ZIPARCHIVE) << "dir is empty";
403 return;
404 }
405 if (srcPath.find(WRONG_FILE_SEPARATOR) != std::string::npos) {
406 LOG(WARNING, ZIPARCHIVE) << "Wrong format";
407 return;
408 }
409 auto tmpDir = srcPath;
410 if (!IsRootDir(tmpDir)) {
411 if (tmpDir.front() == FILE_SEPARATOR_CHAR) {
412 tmpDir.erase(tmpDir.begin());
413 }
414 if (tmpDir.back() != FILE_SEPARATOR_CHAR) {
415 tmpDir.push_back(FILE_SEPARATOR_CHAR);
416 }
417 if (entriesMap_.count(tmpDir) > 0) {
418 return;
419 }
420 tmpDir.pop_back();
421 if (entriesMap_.count(tmpDir) > 0) {
422 LOG(WARNING, ZIPARCHIVE) << "file not dir";
423 return;
424 }
425 }
426
427 if (UseDirCache()) {
428 GetChildNamesCache(tmpDir, fileSet);
429 } else {
430 GetChildNamesNormal(tmpDir, fileSet);
431 }
432 }
433
IsDirExistCache(const std::string & dir)434 bool ZipFile::IsDirExistCache(const std::string &dir)
435 {
436 auto parent = GetDirRoot();
437 if (parent == nullptr) {
438 LOG(ERROR, ZIPARCHIVE) << "null parent";
439 return false;
440 }
441 size_t cur = 0;
442 do {
443 while (cur < dir.size() && dir[cur] == FILE_SEPARATOR_CHAR) {
444 cur++;
445 }
446 if (cur >= dir.size()) {
447 break;
448 }
449 auto next = dir.find(FILE_SEPARATOR_CHAR, cur);
450 auto nodeName = dir.substr(cur, next - cur);
451 auto it = parent->children.find(nodeName);
452 if (it == parent->children.end()) {
453 LOG(ERROR, ZIPARCHIVE) << "dir not found, dir : " << dir;
454 return false;
455 }
456 parent = it->second;
457 cur = next;
458 } while (cur != std::string::npos);
459
460 return true;
461 }
462
GetAllFileListCache(const std::string & srcPath,std::vector<std::string> & assetList)463 void ZipFile::GetAllFileListCache(const std::string &srcPath, std::vector<std::string> &assetList)
464 {
465 auto parent = GetDirRoot();
466 if (parent == nullptr) {
467 LOG(ERROR, ZIPARCHIVE) << "null parent";
468 return;
469 }
470
471 auto rootName = srcPath.back() == FILE_SEPARATOR_CHAR ? srcPath.substr(0, srcPath.length() - 1) : srcPath;
472
473 size_t cur = 0;
474 do {
475 while (cur < rootName.size() && rootName[cur] == FILE_SEPARATOR_CHAR) {
476 cur++;
477 }
478 if (cur >= rootName.size()) {
479 break;
480 }
481 auto next = rootName.find(FILE_SEPARATOR_CHAR, cur);
482 auto nodeName = rootName.substr(cur, next - cur);
483 auto it = parent->children.find(nodeName);
484 if (it == parent->children.end()) {
485 LOG(ERROR, ZIPARCHIVE) << "srcPath not found, srcPath : " << rootName;
486 return;
487 }
488 parent = it->second;
489 cur = next;
490 } while (cur != std::string::npos);
491
492 GetTreeFileList(parent, rootName, assetList);
493 }
494
GetChildNamesCache(const std::string & srcPath,std::set<std::string> & fileSet)495 void ZipFile::GetChildNamesCache(const std::string &srcPath, std::set<std::string> &fileSet)
496 {
497 size_t cur = 0;
498 auto parent = GetDirRoot();
499 if (parent == nullptr) {
500 LOG(ERROR, ZIPARCHIVE) << "null parent";
501 return;
502 }
503 do {
504 while (cur < srcPath.size() && srcPath[cur] == FILE_SEPARATOR_CHAR) {
505 cur++;
506 }
507 if (cur >= srcPath.size()) {
508 break;
509 }
510 auto next = srcPath.find(FILE_SEPARATOR_CHAR, cur);
511 auto nodeName = srcPath.substr(cur, next - cur);
512 auto it = parent->children.find(nodeName);
513 if (it == parent->children.end()) {
514 LOG(ERROR, ZIPARCHIVE) << "srcPath not found, srcPath : " << srcPath;
515 return;
516 }
517 parent = it->second;
518 cur = next;
519 } while (cur != std::string::npos);
520
521 for (const auto &child : parent->children) {
522 fileSet.insert(child.first);
523 }
524 }
525
IsDirExistNormal(const std::string & dir)526 bool ZipFile::IsDirExistNormal(const std::string &dir)
527 {
528 auto targetDir = dir;
529 if (targetDir.back() != FILE_SEPARATOR_CHAR) {
530 targetDir.push_back(FILE_SEPARATOR_CHAR);
531 }
532 for (const auto &[fileName, fileInfo] : entriesMap_) {
533 if (fileName.size() > targetDir.size() && fileName.substr(0, targetDir.size()) == targetDir) {
534 return true;
535 }
536 }
537 return false;
538 }
539
GetAllFileListNormal(const std::string & srcPath,std::vector<std::string> & assetList)540 void ZipFile::GetAllFileListNormal(const std::string &srcPath, std::vector<std::string> &assetList)
541 {
542 auto targetDir = srcPath;
543 if (targetDir.back() != FILE_SEPARATOR_CHAR) {
544 targetDir.push_back(FILE_SEPARATOR_CHAR);
545 }
546 for (const auto &[fileName, fileInfo] : entriesMap_) {
547 if (fileName.size() > targetDir.size() && fileName.back() != FILE_SEPARATOR_CHAR &&
548 fileName.substr(0, targetDir.size()) == targetDir) {
549 assetList.push_back(fileName);
550 }
551 }
552 }
553
GetChildNamesNormal(const std::string & srcPath,std::set<std::string> & fileSet)554 void ZipFile::GetChildNamesNormal(const std::string &srcPath, std::set<std::string> &fileSet)
555 {
556 auto targetDir = srcPath;
557 if (targetDir.back() != FILE_SEPARATOR_CHAR) {
558 targetDir.push_back(FILE_SEPARATOR_CHAR);
559 }
560 if (IsRootDir(srcPath)) {
561 for (const auto &[fileName, fileInfo] : entriesMap_) {
562 auto nextPos = fileName.find(FILE_SEPARATOR_CHAR);
563 fileSet.insert(nextPos == std::string::npos ? fileName : fileName.substr(0, nextPos));
564 }
565 return;
566 }
567 for (const auto &[fileName, fileInfo] : entriesMap_) {
568 if (fileName.size() > targetDir.size() && fileName.substr(0, targetDir.size()) == targetDir) {
569 fileSet.insert(fileName.substr(targetDir.size(),
570 fileName.find(FILE_SEPARATOR_CHAR, targetDir.size()) - targetDir.size()));
571 }
572 }
573 }
574
GetEntry(const std::string & entryName,ZipEntry & resultEntry) const575 bool ZipFile::GetEntry(const std::string &entryName, ZipEntry &resultEntry) const
576 {
577 auto iter = entriesMap_.find(entryName);
578 if (iter != entriesMap_.end()) {
579 resultEntry = iter->second;
580 return true;
581 }
582 return false;
583 }
584
GetLocalHeaderSize(const uint16_t nameSize,const uint16_t extraSize) const585 size_t ZipFile::GetLocalHeaderSize(const uint16_t nameSize, const uint16_t extraSize) const
586 {
587 return sizeof(LocalHeader) + nameSize + extraSize;
588 }
589
CheckDataDesc(const ZipEntry & zipEntry,const LocalHeader & localHeader) const590 bool ZipFile::CheckDataDesc(const ZipEntry &zipEntry, const LocalHeader &localHeader) const
591 {
592 uint32_t crcLocal = 0;
593 uint32_t compressedLocal = 0;
594 uint32_t uncompressedLocal = 0;
595
596 if ((localHeader.flags & FLAG_DATA_DESC) != 0U) { // use data desc
597 DataDesc dataDesc;
598 auto descPos = zipEntry.localHeaderOffset + GetLocalHeaderSize(localHeader.nameSize, localHeader.extraSize);
599 descPos += fileStartPos_ + zipEntry.compressedSize;
600
601 if (!zipFileReader_->ReadBuffer(reinterpret_cast<uint8_t *>(&dataDesc), descPos, sizeof(DataDesc))) {
602 LOG(ERROR, ZIPARCHIVE) << "ReadBuffer failed";
603 return false;
604 }
605
606 if (dataDesc.signature != DATA_DESC_SIGNATURE) {
607 LOG(ERROR, ZIPARCHIVE) << "check signature failed";
608 return false;
609 }
610
611 crcLocal = dataDesc.crc;
612 compressedLocal = dataDesc.compressedSize;
613 uncompressedLocal = dataDesc.uncompressedSize;
614 } else {
615 crcLocal = localHeader.crc;
616 compressedLocal = localHeader.compressedSize;
617 uncompressedLocal = localHeader.uncompressedSize;
618 }
619
620 if ((zipEntry.crc != crcLocal) || (zipEntry.compressedSize != compressedLocal) ||
621 (zipEntry.uncompressedSize != uncompressedLocal)) {
622 LOG(ERROR, ZIPARCHIVE) << "size corrupted";
623 return false;
624 }
625
626 return true;
627 }
628
CheckCoherencyLocalHeader(const ZipEntry & zipEntry,uint16_t & extraSize) const629 bool ZipFile::CheckCoherencyLocalHeader(const ZipEntry &zipEntry, uint16_t &extraSize) const
630 {
631 // current only support store and Z_DEFLATED method
632 if ((zipEntry.compressionMethod != Z_DEFLATED) && (zipEntry.compressionMethod != 0)) {
633 LOG(ERROR, ZIPARCHIVE) << "compressionMethod " << zipEntry.compressionMethod << " not support";
634 return false;
635 }
636
637 auto nameSize = zipEntry.fileName.length();
638 auto startPos = fileStartPos_ + zipEntry.localHeaderOffset;
639 size_t buffSize = sizeof(LocalHeader) + nameSize;
640 auto buff = zipFileReader_->ReadBuffer(startPos, buffSize);
641 if (buff.size() < buffSize) {
642 LOG(ERROR, ZIPARCHIVE) << "read header failed";
643 return false;
644 }
645
646 LocalHeader localHeader = {0};
647 if (memcpy_s(&localHeader, sizeof(LocalHeader), buff.data(), sizeof(LocalHeader)) != EOK) {
648 LOG(ERROR, ZIPARCHIVE) << "memcpy localheader failed";
649 return false;
650 }
651 if ((localHeader.signature != LOCAL_HEADER_SIGNATURE) ||
652 (zipEntry.compressionMethod != localHeader.compressionMethod)) {
653 LOG(ERROR, ZIPARCHIVE) << "signature or compressionMethod failed";
654 return false;
655 }
656
657 if (localHeader.nameSize != nameSize && nameSize < MAX_FILE_NAME - 1) {
658 LOG(ERROR, ZIPARCHIVE) << "name corrupted";
659 return false;
660 }
661 std::string fileName = buff.substr(sizeof(LocalHeader));
662 if (zipEntry.fileName != fileName) {
663 LOG(ERROR, ZIPARCHIVE) << "name corrupted";
664 return false;
665 }
666
667 if (!CheckDataDesc(zipEntry, localHeader)) {
668 LOG(ERROR, ZIPARCHIVE) << "check data desc failed";
669 return false;
670 }
671
672 extraSize = localHeader.extraSize;
673 return true;
674 }
675
GetEntryStart(const ZipEntry & zipEntry,const uint16_t extraSize) const676 size_t ZipFile::GetEntryStart(const ZipEntry &zipEntry, const uint16_t extraSize) const
677 {
678 ZipPos startOffset = zipEntry.localHeaderOffset;
679 // get data offset, add signature+localheader+namesize+extrasize
680 startOffset += GetLocalHeaderSize(zipEntry.fileName.length(), extraSize);
681 startOffset += fileStartPos_; // add file start relative to file stream
682
683 return startOffset;
684 }
685
InitZStream(z_stream & zstream) const686 bool ZipFile::InitZStream(z_stream &zstream) const
687 {
688 // init zlib stream
689 if (memset_s(&zstream, sizeof(z_stream), 0, sizeof(z_stream)) != 0) {
690 LOG(ERROR, ZIPARCHIVE) << "stream buffer init failed";
691 return false;
692 }
693 int32_t zlibErr = inflateInit2(&zstream, -MAX_WBITS);
694 if (zlibErr != Z_OK) {
695 LOG(ERROR, ZIPARCHIVE) << "init failed";
696 return false;
697 }
698
699 BytePtr bufOut = new (std::nothrow) Byte[UNZIP_BUF_OUT_LEN]; // NOLINT(modernize-use-auto)
700 if (bufOut == nullptr) {
701 LOG(ERROR, ZIPARCHIVE) << "null bufOut";
702 return false;
703 }
704
705 BytePtr bufIn = new (std::nothrow) Byte[UNZIP_BUF_IN_LEN]; // NOLINT(modernize-use-auto)
706 if (bufIn == nullptr) {
707 LOG(ERROR, ZIPARCHIVE) << "null bufIn";
708 delete[] bufOut;
709 return false;
710 }
711 zstream.next_out = bufOut;
712 zstream.next_in = bufIn;
713 zstream.avail_out = UNZIP_BUF_OUT_LEN;
714 return true;
715 }
716
GetEntryDataOffset(const ZipEntry & zipEntry,const uint16_t extraSize) const717 ZipPos ZipFile::GetEntryDataOffset(const ZipEntry &zipEntry, const uint16_t extraSize) const
718 {
719 // get entry data offset relative file
720 ZipPos offset = zipEntry.localHeaderOffset;
721
722 offset += GetLocalHeaderSize(zipEntry.fileName.length(), extraSize);
723 offset += fileStartPos_;
724
725 return offset;
726 }
727
GetDataOffsetRelative(const ZipEntry & zipEntry,ZipPos & offset,uint32_t & length) const728 bool ZipFile::GetDataOffsetRelative(const ZipEntry &zipEntry, ZipPos &offset, uint32_t &length) const
729 {
730 uint16_t extraSize = 0;
731 if (!CheckCoherencyLocalHeader(zipEntry, extraSize)) {
732 LOG(ERROR, ZIPARCHIVE) << "check coherency local header failed";
733 return false;
734 }
735
736 offset = GetEntryDataOffset(zipEntry, extraSize);
737 length = zipEntry.compressedSize;
738 return true;
739 }
740
741 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
ExtractFileFromMMap(const std::string & file,void * mmapDataPtr,std::unique_ptr<uint8_t[]> & dataPtr,size_t & len) const742 bool ZipFile::ExtractFileFromMMap(const std::string &file, void *mmapDataPtr, std::unique_ptr<uint8_t[]> &dataPtr,
743 size_t &len) const
744 {
745 ZipEntry zipEntry;
746 if (!GetEntry(file, zipEntry)) {
747 LOG(ERROR, ZIPARCHIVE) << "not find file";
748 return false;
749 }
750
751 if (zipEntry.compressionMethod == 0U) {
752 LOG(ERROR, ZIPARCHIVE) << "file is not extracted, file: " << file;
753 return false;
754 }
755
756 uint16_t extraSize = 0;
757 if (!CheckCoherencyLocalHeader(zipEntry, extraSize)) {
758 LOG(ERROR, ZIPARCHIVE) << "check coherency local header failed";
759 return false;
760 }
761
762 return UnzipWithInflatedFromMMap(zipEntry, extraSize, mmapDataPtr, dataPtr, len);
763 }
764
UnzipWithInflatedFromMMap(const ZipEntry & zipEntry,const uint16_t extraSize,void * mmapDataPtr,std::unique_ptr<uint8_t[]> & dataPtr,size_t & len) const765 bool ZipFile::UnzipWithInflatedFromMMap(const ZipEntry &zipEntry, [[maybe_unused]] const uint16_t extraSize,
766 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
767 void *mmapDataPtr, std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const
768 {
769 z_stream zstream;
770 if (!InitZStream(zstream)) {
771 LOG(ERROR, ZIPARCHIVE) << "init zstream failed";
772 return false;
773 }
774
775 BytePtr bufIn = zstream.next_in;
776 BytePtr bufOut = zstream.next_out;
777
778 bool ret = true;
779 int32_t zlibErr = Z_OK;
780 uint32_t remainCompressedSize = zipEntry.compressedSize;
781 size_t inflateLen = 0;
782 uint8_t errorTimes = 0;
783
784 len = zipEntry.uncompressedSize;
785 dataPtr = std::make_unique<uint8_t[]>(len); // NOLINT(modernize-avoid-c-arrays)
786 auto *dstDataPtr = static_cast<uint8_t *>(dataPtr.get());
787 void *mmapSrcDataPtr = mmapDataPtr;
788
789 while ((remainCompressedSize > 0) || (zstream.avail_in > 0)) {
790 if (!ReadZStreamFromMMap(bufIn, mmapSrcDataPtr, zstream, remainCompressedSize)) {
791 ret = false;
792 break;
793 }
794
795 zlibErr = inflate(&zstream, Z_SYNC_FLUSH);
796 if ((zlibErr >= Z_OK) && (zstream.msg != nullptr)) {
797 LOG(ERROR, ZIPARCHIVE) << "unzip failed, zlibErr: " << zlibErr << ", msg: " << zstream.msg;
798 ret = false;
799 break;
800 }
801
802 inflateLen = UNZIP_BUF_OUT_LEN - zstream.avail_out;
803 if (!CopyInflateOut(zstream, inflateLen, &dstDataPtr, bufOut, errorTimes)) {
804 break;
805 }
806 }
807
808 // free all dynamically allocated data structures except the next_in and next_out for this stream.
809 zlibErr = inflateEnd(&zstream);
810 if (zlibErr != Z_OK) {
811 LOG(ERROR, ZIPARCHIVE) << "inflateEnd failed, zlibErr: " << zlibErr;
812 ret = false;
813 }
814
815 delete[] bufOut;
816 delete[] bufIn;
817 return ret;
818 }
819
CopyInflateOut(z_stream & zstream,size_t inflateLen,uint8_t ** dstDataPtr,BytePtr bufOut,uint8_t & errorTimes) const820 bool ZipFile::CopyInflateOut(z_stream &zstream, size_t inflateLen, uint8_t **dstDataPtr, BytePtr bufOut,
821 uint8_t &errorTimes) const
822 {
823 if (inflateLen > 0) {
824 if (memcpy_s(*dstDataPtr, inflateLen, bufOut, inflateLen) != EOK) {
825 LOG(ERROR, ZIPARCHIVE) << "memcpy failed";
826 return false;
827 }
828
829 *dstDataPtr += inflateLen; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
830 zstream.next_out = bufOut;
831 zstream.avail_out = UNZIP_BUF_OUT_LEN;
832 errorTimes = 0;
833 } else {
834 errorTimes++;
835 }
836 if (errorTimes >= INFLATE_ERROR_TIMES) {
837 LOG(ERROR, ZIPARCHIVE) << "data is abnormal";
838 return false;
839 }
840
841 return true;
842 }
843
ReadZStreamFromMMap(const BytePtr & buffer,void * & dataPtr,z_stream & zstream,uint32_t & remainCompressedSize) const844 bool ZipFile::ReadZStreamFromMMap(const BytePtr &buffer, void *&dataPtr, z_stream &zstream,
845 uint32_t &remainCompressedSize) const
846 {
847 if (dataPtr == nullptr) {
848 LOG(ERROR, ZIPARCHIVE) << "dataPtr is nullptr";
849 return false;
850 }
851
852 auto *srcDataPtr = static_cast<uint8_t *>(dataPtr);
853 if (zstream.avail_in == 0) {
854 size_t remainBytes = (remainCompressedSize > UNZIP_BUF_IN_LEN) ? UNZIP_BUF_IN_LEN : remainCompressedSize;
855 size_t readBytes = sizeof(Byte) * remainBytes;
856 if (memcpy_s(buffer, readBytes, srcDataPtr, readBytes) != EOK) {
857 LOG(ERROR, ZIPARCHIVE) << "memcpy failed";
858 return false;
859 }
860 srcDataPtr += readBytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
861 remainCompressedSize -= remainBytes;
862 zstream.avail_in = remainBytes;
863 zstream.next_in = buffer;
864 }
865 dataPtr = srcDataPtr;
866 return true;
867 }
868
CreateFileMapper(const std::string & fileName,FileMapperType type) const869 std::unique_ptr<FileMapper> ZipFile::CreateFileMapper(const std::string &fileName, FileMapperType type) const
870 {
871 ZipEntry zipEntry;
872 if (!GetEntry(fileName, zipEntry)) {
873 return nullptr;
874 }
875
876 ZipPos offset = 0;
877 uint32_t length = 0;
878 if (!GetDataOffsetRelative(zipEntry, offset, length)) {
879 LOG(ERROR, ZIPARCHIVE) << "GetDataOffsetRelative failed hapPath: " << fileName;
880 return nullptr;
881 }
882 bool compress = zipEntry.compressionMethod > 0;
883 if (type == FileMapperType::SAFE_ABC && compress) {
884 LOG(WARNING, ZIPARCHIVE) << "Entry is compressed for safe: " << fileName;
885 }
886 std::unique_ptr<FileMapper> fileMapper = std::make_unique<FileMapper>();
887 if (zipFileReader_ == nullptr) {
888 LOG(ERROR, ZIPARCHIVE) << "zipFileReader_ is nullptr";
889 return nullptr;
890 }
891 auto result = false;
892 if (type == FileMapperType::NORMAL_MEM) {
893 result = fileMapper->CreateFileMapper(zipFileReader_, fileName, offset, length, compress);
894 } else {
895 result = fileMapper->CreateFileMapper(fileName, compress, zipFileReader_->GetFd(), offset, length, type);
896 if (result && type == FileMapperType::SAFE_ABC) {
897 zipFileReader_->SetClosable(false);
898 }
899 }
900
901 if (!result) {
902 return nullptr;
903 }
904 return fileMapper;
905 }
906
907 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
ExtractToBufByName(const std::string & fileName,std::unique_ptr<uint8_t[]> & dataPtr,size_t & len) const908 bool ZipFile::ExtractToBufByName(const std::string &fileName, std::unique_ptr<uint8_t[]> &dataPtr, size_t &len) const
909 {
910 ZipEntry zipEntry;
911 if (!GetEntry(fileName, zipEntry)) {
912 if (fileName.length() > MAP_FILE_SUFFIX && fileName.substr(fileName.length() - MAP_FILE_SUFFIX) != ".map") {
913 LOG(ERROR, ZIPARCHIVE) << "GetEntry failed hapPath: " << fileName;
914 }
915 return false;
916 }
917 uint16_t extraSize = 0;
918 if (!CheckCoherencyLocalHeader(zipEntry, extraSize)) {
919 LOG(ERROR, ZIPARCHIVE) << "check coherency local header failed";
920 return false;
921 }
922 if (zipFileReader_ == nullptr) {
923 LOG(ERROR, ZIPARCHIVE) << "zipFileReader_ is nullptr";
924 return false;
925 }
926 ZipPos offset = GetEntryDataOffset(zipEntry, extraSize);
927 uint32_t length = zipEntry.compressedSize;
928 auto dataTmp = std::make_unique<uint8_t[]>(length); // NOLINT(modernize-avoid-c-arrays)
929 if (!zipFileReader_->ReadBuffer(dataTmp.get(), offset, length)) {
930 LOG(ERROR, ZIPARCHIVE) << "read file failed, len: " << length << ", fileName: " << fileName
931 << ", offset: " << offset;
932 dataTmp.reset();
933 return false;
934 }
935
936 if (zipEntry.compressionMethod > 0) {
937 return UnzipWithInflatedFromMMap(zipEntry, extraSize, dataTmp.get(), dataPtr, len);
938 }
939
940 len = length;
941 dataPtr = std::move(dataTmp);
942
943 return true;
944 }
945 } // namespace ark::extractor
946