• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/zlib/google/zip_reader.h"
6 
7 #include <algorithm>
8 #include <utility>
9 
10 #include "base/bind.h"
11 #include "base/check.h"
12 #include "base/files/file.h"
13 #include "base/files/file_util.h"
14 #include "base/i18n/icu_string_conversions.h"
15 #include "base/logging.h"
16 #include "base/numerics/safe_conversions.h"
17 #include "base/strings/strcat.h"
18 #include "base/strings/string_piece.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/utf_string_conversions.h"
21 #include "base/threading/sequenced_task_runner_handle.h"
22 #include "build/build_config.h"
23 #include "third_party/zlib/google/redact.h"
24 #include "third_party/zlib/google/zip_internal.h"
25 
26 #if defined(USE_SYSTEM_MINIZIP)
27 #include <minizip/unzip.h>
28 #else
29 #include "third_party/zlib/contrib/minizip/unzip.h"
30 #if defined(OS_WIN)
31 #include "third_party/zlib/contrib/minizip/iowin32.h"
32 #endif  // defined(OS_WIN)
33 #endif  // defined(USE_SYSTEM_MINIZIP)
34 
35 #if defined(OS_POSIX)
36 #include <sys/stat.h>
37 #endif
38 
39 namespace zip {
40 namespace {
41 
42 enum UnzipError : int;
43 
operator <<(std::ostream & out,UnzipError error)44 std::ostream& operator<<(std::ostream& out, UnzipError error) {
45 #define SWITCH_ERR(X) \
46   case X:             \
47     return out << #X;
48   switch (error) {
49     SWITCH_ERR(UNZ_OK);
50     SWITCH_ERR(UNZ_END_OF_LIST_OF_FILE);
51     SWITCH_ERR(UNZ_ERRNO);
52     SWITCH_ERR(UNZ_PARAMERROR);
53     SWITCH_ERR(UNZ_BADZIPFILE);
54     SWITCH_ERR(UNZ_INTERNALERROR);
55     SWITCH_ERR(UNZ_CRCERROR);
56     default:
57       return out << "UNZ" << static_cast<int>(error);
58   }
59 #undef SWITCH_ERR
60 }
61 
62 // A writer delegate that writes to a given string.
63 class StringWriterDelegate : public WriterDelegate {
64  public:
StringWriterDelegate(std::string * output)65   explicit StringWriterDelegate(std::string* output) : output_(output) {}
66 
67   // WriterDelegate methods:
WriteBytes(const char * data,int num_bytes)68   bool WriteBytes(const char* data, int num_bytes) override {
69     output_->append(data, num_bytes);
70     return true;
71   }
72 
73  private:
74   std::string* const output_;
75 };
76 
77 #if defined(OS_POSIX)
SetPosixFilePermissions(int fd,int mode)78 void SetPosixFilePermissions(int fd, int mode) {
79   base::stat_wrapper_t sb;
80   if (base::File::Fstat(fd, &sb)) {
81     return;
82   }
83   mode_t new_mode = sb.st_mode;
84   // Transfer the executable bit only if the file is readable.
85   if ((sb.st_mode & S_IRUSR) == S_IRUSR && (mode & S_IXUSR) == S_IXUSR) {
86     new_mode |= S_IXUSR;
87   }
88   if ((sb.st_mode & S_IRGRP) == S_IRGRP && (mode & S_IXGRP) == S_IXGRP) {
89     new_mode |= S_IXGRP;
90   }
91   if ((sb.st_mode & S_IROTH) == S_IROTH && (mode & S_IXOTH) == S_IXOTH) {
92     new_mode |= S_IXOTH;
93   }
94   if (new_mode != sb.st_mode) {
95     fchmod(fd, new_mode);
96   }
97 }
98 #endif
99 
100 }  // namespace
101 
ZipReader()102 ZipReader::ZipReader() {
103   Reset();
104 }
105 
~ZipReader()106 ZipReader::~ZipReader() {
107   Close();
108 }
109 
Open(const base::FilePath & zip_path)110 bool ZipReader::Open(const base::FilePath& zip_path) {
111   DCHECK(!zip_file_);
112 
113   // Use of "Unsafe" function does not look good, but there is no way to do
114   // this safely on Linux. See file_util.h for details.
115   zip_file_ = internal::OpenForUnzipping(zip_path.AsUTF8Unsafe());
116   if (!zip_file_) {
117     LOG(ERROR) << "Cannot open ZIP archive " << Redact(zip_path);
118     return false;
119   }
120 
121   return OpenInternal();
122 }
123 
OpenFromPlatformFile(base::PlatformFile zip_fd)124 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
125   DCHECK(!zip_file_);
126 
127 #if defined(OS_POSIX) || defined(OS_FUCHSIA)
128   zip_file_ = internal::OpenFdForUnzipping(zip_fd);
129 #elif defined(OS_WIN)
130   zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
131 #endif
132   if (!zip_file_) {
133     LOG(ERROR) << "Cannot open ZIP from file handle " << zip_fd;
134     return false;
135   }
136 
137   return OpenInternal();
138 }
139 
OpenFromString(const std::string & data)140 bool ZipReader::OpenFromString(const std::string& data) {
141   zip_file_ = internal::PrepareMemoryForUnzipping(data);
142   if (!zip_file_)
143     return false;
144   return OpenInternal();
145 }
146 
Close()147 void ZipReader::Close() {
148   if (zip_file_) {
149     if (const UnzipError err{unzClose(zip_file_)}; err != UNZ_OK) {
150       LOG(ERROR) << "Error while closing ZIP archive: " << err;
151     }
152   }
153   Reset();
154 }
155 
Next()156 const ZipReader::Entry* ZipReader::Next() {
157   DCHECK(zip_file_);
158 
159   if (reached_end_)
160     return nullptr;
161 
162   DCHECK(ok_);
163 
164   // Move to the next entry if we're not trying to open the first entry.
165   if (next_index_ > 0) {
166     if (const UnzipError err{unzGoToNextFile(zip_file_)}; err != UNZ_OK) {
167       reached_end_ = true;
168       if (err != UNZ_END_OF_LIST_OF_FILE) {
169         LOG(ERROR) << "Cannot go to next entry in ZIP: " << err;
170         ok_ = false;
171       }
172       return nullptr;
173     }
174   }
175 
176   next_index_++;
177 
178   if (!OpenEntry()) {
179     reached_end_ = true;
180     ok_ = false;
181     return nullptr;
182   }
183 
184   return &entry_;
185 }
186 
OpenEntry()187 bool ZipReader::OpenEntry() {
188   DCHECK(zip_file_);
189 
190   // Get entry info.
191   unz_file_info64 info = {};
192   char path_in_zip[internal::kZipMaxPath] = {};
193   if (const UnzipError err{unzGetCurrentFileInfo64(
194           zip_file_, &info, path_in_zip, sizeof(path_in_zip) - 1, nullptr, 0,
195           nullptr, 0)};
196       err != UNZ_OK) {
197     LOG(ERROR) << "Cannot get entry from ZIP: " << err;
198     return false;
199   }
200 
201   entry_.path_in_original_encoding = path_in_zip;
202 
203   // Convert path from original encoding to Unicode.
204   std::u16string path_in_utf16;
205   const char* const encoding = encoding_.empty() ? "UTF-8" : encoding_.c_str();
206   if (!base::CodepageToUTF16(entry_.path_in_original_encoding, encoding,
207                              base::OnStringConversionError::SUBSTITUTE,
208                              &path_in_utf16)) {
209     LOG(ERROR) << "Cannot convert path from encoding " << encoding;
210     return false;
211   }
212 
213   entry_.path = base::FilePath::FromUTF16Unsafe(path_in_utf16);
214   entry_.original_size = info.uncompressed_size;
215 
216   // Directory entries in ZIP have a path ending with "/".
217   entry_.is_directory = base::EndsWith(path_in_utf16, u"/");
218 
219   // Check the entry path for directory traversal issues. We consider entry
220   // paths unsafe if they are absolute or if they contain "..". On Windows,
221   // IsAbsolute() returns false for paths starting with "/".
222   entry_.is_unsafe = entry_.path.ReferencesParent() ||
223                      entry_.path.IsAbsolute() ||
224                      base::StartsWith(path_in_utf16, u"/");
225 
226   // The file content of this entry is encrypted if flag bit 0 is set.
227   entry_.is_encrypted = info.flag & 1;
228 
229   // Construct the last modified time. The timezone info is not present in ZIP
230   // archives, so we construct the time as UTC.
231   base::Time::Exploded exploded_time = {};
232   exploded_time.year = info.tmu_date.tm_year;
233   exploded_time.month = info.tmu_date.tm_mon + 1;  // 0-based vs 1-based
234   exploded_time.day_of_month = info.tmu_date.tm_mday;
235   exploded_time.hour = info.tmu_date.tm_hour;
236   exploded_time.minute = info.tmu_date.tm_min;
237   exploded_time.second = info.tmu_date.tm_sec;
238   exploded_time.millisecond = 0;
239 
240   if (!base::Time::FromUTCExploded(exploded_time, &entry_.last_modified))
241     entry_.last_modified = base::Time::UnixEpoch();
242 
243 #if defined(OS_POSIX)
244   entry_.posix_mode = (info.external_fa >> 16L) & (S_IRWXU | S_IRWXG | S_IRWXO);
245 #else
246   entry_.posix_mode = 0;
247 #endif
248 
249   return true;
250 }
251 
ExtractCurrentEntry(WriterDelegate * delegate,uint64_t num_bytes_to_extract) const252 bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
253                                     uint64_t num_bytes_to_extract) const {
254   DCHECK(zip_file_);
255   DCHECK_LT(0, next_index_);
256   DCHECK(ok_);
257   DCHECK(!reached_end_);
258 
259   // Use password only for encrypted files. For non-encrypted files, no password
260   // is needed, and must be nullptr.
261   const char* const password =
262       entry_.is_encrypted ? password_.c_str() : nullptr;
263   if (const UnzipError err{unzOpenCurrentFilePassword(zip_file_, password)};
264       err != UNZ_OK) {
265     LOG(ERROR) << "Cannot open file " << Redact(entry_.path)
266                << " from ZIP: " << err;
267     return false;
268   }
269 
270   DCHECK(delegate);
271   if (!delegate->PrepareOutput())
272     return false;
273 
274   uint64_t remaining_capacity = num_bytes_to_extract;
275   bool entire_file_extracted = false;
276 
277   while (remaining_capacity > 0) {
278     char buf[internal::kZipBufSize];
279     const int num_bytes_read =
280         unzReadCurrentFile(zip_file_, buf, internal::kZipBufSize);
281 
282     if (num_bytes_read == 0) {
283       entire_file_extracted = true;
284       break;
285     }
286 
287     if (num_bytes_read < 0) {
288       LOG(ERROR) << "Cannot read file " << Redact(entry_.path)
289                  << " from ZIP: " << UnzipError(num_bytes_read);
290       break;
291     }
292 
293     DCHECK_LT(0, num_bytes_read);
294     CHECK_LE(num_bytes_read, internal::kZipBufSize);
295 
296     uint64_t num_bytes_to_write = std::min<uint64_t>(
297         remaining_capacity, base::checked_cast<uint64_t>(num_bytes_read));
298     if (!delegate->WriteBytes(buf, num_bytes_to_write))
299       break;
300 
301     if (remaining_capacity == base::checked_cast<uint64_t>(num_bytes_read)) {
302       // Ensures function returns true if the entire file has been read.
303       const int n = unzReadCurrentFile(zip_file_, buf, 1);
304       entire_file_extracted = (n == 0);
305       LOG_IF(ERROR, n < 0) << "Cannot read file " << Redact(entry_.path)
306                            << " from ZIP: " << UnzipError(n);
307     }
308 
309     CHECK_GE(remaining_capacity, num_bytes_to_write);
310     remaining_capacity -= num_bytes_to_write;
311   }
312 
313   if (const UnzipError err{unzCloseCurrentFile(zip_file_)}; err != UNZ_OK) {
314     LOG(ERROR) << "Cannot extract file " << Redact(entry_.path)
315                << " from ZIP: " << err;
316     entire_file_extracted = false;
317   }
318 
319   if (entire_file_extracted) {
320     delegate->SetPosixFilePermissions(entry_.posix_mode);
321     if (entry_.last_modified != base::Time::UnixEpoch()) {
322       delegate->SetTimeModified(entry_.last_modified);
323     }
324   } else {
325     delegate->OnError();
326   }
327 
328   return entire_file_extracted;
329 }
330 
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,SuccessCallback success_callback,FailureCallback failure_callback,ProgressCallback progress_callback)331 void ZipReader::ExtractCurrentEntryToFilePathAsync(
332     const base::FilePath& output_file_path,
333     SuccessCallback success_callback,
334     FailureCallback failure_callback,
335     ProgressCallback progress_callback) {
336   DCHECK(zip_file_);
337   DCHECK_LT(0, next_index_);
338   DCHECK(ok_);
339   DCHECK(!reached_end_);
340 
341   // If this is a directory, just create it and return.
342   if (entry_.is_directory) {
343     if (base::CreateDirectory(output_file_path)) {
344       base::SequencedTaskRunnerHandle::Get()->PostTask(
345           FROM_HERE, std::move(success_callback));
346     } else {
347       LOG(ERROR) << "Cannot create directory " << Redact(output_file_path);
348       base::SequencedTaskRunnerHandle::Get()->PostTask(
349           FROM_HERE, std::move(failure_callback));
350     }
351     return;
352   }
353 
354   // Use password only for encrypted files. For non-encrypted files, no password
355   // is needed, and must be nullptr.
356   const char* const password =
357       entry_.is_encrypted ? password_.c_str() : nullptr;
358   if (const UnzipError err{unzOpenCurrentFilePassword(zip_file_, password)};
359       err != UNZ_OK) {
360     LOG(ERROR) << "Cannot open file " << Redact(entry_.path)
361                << " from ZIP: " << err;
362     base::SequencedTaskRunnerHandle::Get()->PostTask(
363         FROM_HERE, std::move(failure_callback));
364     return;
365   }
366 
367   base::FilePath output_dir_path = output_file_path.DirName();
368   if (!base::CreateDirectory(output_dir_path)) {
369     LOG(ERROR) << "Cannot create directory " << Redact(output_dir_path);
370     base::SequencedTaskRunnerHandle::Get()->PostTask(
371         FROM_HERE, std::move(failure_callback));
372     return;
373   }
374 
375   const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
376   base::File output_file(output_file_path, flags);
377 
378   if (!output_file.IsValid()) {
379     LOG(ERROR) << "Cannot create file " << Redact(output_file_path);
380     base::SequencedTaskRunnerHandle::Get()->PostTask(
381         FROM_HERE, std::move(failure_callback));
382     return;
383   }
384 
385   base::SequencedTaskRunnerHandle::Get()->PostTask(
386       FROM_HERE,
387       base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
388                      std::move(output_file), std::move(success_callback),
389                      std::move(failure_callback), std::move(progress_callback),
390                      0 /* initial offset */));
391 }
392 
ExtractCurrentEntryToString(uint64_t max_read_bytes,std::string * output) const393 bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes,
394                                             std::string* output) const {
395   DCHECK(output);
396   DCHECK(zip_file_);
397   DCHECK_LT(0, next_index_);
398   DCHECK(ok_);
399   DCHECK(!reached_end_);
400 
401   output->clear();
402 
403   if (max_read_bytes == 0)
404     return true;
405 
406   if (entry_.is_directory)
407     return true;
408 
409   // The original_size is the best hint for the real size, so it saves doing
410   // reallocations for the common case when the uncompressed size is correct.
411   // However, we need to assume that the uncompressed size could be incorrect
412   // therefore this function needs to read as much data as possible.
413   output->reserve(base::checked_cast<size_t>(std::min<uint64_t>(
414       max_read_bytes, base::checked_cast<uint64_t>(entry_.original_size))));
415 
416   StringWriterDelegate writer(output);
417   return ExtractCurrentEntry(&writer, max_read_bytes);
418 }
419 
OpenInternal()420 bool ZipReader::OpenInternal() {
421   DCHECK(zip_file_);
422 
423   unz_global_info zip_info = {};  // Zero-clear.
424   if (const UnzipError err{unzGetGlobalInfo(zip_file_, &zip_info)};
425       err != UNZ_OK) {
426     LOG(ERROR) << "Cannot get ZIP info: " << err;
427     return false;
428   }
429 
430   num_entries_ = zip_info.number_entry;
431   reached_end_ = (num_entries_ <= 0);
432   ok_ = true;
433   return true;
434 }
435 
Reset()436 void ZipReader::Reset() {
437   zip_file_ = nullptr;
438   num_entries_ = 0;
439   next_index_ = 0;
440   reached_end_ = true;
441   ok_ = false;
442   entry_ = {};
443 }
444 
ExtractChunk(base::File output_file,SuccessCallback success_callback,FailureCallback failure_callback,ProgressCallback progress_callback,int64_t offset)445 void ZipReader::ExtractChunk(base::File output_file,
446                              SuccessCallback success_callback,
447                              FailureCallback failure_callback,
448                              ProgressCallback progress_callback,
449                              int64_t offset) {
450   char buffer[internal::kZipBufSize];
451 
452   const int num_bytes_read =
453       unzReadCurrentFile(zip_file_, buffer, internal::kZipBufSize);
454 
455   if (num_bytes_read == 0) {
456     if (const UnzipError err{unzCloseCurrentFile(zip_file_)}; err != UNZ_OK) {
457       LOG(ERROR) << "Cannot extract file " << Redact(entry_.path)
458                  << " from ZIP: " << err;
459       std::move(failure_callback).Run();
460       return;
461     }
462 
463     std::move(success_callback).Run();
464     return;
465   }
466 
467   if (num_bytes_read < 0) {
468     LOG(ERROR) << "Cannot read file " << Redact(entry_.path)
469                << " from ZIP: " << UnzipError(num_bytes_read);
470     std::move(failure_callback).Run();
471     return;
472   }
473 
474   if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
475     LOG(ERROR) << "Cannot write " << num_bytes_read
476                << " bytes to file at offset " << offset;
477     std::move(failure_callback).Run();
478     return;
479   }
480 
481   offset += num_bytes_read;
482   progress_callback.Run(offset);
483 
484   base::SequencedTaskRunnerHandle::Get()->PostTask(
485       FROM_HERE,
486       base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
487                      std::move(output_file), std::move(success_callback),
488                      std::move(failure_callback), std::move(progress_callback),
489                      offset));
490 }
491 
492 // FileWriterDelegate ----------------------------------------------------------
493 
FileWriterDelegate(base::File * file)494 FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {
495   DCHECK(file_);
496 }
497 
FileWriterDelegate(base::File owned_file)498 FileWriterDelegate::FileWriterDelegate(base::File owned_file)
499     : owned_file_(std::move(owned_file)) {
500   DCHECK_EQ(file_, &owned_file_);
501 }
502 
~FileWriterDelegate()503 FileWriterDelegate::~FileWriterDelegate() {}
504 
PrepareOutput()505 bool FileWriterDelegate::PrepareOutput() {
506   DCHECK(file_);
507   const bool ok = file_->IsValid();
508   if (ok) {
509     DCHECK_EQ(file_->GetLength(), 0)
510         << " The output file should be initially empty";
511   }
512   return ok;
513 }
514 
WriteBytes(const char * data,int num_bytes)515 bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
516   int bytes_written = file_->WriteAtCurrentPos(data, num_bytes);
517   if (bytes_written > 0)
518     file_length_ += bytes_written;
519   return bytes_written == num_bytes;
520 }
521 
SetTimeModified(const base::Time & time)522 void FileWriterDelegate::SetTimeModified(const base::Time& time) {
523   file_->SetTimes(base::Time::Now(), time);
524 }
525 
SetPosixFilePermissions(int mode)526 void FileWriterDelegate::SetPosixFilePermissions(int mode) {
527 #if defined(OS_POSIX)
528   zip::SetPosixFilePermissions(file_->GetPlatformFile(), mode);
529 #endif
530 }
531 
OnError()532 void FileWriterDelegate::OnError() {
533   file_length_ = 0;
534   file_->SetLength(0);
535 }
536 
537 // FilePathWriterDelegate ------------------------------------------------------
538 
FilePathWriterDelegate(base::FilePath output_file_path)539 FilePathWriterDelegate::FilePathWriterDelegate(base::FilePath output_file_path)
540     : FileWriterDelegate(base::File()),
541       output_file_path_(std::move(output_file_path)) {}
542 
~FilePathWriterDelegate()543 FilePathWriterDelegate::~FilePathWriterDelegate() {}
544 
PrepareOutput()545 bool FilePathWriterDelegate::PrepareOutput() {
546   // We can't rely on parent directory entries being specified in the
547   // zip, so we make sure they are created.
548   if (const base::FilePath dir = output_file_path_.DirName();
549       !base::CreateDirectory(dir)) {
550     PLOG(ERROR) << "Cannot create directory " << Redact(dir);
551     return false;
552   }
553 
554   owned_file_.Initialize(output_file_path_,
555                          base::File::FLAG_CREATE | base::File::FLAG_WRITE);
556   PLOG_IF(ERROR, !owned_file_.IsValid())
557       << "Cannot create file " << Redact(output_file_path_) << ": "
558       << base::File::ErrorToString(owned_file_.error_details());
559   return FileWriterDelegate::PrepareOutput();
560 }
561 
OnError()562 void FilePathWriterDelegate::OnError() {
563   FileWriterDelegate::OnError();
564   owned_file_.Close();
565 
566   if (!base::DeleteFile(output_file_path_)) {
567     LOG(ERROR) << "Cannot delete partially extracted file "
568                << Redact(output_file_path_);
569   }
570 }
571 
572 }  // namespace zip
573