1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/zlib/google/zip_reader.h"
6
7 #include <algorithm>
8 #include <utility>
9
10 #include "base/bind.h"
11 #include "base/check.h"
12 #include "base/files/file.h"
13 #include "base/files/file_util.h"
14 #include "base/i18n/icu_string_conversions.h"
15 #include "base/logging.h"
16 #include "base/numerics/safe_conversions.h"
17 #include "base/strings/strcat.h"
18 #include "base/strings/string_piece.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/utf_string_conversions.h"
21 #include "base/threading/sequenced_task_runner_handle.h"
22 #include "build/build_config.h"
23 #include "third_party/zlib/google/redact.h"
24 #include "third_party/zlib/google/zip_internal.h"
25
26 #if defined(USE_SYSTEM_MINIZIP)
27 #include <minizip/unzip.h>
28 #else
29 #include "third_party/zlib/contrib/minizip/unzip.h"
30 #if defined(OS_WIN)
31 #include "third_party/zlib/contrib/minizip/iowin32.h"
32 #endif // defined(OS_WIN)
33 #endif // defined(USE_SYSTEM_MINIZIP)
34
35 #if defined(OS_POSIX)
36 #include <sys/stat.h>
37 #endif
38
39 namespace zip {
40 namespace {
41
42 enum UnzipError : int;
43
operator <<(std::ostream & out,UnzipError error)44 std::ostream& operator<<(std::ostream& out, UnzipError error) {
45 #define SWITCH_ERR(X) \
46 case X: \
47 return out << #X;
48 switch (error) {
49 SWITCH_ERR(UNZ_OK);
50 SWITCH_ERR(UNZ_END_OF_LIST_OF_FILE);
51 SWITCH_ERR(UNZ_ERRNO);
52 SWITCH_ERR(UNZ_PARAMERROR);
53 SWITCH_ERR(UNZ_BADZIPFILE);
54 SWITCH_ERR(UNZ_INTERNALERROR);
55 SWITCH_ERR(UNZ_CRCERROR);
56 default:
57 return out << "UNZ" << static_cast<int>(error);
58 }
59 #undef SWITCH_ERR
60 }
61
62 // A writer delegate that writes to a given string.
63 class StringWriterDelegate : public WriterDelegate {
64 public:
StringWriterDelegate(std::string * output)65 explicit StringWriterDelegate(std::string* output) : output_(output) {}
66
67 // WriterDelegate methods:
WriteBytes(const char * data,int num_bytes)68 bool WriteBytes(const char* data, int num_bytes) override {
69 output_->append(data, num_bytes);
70 return true;
71 }
72
73 private:
74 std::string* const output_;
75 };
76
77 #if defined(OS_POSIX)
SetPosixFilePermissions(int fd,int mode)78 void SetPosixFilePermissions(int fd, int mode) {
79 base::stat_wrapper_t sb;
80 if (base::File::Fstat(fd, &sb)) {
81 return;
82 }
83 mode_t new_mode = sb.st_mode;
84 // Transfer the executable bit only if the file is readable.
85 if ((sb.st_mode & S_IRUSR) == S_IRUSR && (mode & S_IXUSR) == S_IXUSR) {
86 new_mode |= S_IXUSR;
87 }
88 if ((sb.st_mode & S_IRGRP) == S_IRGRP && (mode & S_IXGRP) == S_IXGRP) {
89 new_mode |= S_IXGRP;
90 }
91 if ((sb.st_mode & S_IROTH) == S_IROTH && (mode & S_IXOTH) == S_IXOTH) {
92 new_mode |= S_IXOTH;
93 }
94 if (new_mode != sb.st_mode) {
95 fchmod(fd, new_mode);
96 }
97 }
98 #endif
99
100 } // namespace
101
ZipReader()102 ZipReader::ZipReader() {
103 Reset();
104 }
105
~ZipReader()106 ZipReader::~ZipReader() {
107 Close();
108 }
109
Open(const base::FilePath & zip_path)110 bool ZipReader::Open(const base::FilePath& zip_path) {
111 DCHECK(!zip_file_);
112
113 // Use of "Unsafe" function does not look good, but there is no way to do
114 // this safely on Linux. See file_util.h for details.
115 zip_file_ = internal::OpenForUnzipping(zip_path.AsUTF8Unsafe());
116 if (!zip_file_) {
117 LOG(ERROR) << "Cannot open ZIP archive " << Redact(zip_path);
118 return false;
119 }
120
121 return OpenInternal();
122 }
123
OpenFromPlatformFile(base::PlatformFile zip_fd)124 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
125 DCHECK(!zip_file_);
126
127 #if defined(OS_POSIX) || defined(OS_FUCHSIA)
128 zip_file_ = internal::OpenFdForUnzipping(zip_fd);
129 #elif defined(OS_WIN)
130 zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
131 #endif
132 if (!zip_file_) {
133 LOG(ERROR) << "Cannot open ZIP from file handle " << zip_fd;
134 return false;
135 }
136
137 return OpenInternal();
138 }
139
OpenFromString(const std::string & data)140 bool ZipReader::OpenFromString(const std::string& data) {
141 zip_file_ = internal::PrepareMemoryForUnzipping(data);
142 if (!zip_file_)
143 return false;
144 return OpenInternal();
145 }
146
Close()147 void ZipReader::Close() {
148 if (zip_file_) {
149 if (const UnzipError err{unzClose(zip_file_)}; err != UNZ_OK) {
150 LOG(ERROR) << "Error while closing ZIP archive: " << err;
151 }
152 }
153 Reset();
154 }
155
Next()156 const ZipReader::Entry* ZipReader::Next() {
157 DCHECK(zip_file_);
158
159 if (reached_end_)
160 return nullptr;
161
162 DCHECK(ok_);
163
164 // Move to the next entry if we're not trying to open the first entry.
165 if (next_index_ > 0) {
166 if (const UnzipError err{unzGoToNextFile(zip_file_)}; err != UNZ_OK) {
167 reached_end_ = true;
168 if (err != UNZ_END_OF_LIST_OF_FILE) {
169 LOG(ERROR) << "Cannot go to next entry in ZIP: " << err;
170 ok_ = false;
171 }
172 return nullptr;
173 }
174 }
175
176 next_index_++;
177
178 if (!OpenEntry()) {
179 reached_end_ = true;
180 ok_ = false;
181 return nullptr;
182 }
183
184 return &entry_;
185 }
186
OpenEntry()187 bool ZipReader::OpenEntry() {
188 DCHECK(zip_file_);
189
190 // Get entry info.
191 unz_file_info64 info = {};
192 char path_in_zip[internal::kZipMaxPath] = {};
193 if (const UnzipError err{unzGetCurrentFileInfo64(
194 zip_file_, &info, path_in_zip, sizeof(path_in_zip) - 1, nullptr, 0,
195 nullptr, 0)};
196 err != UNZ_OK) {
197 LOG(ERROR) << "Cannot get entry from ZIP: " << err;
198 return false;
199 }
200
201 entry_.path_in_original_encoding = path_in_zip;
202
203 // Convert path from original encoding to Unicode.
204 std::u16string path_in_utf16;
205 const char* const encoding = encoding_.empty() ? "UTF-8" : encoding_.c_str();
206 if (!base::CodepageToUTF16(entry_.path_in_original_encoding, encoding,
207 base::OnStringConversionError::SUBSTITUTE,
208 &path_in_utf16)) {
209 LOG(ERROR) << "Cannot convert path from encoding " << encoding;
210 return false;
211 }
212
213 entry_.path = base::FilePath::FromUTF16Unsafe(path_in_utf16);
214 entry_.original_size = info.uncompressed_size;
215
216 // Directory entries in ZIP have a path ending with "/".
217 entry_.is_directory = base::EndsWith(path_in_utf16, u"/");
218
219 // Check the entry path for directory traversal issues. We consider entry
220 // paths unsafe if they are absolute or if they contain "..". On Windows,
221 // IsAbsolute() returns false for paths starting with "/".
222 entry_.is_unsafe = entry_.path.ReferencesParent() ||
223 entry_.path.IsAbsolute() ||
224 base::StartsWith(path_in_utf16, u"/");
225
226 // The file content of this entry is encrypted if flag bit 0 is set.
227 entry_.is_encrypted = info.flag & 1;
228
229 // Construct the last modified time. The timezone info is not present in ZIP
230 // archives, so we construct the time as UTC.
231 base::Time::Exploded exploded_time = {};
232 exploded_time.year = info.tmu_date.tm_year;
233 exploded_time.month = info.tmu_date.tm_mon + 1; // 0-based vs 1-based
234 exploded_time.day_of_month = info.tmu_date.tm_mday;
235 exploded_time.hour = info.tmu_date.tm_hour;
236 exploded_time.minute = info.tmu_date.tm_min;
237 exploded_time.second = info.tmu_date.tm_sec;
238 exploded_time.millisecond = 0;
239
240 if (!base::Time::FromUTCExploded(exploded_time, &entry_.last_modified))
241 entry_.last_modified = base::Time::UnixEpoch();
242
243 #if defined(OS_POSIX)
244 entry_.posix_mode = (info.external_fa >> 16L) & (S_IRWXU | S_IRWXG | S_IRWXO);
245 #else
246 entry_.posix_mode = 0;
247 #endif
248
249 return true;
250 }
251
ExtractCurrentEntry(WriterDelegate * delegate,uint64_t num_bytes_to_extract) const252 bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
253 uint64_t num_bytes_to_extract) const {
254 DCHECK(zip_file_);
255 DCHECK_LT(0, next_index_);
256 DCHECK(ok_);
257 DCHECK(!reached_end_);
258
259 // Use password only for encrypted files. For non-encrypted files, no password
260 // is needed, and must be nullptr.
261 const char* const password =
262 entry_.is_encrypted ? password_.c_str() : nullptr;
263 if (const UnzipError err{unzOpenCurrentFilePassword(zip_file_, password)};
264 err != UNZ_OK) {
265 LOG(ERROR) << "Cannot open file " << Redact(entry_.path)
266 << " from ZIP: " << err;
267 return false;
268 }
269
270 DCHECK(delegate);
271 if (!delegate->PrepareOutput())
272 return false;
273
274 uint64_t remaining_capacity = num_bytes_to_extract;
275 bool entire_file_extracted = false;
276
277 while (remaining_capacity > 0) {
278 char buf[internal::kZipBufSize];
279 const int num_bytes_read =
280 unzReadCurrentFile(zip_file_, buf, internal::kZipBufSize);
281
282 if (num_bytes_read == 0) {
283 entire_file_extracted = true;
284 break;
285 }
286
287 if (num_bytes_read < 0) {
288 LOG(ERROR) << "Cannot read file " << Redact(entry_.path)
289 << " from ZIP: " << UnzipError(num_bytes_read);
290 break;
291 }
292
293 DCHECK_LT(0, num_bytes_read);
294 CHECK_LE(num_bytes_read, internal::kZipBufSize);
295
296 uint64_t num_bytes_to_write = std::min<uint64_t>(
297 remaining_capacity, base::checked_cast<uint64_t>(num_bytes_read));
298 if (!delegate->WriteBytes(buf, num_bytes_to_write))
299 break;
300
301 if (remaining_capacity == base::checked_cast<uint64_t>(num_bytes_read)) {
302 // Ensures function returns true if the entire file has been read.
303 const int n = unzReadCurrentFile(zip_file_, buf, 1);
304 entire_file_extracted = (n == 0);
305 LOG_IF(ERROR, n < 0) << "Cannot read file " << Redact(entry_.path)
306 << " from ZIP: " << UnzipError(n);
307 }
308
309 CHECK_GE(remaining_capacity, num_bytes_to_write);
310 remaining_capacity -= num_bytes_to_write;
311 }
312
313 if (const UnzipError err{unzCloseCurrentFile(zip_file_)}; err != UNZ_OK) {
314 LOG(ERROR) << "Cannot extract file " << Redact(entry_.path)
315 << " from ZIP: " << err;
316 entire_file_extracted = false;
317 }
318
319 if (entire_file_extracted) {
320 delegate->SetPosixFilePermissions(entry_.posix_mode);
321 if (entry_.last_modified != base::Time::UnixEpoch()) {
322 delegate->SetTimeModified(entry_.last_modified);
323 }
324 } else {
325 delegate->OnError();
326 }
327
328 return entire_file_extracted;
329 }
330
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,SuccessCallback success_callback,FailureCallback failure_callback,ProgressCallback progress_callback)331 void ZipReader::ExtractCurrentEntryToFilePathAsync(
332 const base::FilePath& output_file_path,
333 SuccessCallback success_callback,
334 FailureCallback failure_callback,
335 ProgressCallback progress_callback) {
336 DCHECK(zip_file_);
337 DCHECK_LT(0, next_index_);
338 DCHECK(ok_);
339 DCHECK(!reached_end_);
340
341 // If this is a directory, just create it and return.
342 if (entry_.is_directory) {
343 if (base::CreateDirectory(output_file_path)) {
344 base::SequencedTaskRunnerHandle::Get()->PostTask(
345 FROM_HERE, std::move(success_callback));
346 } else {
347 LOG(ERROR) << "Cannot create directory " << Redact(output_file_path);
348 base::SequencedTaskRunnerHandle::Get()->PostTask(
349 FROM_HERE, std::move(failure_callback));
350 }
351 return;
352 }
353
354 // Use password only for encrypted files. For non-encrypted files, no password
355 // is needed, and must be nullptr.
356 const char* const password =
357 entry_.is_encrypted ? password_.c_str() : nullptr;
358 if (const UnzipError err{unzOpenCurrentFilePassword(zip_file_, password)};
359 err != UNZ_OK) {
360 LOG(ERROR) << "Cannot open file " << Redact(entry_.path)
361 << " from ZIP: " << err;
362 base::SequencedTaskRunnerHandle::Get()->PostTask(
363 FROM_HERE, std::move(failure_callback));
364 return;
365 }
366
367 base::FilePath output_dir_path = output_file_path.DirName();
368 if (!base::CreateDirectory(output_dir_path)) {
369 LOG(ERROR) << "Cannot create directory " << Redact(output_dir_path);
370 base::SequencedTaskRunnerHandle::Get()->PostTask(
371 FROM_HERE, std::move(failure_callback));
372 return;
373 }
374
375 const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
376 base::File output_file(output_file_path, flags);
377
378 if (!output_file.IsValid()) {
379 LOG(ERROR) << "Cannot create file " << Redact(output_file_path);
380 base::SequencedTaskRunnerHandle::Get()->PostTask(
381 FROM_HERE, std::move(failure_callback));
382 return;
383 }
384
385 base::SequencedTaskRunnerHandle::Get()->PostTask(
386 FROM_HERE,
387 base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
388 std::move(output_file), std::move(success_callback),
389 std::move(failure_callback), std::move(progress_callback),
390 0 /* initial offset */));
391 }
392
ExtractCurrentEntryToString(uint64_t max_read_bytes,std::string * output) const393 bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes,
394 std::string* output) const {
395 DCHECK(output);
396 DCHECK(zip_file_);
397 DCHECK_LT(0, next_index_);
398 DCHECK(ok_);
399 DCHECK(!reached_end_);
400
401 output->clear();
402
403 if (max_read_bytes == 0)
404 return true;
405
406 if (entry_.is_directory)
407 return true;
408
409 // The original_size is the best hint for the real size, so it saves doing
410 // reallocations for the common case when the uncompressed size is correct.
411 // However, we need to assume that the uncompressed size could be incorrect
412 // therefore this function needs to read as much data as possible.
413 output->reserve(base::checked_cast<size_t>(std::min<uint64_t>(
414 max_read_bytes, base::checked_cast<uint64_t>(entry_.original_size))));
415
416 StringWriterDelegate writer(output);
417 return ExtractCurrentEntry(&writer, max_read_bytes);
418 }
419
OpenInternal()420 bool ZipReader::OpenInternal() {
421 DCHECK(zip_file_);
422
423 unz_global_info zip_info = {}; // Zero-clear.
424 if (const UnzipError err{unzGetGlobalInfo(zip_file_, &zip_info)};
425 err != UNZ_OK) {
426 LOG(ERROR) << "Cannot get ZIP info: " << err;
427 return false;
428 }
429
430 num_entries_ = zip_info.number_entry;
431 reached_end_ = (num_entries_ <= 0);
432 ok_ = true;
433 return true;
434 }
435
Reset()436 void ZipReader::Reset() {
437 zip_file_ = nullptr;
438 num_entries_ = 0;
439 next_index_ = 0;
440 reached_end_ = true;
441 ok_ = false;
442 entry_ = {};
443 }
444
ExtractChunk(base::File output_file,SuccessCallback success_callback,FailureCallback failure_callback,ProgressCallback progress_callback,int64_t offset)445 void ZipReader::ExtractChunk(base::File output_file,
446 SuccessCallback success_callback,
447 FailureCallback failure_callback,
448 ProgressCallback progress_callback,
449 int64_t offset) {
450 char buffer[internal::kZipBufSize];
451
452 const int num_bytes_read =
453 unzReadCurrentFile(zip_file_, buffer, internal::kZipBufSize);
454
455 if (num_bytes_read == 0) {
456 if (const UnzipError err{unzCloseCurrentFile(zip_file_)}; err != UNZ_OK) {
457 LOG(ERROR) << "Cannot extract file " << Redact(entry_.path)
458 << " from ZIP: " << err;
459 std::move(failure_callback).Run();
460 return;
461 }
462
463 std::move(success_callback).Run();
464 return;
465 }
466
467 if (num_bytes_read < 0) {
468 LOG(ERROR) << "Cannot read file " << Redact(entry_.path)
469 << " from ZIP: " << UnzipError(num_bytes_read);
470 std::move(failure_callback).Run();
471 return;
472 }
473
474 if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
475 LOG(ERROR) << "Cannot write " << num_bytes_read
476 << " bytes to file at offset " << offset;
477 std::move(failure_callback).Run();
478 return;
479 }
480
481 offset += num_bytes_read;
482 progress_callback.Run(offset);
483
484 base::SequencedTaskRunnerHandle::Get()->PostTask(
485 FROM_HERE,
486 base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
487 std::move(output_file), std::move(success_callback),
488 std::move(failure_callback), std::move(progress_callback),
489 offset));
490 }
491
492 // FileWriterDelegate ----------------------------------------------------------
493
FileWriterDelegate(base::File * file)494 FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {
495 DCHECK(file_);
496 }
497
FileWriterDelegate(base::File owned_file)498 FileWriterDelegate::FileWriterDelegate(base::File owned_file)
499 : owned_file_(std::move(owned_file)) {
500 DCHECK_EQ(file_, &owned_file_);
501 }
502
~FileWriterDelegate()503 FileWriterDelegate::~FileWriterDelegate() {}
504
PrepareOutput()505 bool FileWriterDelegate::PrepareOutput() {
506 DCHECK(file_);
507 const bool ok = file_->IsValid();
508 if (ok) {
509 DCHECK_EQ(file_->GetLength(), 0)
510 << " The output file should be initially empty";
511 }
512 return ok;
513 }
514
WriteBytes(const char * data,int num_bytes)515 bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
516 int bytes_written = file_->WriteAtCurrentPos(data, num_bytes);
517 if (bytes_written > 0)
518 file_length_ += bytes_written;
519 return bytes_written == num_bytes;
520 }
521
SetTimeModified(const base::Time & time)522 void FileWriterDelegate::SetTimeModified(const base::Time& time) {
523 file_->SetTimes(base::Time::Now(), time);
524 }
525
SetPosixFilePermissions(int mode)526 void FileWriterDelegate::SetPosixFilePermissions(int mode) {
527 #if defined(OS_POSIX)
528 zip::SetPosixFilePermissions(file_->GetPlatformFile(), mode);
529 #endif
530 }
531
OnError()532 void FileWriterDelegate::OnError() {
533 file_length_ = 0;
534 file_->SetLength(0);
535 }
536
537 // FilePathWriterDelegate ------------------------------------------------------
538
FilePathWriterDelegate(base::FilePath output_file_path)539 FilePathWriterDelegate::FilePathWriterDelegate(base::FilePath output_file_path)
540 : FileWriterDelegate(base::File()),
541 output_file_path_(std::move(output_file_path)) {}
542
~FilePathWriterDelegate()543 FilePathWriterDelegate::~FilePathWriterDelegate() {}
544
PrepareOutput()545 bool FilePathWriterDelegate::PrepareOutput() {
546 // We can't rely on parent directory entries being specified in the
547 // zip, so we make sure they are created.
548 if (const base::FilePath dir = output_file_path_.DirName();
549 !base::CreateDirectory(dir)) {
550 PLOG(ERROR) << "Cannot create directory " << Redact(dir);
551 return false;
552 }
553
554 owned_file_.Initialize(output_file_path_,
555 base::File::FLAG_CREATE | base::File::FLAG_WRITE);
556 PLOG_IF(ERROR, !owned_file_.IsValid())
557 << "Cannot create file " << Redact(output_file_path_) << ": "
558 << base::File::ErrorToString(owned_file_.error_details());
559 return FileWriterDelegate::PrepareOutput();
560 }
561
OnError()562 void FilePathWriterDelegate::OnError() {
563 FileWriterDelegate::OnError();
564 owned_file_.Close();
565
566 if (!base::DeleteFile(output_file_path_)) {
567 LOG(ERROR) << "Cannot delete partially extracted file "
568 << Redact(output_file_path_);
569 }
570 }
571
572 } // namespace zip
573