1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/zlib/google/zip_reader.h"
6
7 #include <utility>
8
9 #include "base/bind.h"
10 #include "base/files/file.h"
11 #include "base/logging.h"
12 #include "base/macros.h"
13 #include "base/single_thread_task_runner.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/threading/thread_task_runner_handle.h"
17 #include "build/build_config.h"
18 #include "third_party/zlib/google/zip_internal.h"
19
20 #if defined(USE_SYSTEM_MINIZIP)
21 #include <minizip/unzip.h>
22 #else
23 #include "third_party/zlib/contrib/minizip/unzip.h"
24 #if defined(OS_WIN)
25 #include "third_party/zlib/contrib/minizip/iowin32.h"
26 #endif // defined(OS_WIN)
27 #endif // defined(USE_SYSTEM_MINIZIP)
28
29 namespace zip {
30
31 namespace {
32
33 // StringWriterDelegate --------------------------------------------------------
34
35 // A writer delegate that writes no more than |max_read_bytes| to a given
36 // std::string.
37 class StringWriterDelegate : public WriterDelegate {
38 public:
39 StringWriterDelegate(size_t max_read_bytes, std::string* output);
40 ~StringWriterDelegate() override;
41
42 // WriterDelegate methods:
43
44 // Returns true.
45 bool PrepareOutput() override;
46
47 // Appends |num_bytes| bytes from |data| to the output string. Returns false
48 // if |num_bytes| will cause the string to exceed |max_read_bytes|.
49 bool WriteBytes(const char* data, int num_bytes) override;
50
51 void SetTimeModified(const base::Time& time) override;
52
53 private:
54 size_t max_read_bytes_;
55 std::string* output_;
56
57 DISALLOW_COPY_AND_ASSIGN(StringWriterDelegate);
58 };
59
StringWriterDelegate(size_t max_read_bytes,std::string * output)60 StringWriterDelegate::StringWriterDelegate(size_t max_read_bytes,
61 std::string* output)
62 : max_read_bytes_(max_read_bytes),
63 output_(output) {
64 }
65
~StringWriterDelegate()66 StringWriterDelegate::~StringWriterDelegate() {
67 }
68
PrepareOutput()69 bool StringWriterDelegate::PrepareOutput() {
70 return true;
71 }
72
WriteBytes(const char * data,int num_bytes)73 bool StringWriterDelegate::WriteBytes(const char* data, int num_bytes) {
74 if (output_->size() + num_bytes > max_read_bytes_)
75 return false;
76 output_->append(data, num_bytes);
77 return true;
78 }
79
SetTimeModified(const base::Time & time)80 void StringWriterDelegate::SetTimeModified(const base::Time& time) {
81 // Do nothing.
82 }
83
84 } // namespace
85
86 // TODO(satorux): The implementation assumes that file names in zip files
87 // are encoded in UTF-8. This is true for zip files created by Zip()
88 // function in zip.h, but not true for user-supplied random zip files.
EntryInfo(const std::string & file_name_in_zip,const unz_file_info & raw_file_info)89 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip,
90 const unz_file_info& raw_file_info)
91 : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)),
92 is_directory_(false),
93 is_unsafe_(false),
94 is_encrypted_(false) {
95 original_size_ = raw_file_info.uncompressed_size;
96
97 // Directory entries in zip files end with "/".
98 is_directory_ = base::EndsWith(file_name_in_zip, "/",
99 base::CompareCase::INSENSITIVE_ASCII);
100
101 // Check the file name here for directory traversal issues.
102 is_unsafe_ = file_path_.ReferencesParent();
103
104 // We also consider that the file name is unsafe, if it's invalid UTF-8.
105 base::string16 file_name_utf16;
106 if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(),
107 &file_name_utf16)) {
108 is_unsafe_ = true;
109 }
110
111 // We also consider that the file name is unsafe, if it's absolute.
112 // On Windows, IsAbsolute() returns false for paths starting with "/".
113 if (file_path_.IsAbsolute() ||
114 base::StartsWith(file_name_in_zip, "/",
115 base::CompareCase::INSENSITIVE_ASCII))
116 is_unsafe_ = true;
117
118 // Whether the file is encrypted is bit 0 of the flag.
119 is_encrypted_ = raw_file_info.flag & 1;
120
121 // Construct the last modified time. The timezone info is not present in
122 // zip files, so we construct the time as local time.
123 base::Time::Exploded exploded_time = {}; // Zero-clear.
124 exploded_time.year = raw_file_info.tmu_date.tm_year;
125 // The month in zip file is 0-based, whereas ours is 1-based.
126 exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
127 exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
128 exploded_time.hour = raw_file_info.tmu_date.tm_hour;
129 exploded_time.minute = raw_file_info.tmu_date.tm_min;
130 exploded_time.second = raw_file_info.tmu_date.tm_sec;
131 exploded_time.millisecond = 0;
132
133 if (!base::Time::FromLocalExploded(exploded_time, &last_modified_))
134 last_modified_ = base::Time::UnixEpoch();
135 }
136
ZipReader()137 ZipReader::ZipReader() {
138 Reset();
139 }
140
~ZipReader()141 ZipReader::~ZipReader() {
142 Close();
143 }
144
Open(const base::FilePath & zip_file_path)145 bool ZipReader::Open(const base::FilePath& zip_file_path) {
146 DCHECK(!zip_file_);
147
148 // Use of "Unsafe" function does not look good, but there is no way to do
149 // this safely on Linux. See file_util.h for details.
150 zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
151 if (!zip_file_) {
152 return false;
153 }
154
155 return OpenInternal();
156 }
157
OpenFromPlatformFile(base::PlatformFile zip_fd)158 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
159 DCHECK(!zip_file_);
160
161 #if defined(OS_POSIX)
162 zip_file_ = internal::OpenFdForUnzipping(zip_fd);
163 #elif defined(OS_WIN)
164 zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
165 #endif
166 if (!zip_file_) {
167 return false;
168 }
169
170 return OpenInternal();
171 }
172
OpenFromString(const std::string & data)173 bool ZipReader::OpenFromString(const std::string& data) {
174 zip_file_ = internal::PrepareMemoryForUnzipping(data);
175 if (!zip_file_)
176 return false;
177 return OpenInternal();
178 }
179
Close()180 void ZipReader::Close() {
181 if (zip_file_) {
182 unzClose(zip_file_);
183 }
184 Reset();
185 }
186
HasMore()187 bool ZipReader::HasMore() {
188 return !reached_end_;
189 }
190
AdvanceToNextEntry()191 bool ZipReader::AdvanceToNextEntry() {
192 DCHECK(zip_file_);
193
194 // Should not go further if we already reached the end.
195 if (reached_end_)
196 return false;
197
198 unz_file_pos position = {};
199 if (unzGetFilePos(zip_file_, &position) != UNZ_OK)
200 return false;
201 const int current_entry_index = position.num_of_file;
202 // If we are currently at the last entry, then the next position is the
203 // end of the zip file, so mark that we reached the end.
204 if (current_entry_index + 1 == num_entries_) {
205 reached_end_ = true;
206 } else {
207 DCHECK_LT(current_entry_index + 1, num_entries_);
208 if (unzGoToNextFile(zip_file_) != UNZ_OK) {
209 return false;
210 }
211 }
212 current_entry_info_.reset();
213 return true;
214 }
215
OpenCurrentEntryInZip()216 bool ZipReader::OpenCurrentEntryInZip() {
217 DCHECK(zip_file_);
218
219 unz_file_info raw_file_info = {};
220 char raw_file_name_in_zip[internal::kZipMaxPath] = {};
221 const int result = unzGetCurrentFileInfo(zip_file_,
222 &raw_file_info,
223 raw_file_name_in_zip,
224 sizeof(raw_file_name_in_zip) - 1,
225 NULL, // extraField.
226 0, // extraFieldBufferSize.
227 NULL, // szComment.
228 0); // commentBufferSize.
229 if (result != UNZ_OK)
230 return false;
231 if (raw_file_name_in_zip[0] == '\0')
232 return false;
233 current_entry_info_.reset(
234 new EntryInfo(raw_file_name_in_zip, raw_file_info));
235 return true;
236 }
237
ExtractCurrentEntry(WriterDelegate * delegate,uint64_t num_bytes_to_extract) const238 bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
239 uint64_t num_bytes_to_extract) const {
240 DCHECK(zip_file_);
241
242 const int open_result = unzOpenCurrentFile(zip_file_);
243 if (open_result != UNZ_OK)
244 return false;
245
246 if (!delegate->PrepareOutput())
247 return false;
248 std::unique_ptr<char[]> buf(new char[internal::kZipBufSize]);
249
250 uint64_t remaining_capacity = num_bytes_to_extract;
251 bool entire_file_extracted = false;
252
253 while (remaining_capacity > 0) {
254 const int num_bytes_read =
255 unzReadCurrentFile(zip_file_, buf.get(), internal::kZipBufSize);
256
257 if (num_bytes_read == 0) {
258 entire_file_extracted = true;
259 break;
260 } else if (num_bytes_read < 0) {
261 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
262 break;
263 } else if (num_bytes_read > 0) {
264 uint64_t num_bytes_to_write = std::min<uint64_t>(
265 remaining_capacity, base::checked_cast<uint64_t>(num_bytes_read));
266 if (!delegate->WriteBytes(buf.get(), num_bytes_to_write))
267 break;
268 if (remaining_capacity == base::checked_cast<uint64_t>(num_bytes_read)) {
269 // Ensures function returns true if the entire file has been read.
270 entire_file_extracted =
271 (unzReadCurrentFile(zip_file_, buf.get(), 1) == 0);
272 }
273 CHECK_GE(remaining_capacity, num_bytes_to_write);
274 remaining_capacity -= num_bytes_to_write;
275 }
276 }
277
278 unzCloseCurrentFile(zip_file_);
279
280 if (entire_file_extracted &&
281 current_entry_info()->last_modified() != base::Time::UnixEpoch()) {
282 delegate->SetTimeModified(current_entry_info()->last_modified());
283 }
284
285 return entire_file_extracted;
286 }
287
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback)288 void ZipReader::ExtractCurrentEntryToFilePathAsync(
289 const base::FilePath& output_file_path,
290 SuccessCallback success_callback,
291 FailureCallback failure_callback,
292 const ProgressCallback& progress_callback) {
293 DCHECK(zip_file_);
294 DCHECK(current_entry_info_.get());
295
296 // If this is a directory, just create it and return.
297 if (current_entry_info()->is_directory()) {
298 if (base::CreateDirectory(output_file_path)) {
299 base::ThreadTaskRunnerHandle::Get()->PostTask(
300 FROM_HERE, std::move(success_callback));
301 } else {
302 DVLOG(1) << "Unzip failed: unable to create directory.";
303 base::ThreadTaskRunnerHandle::Get()->PostTask(
304 FROM_HERE, std::move(failure_callback));
305 }
306 return;
307 }
308
309 if (unzOpenCurrentFile(zip_file_) != UNZ_OK) {
310 DVLOG(1) << "Unzip failed: unable to open current zip entry.";
311 base::ThreadTaskRunnerHandle::Get()->PostTask(FROM_HERE,
312 std::move(failure_callback));
313 return;
314 }
315
316 base::FilePath output_dir_path = output_file_path.DirName();
317 if (!base::CreateDirectory(output_dir_path)) {
318 DVLOG(1) << "Unzip failed: unable to create containing directory.";
319 base::ThreadTaskRunnerHandle::Get()->PostTask(FROM_HERE,
320 std::move(failure_callback));
321 return;
322 }
323
324 const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
325 base::File output_file(output_file_path, flags);
326
327 if (!output_file.IsValid()) {
328 DVLOG(1) << "Unzip failed: unable to create platform file at "
329 << output_file_path.value();
330 base::ThreadTaskRunnerHandle::Get()->PostTask(FROM_HERE,
331 std::move(failure_callback));
332 return;
333 }
334
335 base::ThreadTaskRunnerHandle::Get()->PostTask(
336 FROM_HERE,
337 base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
338 Passed(std::move(output_file)),
339 std::move(success_callback), std::move(failure_callback),
340 progress_callback, 0 /* initial offset */));
341 }
342
ExtractCurrentEntryToString(uint64_t max_read_bytes,std::string * output) const343 bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes,
344 std::string* output) const {
345 DCHECK(output);
346 DCHECK(zip_file_);
347
348 if (max_read_bytes == 0) {
349 output->clear();
350 return true;
351 }
352
353 if (current_entry_info()->is_directory()) {
354 output->clear();
355 return true;
356 }
357
358 // The original_size() is the best hint for the real size, so it saves
359 // doing reallocations for the common case when the uncompressed size is
360 // correct. However, we need to assume that the uncompressed size could be
361 // incorrect therefore this function needs to read as much data as possible.
362 std::string contents;
363 contents.reserve(
364 static_cast<size_t>(std::min(base::checked_cast<int64_t>(max_read_bytes),
365 current_entry_info()->original_size())));
366
367 StringWriterDelegate writer(max_read_bytes, &contents);
368 if (!ExtractCurrentEntry(&writer, max_read_bytes)) {
369 if (contents.length() < max_read_bytes) {
370 // There was an error in extracting entry. If ExtractCurrentEntry()
371 // returns false, the entire file was not read - in which case
372 // contents.length() should equal |max_read_bytes| unless an error
373 // occurred which caused extraction to be aborted.
374 output->clear();
375 } else {
376 // |num_bytes| is less than the length of current entry.
377 output->swap(contents);
378 }
379 return false;
380 }
381 output->swap(contents);
382 return true;
383 }
384
OpenInternal()385 bool ZipReader::OpenInternal() {
386 DCHECK(zip_file_);
387
388 unz_global_info zip_info = {}; // Zero-clear.
389 if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) {
390 return false;
391 }
392 num_entries_ = zip_info.number_entry;
393 if (num_entries_ < 0)
394 return false;
395
396 // We are already at the end if the zip file is empty.
397 reached_end_ = (num_entries_ == 0);
398 return true;
399 }
400
Reset()401 void ZipReader::Reset() {
402 zip_file_ = NULL;
403 num_entries_ = 0;
404 reached_end_ = false;
405 current_entry_info_.reset();
406 }
407
ExtractChunk(base::File output_file,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback,const int64_t offset)408 void ZipReader::ExtractChunk(base::File output_file,
409 SuccessCallback success_callback,
410 FailureCallback failure_callback,
411 const ProgressCallback& progress_callback,
412 const int64_t offset) {
413 char buffer[internal::kZipBufSize];
414
415 const int num_bytes_read = unzReadCurrentFile(zip_file_,
416 buffer,
417 internal::kZipBufSize);
418
419 if (num_bytes_read == 0) {
420 unzCloseCurrentFile(zip_file_);
421 std::move(success_callback).Run();
422 } else if (num_bytes_read < 0) {
423 DVLOG(1) << "Unzip failed: error while reading zipfile "
424 << "(" << num_bytes_read << ")";
425 std::move(failure_callback).Run();
426 } else {
427 if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
428 DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
429 std::move(failure_callback).Run();
430 return;
431 }
432
433 int64_t current_progress = offset + num_bytes_read;
434
435 progress_callback.Run(current_progress);
436
437 base::ThreadTaskRunnerHandle::Get()->PostTask(
438 FROM_HERE,
439 base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
440 Passed(std::move(output_file)),
441 std::move(success_callback), std::move(failure_callback),
442 progress_callback, current_progress));
443 }
444 }
445
446 // FileWriterDelegate ----------------------------------------------------------
447
FileWriterDelegate(base::File * file)448 FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {}
449
FileWriterDelegate(std::unique_ptr<base::File> file)450 FileWriterDelegate::FileWriterDelegate(std::unique_ptr<base::File> file)
451 : file_(file.get()), owned_file_(std::move(file)) {}
452
~FileWriterDelegate()453 FileWriterDelegate::~FileWriterDelegate() {
454 if (!file_->SetLength(file_length_)) {
455 DVPLOG(1) << "Failed updating length of written file";
456 }
457 }
458
PrepareOutput()459 bool FileWriterDelegate::PrepareOutput() {
460 return file_->Seek(base::File::FROM_BEGIN, 0) >= 0;
461 }
462
WriteBytes(const char * data,int num_bytes)463 bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
464 int bytes_written = file_->WriteAtCurrentPos(data, num_bytes);
465 if (bytes_written > 0)
466 file_length_ += bytes_written;
467 return bytes_written == num_bytes;
468 }
469
SetTimeModified(const base::Time & time)470 void FileWriterDelegate::SetTimeModified(const base::Time& time) {
471 file_->SetTimes(base::Time::Now(), time);
472 }
473
474 // FilePathWriterDelegate ------------------------------------------------------
475
FilePathWriterDelegate(const base::FilePath & output_file_path)476 FilePathWriterDelegate::FilePathWriterDelegate(
477 const base::FilePath& output_file_path)
478 : output_file_path_(output_file_path) {}
479
~FilePathWriterDelegate()480 FilePathWriterDelegate::~FilePathWriterDelegate() {}
481
PrepareOutput()482 bool FilePathWriterDelegate::PrepareOutput() {
483 // We can't rely on parent directory entries being specified in the
484 // zip, so we make sure they are created.
485 if (!base::CreateDirectory(output_file_path_.DirName()))
486 return false;
487
488 file_.Initialize(output_file_path_,
489 base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
490 return file_.IsValid();
491 }
492
WriteBytes(const char * data,int num_bytes)493 bool FilePathWriterDelegate::WriteBytes(const char* data, int num_bytes) {
494 return num_bytes == file_.WriteAtCurrentPos(data, num_bytes);
495 }
496
SetTimeModified(const base::Time & time)497 void FilePathWriterDelegate::SetTimeModified(const base::Time& time) {
498 file_.Close();
499 base::TouchFile(output_file_path_, base::Time::Now(), time);
500 }
501
502 } // namespace zip
503