• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/zlib/google/zip_reader.h"
6 
7 #include <utility>
8 
9 #include "base/bind.h"
10 #include "base/files/file.h"
11 #include "base/logging.h"
12 #include "base/macros.h"
13 #include "base/single_thread_task_runner.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/threading/thread_task_runner_handle.h"
17 #include "build/build_config.h"
18 #include "third_party/zlib/google/zip_internal.h"
19 
20 #if defined(USE_SYSTEM_MINIZIP)
21 #include <minizip/unzip.h>
22 #else
23 #include "third_party/zlib/contrib/minizip/unzip.h"
24 #if defined(OS_WIN)
25 #include "third_party/zlib/contrib/minizip/iowin32.h"
26 #endif  // defined(OS_WIN)
27 #endif  // defined(USE_SYSTEM_MINIZIP)
28 
29 namespace zip {
30 
31 namespace {
32 
33 // StringWriterDelegate --------------------------------------------------------
34 
35 // A writer delegate that writes no more than |max_read_bytes| to a given
36 // std::string.
37 class StringWriterDelegate : public WriterDelegate {
38  public:
39   StringWriterDelegate(size_t max_read_bytes, std::string* output);
40   ~StringWriterDelegate() override;
41 
42   // WriterDelegate methods:
43 
44   // Returns true.
45   bool PrepareOutput() override;
46 
47   // Appends |num_bytes| bytes from |data| to the output string. Returns false
48   // if |num_bytes| will cause the string to exceed |max_read_bytes|.
49   bool WriteBytes(const char* data, int num_bytes) override;
50 
51   void SetTimeModified(const base::Time& time) override;
52 
53  private:
54   size_t max_read_bytes_;
55   std::string* output_;
56 
57   DISALLOW_COPY_AND_ASSIGN(StringWriterDelegate);
58 };
59 
StringWriterDelegate(size_t max_read_bytes,std::string * output)60 StringWriterDelegate::StringWriterDelegate(size_t max_read_bytes,
61                                            std::string* output)
62     : max_read_bytes_(max_read_bytes),
63       output_(output) {
64 }
65 
~StringWriterDelegate()66 StringWriterDelegate::~StringWriterDelegate() {
67 }
68 
PrepareOutput()69 bool StringWriterDelegate::PrepareOutput() {
70   return true;
71 }
72 
WriteBytes(const char * data,int num_bytes)73 bool StringWriterDelegate::WriteBytes(const char* data, int num_bytes) {
74   if (output_->size() + num_bytes > max_read_bytes_)
75     return false;
76   output_->append(data, num_bytes);
77   return true;
78 }
79 
SetTimeModified(const base::Time & time)80 void StringWriterDelegate::SetTimeModified(const base::Time& time) {
81   // Do nothing.
82 }
83 
84 }  // namespace
85 
86 // TODO(satorux): The implementation assumes that file names in zip files
87 // are encoded in UTF-8. This is true for zip files created by Zip()
88 // function in zip.h, but not true for user-supplied random zip files.
EntryInfo(const std::string & file_name_in_zip,const unz_file_info & raw_file_info)89 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip,
90                                 const unz_file_info& raw_file_info)
91     : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)),
92       is_directory_(false),
93       is_unsafe_(false),
94       is_encrypted_(false) {
95   original_size_ = raw_file_info.uncompressed_size;
96 
97   // Directory entries in zip files end with "/".
98   is_directory_ = base::EndsWith(file_name_in_zip, "/",
99                                  base::CompareCase::INSENSITIVE_ASCII);
100 
101   // Check the file name here for directory traversal issues.
102   is_unsafe_ = file_path_.ReferencesParent();
103 
104   // We also consider that the file name is unsafe, if it's invalid UTF-8.
105   base::string16 file_name_utf16;
106   if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(),
107                          &file_name_utf16)) {
108     is_unsafe_ = true;
109   }
110 
111   // We also consider that the file name is unsafe, if it's absolute.
112   // On Windows, IsAbsolute() returns false for paths starting with "/".
113   if (file_path_.IsAbsolute() ||
114       base::StartsWith(file_name_in_zip, "/",
115                        base::CompareCase::INSENSITIVE_ASCII))
116     is_unsafe_ = true;
117 
118   // Whether the file is encrypted is bit 0 of the flag.
119   is_encrypted_ = raw_file_info.flag & 1;
120 
121   // Construct the last modified time. The timezone info is not present in
122   // zip files, so we construct the time as local time.
123   base::Time::Exploded exploded_time = {};  // Zero-clear.
124   exploded_time.year = raw_file_info.tmu_date.tm_year;
125   // The month in zip file is 0-based, whereas ours is 1-based.
126   exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
127   exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
128   exploded_time.hour = raw_file_info.tmu_date.tm_hour;
129   exploded_time.minute = raw_file_info.tmu_date.tm_min;
130   exploded_time.second = raw_file_info.tmu_date.tm_sec;
131   exploded_time.millisecond = 0;
132 
133   if (!base::Time::FromLocalExploded(exploded_time, &last_modified_))
134     last_modified_ = base::Time::UnixEpoch();
135 }
136 
ZipReader()137 ZipReader::ZipReader() {
138   Reset();
139 }
140 
~ZipReader()141 ZipReader::~ZipReader() {
142   Close();
143 }
144 
Open(const base::FilePath & zip_file_path)145 bool ZipReader::Open(const base::FilePath& zip_file_path) {
146   DCHECK(!zip_file_);
147 
148   // Use of "Unsafe" function does not look good, but there is no way to do
149   // this safely on Linux. See file_util.h for details.
150   zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
151   if (!zip_file_) {
152     return false;
153   }
154 
155   return OpenInternal();
156 }
157 
OpenFromPlatformFile(base::PlatformFile zip_fd)158 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
159   DCHECK(!zip_file_);
160 
161 #if defined(OS_POSIX)
162   zip_file_ = internal::OpenFdForUnzipping(zip_fd);
163 #elif defined(OS_WIN)
164   zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
165 #endif
166   if (!zip_file_) {
167     return false;
168   }
169 
170   return OpenInternal();
171 }
172 
OpenFromString(const std::string & data)173 bool ZipReader::OpenFromString(const std::string& data) {
174   zip_file_ = internal::PrepareMemoryForUnzipping(data);
175   if (!zip_file_)
176     return false;
177   return OpenInternal();
178 }
179 
Close()180 void ZipReader::Close() {
181   if (zip_file_) {
182     unzClose(zip_file_);
183   }
184   Reset();
185 }
186 
HasMore()187 bool ZipReader::HasMore() {
188   return !reached_end_;
189 }
190 
AdvanceToNextEntry()191 bool ZipReader::AdvanceToNextEntry() {
192   DCHECK(zip_file_);
193 
194   // Should not go further if we already reached the end.
195   if (reached_end_)
196     return false;
197 
198   unz_file_pos position = {};
199   if (unzGetFilePos(zip_file_, &position) != UNZ_OK)
200     return false;
201   const int current_entry_index = position.num_of_file;
202   // If we are currently at the last entry, then the next position is the
203   // end of the zip file, so mark that we reached the end.
204   if (current_entry_index + 1 == num_entries_) {
205     reached_end_ = true;
206   } else {
207     DCHECK_LT(current_entry_index + 1, num_entries_);
208     if (unzGoToNextFile(zip_file_) != UNZ_OK) {
209       return false;
210     }
211   }
212   current_entry_info_.reset();
213   return true;
214 }
215 
OpenCurrentEntryInZip()216 bool ZipReader::OpenCurrentEntryInZip() {
217   DCHECK(zip_file_);
218 
219   unz_file_info raw_file_info = {};
220   char raw_file_name_in_zip[internal::kZipMaxPath] = {};
221   const int result = unzGetCurrentFileInfo(zip_file_,
222                                            &raw_file_info,
223                                            raw_file_name_in_zip,
224                                            sizeof(raw_file_name_in_zip) - 1,
225                                            NULL,  // extraField.
226                                            0,  // extraFieldBufferSize.
227                                            NULL,  // szComment.
228                                            0);  // commentBufferSize.
229   if (result != UNZ_OK)
230     return false;
231   if (raw_file_name_in_zip[0] == '\0')
232     return false;
233   current_entry_info_.reset(
234       new EntryInfo(raw_file_name_in_zip, raw_file_info));
235   return true;
236 }
237 
ExtractCurrentEntry(WriterDelegate * delegate,uint64_t num_bytes_to_extract) const238 bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
239                                     uint64_t num_bytes_to_extract) const {
240   DCHECK(zip_file_);
241 
242   const int open_result = unzOpenCurrentFile(zip_file_);
243   if (open_result != UNZ_OK)
244     return false;
245 
246   if (!delegate->PrepareOutput())
247     return false;
248   std::unique_ptr<char[]> buf(new char[internal::kZipBufSize]);
249 
250   uint64_t remaining_capacity = num_bytes_to_extract;
251   bool entire_file_extracted = false;
252 
253   while (remaining_capacity > 0) {
254     const int num_bytes_read =
255         unzReadCurrentFile(zip_file_, buf.get(), internal::kZipBufSize);
256 
257     if (num_bytes_read == 0) {
258       entire_file_extracted = true;
259       break;
260     } else if (num_bytes_read < 0) {
261       // If num_bytes_read < 0, then it's a specific UNZ_* error code.
262       break;
263     } else if (num_bytes_read > 0) {
264       uint64_t num_bytes_to_write = std::min<uint64_t>(
265           remaining_capacity, base::checked_cast<uint64_t>(num_bytes_read));
266       if (!delegate->WriteBytes(buf.get(), num_bytes_to_write))
267         break;
268       if (remaining_capacity == base::checked_cast<uint64_t>(num_bytes_read)) {
269         // Ensures function returns true if the entire file has been read.
270         entire_file_extracted =
271             (unzReadCurrentFile(zip_file_, buf.get(), 1) == 0);
272       }
273       CHECK_GE(remaining_capacity, num_bytes_to_write);
274       remaining_capacity -= num_bytes_to_write;
275     }
276   }
277 
278   unzCloseCurrentFile(zip_file_);
279 
280   if (entire_file_extracted &&
281       current_entry_info()->last_modified() != base::Time::UnixEpoch()) {
282     delegate->SetTimeModified(current_entry_info()->last_modified());
283   }
284 
285   return entire_file_extracted;
286 }
287 
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback)288 void ZipReader::ExtractCurrentEntryToFilePathAsync(
289     const base::FilePath& output_file_path,
290     SuccessCallback success_callback,
291     FailureCallback failure_callback,
292     const ProgressCallback& progress_callback) {
293   DCHECK(zip_file_);
294   DCHECK(current_entry_info_.get());
295 
296   // If this is a directory, just create it and return.
297   if (current_entry_info()->is_directory()) {
298     if (base::CreateDirectory(output_file_path)) {
299       base::ThreadTaskRunnerHandle::Get()->PostTask(
300           FROM_HERE, std::move(success_callback));
301     } else {
302       DVLOG(1) << "Unzip failed: unable to create directory.";
303       base::ThreadTaskRunnerHandle::Get()->PostTask(
304           FROM_HERE, std::move(failure_callback));
305     }
306     return;
307   }
308 
309   if (unzOpenCurrentFile(zip_file_) != UNZ_OK) {
310     DVLOG(1) << "Unzip failed: unable to open current zip entry.";
311     base::ThreadTaskRunnerHandle::Get()->PostTask(FROM_HERE,
312                                                   std::move(failure_callback));
313     return;
314   }
315 
316   base::FilePath output_dir_path = output_file_path.DirName();
317   if (!base::CreateDirectory(output_dir_path)) {
318     DVLOG(1) << "Unzip failed: unable to create containing directory.";
319     base::ThreadTaskRunnerHandle::Get()->PostTask(FROM_HERE,
320                                                   std::move(failure_callback));
321     return;
322   }
323 
324   const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
325   base::File output_file(output_file_path, flags);
326 
327   if (!output_file.IsValid()) {
328     DVLOG(1) << "Unzip failed: unable to create platform file at "
329              << output_file_path.value();
330     base::ThreadTaskRunnerHandle::Get()->PostTask(FROM_HERE,
331                                                   std::move(failure_callback));
332     return;
333   }
334 
335   base::ThreadTaskRunnerHandle::Get()->PostTask(
336       FROM_HERE,
337       base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
338                      Passed(std::move(output_file)),
339                      std::move(success_callback), std::move(failure_callback),
340                      progress_callback, 0 /* initial offset */));
341 }
342 
ExtractCurrentEntryToString(uint64_t max_read_bytes,std::string * output) const343 bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes,
344                                             std::string* output) const {
345   DCHECK(output);
346   DCHECK(zip_file_);
347 
348   if (max_read_bytes == 0) {
349     output->clear();
350     return true;
351   }
352 
353   if (current_entry_info()->is_directory()) {
354     output->clear();
355     return true;
356   }
357 
358   // The original_size() is the best hint for the real size, so it saves
359   // doing reallocations for the common case when the uncompressed size is
360   // correct. However, we need to assume that the uncompressed size could be
361   // incorrect therefore this function needs to read as much data as possible.
362   std::string contents;
363   contents.reserve(
364       static_cast<size_t>(std::min(base::checked_cast<int64_t>(max_read_bytes),
365                                    current_entry_info()->original_size())));
366 
367   StringWriterDelegate writer(max_read_bytes, &contents);
368   if (!ExtractCurrentEntry(&writer, max_read_bytes)) {
369     if (contents.length() < max_read_bytes) {
370       // There was an error in extracting entry. If ExtractCurrentEntry()
371       // returns false, the entire file was not read - in which case
372       // contents.length() should equal |max_read_bytes| unless an error
373       // occurred which caused extraction to be aborted.
374       output->clear();
375     } else {
376       // |num_bytes| is less than the length of current entry.
377       output->swap(contents);
378     }
379     return false;
380   }
381   output->swap(contents);
382   return true;
383 }
384 
OpenInternal()385 bool ZipReader::OpenInternal() {
386   DCHECK(zip_file_);
387 
388   unz_global_info zip_info = {};  // Zero-clear.
389   if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) {
390     return false;
391   }
392   num_entries_ = zip_info.number_entry;
393   if (num_entries_ < 0)
394     return false;
395 
396   // We are already at the end if the zip file is empty.
397   reached_end_ = (num_entries_ == 0);
398   return true;
399 }
400 
Reset()401 void ZipReader::Reset() {
402   zip_file_ = NULL;
403   num_entries_ = 0;
404   reached_end_ = false;
405   current_entry_info_.reset();
406 }
407 
ExtractChunk(base::File output_file,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback,const int64_t offset)408 void ZipReader::ExtractChunk(base::File output_file,
409                              SuccessCallback success_callback,
410                              FailureCallback failure_callback,
411                              const ProgressCallback& progress_callback,
412                              const int64_t offset) {
413   char buffer[internal::kZipBufSize];
414 
415   const int num_bytes_read = unzReadCurrentFile(zip_file_,
416                                                 buffer,
417                                                 internal::kZipBufSize);
418 
419   if (num_bytes_read == 0) {
420     unzCloseCurrentFile(zip_file_);
421     std::move(success_callback).Run();
422   } else if (num_bytes_read < 0) {
423     DVLOG(1) << "Unzip failed: error while reading zipfile "
424              << "(" << num_bytes_read << ")";
425     std::move(failure_callback).Run();
426   } else {
427     if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
428       DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
429       std::move(failure_callback).Run();
430       return;
431     }
432 
433     int64_t current_progress = offset + num_bytes_read;
434 
435     progress_callback.Run(current_progress);
436 
437     base::ThreadTaskRunnerHandle::Get()->PostTask(
438         FROM_HERE,
439         base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
440                        Passed(std::move(output_file)),
441                        std::move(success_callback), std::move(failure_callback),
442                        progress_callback, current_progress));
443   }
444 }
445 
446 // FileWriterDelegate ----------------------------------------------------------
447 
FileWriterDelegate(base::File * file)448 FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {}
449 
FileWriterDelegate(std::unique_ptr<base::File> file)450 FileWriterDelegate::FileWriterDelegate(std::unique_ptr<base::File> file)
451     : file_(file.get()), owned_file_(std::move(file)) {}
452 
~FileWriterDelegate()453 FileWriterDelegate::~FileWriterDelegate() {
454   if (!file_->SetLength(file_length_)) {
455     DVPLOG(1) << "Failed updating length of written file";
456   }
457 }
458 
PrepareOutput()459 bool FileWriterDelegate::PrepareOutput() {
460   return file_->Seek(base::File::FROM_BEGIN, 0) >= 0;
461 }
462 
WriteBytes(const char * data,int num_bytes)463 bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
464   int bytes_written = file_->WriteAtCurrentPos(data, num_bytes);
465   if (bytes_written > 0)
466     file_length_ += bytes_written;
467   return bytes_written == num_bytes;
468 }
469 
SetTimeModified(const base::Time & time)470 void FileWriterDelegate::SetTimeModified(const base::Time& time) {
471   file_->SetTimes(base::Time::Now(), time);
472 }
473 
474 // FilePathWriterDelegate ------------------------------------------------------
475 
FilePathWriterDelegate(const base::FilePath & output_file_path)476 FilePathWriterDelegate::FilePathWriterDelegate(
477     const base::FilePath& output_file_path)
478     : output_file_path_(output_file_path) {}
479 
~FilePathWriterDelegate()480 FilePathWriterDelegate::~FilePathWriterDelegate() {}
481 
PrepareOutput()482 bool FilePathWriterDelegate::PrepareOutput() {
483   // We can't rely on parent directory entries being specified in the
484   // zip, so we make sure they are created.
485   if (!base::CreateDirectory(output_file_path_.DirName()))
486     return false;
487 
488   file_.Initialize(output_file_path_,
489                    base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
490   return file_.IsValid();
491 }
492 
WriteBytes(const char * data,int num_bytes)493 bool FilePathWriterDelegate::WriteBytes(const char* data, int num_bytes) {
494   return num_bytes == file_.WriteAtCurrentPos(data, num_bytes);
495 }
496 
SetTimeModified(const base::Time & time)497 void FilePathWriterDelegate::SetTimeModified(const base::Time& time) {
498   file_.Close();
499   base::TouchFile(output_file_path_, base::Time::Now(), time);
500 }
501 
502 }  // namespace zip
503