1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/zlib/google/zip_reader.h"
6
7 #include "base/bind.h"
8 #include "base/files/file.h"
9 #include "base/logging.h"
10 #include "base/message_loop/message_loop.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "third_party/zlib/google/zip_internal.h"
14
15 #if defined(USE_SYSTEM_MINIZIP)
16 #include <minizip/unzip.h>
17 #else
18 #include "third_party/zlib/contrib/minizip/unzip.h"
19 #if defined(OS_WIN)
20 #include "third_party/zlib/contrib/minizip/iowin32.h"
21 #endif // defined(OS_WIN)
22 #endif // defined(USE_SYSTEM_MINIZIP)
23
24 namespace zip {
25
26 // TODO(satorux): The implementation assumes that file names in zip files
27 // are encoded in UTF-8. This is true for zip files created by Zip()
28 // function in zip.h, but not true for user-supplied random zip files.
EntryInfo(const std::string & file_name_in_zip,const unz_file_info & raw_file_info)29 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip,
30 const unz_file_info& raw_file_info)
31 : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)),
32 is_directory_(false) {
33 original_size_ = raw_file_info.uncompressed_size;
34
35 // Directory entries in zip files end with "/".
36 is_directory_ = EndsWith(file_name_in_zip, "/", false);
37
38 // Check the file name here for directory traversal issues.
39 is_unsafe_ = file_path_.ReferencesParent();
40
41 // We also consider that the file name is unsafe, if it's invalid UTF-8.
42 base::string16 file_name_utf16;
43 if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(),
44 &file_name_utf16)) {
45 is_unsafe_ = true;
46 }
47
48 // We also consider that the file name is unsafe, if it's absolute.
49 // On Windows, IsAbsolute() returns false for paths starting with "/".
50 if (file_path_.IsAbsolute() || StartsWithASCII(file_name_in_zip, "/", false))
51 is_unsafe_ = true;
52
53 // Construct the last modified time. The timezone info is not present in
54 // zip files, so we construct the time as local time.
55 base::Time::Exploded exploded_time = {}; // Zero-clear.
56 exploded_time.year = raw_file_info.tmu_date.tm_year;
57 // The month in zip file is 0-based, whereas ours is 1-based.
58 exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
59 exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
60 exploded_time.hour = raw_file_info.tmu_date.tm_hour;
61 exploded_time.minute = raw_file_info.tmu_date.tm_min;
62 exploded_time.second = raw_file_info.tmu_date.tm_sec;
63 exploded_time.millisecond = 0;
64 if (exploded_time.HasValidValues()) {
65 last_modified_ = base::Time::FromLocalExploded(exploded_time);
66 } else {
67 // Use Unix time epoch if the time stamp data is invalid.
68 last_modified_ = base::Time::UnixEpoch();
69 }
70 }
71
ZipReader()72 ZipReader::ZipReader()
73 : weak_ptr_factory_(this) {
74 Reset();
75 }
76
~ZipReader()77 ZipReader::~ZipReader() {
78 Close();
79 }
80
Open(const base::FilePath & zip_file_path)81 bool ZipReader::Open(const base::FilePath& zip_file_path) {
82 DCHECK(!zip_file_);
83
84 // Use of "Unsafe" function does not look good, but there is no way to do
85 // this safely on Linux. See file_util.h for details.
86 zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
87 if (!zip_file_) {
88 return false;
89 }
90
91 return OpenInternal();
92 }
93
OpenFromPlatformFile(base::PlatformFile zip_fd)94 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
95 DCHECK(!zip_file_);
96
97 #if defined(OS_POSIX)
98 zip_file_ = internal::OpenFdForUnzipping(zip_fd);
99 #elif defined(OS_WIN)
100 zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
101 #endif
102 if (!zip_file_) {
103 return false;
104 }
105
106 return OpenInternal();
107 }
108
OpenFromString(const std::string & data)109 bool ZipReader::OpenFromString(const std::string& data) {
110 zip_file_ = internal::PrepareMemoryForUnzipping(data);
111 if (!zip_file_)
112 return false;
113 return OpenInternal();
114 }
115
Close()116 void ZipReader::Close() {
117 if (zip_file_) {
118 unzClose(zip_file_);
119 }
120 Reset();
121 }
122
HasMore()123 bool ZipReader::HasMore() {
124 return !reached_end_;
125 }
126
AdvanceToNextEntry()127 bool ZipReader::AdvanceToNextEntry() {
128 DCHECK(zip_file_);
129
130 // Should not go further if we already reached the end.
131 if (reached_end_)
132 return false;
133
134 unz_file_pos position = {};
135 if (unzGetFilePos(zip_file_, &position) != UNZ_OK)
136 return false;
137 const int current_entry_index = position.num_of_file;
138 // If we are currently at the last entry, then the next position is the
139 // end of the zip file, so mark that we reached the end.
140 if (current_entry_index + 1 == num_entries_) {
141 reached_end_ = true;
142 } else {
143 DCHECK_LT(current_entry_index + 1, num_entries_);
144 if (unzGoToNextFile(zip_file_) != UNZ_OK) {
145 return false;
146 }
147 }
148 current_entry_info_.reset();
149 return true;
150 }
151
OpenCurrentEntryInZip()152 bool ZipReader::OpenCurrentEntryInZip() {
153 DCHECK(zip_file_);
154
155 unz_file_info raw_file_info = {};
156 char raw_file_name_in_zip[internal::kZipMaxPath] = {};
157 const int result = unzGetCurrentFileInfo(zip_file_,
158 &raw_file_info,
159 raw_file_name_in_zip,
160 sizeof(raw_file_name_in_zip) - 1,
161 NULL, // extraField.
162 0, // extraFieldBufferSize.
163 NULL, // szComment.
164 0); // commentBufferSize.
165 if (result != UNZ_OK)
166 return false;
167 if (raw_file_name_in_zip[0] == '\0')
168 return false;
169 current_entry_info_.reset(
170 new EntryInfo(raw_file_name_in_zip, raw_file_info));
171 return true;
172 }
173
LocateAndOpenEntry(const base::FilePath & path_in_zip)174 bool ZipReader::LocateAndOpenEntry(const base::FilePath& path_in_zip) {
175 DCHECK(zip_file_);
176
177 current_entry_info_.reset();
178 reached_end_ = false;
179 const int kDefaultCaseSensivityOfOS = 0;
180 const int result = unzLocateFile(zip_file_,
181 path_in_zip.AsUTF8Unsafe().c_str(),
182 kDefaultCaseSensivityOfOS);
183 if (result != UNZ_OK)
184 return false;
185
186 // Then Open the entry.
187 return OpenCurrentEntryInZip();
188 }
189
ExtractCurrentEntryToFilePath(const base::FilePath & output_file_path)190 bool ZipReader::ExtractCurrentEntryToFilePath(
191 const base::FilePath& output_file_path) {
192 DCHECK(zip_file_);
193
194 // If this is a directory, just create it and return.
195 if (current_entry_info()->is_directory())
196 return base::CreateDirectory(output_file_path);
197
198 const int open_result = unzOpenCurrentFile(zip_file_);
199 if (open_result != UNZ_OK)
200 return false;
201
202 // We can't rely on parent directory entries being specified in the
203 // zip, so we make sure they are created.
204 base::FilePath output_dir_path = output_file_path.DirName();
205 if (!base::CreateDirectory(output_dir_path))
206 return false;
207
208 base::File file(output_file_path,
209 base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
210 if (!file.IsValid())
211 return false;
212
213 bool success = true; // This becomes false when something bad happens.
214 while (true) {
215 char buf[internal::kZipBufSize];
216 const int num_bytes_read = unzReadCurrentFile(zip_file_, buf,
217 internal::kZipBufSize);
218 if (num_bytes_read == 0) {
219 // Reached the end of the file.
220 break;
221 } else if (num_bytes_read < 0) {
222 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
223 success = false;
224 break;
225 } else if (num_bytes_read > 0) {
226 // Some data is read. Write it to the output file.
227 if (num_bytes_read != file.WriteAtCurrentPos(buf, num_bytes_read)) {
228 success = false;
229 break;
230 }
231 }
232 }
233
234 file.Close();
235 unzCloseCurrentFile(zip_file_);
236
237 if (current_entry_info()->last_modified() != base::Time::UnixEpoch())
238 base::TouchFile(output_file_path,
239 base::Time::Now(),
240 current_entry_info()->last_modified());
241
242 return success;
243 }
244
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,const SuccessCallback & success_callback,const FailureCallback & failure_callback,const ProgressCallback & progress_callback)245 void ZipReader::ExtractCurrentEntryToFilePathAsync(
246 const base::FilePath& output_file_path,
247 const SuccessCallback& success_callback,
248 const FailureCallback& failure_callback,
249 const ProgressCallback& progress_callback) {
250 DCHECK(zip_file_);
251 DCHECK(current_entry_info_.get());
252
253 // If this is a directory, just create it and return.
254 if (current_entry_info()->is_directory()) {
255 if (base::CreateDirectory(output_file_path)) {
256 base::MessageLoopProxy::current()->PostTask(FROM_HERE, success_callback);
257 } else {
258 DVLOG(1) << "Unzip failed: unable to create directory.";
259 base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback);
260 }
261 return;
262 }
263
264 if (unzOpenCurrentFile(zip_file_) != UNZ_OK) {
265 DVLOG(1) << "Unzip failed: unable to open current zip entry.";
266 base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback);
267 return;
268 }
269
270 base::FilePath output_dir_path = output_file_path.DirName();
271 if (!base::CreateDirectory(output_dir_path)) {
272 DVLOG(1) << "Unzip failed: unable to create containing directory.";
273 base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback);
274 return;
275 }
276
277 const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
278 base::File output_file(output_file_path, flags);
279
280 if (!output_file.IsValid()) {
281 DVLOG(1) << "Unzip failed: unable to create platform file at "
282 << output_file_path.value();
283 base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback);
284 return;
285 }
286
287 base::MessageLoop::current()->PostTask(
288 FROM_HERE,
289 base::Bind(&ZipReader::ExtractChunk,
290 weak_ptr_factory_.GetWeakPtr(),
291 Passed(output_file.Pass()),
292 success_callback,
293 failure_callback,
294 progress_callback,
295 0 /* initial offset */));
296 }
297
ExtractCurrentEntryIntoDirectory(const base::FilePath & output_directory_path)298 bool ZipReader::ExtractCurrentEntryIntoDirectory(
299 const base::FilePath& output_directory_path) {
300 DCHECK(current_entry_info_.get());
301
302 base::FilePath output_file_path = output_directory_path.Append(
303 current_entry_info()->file_path());
304 return ExtractCurrentEntryToFilePath(output_file_path);
305 }
306
307 #if defined(OS_POSIX)
ExtractCurrentEntryToFd(const int fd)308 bool ZipReader::ExtractCurrentEntryToFd(const int fd) {
309 DCHECK(zip_file_);
310
311 // If this is a directory, there's nothing to extract to the file descriptor,
312 // so return false.
313 if (current_entry_info()->is_directory())
314 return false;
315
316 const int open_result = unzOpenCurrentFile(zip_file_);
317 if (open_result != UNZ_OK)
318 return false;
319
320 bool success = true; // This becomes false when something bad happens.
321 while (true) {
322 char buf[internal::kZipBufSize];
323 const int num_bytes_read = unzReadCurrentFile(zip_file_, buf,
324 internal::kZipBufSize);
325 if (num_bytes_read == 0) {
326 // Reached the end of the file.
327 break;
328 } else if (num_bytes_read < 0) {
329 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
330 success = false;
331 break;
332 } else if (num_bytes_read > 0) {
333 // Some data is read. Write it to the output file descriptor.
334 if (num_bytes_read !=
335 base::WriteFileDescriptor(fd, buf, num_bytes_read)) {
336 success = false;
337 break;
338 }
339 }
340 }
341
342 unzCloseCurrentFile(zip_file_);
343 return success;
344 }
345 #endif // defined(OS_POSIX)
346
ExtractCurrentEntryToString(size_t max_read_bytes,std::string * output) const347 bool ZipReader::ExtractCurrentEntryToString(
348 size_t max_read_bytes,
349 std::string* output) const {
350 DCHECK(output);
351 DCHECK(zip_file_);
352 DCHECK(max_read_bytes != 0);
353
354 if (current_entry_info()->is_directory()) {
355 output->clear();
356 return true;
357 }
358
359 const int open_result = unzOpenCurrentFile(zip_file_);
360 if (open_result != UNZ_OK)
361 return false;
362
363 // The original_size() is the best hint for the real size, so it saves
364 // doing reallocations for the common case when the uncompressed size is
365 // correct. However, we need to assume that the uncompressed size could be
366 // incorrect therefore this function needs to read as much data as possible.
367 std::string contents;
368 contents.reserve(std::min<size_t>(
369 max_read_bytes, current_entry_info()->original_size()));
370
371 bool success = true; // This becomes false when something bad happens.
372 char buf[internal::kZipBufSize];
373 while (true) {
374 const int num_bytes_read = unzReadCurrentFile(zip_file_, buf,
375 internal::kZipBufSize);
376 if (num_bytes_read == 0) {
377 // Reached the end of the file.
378 break;
379 } else if (num_bytes_read < 0) {
380 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
381 success = false;
382 break;
383 } else if (num_bytes_read > 0) {
384 if (contents.size() + num_bytes_read > max_read_bytes) {
385 success = false;
386 break;
387 }
388 contents.append(buf, num_bytes_read);
389 }
390 }
391
392 unzCloseCurrentFile(zip_file_);
393 if (success)
394 output->swap(contents);
395
396 return success;
397 }
398
OpenInternal()399 bool ZipReader::OpenInternal() {
400 DCHECK(zip_file_);
401
402 unz_global_info zip_info = {}; // Zero-clear.
403 if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) {
404 return false;
405 }
406 num_entries_ = zip_info.number_entry;
407 if (num_entries_ < 0)
408 return false;
409
410 // We are already at the end if the zip file is empty.
411 reached_end_ = (num_entries_ == 0);
412 return true;
413 }
414
Reset()415 void ZipReader::Reset() {
416 zip_file_ = NULL;
417 num_entries_ = 0;
418 reached_end_ = false;
419 current_entry_info_.reset();
420 }
421
ExtractChunk(base::File output_file,const SuccessCallback & success_callback,const FailureCallback & failure_callback,const ProgressCallback & progress_callback,const int64 offset)422 void ZipReader::ExtractChunk(base::File output_file,
423 const SuccessCallback& success_callback,
424 const FailureCallback& failure_callback,
425 const ProgressCallback& progress_callback,
426 const int64 offset) {
427 char buffer[internal::kZipBufSize];
428
429 const int num_bytes_read = unzReadCurrentFile(zip_file_,
430 buffer,
431 internal::kZipBufSize);
432
433 if (num_bytes_read == 0) {
434 unzCloseCurrentFile(zip_file_);
435 success_callback.Run();
436 } else if (num_bytes_read < 0) {
437 DVLOG(1) << "Unzip failed: error while reading zipfile "
438 << "(" << num_bytes_read << ")";
439 failure_callback.Run();
440 } else {
441 if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
442 DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
443 failure_callback.Run();
444 return;
445 }
446
447 int64 current_progress = offset + num_bytes_read;
448
449 progress_callback.Run(current_progress);
450
451 base::MessageLoop::current()->PostTask(
452 FROM_HERE,
453 base::Bind(&ZipReader::ExtractChunk,
454 weak_ptr_factory_.GetWeakPtr(),
455 Passed(output_file.Pass()),
456 success_callback,
457 failure_callback,
458 progress_callback,
459 current_progress));
460
461 }
462 }
463
464
465 } // namespace zip
466