1 //! Types for reading ZIP archives
2
3 use crate::compression::CompressionMethod;
4 use crate::crc32::Crc32Reader;
5 use crate::result::{InvalidPassword, ZipError, ZipResult};
6 use crate::spec;
7 use crate::zipcrypto::ZipCryptoReader;
8 use crate::zipcrypto::ZipCryptoReaderValid;
9 use std::borrow::Cow;
10 use std::collections::HashMap;
11 use std::io::{self, prelude::*};
12 use std::path::{Component, Path};
13
14 use crate::cp437::FromCp437;
15 use crate::types::{DateTime, System, ZipFileData};
16 use byteorder::{LittleEndian, ReadBytesExt};
17
18 #[cfg(any(
19 feature = "deflate",
20 feature = "deflate-miniz",
21 feature = "deflate-zlib"
22 ))]
23 use flate2::read::DeflateDecoder;
24
25 #[cfg(feature = "bzip2")]
26 use bzip2::read::BzDecoder;
27
28 mod ffi {
29 pub const S_IFDIR: u32 = 0o0040000;
30 pub const S_IFREG: u32 = 0o0100000;
31 }
32
33 /// ZIP archive reader
34 ///
35 /// ```no_run
36 /// use std::io::prelude::*;
37 /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
38 /// let mut zip = zip::ZipArchive::new(reader)?;
39 ///
40 /// for i in 0..zip.len() {
41 /// let mut file = zip.by_index(i)?;
42 /// println!("Filename: {}", file.name());
43 /// std::io::copy(&mut file, &mut std::io::stdout());
44 /// }
45 ///
46 /// Ok(())
47 /// }
48 /// ```
49 #[derive(Clone, Debug)]
50 pub struct ZipArchive<R: Read + io::Seek> {
51 reader: R,
52 files: Vec<ZipFileData>,
53 names_map: HashMap<String, usize>,
54 offset: u64,
55 comment: Vec<u8>,
56 }
57
58 enum CryptoReader<'a> {
59 Plaintext(io::Take<&'a mut dyn Read>),
60 ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>),
61 }
62
63 impl<'a> Read for CryptoReader<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>64 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
65 match self {
66 CryptoReader::Plaintext(r) => r.read(buf),
67 CryptoReader::ZipCrypto(r) => r.read(buf),
68 }
69 }
70 }
71
72 impl<'a> CryptoReader<'a> {
73 /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> io::Take<&'a mut dyn Read>74 pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
75 match self {
76 CryptoReader::Plaintext(r) => r,
77 CryptoReader::ZipCrypto(r) => r.into_inner(),
78 }
79 }
80 }
81
82 enum ZipFileReader<'a> {
83 NoReader,
84 Raw(io::Take<&'a mut dyn io::Read>),
85 Stored(Crc32Reader<CryptoReader<'a>>),
86 #[cfg(any(
87 feature = "deflate",
88 feature = "deflate-miniz",
89 feature = "deflate-zlib"
90 ))]
91 Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>),
92 #[cfg(feature = "bzip2")]
93 Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>),
94 }
95
96 impl<'a> Read for ZipFileReader<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>97 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
98 match self {
99 ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
100 ZipFileReader::Raw(r) => r.read(buf),
101 ZipFileReader::Stored(r) => r.read(buf),
102 #[cfg(any(
103 feature = "deflate",
104 feature = "deflate-miniz",
105 feature = "deflate-zlib"
106 ))]
107 ZipFileReader::Deflated(r) => r.read(buf),
108 #[cfg(feature = "bzip2")]
109 ZipFileReader::Bzip2(r) => r.read(buf),
110 }
111 }
112 }
113
114 impl<'a> ZipFileReader<'a> {
115 /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> io::Take<&'a mut dyn Read>116 pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
117 match self {
118 ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
119 ZipFileReader::Raw(r) => r,
120 ZipFileReader::Stored(r) => r.into_inner().into_inner(),
121 #[cfg(any(
122 feature = "deflate",
123 feature = "deflate-miniz",
124 feature = "deflate-zlib"
125 ))]
126 ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(),
127 #[cfg(feature = "bzip2")]
128 ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(),
129 }
130 }
131 }
132
133 /// A struct for reading a zip file
134 pub struct ZipFile<'a> {
135 data: Cow<'a, ZipFileData>,
136 crypto_reader: Option<CryptoReader<'a>>,
137 reader: ZipFileReader<'a>,
138 }
139
find_content<'a>( data: &mut ZipFileData, reader: &'a mut (impl Read + Seek), ) -> ZipResult<io::Take<&'a mut dyn Read>>140 fn find_content<'a>(
141 data: &mut ZipFileData,
142 reader: &'a mut (impl Read + Seek),
143 ) -> ZipResult<io::Take<&'a mut dyn Read>> {
144 // Parse local header
145 reader.seek(io::SeekFrom::Start(data.header_start))?;
146 let signature = reader.read_u32::<LittleEndian>()?;
147 if signature != spec::LOCAL_FILE_HEADER_SIGNATURE {
148 return Err(ZipError::InvalidArchive("Invalid local file header"));
149 }
150
151 reader.seek(io::SeekFrom::Current(22))?;
152 let file_name_length = reader.read_u16::<LittleEndian>()? as u64;
153 let extra_field_length = reader.read_u16::<LittleEndian>()? as u64;
154 let magic_and_header = 4 + 22 + 2 + 2;
155 data.data_start = data.header_start + magic_and_header + file_name_length + extra_field_length;
156
157 reader.seek(io::SeekFrom::Start(data.data_start))?;
158 Ok((reader as &mut dyn Read).take(data.compressed_size))
159 }
160
make_crypto_reader<'a>( compression_method: crate::compression::CompressionMethod, crc32: u32, reader: io::Take<&'a mut dyn io::Read>, password: Option<&[u8]>, ) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>>161 fn make_crypto_reader<'a>(
162 compression_method: crate::compression::CompressionMethod,
163 crc32: u32,
164 reader: io::Take<&'a mut dyn io::Read>,
165 password: Option<&[u8]>,
166 ) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> {
167 #[allow(deprecated)]
168 {
169 if let CompressionMethod::Unsupported(_) = compression_method {
170 return unsupported_zip_error("Compression method not supported");
171 }
172 }
173
174 let reader = match password {
175 None => CryptoReader::Plaintext(reader),
176 Some(password) => match ZipCryptoReader::new(reader, password).validate(crc32)? {
177 None => return Ok(Err(InvalidPassword)),
178 Some(r) => CryptoReader::ZipCrypto(r),
179 },
180 };
181 Ok(Ok(reader))
182 }
183
make_reader<'a>( compression_method: CompressionMethod, crc32: u32, reader: CryptoReader<'a>, ) -> ZipFileReader<'a>184 fn make_reader<'a>(
185 compression_method: CompressionMethod,
186 crc32: u32,
187 reader: CryptoReader<'a>,
188 ) -> ZipFileReader<'a> {
189 match compression_method {
190 CompressionMethod::Stored => ZipFileReader::Stored(Crc32Reader::new(reader, crc32)),
191 #[cfg(any(
192 feature = "deflate",
193 feature = "deflate-miniz",
194 feature = "deflate-zlib"
195 ))]
196 CompressionMethod::Deflated => {
197 let deflate_reader = DeflateDecoder::new(reader);
198 ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32))
199 }
200 #[cfg(feature = "bzip2")]
201 CompressionMethod::Bzip2 => {
202 let bzip2_reader = BzDecoder::new(reader);
203 ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32))
204 }
205 _ => panic!("Compression method not supported"),
206 }
207 }
208
209 impl<R: Read + io::Seek> ZipArchive<R> {
210 /// Get the directory start offset and number of files. This is done in a
211 /// separate function to ease the control flow design.
get_directory_counts( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult<(u64, u64, usize)>212 fn get_directory_counts(
213 reader: &mut R,
214 footer: &spec::CentralDirectoryEnd,
215 cde_start_pos: u64,
216 ) -> ZipResult<(u64, u64, usize)> {
217 // See if there's a ZIP64 footer. The ZIP64 locator if present will
218 // have its signature 20 bytes in front of the standard footer. The
219 // standard footer, in turn, is 22+N bytes large, where N is the
220 // comment length. Therefore:
221 let zip64locator = if reader
222 .seek(io::SeekFrom::End(
223 -(20 + 22 + footer.zip_file_comment.len() as i64),
224 ))
225 .is_ok()
226 {
227 match spec::Zip64CentralDirectoryEndLocator::parse(reader) {
228 Ok(loc) => Some(loc),
229 Err(ZipError::InvalidArchive(_)) => {
230 // No ZIP64 header; that's actually fine. We're done here.
231 None
232 }
233 Err(e) => {
234 // Yikes, a real problem
235 return Err(e);
236 }
237 }
238 } else {
239 // Empty Zip files will have nothing else so this error might be fine. If
240 // not, we'll find out soon.
241 None
242 };
243
244 match zip64locator {
245 None => {
246 // Some zip files have data prepended to them, resulting in the
247 // offsets all being too small. Get the amount of error by comparing
248 // the actual file position we found the CDE at with the offset
249 // recorded in the CDE.
250 let archive_offset = cde_start_pos
251 .checked_sub(footer.central_directory_size as u64)
252 .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
253 .ok_or(ZipError::InvalidArchive(
254 "Invalid central directory size or offset",
255 ))?;
256
257 let directory_start = footer.central_directory_offset as u64 + archive_offset;
258 let number_of_files = footer.number_of_files_on_this_disk as usize;
259 Ok((archive_offset, directory_start, number_of_files))
260 }
261 Some(locator64) => {
262 // If we got here, this is indeed a ZIP64 file.
263
264 if footer.disk_number as u32 != locator64.disk_with_central_directory {
265 return unsupported_zip_error(
266 "Support for multi-disk files is not implemented",
267 );
268 }
269
270 // We need to reassess `archive_offset`. We know where the ZIP64
271 // central-directory-end structure *should* be, but unfortunately we
272 // don't know how to precisely relate that location to our current
273 // actual offset in the file, since there may be junk at its
274 // beginning. Therefore we need to perform another search, as in
275 // read::CentralDirectoryEnd::find_and_parse, except now we search
276 // forward.
277
278 let search_upper_bound = cde_start_pos
279 .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
280 .ok_or(ZipError::InvalidArchive(
281 "File cannot contain ZIP64 central directory end",
282 ))?;
283 let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
284 reader,
285 locator64.end_of_central_directory_offset,
286 search_upper_bound,
287 )?;
288
289 if footer.disk_number != footer.disk_with_central_directory {
290 return unsupported_zip_error(
291 "Support for multi-disk files is not implemented",
292 );
293 }
294
295 let directory_start = footer
296 .central_directory_offset
297 .checked_add(archive_offset)
298 .ok_or_else(|| {
299 ZipError::InvalidArchive("Invalid central directory size or offset")
300 })?;
301
302 Ok((
303 archive_offset,
304 directory_start,
305 footer.number_of_files as usize,
306 ))
307 }
308 }
309 }
310
311 /// Read a ZIP archive, collecting the files it contains
312 ///
313 /// This uses the central directory record of the ZIP file, and ignores local file headers
new(mut reader: R) -> ZipResult<ZipArchive<R>>314 pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
315 let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
316
317 if footer.disk_number != footer.disk_with_central_directory {
318 return unsupported_zip_error("Support for multi-disk files is not implemented");
319 }
320
321 let (archive_offset, directory_start, number_of_files) =
322 Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?;
323
324 let mut files = Vec::new();
325 let mut names_map = HashMap::new();
326
327 if let Err(_) = reader.seek(io::SeekFrom::Start(directory_start)) {
328 return Err(ZipError::InvalidArchive(
329 "Could not seek to start of central directory",
330 ));
331 }
332
333 for _ in 0..number_of_files {
334 let file = central_header_to_zip_file(&mut reader, archive_offset)?;
335 names_map.insert(file.file_name.clone(), files.len());
336 files.push(file);
337 }
338
339 Ok(ZipArchive {
340 reader,
341 files,
342 names_map,
343 offset: archive_offset,
344 comment: footer.zip_file_comment,
345 })
346 }
347 /// Extract a Zip archive into a directory, overwriting files if they
348 /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
349 ///
350 /// Extraction is not atomic; If an error is encountered, some of the files
351 /// may be left on disk.
extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()>352 pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
353 use std::fs;
354
355 for i in 0..self.len() {
356 let mut file = self.by_index(i)?;
357 let filepath = file
358 .enclosed_name()
359 .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
360
361 let outpath = directory.as_ref().join(filepath);
362
363 if file.name().ends_with('/') {
364 fs::create_dir_all(&outpath)?;
365 } else {
366 if let Some(p) = outpath.parent() {
367 if !p.exists() {
368 fs::create_dir_all(&p)?;
369 }
370 }
371 let mut outfile = fs::File::create(&outpath)?;
372 io::copy(&mut file, &mut outfile)?;
373 }
374 // Get and Set permissions
375 #[cfg(unix)]
376 {
377 use std::os::unix::fs::PermissionsExt;
378 if let Some(mode) = file.unix_mode() {
379 fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?;
380 }
381 }
382 }
383 Ok(())
384 }
385
386 /// Number of files contained in this zip.
len(&self) -> usize387 pub fn len(&self) -> usize {
388 self.files.len()
389 }
390
391 /// Whether this zip archive contains no files
is_empty(&self) -> bool392 pub fn is_empty(&self) -> bool {
393 self.len() == 0
394 }
395
396 /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
397 ///
398 /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
399 /// of that prepended data.
offset(&self) -> u64400 pub fn offset(&self) -> u64 {
401 self.offset
402 }
403
404 /// Get the comment of the zip archive.
comment(&self) -> &[u8]405 pub fn comment(&self) -> &[u8] {
406 &self.comment
407 }
408
409 /// Returns an iterator over all the file and directory names in this archive.
file_names(&self) -> impl Iterator<Item = &str>410 pub fn file_names(&self) -> impl Iterator<Item = &str> {
411 self.names_map.keys().map(|s| s.as_str())
412 }
413
414 /// Search for a file entry by name, decrypt with given password
by_name_decrypt<'a>( &'a mut self, name: &str, password: &[u8], ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>415 pub fn by_name_decrypt<'a>(
416 &'a mut self,
417 name: &str,
418 password: &[u8],
419 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
420 self.by_name_with_optional_password(name, Some(password))
421 }
422
423 /// Search for a file entry by name
by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>>424 pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>> {
425 Ok(self.by_name_with_optional_password(name, None)?.unwrap())
426 }
427
by_name_with_optional_password<'a>( &'a mut self, name: &str, password: Option<&[u8]>, ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>428 fn by_name_with_optional_password<'a>(
429 &'a mut self,
430 name: &str,
431 password: Option<&[u8]>,
432 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
433 let index = match self.names_map.get(name) {
434 Some(index) => *index,
435 None => {
436 return Err(ZipError::FileNotFound);
437 }
438 };
439 self.by_index_with_optional_password(index, password)
440 }
441
442 /// Get a contained file by index, decrypt with given password
by_index_decrypt<'a>( &'a mut self, file_number: usize, password: &[u8], ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>443 pub fn by_index_decrypt<'a>(
444 &'a mut self,
445 file_number: usize,
446 password: &[u8],
447 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
448 self.by_index_with_optional_password(file_number, Some(password))
449 }
450
451 /// Get a contained file by index
by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>452 pub fn by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>> {
453 Ok(self
454 .by_index_with_optional_password(file_number, None)?
455 .unwrap())
456 }
457
458 /// Get a contained file by index without decompressing it
by_index_raw<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>459 pub fn by_index_raw<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>> {
460 let reader = &mut self.reader;
461 self.files
462 .get_mut(file_number)
463 .ok_or(ZipError::FileNotFound)
464 .and_then(move |data| {
465 Ok(ZipFile {
466 crypto_reader: None,
467 reader: ZipFileReader::Raw(find_content(data, reader)?),
468 data: Cow::Borrowed(data),
469 })
470 })
471 }
472
by_index_with_optional_password<'a>( &'a mut self, file_number: usize, mut password: Option<&[u8]>, ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>473 fn by_index_with_optional_password<'a>(
474 &'a mut self,
475 file_number: usize,
476 mut password: Option<&[u8]>,
477 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
478 if file_number >= self.files.len() {
479 return Err(ZipError::FileNotFound);
480 }
481 let data = &mut self.files[file_number];
482
483 match (password, data.encrypted) {
484 (None, true) => {
485 return Err(ZipError::UnsupportedArchive(
486 "Password required to decrypt file",
487 ))
488 }
489 (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
490 _ => {}
491 }
492 let limit_reader = find_content(data, &mut self.reader)?;
493
494 match make_crypto_reader(data.compression_method, data.crc32, limit_reader, password) {
495 Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile {
496 crypto_reader: Some(crypto_reader),
497 reader: ZipFileReader::NoReader,
498 data: Cow::Borrowed(data),
499 })),
500 Err(e) => Err(e),
501 Ok(Err(e)) => Ok(Err(e)),
502 }
503 }
504
505 /// Unwrap and return the inner reader object
506 ///
507 /// The position of the reader is undefined.
into_inner(self) -> R508 pub fn into_inner(self) -> R {
509 self.reader
510 }
511 }
512
unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>513 fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
514 Err(ZipError::UnsupportedArchive(detail))
515 }
516
central_header_to_zip_file<R: Read + io::Seek>( reader: &mut R, archive_offset: u64, ) -> ZipResult<ZipFileData>517 fn central_header_to_zip_file<R: Read + io::Seek>(
518 reader: &mut R,
519 archive_offset: u64,
520 ) -> ZipResult<ZipFileData> {
521 let central_header_start = reader.seek(io::SeekFrom::Current(0))?;
522 // Parse central header
523 let signature = reader.read_u32::<LittleEndian>()?;
524 if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
525 return Err(ZipError::InvalidArchive("Invalid Central Directory header"));
526 }
527
528 let version_made_by = reader.read_u16::<LittleEndian>()?;
529 let _version_to_extract = reader.read_u16::<LittleEndian>()?;
530 let flags = reader.read_u16::<LittleEndian>()?;
531 let encrypted = flags & 1 == 1;
532 let is_utf8 = flags & (1 << 11) != 0;
533 let compression_method = reader.read_u16::<LittleEndian>()?;
534 let last_mod_time = reader.read_u16::<LittleEndian>()?;
535 let last_mod_date = reader.read_u16::<LittleEndian>()?;
536 let crc32 = reader.read_u32::<LittleEndian>()?;
537 let compressed_size = reader.read_u32::<LittleEndian>()?;
538 let uncompressed_size = reader.read_u32::<LittleEndian>()?;
539 let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
540 let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
541 let file_comment_length = reader.read_u16::<LittleEndian>()? as usize;
542 let _disk_number = reader.read_u16::<LittleEndian>()?;
543 let _internal_file_attributes = reader.read_u16::<LittleEndian>()?;
544 let external_file_attributes = reader.read_u32::<LittleEndian>()?;
545 let offset = reader.read_u32::<LittleEndian>()? as u64;
546 let mut file_name_raw = vec![0; file_name_length];
547 reader.read_exact(&mut file_name_raw)?;
548 let mut extra_field = vec![0; extra_field_length];
549 reader.read_exact(&mut extra_field)?;
550 let mut file_comment_raw = vec![0; file_comment_length];
551 reader.read_exact(&mut file_comment_raw)?;
552
553 let file_name = match is_utf8 {
554 true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
555 false => file_name_raw.clone().from_cp437(),
556 };
557 let file_comment = match is_utf8 {
558 true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
559 false => file_comment_raw.from_cp437(),
560 };
561
562 // Construct the result
563 let mut result = ZipFileData {
564 system: System::from_u8((version_made_by >> 8) as u8),
565 version_made_by: version_made_by as u8,
566 encrypted,
567 compression_method: {
568 #[allow(deprecated)]
569 CompressionMethod::from_u16(compression_method)
570 },
571 last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
572 crc32,
573 compressed_size: compressed_size as u64,
574 uncompressed_size: uncompressed_size as u64,
575 file_name,
576 file_name_raw,
577 file_comment,
578 header_start: offset,
579 central_header_start,
580 data_start: 0,
581 external_attributes: external_file_attributes,
582 };
583
584 match parse_extra_field(&mut result, &*extra_field) {
585 Ok(..) | Err(ZipError::Io(..)) => {}
586 Err(e) => return Err(e),
587 }
588
589 // Account for shifted zip offsets.
590 result.header_start += archive_offset;
591
592 Ok(result)
593 }
594
parse_extra_field(file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>595 fn parse_extra_field(file: &mut ZipFileData, data: &[u8]) -> ZipResult<()> {
596 let mut reader = io::Cursor::new(data);
597
598 while (reader.position() as usize) < data.len() {
599 let kind = reader.read_u16::<LittleEndian>()?;
600 let len = reader.read_u16::<LittleEndian>()?;
601 let mut len_left = len as i64;
602 // Zip64 extended information extra field
603 if kind == 0x0001 {
604 if file.uncompressed_size == 0xFFFFFFFF {
605 file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
606 len_left -= 8;
607 }
608 if file.compressed_size == 0xFFFFFFFF {
609 file.compressed_size = reader.read_u64::<LittleEndian>()?;
610 len_left -= 8;
611 }
612 if file.header_start == 0xFFFFFFFF {
613 file.header_start = reader.read_u64::<LittleEndian>()?;
614 len_left -= 8;
615 }
616 // Unparsed fields:
617 // u32: disk start number
618 }
619
620 // We could also check for < 0 to check for errors
621 if len_left > 0 {
622 reader.seek(io::SeekFrom::Current(len_left))?;
623 }
624 }
625 Ok(())
626 }
627
628 /// Methods for retrieving information on zip files
629 impl<'a> ZipFile<'a> {
get_reader(&mut self) -> &mut ZipFileReader<'a>630 fn get_reader(&mut self) -> &mut ZipFileReader<'a> {
631 if let ZipFileReader::NoReader = self.reader {
632 let data = &self.data;
633 let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
634 self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)
635 }
636 &mut self.reader
637 }
638
get_raw_reader(&mut self) -> &mut dyn Read639 pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read {
640 if let ZipFileReader::NoReader = self.reader {
641 let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
642 self.reader = ZipFileReader::Raw(crypto_reader.into_inner())
643 }
644 &mut self.reader
645 }
646
647 /// Get the version of the file
version_made_by(&self) -> (u8, u8)648 pub fn version_made_by(&self) -> (u8, u8) {
649 (
650 self.data.version_made_by / 10,
651 self.data.version_made_by % 10,
652 )
653 }
654
655 /// Get the name of the file
656 ///
657 /// # Warnings
658 ///
659 /// It is dangerous to use this name directly when extracting an archive.
660 /// It may contain an absolute path (`/etc/shadow`), or break out of the
661 /// current directory (`../runtime`). Carelessly writing to these paths
662 /// allows an attacker to craft a ZIP archive that will overwrite critical
663 /// files.
664 ///
665 /// You can use the [`ZipFile::enclosed_name`] method to validate the name
666 /// as a safe path.
name(&self) -> &str667 pub fn name(&self) -> &str {
668 &self.data.file_name
669 }
670
671 /// Get the name of the file, in the raw (internal) byte representation.
672 ///
673 /// The encoding of this data is currently undefined.
name_raw(&self) -> &[u8]674 pub fn name_raw(&self) -> &[u8] {
675 &self.data.file_name_raw
676 }
677
678 /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
679 /// removes a leading '/' and removes '..' parts.
680 #[deprecated(
681 since = "0.5.7",
682 note = "by stripping `..`s from the path, the meaning of paths can change.
683 `mangled_name` can be used if this behaviour is desirable"
684 )]
sanitized_name(&self) -> ::std::path::PathBuf685 pub fn sanitized_name(&self) -> ::std::path::PathBuf {
686 self.mangled_name()
687 }
688
689 /// Rewrite the path, ignoring any path components with special meaning.
690 ///
691 /// - Absolute paths are made relative
692 /// - [`ParentDir`]s are ignored
693 /// - Truncates the filename at a NULL byte
694 ///
695 /// This is appropriate if you need to be able to extract *something* from
696 /// any archive, but will easily misrepresent trivial paths like
697 /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
698 /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
699 ///
700 /// [`ParentDir`]: `Component::ParentDir`
mangled_name(&self) -> ::std::path::PathBuf701 pub fn mangled_name(&self) -> ::std::path::PathBuf {
702 self.data.file_name_sanitized()
703 }
704
705 /// Ensure the file path is safe to use as a [`Path`].
706 ///
707 /// - It can't contain NULL bytes
708 /// - It can't resolve to a path outside the current directory
709 /// > `foo/../bar` is fine, `foo/../../bar` is not.
710 /// - It can't be an absolute path
711 ///
712 /// This will read well-formed ZIP files correctly, and is resistant
713 /// to path-based exploits. It is recommended over
714 /// [`ZipFile::mangled_name`].
enclosed_name(&self) -> Option<&Path>715 pub fn enclosed_name(&self) -> Option<&Path> {
716 if self.data.file_name.contains('\0') {
717 return None;
718 }
719 let path = Path::new(&self.data.file_name);
720 let mut depth = 0usize;
721 for component in path.components() {
722 match component {
723 Component::Prefix(_) | Component::RootDir => return None,
724 Component::ParentDir => depth = depth.checked_sub(1)?,
725 Component::Normal(_) => depth += 1,
726 Component::CurDir => (),
727 }
728 }
729 Some(path)
730 }
731
732 /// Get the comment of the file
comment(&self) -> &str733 pub fn comment(&self) -> &str {
734 &self.data.file_comment
735 }
736
737 /// Get the compression method used to store the file
compression(&self) -> CompressionMethod738 pub fn compression(&self) -> CompressionMethod {
739 self.data.compression_method
740 }
741
742 /// Get the size of the file in the archive
compressed_size(&self) -> u64743 pub fn compressed_size(&self) -> u64 {
744 self.data.compressed_size
745 }
746
747 /// Get the size of the file when uncompressed
size(&self) -> u64748 pub fn size(&self) -> u64 {
749 self.data.uncompressed_size
750 }
751
752 /// Get the time the file was last modified
last_modified(&self) -> DateTime753 pub fn last_modified(&self) -> DateTime {
754 self.data.last_modified_time
755 }
756 /// Returns whether the file is actually a directory
is_dir(&self) -> bool757 pub fn is_dir(&self) -> bool {
758 self.name()
759 .chars()
760 .rev()
761 .next()
762 .map_or(false, |c| c == '/' || c == '\\')
763 }
764
765 /// Returns whether the file is a regular file
is_file(&self) -> bool766 pub fn is_file(&self) -> bool {
767 !self.is_dir()
768 }
769
770 /// Get unix mode for the file
unix_mode(&self) -> Option<u32>771 pub fn unix_mode(&self) -> Option<u32> {
772 if self.data.external_attributes == 0 {
773 return None;
774 }
775
776 match self.data.system {
777 System::Unix => Some(self.data.external_attributes >> 16),
778 System::Dos => {
779 // Interpret MSDOS directory bit
780 let mut mode = if 0x10 == (self.data.external_attributes & 0x10) {
781 ffi::S_IFDIR | 0o0775
782 } else {
783 ffi::S_IFREG | 0o0664
784 };
785 if 0x01 == (self.data.external_attributes & 0x01) {
786 // Read-only bit; strip write permissions
787 mode &= 0o0555;
788 }
789 Some(mode)
790 }
791 _ => None,
792 }
793 }
794
795 /// Get the CRC32 hash of the original file
crc32(&self) -> u32796 pub fn crc32(&self) -> u32 {
797 self.data.crc32
798 }
799
800 /// Get the starting offset of the data of the compressed file
data_start(&self) -> u64801 pub fn data_start(&self) -> u64 {
802 self.data.data_start
803 }
804
805 /// Get the starting offset of the zip header for this file
header_start(&self) -> u64806 pub fn header_start(&self) -> u64 {
807 self.data.header_start
808 }
809 /// Get the starting offset of the zip header in the central directory for this file
central_header_start(&self) -> u64810 pub fn central_header_start(&self) -> u64 {
811 self.data.central_header_start
812 }
813 }
814
815 impl<'a> Read for ZipFile<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>816 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
817 self.get_reader().read(buf)
818 }
819 }
820
821 impl<'a> Drop for ZipFile<'a> {
drop(&mut self)822 fn drop(&mut self) {
823 // self.data is Owned, this reader is constructed by a streaming reader.
824 // In this case, we want to exhaust the reader so that the next file is accessible.
825 if let Cow::Owned(_) = self.data {
826 let mut buffer = [0; 1 << 16];
827
828 // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
829 let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader {
830 ZipFileReader::NoReader => {
831 let innerreader = ::std::mem::replace(&mut self.crypto_reader, None);
832 innerreader.expect("Invalid reader state").into_inner()
833 }
834 reader => {
835 let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader);
836 innerreader.into_inner()
837 }
838 };
839
840 loop {
841 match reader.read(&mut buffer) {
842 Ok(0) => break,
843 Ok(_) => (),
844 Err(e) => panic!(
845 "Could not consume all of the output of the current ZipFile: {:?}",
846 e
847 ),
848 }
849 }
850 }
851 }
852 }
853
854 /// Read ZipFile structures from a non-seekable reader.
855 ///
856 /// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
857 /// as some information will be missing when reading this manner.
858 ///
859 /// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
860 /// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
861 /// is encountered. No more files should be read after this.
862 ///
863 /// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
864 /// the structure is done.
865 ///
866 /// Missing fields are:
867 /// * `comment`: set to an empty string
868 /// * `data_start`: set to 0
869 /// * `external_attributes`: `unix_mode()`: will return None
read_zipfile_from_stream<'a, R: io::Read>( reader: &'a mut R, ) -> ZipResult<Option<ZipFile<'_>>>870 pub fn read_zipfile_from_stream<'a, R: io::Read>(
871 reader: &'a mut R,
872 ) -> ZipResult<Option<ZipFile<'_>>> {
873 let signature = reader.read_u32::<LittleEndian>()?;
874
875 match signature {
876 spec::LOCAL_FILE_HEADER_SIGNATURE => (),
877 spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
878 _ => return Err(ZipError::InvalidArchive("Invalid local file header")),
879 }
880
881 let version_made_by = reader.read_u16::<LittleEndian>()?;
882 let flags = reader.read_u16::<LittleEndian>()?;
883 let encrypted = flags & 1 == 1;
884 let is_utf8 = flags & (1 << 11) != 0;
885 let using_data_descriptor = flags & (1 << 3) != 0;
886 #[allow(deprecated)]
887 let compression_method = CompressionMethod::from_u16(reader.read_u16::<LittleEndian>()?);
888 let last_mod_time = reader.read_u16::<LittleEndian>()?;
889 let last_mod_date = reader.read_u16::<LittleEndian>()?;
890 let crc32 = reader.read_u32::<LittleEndian>()?;
891 let compressed_size = reader.read_u32::<LittleEndian>()?;
892 let uncompressed_size = reader.read_u32::<LittleEndian>()?;
893 let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
894 let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
895
896 let mut file_name_raw = vec![0; file_name_length];
897 reader.read_exact(&mut file_name_raw)?;
898 let mut extra_field = vec![0; extra_field_length];
899 reader.read_exact(&mut extra_field)?;
900
901 let file_name = match is_utf8 {
902 true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
903 false => file_name_raw.clone().from_cp437(),
904 };
905
906 let mut result = ZipFileData {
907 system: System::from_u8((version_made_by >> 8) as u8),
908 version_made_by: version_made_by as u8,
909 encrypted,
910 compression_method,
911 last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
912 crc32,
913 compressed_size: compressed_size as u64,
914 uncompressed_size: uncompressed_size as u64,
915 file_name,
916 file_name_raw,
917 file_comment: String::new(), // file comment is only available in the central directory
918 // header_start and data start are not available, but also don't matter, since seeking is
919 // not available.
920 header_start: 0,
921 data_start: 0,
922 central_header_start: 0,
923 // The external_attributes field is only available in the central directory.
924 // We set this to zero, which should be valid as the docs state 'If input came
925 // from standard input, this field is set to zero.'
926 external_attributes: 0,
927 };
928
929 match parse_extra_field(&mut result, &extra_field) {
930 Ok(..) | Err(ZipError::Io(..)) => {}
931 Err(e) => return Err(e),
932 }
933
934 if encrypted {
935 return unsupported_zip_error("Encrypted files are not supported");
936 }
937 if using_data_descriptor {
938 return unsupported_zip_error("The file length is not available in the local header");
939 }
940
941 let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size as u64);
942
943 let result_crc32 = result.crc32;
944 let result_compression_method = result.compression_method;
945 let crypto_reader =
946 make_crypto_reader(result_compression_method, result_crc32, limit_reader, None)?.unwrap();
947
948 Ok(Some(ZipFile {
949 data: Cow::Owned(result),
950 crypto_reader: None,
951 reader: make_reader(result_compression_method, result_crc32, crypto_reader),
952 }))
953 }
954
955 #[cfg(test)]
956 mod test {
957 #[test]
invalid_offset()958 fn invalid_offset() {
959 use super::ZipArchive;
960 use std::io;
961
962 let mut v = Vec::new();
963 v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
964 let reader = ZipArchive::new(io::Cursor::new(v));
965 assert!(reader.is_err());
966 }
967
968 #[test]
invalid_offset2()969 fn invalid_offset2() {
970 use super::ZipArchive;
971 use std::io;
972
973 let mut v = Vec::new();
974 v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
975 let reader = ZipArchive::new(io::Cursor::new(v));
976 assert!(reader.is_err());
977 }
978
979 #[test]
zip64_with_leading_junk()980 fn zip64_with_leading_junk() {
981 use super::ZipArchive;
982 use std::io;
983
984 let mut v = Vec::new();
985 v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
986 let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
987 assert!(reader.len() == 1);
988 }
989
990 #[test]
zip_contents()991 fn zip_contents() {
992 use super::ZipArchive;
993 use std::io;
994
995 let mut v = Vec::new();
996 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
997 let mut reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
998 assert!(reader.comment() == b"");
999 assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
1000 }
1001
1002 #[test]
zip_read_streaming()1003 fn zip_read_streaming() {
1004 use super::read_zipfile_from_stream;
1005 use std::io;
1006
1007 let mut v = Vec::new();
1008 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1009 let mut reader = io::Cursor::new(v);
1010 loop {
1011 match read_zipfile_from_stream(&mut reader).unwrap() {
1012 None => break,
1013 _ => (),
1014 }
1015 }
1016 }
1017
1018 #[test]
zip_clone()1019 fn zip_clone() {
1020 use super::ZipArchive;
1021 use std::io::{self, Read};
1022
1023 let mut v = Vec::new();
1024 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1025 let mut reader1 = ZipArchive::new(io::Cursor::new(v)).unwrap();
1026 let mut reader2 = reader1.clone();
1027
1028 let mut file1 = reader1.by_index(0).unwrap();
1029 let mut file2 = reader2.by_index(0).unwrap();
1030
1031 let t = file1.last_modified();
1032 assert_eq!(
1033 (
1034 t.year(),
1035 t.month(),
1036 t.day(),
1037 t.hour(),
1038 t.minute(),
1039 t.second()
1040 ),
1041 (1980, 1, 1, 0, 0, 0)
1042 );
1043
1044 let mut buf1 = [0; 5];
1045 let mut buf2 = [0; 5];
1046 let mut buf3 = [0; 5];
1047 let mut buf4 = [0; 5];
1048
1049 file1.read(&mut buf1).unwrap();
1050 file2.read(&mut buf2).unwrap();
1051 file1.read(&mut buf3).unwrap();
1052 file2.read(&mut buf4).unwrap();
1053
1054 assert_eq!(buf1, buf2);
1055 assert_eq!(buf3, buf4);
1056 assert!(buf1 != buf3);
1057 }
1058
1059 #[test]
file_and_dir_predicates()1060 fn file_and_dir_predicates() {
1061 use super::ZipArchive;
1062 use std::io;
1063
1064 let mut v = Vec::new();
1065 v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
1066 let mut zip = ZipArchive::new(io::Cursor::new(v)).unwrap();
1067
1068 for i in 0..zip.len() {
1069 let zip_file = zip.by_index(i).unwrap();
1070 let full_name = zip_file.enclosed_name().unwrap();
1071 let file_name = full_name.file_name().unwrap().to_str().unwrap();
1072 assert!(
1073 (file_name.starts_with("dir") && zip_file.is_dir())
1074 || (file_name.starts_with("file") && zip_file.is_file())
1075 );
1076 }
1077 }
1078 }
1079