• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Types for reading ZIP archives
2 
3 use crate::compression::CompressionMethod;
4 use crate::crc32::Crc32Reader;
5 use crate::result::{InvalidPassword, ZipError, ZipResult};
6 use crate::spec;
7 use crate::zipcrypto::ZipCryptoReader;
8 use crate::zipcrypto::ZipCryptoReaderValid;
9 use std::borrow::Cow;
10 use std::collections::HashMap;
11 use std::io::{self, prelude::*};
12 use std::path::{Component, Path};
13 
14 use crate::cp437::FromCp437;
15 use crate::types::{DateTime, System, ZipFileData};
16 use byteorder::{LittleEndian, ReadBytesExt};
17 
18 #[cfg(any(
19     feature = "deflate",
20     feature = "deflate-miniz",
21     feature = "deflate-zlib"
22 ))]
23 use flate2::read::DeflateDecoder;
24 
25 #[cfg(feature = "bzip2")]
26 use bzip2::read::BzDecoder;
27 
28 mod ffi {
29     pub const S_IFDIR: u32 = 0o0040000;
30     pub const S_IFREG: u32 = 0o0100000;
31 }
32 
33 /// ZIP archive reader
34 ///
35 /// ```no_run
36 /// use std::io::prelude::*;
37 /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
38 ///     let mut zip = zip::ZipArchive::new(reader)?;
39 ///
40 ///     for i in 0..zip.len() {
41 ///         let mut file = zip.by_index(i)?;
42 ///         println!("Filename: {}", file.name());
43 ///         std::io::copy(&mut file, &mut std::io::stdout());
44 ///     }
45 ///
46 ///     Ok(())
47 /// }
48 /// ```
49 #[derive(Clone, Debug)]
50 pub struct ZipArchive<R: Read + io::Seek> {
51     reader: R,
52     files: Vec<ZipFileData>,
53     names_map: HashMap<String, usize>,
54     offset: u64,
55     comment: Vec<u8>,
56 }
57 
58 enum CryptoReader<'a> {
59     Plaintext(io::Take<&'a mut dyn Read>),
60     ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>),
61 }
62 
63 impl<'a> Read for CryptoReader<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>64     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
65         match self {
66             CryptoReader::Plaintext(r) => r.read(buf),
67             CryptoReader::ZipCrypto(r) => r.read(buf),
68         }
69     }
70 }
71 
72 impl<'a> CryptoReader<'a> {
73     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> io::Take<&'a mut dyn Read>74     pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
75         match self {
76             CryptoReader::Plaintext(r) => r,
77             CryptoReader::ZipCrypto(r) => r.into_inner(),
78         }
79     }
80 }
81 
82 enum ZipFileReader<'a> {
83     NoReader,
84     Raw(io::Take<&'a mut dyn io::Read>),
85     Stored(Crc32Reader<CryptoReader<'a>>),
86     #[cfg(any(
87         feature = "deflate",
88         feature = "deflate-miniz",
89         feature = "deflate-zlib"
90     ))]
91     Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>),
92     #[cfg(feature = "bzip2")]
93     Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>),
94 }
95 
96 impl<'a> Read for ZipFileReader<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>97     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
98         match self {
99             ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
100             ZipFileReader::Raw(r) => r.read(buf),
101             ZipFileReader::Stored(r) => r.read(buf),
102             #[cfg(any(
103                 feature = "deflate",
104                 feature = "deflate-miniz",
105                 feature = "deflate-zlib"
106             ))]
107             ZipFileReader::Deflated(r) => r.read(buf),
108             #[cfg(feature = "bzip2")]
109             ZipFileReader::Bzip2(r) => r.read(buf),
110         }
111     }
112 }
113 
114 impl<'a> ZipFileReader<'a> {
115     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> io::Take<&'a mut dyn Read>116     pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
117         match self {
118             ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
119             ZipFileReader::Raw(r) => r,
120             ZipFileReader::Stored(r) => r.into_inner().into_inner(),
121             #[cfg(any(
122                 feature = "deflate",
123                 feature = "deflate-miniz",
124                 feature = "deflate-zlib"
125             ))]
126             ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(),
127             #[cfg(feature = "bzip2")]
128             ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(),
129         }
130     }
131 }
132 
133 /// A struct for reading a zip file
134 pub struct ZipFile<'a> {
135     data: Cow<'a, ZipFileData>,
136     crypto_reader: Option<CryptoReader<'a>>,
137     reader: ZipFileReader<'a>,
138 }
139 
find_content<'a>( data: &mut ZipFileData, reader: &'a mut (impl Read + Seek), ) -> ZipResult<io::Take<&'a mut dyn Read>>140 fn find_content<'a>(
141     data: &mut ZipFileData,
142     reader: &'a mut (impl Read + Seek),
143 ) -> ZipResult<io::Take<&'a mut dyn Read>> {
144     // Parse local header
145     reader.seek(io::SeekFrom::Start(data.header_start))?;
146     let signature = reader.read_u32::<LittleEndian>()?;
147     if signature != spec::LOCAL_FILE_HEADER_SIGNATURE {
148         return Err(ZipError::InvalidArchive("Invalid local file header"));
149     }
150 
151     reader.seek(io::SeekFrom::Current(22))?;
152     let file_name_length = reader.read_u16::<LittleEndian>()? as u64;
153     let extra_field_length = reader.read_u16::<LittleEndian>()? as u64;
154     let magic_and_header = 4 + 22 + 2 + 2;
155     data.data_start = data.header_start + magic_and_header + file_name_length + extra_field_length;
156 
157     reader.seek(io::SeekFrom::Start(data.data_start))?;
158     Ok((reader as &mut dyn Read).take(data.compressed_size))
159 }
160 
make_crypto_reader<'a>( compression_method: crate::compression::CompressionMethod, crc32: u32, reader: io::Take<&'a mut dyn io::Read>, password: Option<&[u8]>, ) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>>161 fn make_crypto_reader<'a>(
162     compression_method: crate::compression::CompressionMethod,
163     crc32: u32,
164     reader: io::Take<&'a mut dyn io::Read>,
165     password: Option<&[u8]>,
166 ) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> {
167     #[allow(deprecated)]
168     {
169         if let CompressionMethod::Unsupported(_) = compression_method {
170             return unsupported_zip_error("Compression method not supported");
171         }
172     }
173 
174     let reader = match password {
175         None => CryptoReader::Plaintext(reader),
176         Some(password) => match ZipCryptoReader::new(reader, password).validate(crc32)? {
177             None => return Ok(Err(InvalidPassword)),
178             Some(r) => CryptoReader::ZipCrypto(r),
179         },
180     };
181     Ok(Ok(reader))
182 }
183 
make_reader<'a>( compression_method: CompressionMethod, crc32: u32, reader: CryptoReader<'a>, ) -> ZipFileReader<'a>184 fn make_reader<'a>(
185     compression_method: CompressionMethod,
186     crc32: u32,
187     reader: CryptoReader<'a>,
188 ) -> ZipFileReader<'a> {
189     match compression_method {
190         CompressionMethod::Stored => ZipFileReader::Stored(Crc32Reader::new(reader, crc32)),
191         #[cfg(any(
192             feature = "deflate",
193             feature = "deflate-miniz",
194             feature = "deflate-zlib"
195         ))]
196         CompressionMethod::Deflated => {
197             let deflate_reader = DeflateDecoder::new(reader);
198             ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32))
199         }
200         #[cfg(feature = "bzip2")]
201         CompressionMethod::Bzip2 => {
202             let bzip2_reader = BzDecoder::new(reader);
203             ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32))
204         }
205         _ => panic!("Compression method not supported"),
206     }
207 }
208 
209 impl<R: Read + io::Seek> ZipArchive<R> {
210     /// Get the directory start offset and number of files. This is done in a
211     /// separate function to ease the control flow design.
get_directory_counts( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult<(u64, u64, usize)>212     fn get_directory_counts(
213         reader: &mut R,
214         footer: &spec::CentralDirectoryEnd,
215         cde_start_pos: u64,
216     ) -> ZipResult<(u64, u64, usize)> {
217         // See if there's a ZIP64 footer. The ZIP64 locator if present will
218         // have its signature 20 bytes in front of the standard footer. The
219         // standard footer, in turn, is 22+N bytes large, where N is the
220         // comment length. Therefore:
221         let zip64locator = if reader
222             .seek(io::SeekFrom::End(
223                 -(20 + 22 + footer.zip_file_comment.len() as i64),
224             ))
225             .is_ok()
226         {
227             match spec::Zip64CentralDirectoryEndLocator::parse(reader) {
228                 Ok(loc) => Some(loc),
229                 Err(ZipError::InvalidArchive(_)) => {
230                     // No ZIP64 header; that's actually fine. We're done here.
231                     None
232                 }
233                 Err(e) => {
234                     // Yikes, a real problem
235                     return Err(e);
236                 }
237             }
238         } else {
239             // Empty Zip files will have nothing else so this error might be fine. If
240             // not, we'll find out soon.
241             None
242         };
243 
244         match zip64locator {
245             None => {
246                 // Some zip files have data prepended to them, resulting in the
247                 // offsets all being too small. Get the amount of error by comparing
248                 // the actual file position we found the CDE at with the offset
249                 // recorded in the CDE.
250                 let archive_offset = cde_start_pos
251                     .checked_sub(footer.central_directory_size as u64)
252                     .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
253                     .ok_or(ZipError::InvalidArchive(
254                         "Invalid central directory size or offset",
255                     ))?;
256 
257                 let directory_start = footer.central_directory_offset as u64 + archive_offset;
258                 let number_of_files = footer.number_of_files_on_this_disk as usize;
259                 Ok((archive_offset, directory_start, number_of_files))
260             }
261             Some(locator64) => {
262                 // If we got here, this is indeed a ZIP64 file.
263 
264                 if footer.disk_number as u32 != locator64.disk_with_central_directory {
265                     return unsupported_zip_error(
266                         "Support for multi-disk files is not implemented",
267                     );
268                 }
269 
270                 // We need to reassess `archive_offset`. We know where the ZIP64
271                 // central-directory-end structure *should* be, but unfortunately we
272                 // don't know how to precisely relate that location to our current
273                 // actual offset in the file, since there may be junk at its
274                 // beginning. Therefore we need to perform another search, as in
275                 // read::CentralDirectoryEnd::find_and_parse, except now we search
276                 // forward.
277 
278                 let search_upper_bound = cde_start_pos
279                     .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
280                     .ok_or(ZipError::InvalidArchive(
281                         "File cannot contain ZIP64 central directory end",
282                     ))?;
283                 let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
284                     reader,
285                     locator64.end_of_central_directory_offset,
286                     search_upper_bound,
287                 )?;
288 
289                 if footer.disk_number != footer.disk_with_central_directory {
290                     return unsupported_zip_error(
291                         "Support for multi-disk files is not implemented",
292                     );
293                 }
294 
295                 let directory_start = footer
296                     .central_directory_offset
297                     .checked_add(archive_offset)
298                     .ok_or_else(|| {
299                         ZipError::InvalidArchive("Invalid central directory size or offset")
300                     })?;
301 
302                 Ok((
303                     archive_offset,
304                     directory_start,
305                     footer.number_of_files as usize,
306                 ))
307             }
308         }
309     }
310 
311     /// Read a ZIP archive, collecting the files it contains
312     ///
313     /// This uses the central directory record of the ZIP file, and ignores local file headers
new(mut reader: R) -> ZipResult<ZipArchive<R>>314     pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
315         let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
316 
317         if footer.disk_number != footer.disk_with_central_directory {
318             return unsupported_zip_error("Support for multi-disk files is not implemented");
319         }
320 
321         let (archive_offset, directory_start, number_of_files) =
322             Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?;
323 
324         let mut files = Vec::new();
325         let mut names_map = HashMap::new();
326 
327         if let Err(_) = reader.seek(io::SeekFrom::Start(directory_start)) {
328             return Err(ZipError::InvalidArchive(
329                 "Could not seek to start of central directory",
330             ));
331         }
332 
333         for _ in 0..number_of_files {
334             let file = central_header_to_zip_file(&mut reader, archive_offset)?;
335             names_map.insert(file.file_name.clone(), files.len());
336             files.push(file);
337         }
338 
339         Ok(ZipArchive {
340             reader,
341             files,
342             names_map,
343             offset: archive_offset,
344             comment: footer.zip_file_comment,
345         })
346     }
347     /// Extract a Zip archive into a directory, overwriting files if they
348     /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
349     ///
350     /// Extraction is not atomic; If an error is encountered, some of the files
351     /// may be left on disk.
extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()>352     pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
353         use std::fs;
354 
355         for i in 0..self.len() {
356             let mut file = self.by_index(i)?;
357             let filepath = file
358                 .enclosed_name()
359                 .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
360 
361             let outpath = directory.as_ref().join(filepath);
362 
363             if file.name().ends_with('/') {
364                 fs::create_dir_all(&outpath)?;
365             } else {
366                 if let Some(p) = outpath.parent() {
367                     if !p.exists() {
368                         fs::create_dir_all(&p)?;
369                     }
370                 }
371                 let mut outfile = fs::File::create(&outpath)?;
372                 io::copy(&mut file, &mut outfile)?;
373             }
374             // Get and Set permissions
375             #[cfg(unix)]
376             {
377                 use std::os::unix::fs::PermissionsExt;
378                 if let Some(mode) = file.unix_mode() {
379                     fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?;
380                 }
381             }
382         }
383         Ok(())
384     }
385 
386     /// Number of files contained in this zip.
len(&self) -> usize387     pub fn len(&self) -> usize {
388         self.files.len()
389     }
390 
391     /// Whether this zip archive contains no files
is_empty(&self) -> bool392     pub fn is_empty(&self) -> bool {
393         self.len() == 0
394     }
395 
396     /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
397     ///
398     /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
399     /// of that prepended data.
offset(&self) -> u64400     pub fn offset(&self) -> u64 {
401         self.offset
402     }
403 
404     /// Get the comment of the zip archive.
comment(&self) -> &[u8]405     pub fn comment(&self) -> &[u8] {
406         &self.comment
407     }
408 
409     /// Returns an iterator over all the file and directory names in this archive.
file_names(&self) -> impl Iterator<Item = &str>410     pub fn file_names(&self) -> impl Iterator<Item = &str> {
411         self.names_map.keys().map(|s| s.as_str())
412     }
413 
414     /// Search for a file entry by name, decrypt with given password
by_name_decrypt<'a>( &'a mut self, name: &str, password: &[u8], ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>415     pub fn by_name_decrypt<'a>(
416         &'a mut self,
417         name: &str,
418         password: &[u8],
419     ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
420         self.by_name_with_optional_password(name, Some(password))
421     }
422 
423     /// Search for a file entry by name
by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>>424     pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>> {
425         Ok(self.by_name_with_optional_password(name, None)?.unwrap())
426     }
427 
by_name_with_optional_password<'a>( &'a mut self, name: &str, password: Option<&[u8]>, ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>428     fn by_name_with_optional_password<'a>(
429         &'a mut self,
430         name: &str,
431         password: Option<&[u8]>,
432     ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
433         let index = match self.names_map.get(name) {
434             Some(index) => *index,
435             None => {
436                 return Err(ZipError::FileNotFound);
437             }
438         };
439         self.by_index_with_optional_password(index, password)
440     }
441 
442     /// Get a contained file by index, decrypt with given password
by_index_decrypt<'a>( &'a mut self, file_number: usize, password: &[u8], ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>443     pub fn by_index_decrypt<'a>(
444         &'a mut self,
445         file_number: usize,
446         password: &[u8],
447     ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
448         self.by_index_with_optional_password(file_number, Some(password))
449     }
450 
451     /// Get a contained file by index
by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>452     pub fn by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>> {
453         Ok(self
454             .by_index_with_optional_password(file_number, None)?
455             .unwrap())
456     }
457 
458     /// Get a contained file by index without decompressing it
by_index_raw<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>459     pub fn by_index_raw<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>> {
460         let reader = &mut self.reader;
461         self.files
462             .get_mut(file_number)
463             .ok_or(ZipError::FileNotFound)
464             .and_then(move |data| {
465                 Ok(ZipFile {
466                     crypto_reader: None,
467                     reader: ZipFileReader::Raw(find_content(data, reader)?),
468                     data: Cow::Borrowed(data),
469                 })
470             })
471     }
472 
by_index_with_optional_password<'a>( &'a mut self, file_number: usize, mut password: Option<&[u8]>, ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>473     fn by_index_with_optional_password<'a>(
474         &'a mut self,
475         file_number: usize,
476         mut password: Option<&[u8]>,
477     ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
478         if file_number >= self.files.len() {
479             return Err(ZipError::FileNotFound);
480         }
481         let data = &mut self.files[file_number];
482 
483         match (password, data.encrypted) {
484             (None, true) => {
485                 return Err(ZipError::UnsupportedArchive(
486                     "Password required to decrypt file",
487                 ))
488             }
489             (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
490             _ => {}
491         }
492         let limit_reader = find_content(data, &mut self.reader)?;
493 
494         match make_crypto_reader(data.compression_method, data.crc32, limit_reader, password) {
495             Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile {
496                 crypto_reader: Some(crypto_reader),
497                 reader: ZipFileReader::NoReader,
498                 data: Cow::Borrowed(data),
499             })),
500             Err(e) => Err(e),
501             Ok(Err(e)) => Ok(Err(e)),
502         }
503     }
504 
505     /// Unwrap and return the inner reader object
506     ///
507     /// The position of the reader is undefined.
into_inner(self) -> R508     pub fn into_inner(self) -> R {
509         self.reader
510     }
511 }
512 
unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>513 fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
514     Err(ZipError::UnsupportedArchive(detail))
515 }
516 
central_header_to_zip_file<R: Read + io::Seek>( reader: &mut R, archive_offset: u64, ) -> ZipResult<ZipFileData>517 fn central_header_to_zip_file<R: Read + io::Seek>(
518     reader: &mut R,
519     archive_offset: u64,
520 ) -> ZipResult<ZipFileData> {
521     let central_header_start = reader.seek(io::SeekFrom::Current(0))?;
522     // Parse central header
523     let signature = reader.read_u32::<LittleEndian>()?;
524     if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
525         return Err(ZipError::InvalidArchive("Invalid Central Directory header"));
526     }
527 
528     let version_made_by = reader.read_u16::<LittleEndian>()?;
529     let _version_to_extract = reader.read_u16::<LittleEndian>()?;
530     let flags = reader.read_u16::<LittleEndian>()?;
531     let encrypted = flags & 1 == 1;
532     let is_utf8 = flags & (1 << 11) != 0;
533     let compression_method = reader.read_u16::<LittleEndian>()?;
534     let last_mod_time = reader.read_u16::<LittleEndian>()?;
535     let last_mod_date = reader.read_u16::<LittleEndian>()?;
536     let crc32 = reader.read_u32::<LittleEndian>()?;
537     let compressed_size = reader.read_u32::<LittleEndian>()?;
538     let uncompressed_size = reader.read_u32::<LittleEndian>()?;
539     let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
540     let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
541     let file_comment_length = reader.read_u16::<LittleEndian>()? as usize;
542     let _disk_number = reader.read_u16::<LittleEndian>()?;
543     let _internal_file_attributes = reader.read_u16::<LittleEndian>()?;
544     let external_file_attributes = reader.read_u32::<LittleEndian>()?;
545     let offset = reader.read_u32::<LittleEndian>()? as u64;
546     let mut file_name_raw = vec![0; file_name_length];
547     reader.read_exact(&mut file_name_raw)?;
548     let mut extra_field = vec![0; extra_field_length];
549     reader.read_exact(&mut extra_field)?;
550     let mut file_comment_raw = vec![0; file_comment_length];
551     reader.read_exact(&mut file_comment_raw)?;
552 
553     let file_name = match is_utf8 {
554         true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
555         false => file_name_raw.clone().from_cp437(),
556     };
557     let file_comment = match is_utf8 {
558         true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
559         false => file_comment_raw.from_cp437(),
560     };
561 
562     // Construct the result
563     let mut result = ZipFileData {
564         system: System::from_u8((version_made_by >> 8) as u8),
565         version_made_by: version_made_by as u8,
566         encrypted,
567         compression_method: {
568             #[allow(deprecated)]
569             CompressionMethod::from_u16(compression_method)
570         },
571         last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
572         crc32,
573         compressed_size: compressed_size as u64,
574         uncompressed_size: uncompressed_size as u64,
575         file_name,
576         file_name_raw,
577         file_comment,
578         header_start: offset,
579         central_header_start,
580         data_start: 0,
581         external_attributes: external_file_attributes,
582     };
583 
584     match parse_extra_field(&mut result, &*extra_field) {
585         Ok(..) | Err(ZipError::Io(..)) => {}
586         Err(e) => return Err(e),
587     }
588 
589     // Account for shifted zip offsets.
590     result.header_start += archive_offset;
591 
592     Ok(result)
593 }
594 
parse_extra_field(file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>595 fn parse_extra_field(file: &mut ZipFileData, data: &[u8]) -> ZipResult<()> {
596     let mut reader = io::Cursor::new(data);
597 
598     while (reader.position() as usize) < data.len() {
599         let kind = reader.read_u16::<LittleEndian>()?;
600         let len = reader.read_u16::<LittleEndian>()?;
601         let mut len_left = len as i64;
602         // Zip64 extended information extra field
603         if kind == 0x0001 {
604             if file.uncompressed_size == 0xFFFFFFFF {
605                 file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
606                 len_left -= 8;
607             }
608             if file.compressed_size == 0xFFFFFFFF {
609                 file.compressed_size = reader.read_u64::<LittleEndian>()?;
610                 len_left -= 8;
611             }
612             if file.header_start == 0xFFFFFFFF {
613                 file.header_start = reader.read_u64::<LittleEndian>()?;
614                 len_left -= 8;
615             }
616             // Unparsed fields:
617             // u32: disk start number
618         }
619 
620         // We could also check for < 0 to check for errors
621         if len_left > 0 {
622             reader.seek(io::SeekFrom::Current(len_left))?;
623         }
624     }
625     Ok(())
626 }
627 
628 /// Methods for retrieving information on zip files
629 impl<'a> ZipFile<'a> {
get_reader(&mut self) -> &mut ZipFileReader<'a>630     fn get_reader(&mut self) -> &mut ZipFileReader<'a> {
631         if let ZipFileReader::NoReader = self.reader {
632             let data = &self.data;
633             let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
634             self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)
635         }
636         &mut self.reader
637     }
638 
get_raw_reader(&mut self) -> &mut dyn Read639     pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read {
640         if let ZipFileReader::NoReader = self.reader {
641             let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
642             self.reader = ZipFileReader::Raw(crypto_reader.into_inner())
643         }
644         &mut self.reader
645     }
646 
647     /// Get the version of the file
version_made_by(&self) -> (u8, u8)648     pub fn version_made_by(&self) -> (u8, u8) {
649         (
650             self.data.version_made_by / 10,
651             self.data.version_made_by % 10,
652         )
653     }
654 
655     /// Get the name of the file
656     ///
657     /// # Warnings
658     ///
659     /// It is dangerous to use this name directly when extracting an archive.
660     /// It may contain an absolute path (`/etc/shadow`), or break out of the
661     /// current directory (`../runtime`). Carelessly writing to these paths
662     /// allows an attacker to craft a ZIP archive that will overwrite critical
663     /// files.
664     ///
665     /// You can use the [`ZipFile::enclosed_name`] method to validate the name
666     /// as a safe path.
name(&self) -> &str667     pub fn name(&self) -> &str {
668         &self.data.file_name
669     }
670 
671     /// Get the name of the file, in the raw (internal) byte representation.
672     ///
673     /// The encoding of this data is currently undefined.
name_raw(&self) -> &[u8]674     pub fn name_raw(&self) -> &[u8] {
675         &self.data.file_name_raw
676     }
677 
678     /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
679     /// removes a leading '/' and removes '..' parts.
680     #[deprecated(
681         since = "0.5.7",
682         note = "by stripping `..`s from the path, the meaning of paths can change.
683                 `mangled_name` can be used if this behaviour is desirable"
684     )]
sanitized_name(&self) -> ::std::path::PathBuf685     pub fn sanitized_name(&self) -> ::std::path::PathBuf {
686         self.mangled_name()
687     }
688 
689     /// Rewrite the path, ignoring any path components with special meaning.
690     ///
691     /// - Absolute paths are made relative
692     /// - [`ParentDir`]s are ignored
693     /// - Truncates the filename at a NULL byte
694     ///
695     /// This is appropriate if you need to be able to extract *something* from
696     /// any archive, but will easily misrepresent trivial paths like
697     /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
698     /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
699     ///
700     /// [`ParentDir`]: `Component::ParentDir`
mangled_name(&self) -> ::std::path::PathBuf701     pub fn mangled_name(&self) -> ::std::path::PathBuf {
702         self.data.file_name_sanitized()
703     }
704 
705     /// Ensure the file path is safe to use as a [`Path`].
706     ///
707     /// - It can't contain NULL bytes
708     /// - It can't resolve to a path outside the current directory
709     ///   > `foo/../bar` is fine, `foo/../../bar` is not.
710     /// - It can't be an absolute path
711     ///
712     /// This will read well-formed ZIP files correctly, and is resistant
713     /// to path-based exploits. It is recommended over
714     /// [`ZipFile::mangled_name`].
enclosed_name(&self) -> Option<&Path>715     pub fn enclosed_name(&self) -> Option<&Path> {
716         if self.data.file_name.contains('\0') {
717             return None;
718         }
719         let path = Path::new(&self.data.file_name);
720         let mut depth = 0usize;
721         for component in path.components() {
722             match component {
723                 Component::Prefix(_) | Component::RootDir => return None,
724                 Component::ParentDir => depth = depth.checked_sub(1)?,
725                 Component::Normal(_) => depth += 1,
726                 Component::CurDir => (),
727             }
728         }
729         Some(path)
730     }
731 
732     /// Get the comment of the file
comment(&self) -> &str733     pub fn comment(&self) -> &str {
734         &self.data.file_comment
735     }
736 
737     /// Get the compression method used to store the file
compression(&self) -> CompressionMethod738     pub fn compression(&self) -> CompressionMethod {
739         self.data.compression_method
740     }
741 
742     /// Get the size of the file in the archive
compressed_size(&self) -> u64743     pub fn compressed_size(&self) -> u64 {
744         self.data.compressed_size
745     }
746 
747     /// Get the size of the file when uncompressed
size(&self) -> u64748     pub fn size(&self) -> u64 {
749         self.data.uncompressed_size
750     }
751 
752     /// Get the time the file was last modified
last_modified(&self) -> DateTime753     pub fn last_modified(&self) -> DateTime {
754         self.data.last_modified_time
755     }
756     /// Returns whether the file is actually a directory
is_dir(&self) -> bool757     pub fn is_dir(&self) -> bool {
758         self.name()
759             .chars()
760             .rev()
761             .next()
762             .map_or(false, |c| c == '/' || c == '\\')
763     }
764 
765     /// Returns whether the file is a regular file
is_file(&self) -> bool766     pub fn is_file(&self) -> bool {
767         !self.is_dir()
768     }
769 
770     /// Get unix mode for the file
unix_mode(&self) -> Option<u32>771     pub fn unix_mode(&self) -> Option<u32> {
772         if self.data.external_attributes == 0 {
773             return None;
774         }
775 
776         match self.data.system {
777             System::Unix => Some(self.data.external_attributes >> 16),
778             System::Dos => {
779                 // Interpret MSDOS directory bit
780                 let mut mode = if 0x10 == (self.data.external_attributes & 0x10) {
781                     ffi::S_IFDIR | 0o0775
782                 } else {
783                     ffi::S_IFREG | 0o0664
784                 };
785                 if 0x01 == (self.data.external_attributes & 0x01) {
786                     // Read-only bit; strip write permissions
787                     mode &= 0o0555;
788                 }
789                 Some(mode)
790             }
791             _ => None,
792         }
793     }
794 
795     /// Get the CRC32 hash of the original file
crc32(&self) -> u32796     pub fn crc32(&self) -> u32 {
797         self.data.crc32
798     }
799 
800     /// Get the starting offset of the data of the compressed file
data_start(&self) -> u64801     pub fn data_start(&self) -> u64 {
802         self.data.data_start
803     }
804 
805     /// Get the starting offset of the zip header for this file
header_start(&self) -> u64806     pub fn header_start(&self) -> u64 {
807         self.data.header_start
808     }
809     /// Get the starting offset of the zip header in the central directory for this file
central_header_start(&self) -> u64810     pub fn central_header_start(&self) -> u64 {
811         self.data.central_header_start
812     }
813 }
814 
815 impl<'a> Read for ZipFile<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>816     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
817         self.get_reader().read(buf)
818     }
819 }
820 
821 impl<'a> Drop for ZipFile<'a> {
drop(&mut self)822     fn drop(&mut self) {
823         // self.data is Owned, this reader is constructed by a streaming reader.
824         // In this case, we want to exhaust the reader so that the next file is accessible.
825         if let Cow::Owned(_) = self.data {
826             let mut buffer = [0; 1 << 16];
827 
828             // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
829             let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader {
830                 ZipFileReader::NoReader => {
831                     let innerreader = ::std::mem::replace(&mut self.crypto_reader, None);
832                     innerreader.expect("Invalid reader state").into_inner()
833                 }
834                 reader => {
835                     let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader);
836                     innerreader.into_inner()
837                 }
838             };
839 
840             loop {
841                 match reader.read(&mut buffer) {
842                     Ok(0) => break,
843                     Ok(_) => (),
844                     Err(e) => panic!(
845                         "Could not consume all of the output of the current ZipFile: {:?}",
846                         e
847                     ),
848                 }
849             }
850         }
851     }
852 }
853 
854 /// Read ZipFile structures from a non-seekable reader.
855 ///
856 /// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
857 /// as some information will be missing when reading this manner.
858 ///
859 /// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
860 /// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
861 /// is encountered. No more files should be read after this.
862 ///
863 /// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
864 /// the structure is done.
865 ///
866 /// Missing fields are:
867 /// * `comment`: set to an empty string
868 /// * `data_start`: set to 0
869 /// * `external_attributes`: `unix_mode()`: will return None
read_zipfile_from_stream<'a, R: io::Read>( reader: &'a mut R, ) -> ZipResult<Option<ZipFile<'_>>>870 pub fn read_zipfile_from_stream<'a, R: io::Read>(
871     reader: &'a mut R,
872 ) -> ZipResult<Option<ZipFile<'_>>> {
873     let signature = reader.read_u32::<LittleEndian>()?;
874 
875     match signature {
876         spec::LOCAL_FILE_HEADER_SIGNATURE => (),
877         spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
878         _ => return Err(ZipError::InvalidArchive("Invalid local file header")),
879     }
880 
881     let version_made_by = reader.read_u16::<LittleEndian>()?;
882     let flags = reader.read_u16::<LittleEndian>()?;
883     let encrypted = flags & 1 == 1;
884     let is_utf8 = flags & (1 << 11) != 0;
885     let using_data_descriptor = flags & (1 << 3) != 0;
886     #[allow(deprecated)]
887     let compression_method = CompressionMethod::from_u16(reader.read_u16::<LittleEndian>()?);
888     let last_mod_time = reader.read_u16::<LittleEndian>()?;
889     let last_mod_date = reader.read_u16::<LittleEndian>()?;
890     let crc32 = reader.read_u32::<LittleEndian>()?;
891     let compressed_size = reader.read_u32::<LittleEndian>()?;
892     let uncompressed_size = reader.read_u32::<LittleEndian>()?;
893     let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
894     let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
895 
896     let mut file_name_raw = vec![0; file_name_length];
897     reader.read_exact(&mut file_name_raw)?;
898     let mut extra_field = vec![0; extra_field_length];
899     reader.read_exact(&mut extra_field)?;
900 
901     let file_name = match is_utf8 {
902         true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
903         false => file_name_raw.clone().from_cp437(),
904     };
905 
906     let mut result = ZipFileData {
907         system: System::from_u8((version_made_by >> 8) as u8),
908         version_made_by: version_made_by as u8,
909         encrypted,
910         compression_method,
911         last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
912         crc32,
913         compressed_size: compressed_size as u64,
914         uncompressed_size: uncompressed_size as u64,
915         file_name,
916         file_name_raw,
917         file_comment: String::new(), // file comment is only available in the central directory
918         // header_start and data start are not available, but also don't matter, since seeking is
919         // not available.
920         header_start: 0,
921         data_start: 0,
922         central_header_start: 0,
923         // The external_attributes field is only available in the central directory.
924         // We set this to zero, which should be valid as the docs state 'If input came
925         // from standard input, this field is set to zero.'
926         external_attributes: 0,
927     };
928 
929     match parse_extra_field(&mut result, &extra_field) {
930         Ok(..) | Err(ZipError::Io(..)) => {}
931         Err(e) => return Err(e),
932     }
933 
934     if encrypted {
935         return unsupported_zip_error("Encrypted files are not supported");
936     }
937     if using_data_descriptor {
938         return unsupported_zip_error("The file length is not available in the local header");
939     }
940 
941     let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size as u64);
942 
943     let result_crc32 = result.crc32;
944     let result_compression_method = result.compression_method;
945     let crypto_reader =
946         make_crypto_reader(result_compression_method, result_crc32, limit_reader, None)?.unwrap();
947 
948     Ok(Some(ZipFile {
949         data: Cow::Owned(result),
950         crypto_reader: None,
951         reader: make_reader(result_compression_method, result_crc32, crypto_reader),
952     }))
953 }
954 
955 #[cfg(test)]
956 mod test {
957     #[test]
invalid_offset()958     fn invalid_offset() {
959         use super::ZipArchive;
960         use std::io;
961 
962         let mut v = Vec::new();
963         v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
964         let reader = ZipArchive::new(io::Cursor::new(v));
965         assert!(reader.is_err());
966     }
967 
968     #[test]
invalid_offset2()969     fn invalid_offset2() {
970         use super::ZipArchive;
971         use std::io;
972 
973         let mut v = Vec::new();
974         v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
975         let reader = ZipArchive::new(io::Cursor::new(v));
976         assert!(reader.is_err());
977     }
978 
979     #[test]
zip64_with_leading_junk()980     fn zip64_with_leading_junk() {
981         use super::ZipArchive;
982         use std::io;
983 
984         let mut v = Vec::new();
985         v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
986         let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
987         assert!(reader.len() == 1);
988     }
989 
990     #[test]
zip_contents()991     fn zip_contents() {
992         use super::ZipArchive;
993         use std::io;
994 
995         let mut v = Vec::new();
996         v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
997         let mut reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
998         assert!(reader.comment() == b"");
999         assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
1000     }
1001 
1002     #[test]
zip_read_streaming()1003     fn zip_read_streaming() {
1004         use super::read_zipfile_from_stream;
1005         use std::io;
1006 
1007         let mut v = Vec::new();
1008         v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1009         let mut reader = io::Cursor::new(v);
1010         loop {
1011             match read_zipfile_from_stream(&mut reader).unwrap() {
1012                 None => break,
1013                 _ => (),
1014             }
1015         }
1016     }
1017 
1018     #[test]
zip_clone()1019     fn zip_clone() {
1020         use super::ZipArchive;
1021         use std::io::{self, Read};
1022 
1023         let mut v = Vec::new();
1024         v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1025         let mut reader1 = ZipArchive::new(io::Cursor::new(v)).unwrap();
1026         let mut reader2 = reader1.clone();
1027 
1028         let mut file1 = reader1.by_index(0).unwrap();
1029         let mut file2 = reader2.by_index(0).unwrap();
1030 
1031         let t = file1.last_modified();
1032         assert_eq!(
1033             (
1034                 t.year(),
1035                 t.month(),
1036                 t.day(),
1037                 t.hour(),
1038                 t.minute(),
1039                 t.second()
1040             ),
1041             (1980, 1, 1, 0, 0, 0)
1042         );
1043 
1044         let mut buf1 = [0; 5];
1045         let mut buf2 = [0; 5];
1046         let mut buf3 = [0; 5];
1047         let mut buf4 = [0; 5];
1048 
1049         file1.read(&mut buf1).unwrap();
1050         file2.read(&mut buf2).unwrap();
1051         file1.read(&mut buf3).unwrap();
1052         file2.read(&mut buf4).unwrap();
1053 
1054         assert_eq!(buf1, buf2);
1055         assert_eq!(buf3, buf4);
1056         assert!(buf1 != buf3);
1057     }
1058 
1059     #[test]
file_and_dir_predicates()1060     fn file_and_dir_predicates() {
1061         use super::ZipArchive;
1062         use std::io;
1063 
1064         let mut v = Vec::new();
1065         v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
1066         let mut zip = ZipArchive::new(io::Cursor::new(v)).unwrap();
1067 
1068         for i in 0..zip.len() {
1069             let zip_file = zip.by_index(i).unwrap();
1070             let full_name = zip_file.enclosed_name().unwrap();
1071             let file_name = full_name.file_name().unwrap().to_str().unwrap();
1072             assert!(
1073                 (file_name.starts_with("dir") && zip_file.is_dir())
1074                     || (file_name.starts_with("file") && zip_file.is_file())
1075             );
1076         }
1077     }
1078 }
1079