• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied.  See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 package org.apache.commons.compress.archivers.zip;
20 
21 import java.io.ByteArrayInputStream;
22 import java.io.ByteArrayOutputStream;
23 import java.io.EOFException;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.PushbackInputStream;
27 import java.math.BigInteger;
28 import java.nio.ByteBuffer;
29 import java.util.Arrays;
30 import java.util.zip.CRC32;
31 import java.util.zip.DataFormatException;
32 import java.util.zip.Inflater;
33 import java.util.zip.ZipEntry;
34 import java.util.zip.ZipException;
35 
36 import org.apache.commons.compress.archivers.ArchiveEntry;
37 import org.apache.commons.compress.archivers.ArchiveInputStream;
38 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
39 import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
40 import org.apache.commons.compress.utils.ArchiveUtils;
41 import org.apache.commons.compress.utils.IOUtils;
42 import org.apache.commons.compress.utils.InputStreamStatistics;
43 
44 import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
45 import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
46 import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
47 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
48 
49 /**
50  * Implements an input stream that can read Zip archives.
51  *
52  * <p>As of Apache Commons Compress it transparently supports Zip64
53  * extensions and thus individual entries and archives larger than 4
54  * GB or with more than 65536 entries.</p>
55  *
56  * <p>The {@link ZipFile} class is preferred when reading from files
57  * as {@link ZipArchiveInputStream} is limited by not being able to
58  * read the central directory header before returning entries.  In
59  * particular {@link ZipArchiveInputStream}</p>
60  *
61  * <ul>
62  *
63  *  <li>may return entries that are not part of the central directory
64  *  at all and shouldn't be considered part of the archive.</li>
65  *
66  *  <li>may return several entries with the same name.</li>
67  *
68  *  <li>will not return internal or external attributes.</li>
69  *
70  *  <li>may return incomplete extra field data.</li>
71  *
72  *  <li>may return unknown sizes and CRC values for entries until the
73  *  next entry has been reached if the archive uses the data
74  *  descriptor feature.</li>
75  *
76  * </ul>
77  *
78  * @see ZipFile
79  * @NotThreadSafe
80  */
81 public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics {
82 
83     /** The zip encoding to use for filenames and the file comment. */
84     private final ZipEncoding zipEncoding;
85 
86     // the provided encoding (for unit tests)
87     final String encoding;
88 
89     /** Whether to look for and use Unicode extra fields. */
90     private final boolean useUnicodeExtraFields;
91 
92     /** Wrapped stream, will always be a PushbackInputStream. */
93     private final InputStream in;
94 
95     /** Inflater used for all deflated entries. */
96     private final Inflater inf = new Inflater(true);
97 
98     /** Buffer used to read from the wrapped stream. */
99     private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
100 
101     /** The entry that is currently being read. */
102     private CurrentEntry current = null;
103 
104     /** Whether the stream has been closed. */
105     private boolean closed = false;
106 
107     /** Whether the stream has reached the central directory - and thus found all entries. */
108     private boolean hitCentralDirectory = false;
109 
110     /**
111      * When reading a stored entry that uses the data descriptor this
112      * stream has to read the full entry and caches it.  This is the
113      * cache.
114      */
115     private ByteArrayInputStream lastStoredEntry = null;
116 
117     /** Whether the stream will try to read STORED entries that use a data descriptor. */
118     private boolean allowStoredEntriesWithDataDescriptor = false;
119 
120     /** Count decompressed bytes for current entry */
121     private long uncompressedCount = 0;
122 
123     private static final int LFH_LEN = 30;
124     /*
125       local file header signature     WORD
126       version needed to extract       SHORT
127       general purpose bit flag        SHORT
128       compression method              SHORT
129       last mod file time              SHORT
130       last mod file date              SHORT
131       crc-32                          WORD
132       compressed size                 WORD
133       uncompressed size               WORD
134       file name length                SHORT
135       extra field length              SHORT
136     */
137 
138     private static final int CFH_LEN = 46;
139     /*
140         central file header signature   WORD
141         version made by                 SHORT
142         version needed to extract       SHORT
143         general purpose bit flag        SHORT
144         compression method              SHORT
145         last mod file time              SHORT
146         last mod file date              SHORT
147         crc-32                          WORD
148         compressed size                 WORD
149         uncompressed size               WORD
150         file name length                SHORT
151         extra field length              SHORT
152         file comment length             SHORT
153         disk number start               SHORT
154         internal file attributes        SHORT
155         external file attributes        WORD
156         relative offset of local header WORD
157     */
158 
159     private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
160 
161     // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
162     private final byte[] lfhBuf = new byte[LFH_LEN];
163     private final byte[] skipBuf = new byte[1024];
164     private final byte[] shortBuf = new byte[SHORT];
165     private final byte[] wordBuf = new byte[WORD];
166     private final byte[] twoDwordBuf = new byte[2 * DWORD];
167 
168     private int entriesRead = 0;
169 
170     /**
171      * Create an instance using UTF-8 encoding
172      * @param inputStream the stream to wrap
173      */
ZipArchiveInputStream(final InputStream inputStream)174     public ZipArchiveInputStream(final InputStream inputStream) {
175         this(inputStream, ZipEncodingHelper.UTF8);
176     }
177 
178     /**
179      * Create an instance using the specified encoding
180      * @param inputStream the stream to wrap
181      * @param encoding the encoding to use for file names, use null
182      * for the platform's default encoding
183      * @since 1.5
184      */
ZipArchiveInputStream(final InputStream inputStream, final String encoding)185     public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
186         this(inputStream, encoding, true);
187     }
188 
189     /**
190      * Create an instance using the specified encoding
191      * @param inputStream the stream to wrap
192      * @param encoding the encoding to use for file names, use null
193      * for the platform's default encoding
194      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
195      * Extra Fields (if present) to set the file names.
196      */
ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields)197     public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
198         this(inputStream, encoding, useUnicodeExtraFields, false);
199     }
200 
201     /**
202      * Create an instance using the specified encoding
203      * @param inputStream the stream to wrap
204      * @param encoding the encoding to use for file names, use null
205      * for the platform's default encoding
206      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
207      * Extra Fields (if present) to set the file names.
208      * @param allowStoredEntriesWithDataDescriptor whether the stream
209      * will try to read STORED entries that use a data descriptor
210      * @since 1.1
211      */
ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields, final boolean allowStoredEntriesWithDataDescriptor)212     public ZipArchiveInputStream(final InputStream inputStream,
213                                  final String encoding,
214                                  final boolean useUnicodeExtraFields,
215                                  final boolean allowStoredEntriesWithDataDescriptor) {
216         this.encoding = encoding;
217         zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
218         this.useUnicodeExtraFields = useUnicodeExtraFields;
219         in = new PushbackInputStream(inputStream, buf.capacity());
220         this.allowStoredEntriesWithDataDescriptor =
221             allowStoredEntriesWithDataDescriptor;
222         // haven't read anything so far
223         buf.limit(0);
224     }
225 
getNextZipEntry()226     public ZipArchiveEntry getNextZipEntry() throws IOException {
227         uncompressedCount = 0;
228 
229         boolean firstEntry = true;
230         if (closed || hitCentralDirectory) {
231             return null;
232         }
233         if (current != null) {
234             closeEntry();
235             firstEntry = false;
236         }
237 
238         long currentHeaderOffset = getBytesRead();
239         try {
240             if (firstEntry) {
241                 // split archives have a special signature before the
242                 // first local file header - look for it and fail with
243                 // the appropriate error message if this is a split
244                 // archive.
245                 readFirstLocalFileHeader(lfhBuf);
246             } else {
247                 readFully(lfhBuf);
248             }
249         } catch (final EOFException e) { //NOSONAR
250             return null;
251         }
252 
253         final ZipLong sig = new ZipLong(lfhBuf);
254         if (!sig.equals(ZipLong.LFH_SIG)) {
255             if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) {
256                 hitCentralDirectory = true;
257                 skipRemainderOfArchive();
258                 return null;
259             }
260             throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
261         }
262 
263         int off = WORD;
264         current = new CurrentEntry();
265 
266         final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
267         off += SHORT;
268         current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
269 
270         final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
271         final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
272         final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
273         current.hasDataDescriptor = gpFlag.usesDataDescriptor();
274         current.entry.setGeneralPurposeBit(gpFlag);
275 
276         off += SHORT;
277 
278         current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
279         off += SHORT;
280 
281         final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
282         current.entry.setTime(time);
283         off += WORD;
284 
285         ZipLong size = null, cSize = null;
286         if (!current.hasDataDescriptor) {
287             current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
288             off += WORD;
289 
290             cSize = new ZipLong(lfhBuf, off);
291             off += WORD;
292 
293             size = new ZipLong(lfhBuf, off);
294             off += WORD;
295         } else {
296             off += 3 * WORD;
297         }
298 
299         final int fileNameLen = ZipShort.getValue(lfhBuf, off);
300 
301         off += SHORT;
302 
303         final int extraLen = ZipShort.getValue(lfhBuf, off);
304         off += SHORT; // NOSONAR - assignment as documentation
305 
306         final byte[] fileName = new byte[fileNameLen];
307         readFully(fileName);
308         current.entry.setName(entryEncoding.decode(fileName), fileName);
309         if (hasUTF8Flag) {
310             current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
311         }
312 
313         final byte[] extraData = new byte[extraLen];
314         readFully(extraData);
315         current.entry.setExtra(extraData);
316 
317         if (!hasUTF8Flag && useUnicodeExtraFields) {
318             ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
319         }
320 
321         processZip64Extra(size, cSize);
322 
323         current.entry.setLocalHeaderOffset(currentHeaderOffset);
324         current.entry.setDataOffset(getBytesRead());
325         current.entry.setStreamContiguous(true);
326 
327         ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
328         if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
329             if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
330                 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
331                 switch (m) {
332                 case UNSHRINKING:
333                     current.in = new UnshrinkingInputStream(bis);
334                     break;
335                 case IMPLODING:
336                     current.in = new ExplodingInputStream(
337                         current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
338                         current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
339                         bis);
340                     break;
341                 case BZIP2:
342                     current.in = new BZip2CompressorInputStream(bis);
343                     break;
344                 case ENHANCED_DEFLATED:
345                     current.in = new Deflate64CompressorInputStream(bis);
346                     break;
347                 default:
348                     // we should never get here as all supported methods have been covered
349                     // will cause an error when read is invoked, don't throw an exception here so people can
350                     // skip unsupported entries
351                     break;
352                 }
353             }
354         } else if (m == ZipMethod.ENHANCED_DEFLATED) {
355             current.in = new Deflate64CompressorInputStream(in);
356         }
357 
358         entriesRead++;
359         return current.entry;
360     }
361 
362     /**
363      * Fills the given array with the first local file header and
364      * deals with splitting/spanning markers that may prefix the first
365      * LFH.
366      */
readFirstLocalFileHeader(final byte[] lfh)367     private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
368         readFully(lfh);
369         final ZipLong sig = new ZipLong(lfh);
370         if (sig.equals(ZipLong.DD_SIG)) {
371             throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
372         }
373 
374         if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
375             // The archive is not really split as only one segment was
376             // needed in the end.  Just skip over the marker.
377             final byte[] missedLfhBytes = new byte[4];
378             readFully(missedLfhBytes);
379             System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
380             System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
381         }
382     }
383 
384     /**
385      * Records whether a Zip64 extra is present and sets the size
386      * information from it if sizes are 0xFFFFFFFF and the entry
387      * doesn't use a data descriptor.
388      */
processZip64Extra(final ZipLong size, final ZipLong cSize)389     private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
390         final Zip64ExtendedInformationExtraField z64 =
391             (Zip64ExtendedInformationExtraField)
392             current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
393         current.usesZip64 = z64 != null;
394         if (!current.hasDataDescriptor) {
395             if (z64 != null // same as current.usesZip64 but avoids NPE warning
396                     && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
397                 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
398                 current.entry.setSize(z64.getSize().getLongValue());
399             } else {
400                 current.entry.setCompressedSize(cSize.getValue());
401                 current.entry.setSize(size.getValue());
402             }
403         }
404     }
405 
406     @Override
getNextEntry()407     public ArchiveEntry getNextEntry() throws IOException {
408         return getNextZipEntry();
409     }
410 
411     /**
412      * Whether this class is able to read the given entry.
413      *
414      * <p>May return false if it is set up to use encryption or a
415      * compression method that hasn't been implemented yet.</p>
416      * @since 1.1
417      */
418     @Override
canReadEntryData(final ArchiveEntry ae)419     public boolean canReadEntryData(final ArchiveEntry ae) {
420         if (ae instanceof ZipArchiveEntry) {
421             final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
422             return ZipUtil.canHandleEntryData(ze)
423                 && supportsDataDescriptorFor(ze)
424                 && supportsCompressedSizeFor(ze);
425         }
426         return false;
427     }
428 
429     @Override
read(final byte[] buffer, final int offset, final int length)430     public int read(final byte[] buffer, final int offset, final int length) throws IOException {
431         if (closed) {
432             throw new IOException("The stream is closed");
433         }
434 
435         if (current == null) {
436             return -1;
437         }
438 
439         // avoid int overflow, check null buffer
440         if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
441             throw new ArrayIndexOutOfBoundsException();
442         }
443 
444         ZipUtil.checkRequestedFeatures(current.entry);
445         if (!supportsDataDescriptorFor(current.entry)) {
446             throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
447                     current.entry);
448         }
449         if (!supportsCompressedSizeFor(current.entry)) {
450             throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
451                     current.entry);
452         }
453 
454         int read;
455         if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
456             read = readStored(buffer, offset, length);
457         } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
458             read = readDeflated(buffer, offset, length);
459         } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
460                 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
461                 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
462                 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
463             read = current.in.read(buffer, offset, length);
464         } else {
465             throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
466                     current.entry);
467         }
468 
469         if (read >= 0) {
470             current.crc.update(buffer, offset, read);
471             uncompressedCount += read;
472         }
473 
474         return read;
475     }
476 
477     /**
478      * @since 1.17
479      */
480     @Override
getCompressedCount()481     public long getCompressedCount() {
482         if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
483             return current.bytesRead;
484         } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
485             return getBytesInflated();
486         } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
487             return ((UnshrinkingInputStream) current.in).getCompressedCount();
488         } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
489             return ((ExplodingInputStream) current.in).getCompressedCount();
490         } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) {
491             return ((Deflate64CompressorInputStream) current.in).getCompressedCount();
492         } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
493             return ((BZip2CompressorInputStream) current.in).getCompressedCount();
494         } else {
495             return -1;
496         }
497     }
498 
499     /**
500      * @since 1.17
501      */
502     @Override
getUncompressedCount()503     public long getUncompressedCount() {
504         return uncompressedCount;
505     }
506 
507     /**
508      * Implementation of read for STORED entries.
509      */
readStored(final byte[] buffer, final int offset, final int length)510     private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
511 
512         if (current.hasDataDescriptor) {
513             if (lastStoredEntry == null) {
514                 readStoredEntry();
515             }
516             return lastStoredEntry.read(buffer, offset, length);
517         }
518 
519         final long csize = current.entry.getSize();
520         if (current.bytesRead >= csize) {
521             return -1;
522         }
523 
524         if (buf.position() >= buf.limit()) {
525             buf.position(0);
526             final int l = in.read(buf.array());
527             if (l == -1) {
528                 buf.limit(0);
529                 throw new IOException("Truncated ZIP file");
530             }
531             buf.limit(l);
532 
533             count(l);
534             current.bytesReadFromStream += l;
535         }
536 
537         int toRead = Math.min(buf.remaining(), length);
538         if ((csize - current.bytesRead) < toRead) {
539             // if it is smaller than toRead then it fits into an int
540             toRead = (int) (csize - current.bytesRead);
541         }
542         buf.get(buffer, offset, toRead);
543         current.bytesRead += toRead;
544         return toRead;
545     }
546 
547     /**
548      * Implementation of read for DEFLATED entries.
549      */
readDeflated(final byte[] buffer, final int offset, final int length)550     private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
551         final int read = readFromInflater(buffer, offset, length);
552         if (read <= 0) {
553             if (inf.finished()) {
554                 return -1;
555             } else if (inf.needsDictionary()) {
556                 throw new ZipException("This archive needs a preset dictionary"
557                                        + " which is not supported by Commons"
558                                        + " Compress.");
559             } else if (read == -1) {
560                 throw new IOException("Truncated ZIP file");
561             }
562         }
563         return read;
564     }
565 
566     /**
567      * Potentially reads more bytes to fill the inflater's buffer and
568      * reads from it.
569      */
readFromInflater(final byte[] buffer, final int offset, final int length)570     private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
571         int read = 0;
572         do {
573             if (inf.needsInput()) {
574                 final int l = fill();
575                 if (l > 0) {
576                     current.bytesReadFromStream += buf.limit();
577                 } else if (l == -1) {
578                     return -1;
579                 } else {
580                     break;
581                 }
582             }
583             try {
584                 read = inf.inflate(buffer, offset, length);
585             } catch (final DataFormatException e) {
586                 throw (IOException) new ZipException(e.getMessage()).initCause(e);
587             }
588         } while (read == 0 && inf.needsInput());
589         return read;
590     }
591 
592     @Override
close()593     public void close() throws IOException {
594         if (!closed) {
595             closed = true;
596             try {
597                 in.close();
598             } finally {
599                 inf.end();
600             }
601         }
602     }
603 
604     /**
605      * Skips over and discards value bytes of data from this input
606      * stream.
607      *
608      * <p>This implementation may end up skipping over some smaller
609      * number of bytes, possibly 0, if and only if it reaches the end
610      * of the underlying stream.</p>
611      *
612      * <p>The actual number of bytes skipped is returned.</p>
613      *
614      * @param value the number of bytes to be skipped.
615      * @return the actual number of bytes skipped.
616      * @throws IOException - if an I/O error occurs.
617      * @throws IllegalArgumentException - if value is negative.
618      */
619     @Override
skip(final long value)620     public long skip(final long value) throws IOException {
621         if (value >= 0) {
622             long skipped = 0;
623             while (skipped < value) {
624                 final long rem = value - skipped;
625                 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
626                 if (x == -1) {
627                     return skipped;
628                 }
629                 skipped += x;
630             }
631             return skipped;
632         }
633         throw new IllegalArgumentException();
634     }
635 
636     /**
637      * Checks if the signature matches what is expected for a zip file.
638      * Does not currently handle self-extracting zips which may have arbitrary
639      * leading content.
640      *
641      * @param signature the bytes to check
642      * @param length    the number of bytes to check
643      * @return true, if this stream is a zip archive stream, false otherwise
644      */
matches(final byte[] signature, final int length)645     public static boolean matches(final byte[] signature, final int length) {
646         if (length < ZipArchiveOutputStream.LFH_SIG.length) {
647             return false;
648         }
649 
650         return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
651             || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
652             || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
653             || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
654     }
655 
checksig(final byte[] signature, final byte[] expected)656     private static boolean checksig(final byte[] signature, final byte[] expected) {
657         for (int i = 0; i < expected.length; i++) {
658             if (signature[i] != expected[i]) {
659                 return false;
660             }
661         }
662         return true;
663     }
664 
665     /**
666      * Closes the current ZIP archive entry and positions the underlying
667      * stream to the beginning of the next entry. All per-entry variables
668      * and data structures are cleared.
669      * <p>
670      * If the compressed size of this entry is included in the entry header,
671      * then any outstanding bytes are simply skipped from the underlying
672      * stream without uncompressing them. This allows an entry to be safely
673      * closed even if the compression method is unsupported.
674      * <p>
675      * In case we don't know the compressed size of this entry or have
676      * already buffered too much data from the underlying stream to support
677      * uncompression, then the uncompression process is completed and the
678      * end position of the stream is adjusted based on the result of that
679      * process.
680      *
681      * @throws IOException if an error occurs
682      */
closeEntry()683     private void closeEntry() throws IOException {
684         if (closed) {
685             throw new IOException("The stream is closed");
686         }
687         if (current == null) {
688             return;
689         }
690 
691         // Ensure all entry bytes are read
692         if (currentEntryHasOutstandingBytes()) {
693             drainCurrentEntryData();
694         } else {
695             // this is guaranteed to exhaust the stream
696             skip(Long.MAX_VALUE); //NOSONAR
697 
698             final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
699                        ? getBytesInflated() : current.bytesRead;
700 
701             // this is at most a single read() operation and can't
702             // exceed the range of int
703             final int diff = (int) (current.bytesReadFromStream - inB);
704 
705             // Pushback any required bytes
706             if (diff > 0) {
707                 pushback(buf.array(), buf.limit() - diff, diff);
708                 current.bytesReadFromStream -= diff;
709             }
710 
711             // Drain remainder of entry if not all data bytes were required
712             if (currentEntryHasOutstandingBytes()) {
713                 drainCurrentEntryData();
714             }
715         }
716 
717         if (lastStoredEntry == null && current.hasDataDescriptor) {
718             readDataDescriptor();
719         }
720 
721         inf.reset();
722         buf.clear().flip();
723         current = null;
724         lastStoredEntry = null;
725     }
726 
727     /**
728      * If the compressed size of the current entry is included in the entry header
729      * and there are any outstanding bytes in the underlying stream, then
730      * this returns true.
731      *
732      * @return true, if current entry is determined to have outstanding bytes, false otherwise
733      */
currentEntryHasOutstandingBytes()734     private boolean currentEntryHasOutstandingBytes() {
735         return current.bytesReadFromStream <= current.entry.getCompressedSize()
736                 && !current.hasDataDescriptor;
737     }
738 
739     /**
740      * Read all data of the current entry from the underlying stream
741      * that hasn't been read, yet.
742      */
drainCurrentEntryData()743     private void drainCurrentEntryData() throws IOException {
744         long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
745         while (remaining > 0) {
746             final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
747             if (n < 0) {
748                 throw new EOFException("Truncated ZIP entry: "
749                                        + ArchiveUtils.sanitize(current.entry.getName()));
750             }
751             count(n);
752             remaining -= n;
753         }
754     }
755 
756     /**
757      * Get the number of bytes Inflater has actually processed.
758      *
759      * <p>for Java &lt; Java7 the getBytes* methods in
760      * Inflater/Deflater seem to return unsigned ints rather than
761      * longs that start over with 0 at 2^32.</p>
762      *
763      * <p>The stream knows how many bytes it has read, but not how
764      * many the Inflater actually consumed - it should be between the
765      * total number of bytes read for the entry and the total number
766      * minus the last read operation.  Here we just try to make the
767      * value close enough to the bytes we've read by assuming the
768      * number of bytes consumed must be smaller than (or equal to) the
769      * number of bytes read but not smaller by more than 2^32.</p>
770      */
getBytesInflated()771     private long getBytesInflated() {
772         long inB = inf.getBytesRead();
773         if (current.bytesReadFromStream >= TWO_EXP_32) {
774             while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
775                 inB += TWO_EXP_32;
776             }
777         }
778         return inB;
779     }
780 
fill()781     private int fill() throws IOException {
782         if (closed) {
783             throw new IOException("The stream is closed");
784         }
785         final int length = in.read(buf.array());
786         if (length > 0) {
787             buf.limit(length);
788             count(buf.limit());
789             inf.setInput(buf.array(), 0, buf.limit());
790         }
791         return length;
792     }
793 
readFully(final byte[] b)794     private void readFully(final byte[] b) throws IOException {
795         readFully(b, 0);
796     }
797 
readFully(final byte[] b, final int off)798     private void readFully(final byte[] b, final int off) throws IOException {
799         final int len = b.length - off;
800         final int count = IOUtils.readFully(in, b, off, len);
801         count(count);
802         if (count < len) {
803             throw new EOFException();
804         }
805     }
806 
readDataDescriptor()807     private void readDataDescriptor() throws IOException {
808         readFully(wordBuf);
809         ZipLong val = new ZipLong(wordBuf);
810         if (ZipLong.DD_SIG.equals(val)) {
811             // data descriptor with signature, skip sig
812             readFully(wordBuf);
813             val = new ZipLong(wordBuf);
814         }
815         current.entry.setCrc(val.getValue());
816 
817         // if there is a ZIP64 extra field, sizes are eight bytes
818         // each, otherwise four bytes each.  Unfortunately some
819         // implementations - namely Java7 - use eight bytes without
820         // using a ZIP64 extra field -
821         // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
822 
823         // just read 16 bytes and check whether bytes nine to twelve
824         // look like one of the signatures of what could follow a data
825         // descriptor (ignoring archive decryption headers for now).
826         // If so, push back eight bytes and assume sizes are four
827         // bytes, otherwise sizes are eight bytes each.
828         readFully(twoDwordBuf);
829         final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
830         if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
831             pushback(twoDwordBuf, DWORD, DWORD);
832             current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
833             current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
834         } else {
835             current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
836             current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
837         }
838     }
839 
840     /**
841      * Whether this entry requires a data descriptor this library can work with.
842      *
843      * @return true if allowStoredEntriesWithDataDescriptor is true,
844      * the entry doesn't require any data descriptor or the method is
845      * DEFLATED or ENHANCED_DEFLATED.
846      */
supportsDataDescriptorFor(final ZipArchiveEntry entry)847     private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
848         return !entry.getGeneralPurposeBit().usesDataDescriptor()
849 
850                 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
851                 || entry.getMethod() == ZipEntry.DEFLATED
852                 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
853     }
854 
855     /**
856      * Whether the compressed size for the entry is either known or
857      * not required by the compression method being used.
858      */
supportsCompressedSizeFor(final ZipArchiveEntry entry)859     private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
860         return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
861             || entry.getMethod() == ZipEntry.DEFLATED
862             || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
863             || (entry.getGeneralPurposeBit().usesDataDescriptor()
864                 && allowStoredEntriesWithDataDescriptor
865                 && entry.getMethod() == ZipEntry.STORED);
866     }
867 
868     /**
869      * Caches a stored entry that uses the data descriptor.
870      *
871      * <ul>
872      *   <li>Reads a stored entry until the signature of a local file
873      *     header, central directory header or data descriptor has been
874      *     found.</li>
875      *   <li>Stores all entry data in lastStoredEntry.</p>
876      *   <li>Rewinds the stream to position at the data
877      *     descriptor.</li>
878      *   <li>reads the data descriptor</li>
879      * </ul>
880      *
881      * <p>After calling this method the entry should know its size,
882      * the entry's data is cached and the stream is positioned at the
883      * next local file or central directory header.</p>
884      */
readStoredEntry()885     private void readStoredEntry() throws IOException {
886         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
887         int off = 0;
888         boolean done = false;
889 
890         // length of DD without signature
891         final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
892 
893         while (!done) {
894             final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
895             if (r <= 0) {
896                 // read the whole archive without ever finding a
897                 // central directory
898                 throw new IOException("Truncated ZIP file");
899             }
900             if (r + off < 4) {
901                 // buffer too small to check for a signature, loop
902                 off += r;
903                 continue;
904             }
905 
906             done = bufferContainsSignature(bos, off, r, ddLen);
907             if (!done) {
908                 off = cacheBytesRead(bos, off, r, ddLen);
909             }
910         }
911 
912         final byte[] b = bos.toByteArray();
913         lastStoredEntry = new ByteArrayInputStream(b);
914     }
915 
916     private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
917     private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
918     private static final byte[] DD = ZipLong.DD_SIG.getBytes();
919 
920     /**
921      * Checks whether the current buffer contains the signature of a
922      * &quot;data descriptor&quot;, &quot;local file header&quot; or
923      * &quot;central directory entry&quot;.
924      *
925      * <p>If it contains such a signature, reads the data descriptor
926      * and positions the stream right after the data descriptor.</p>
927      */
bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)928     private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
929             throws IOException {
930 
931         boolean done = false;
932         int readTooMuch = 0;
933         for (int i = 0; !done && i < offset + lastRead - 4; i++) {
934             if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
935                 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
936                     || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
937                     // found a LFH or CFH:
938                     readTooMuch = offset + lastRead - i - expectedDDLen;
939                     done = true;
940                 }
941                 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
942                     // found DD:
943                     readTooMuch = offset + lastRead - i;
944                     done = true;
945                 }
946                 if (done) {
947                     // * push back bytes read in excess as well as the data
948                     //   descriptor
949                     // * copy the remaining bytes to cache
950                     // * read data descriptor
951                     pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
952                     bos.write(buf.array(), 0, i);
953                     readDataDescriptor();
954                 }
955             }
956         }
957         return done;
958     }
959 
960     /**
961      * If the last read bytes could hold a data descriptor and an
962      * incomplete signature then save the last bytes to the front of
963      * the buffer and cache everything in front of the potential data
964      * descriptor into the given ByteArrayOutputStream.
965      *
966      * <p>Data descriptor plus incomplete signature (3 bytes in the
967      * worst case) can be 20 bytes max.</p>
968      */
cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen)969     private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
970         final int cacheable = offset + lastRead - expecteDDLen - 3;
971         if (cacheable > 0) {
972             bos.write(buf.array(), 0, cacheable);
973             System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
974             offset = expecteDDLen + 3;
975         } else {
976             offset += lastRead;
977         }
978         return offset;
979     }
980 
pushback(final byte[] buf, final int offset, final int length)981     private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
982         ((PushbackInputStream) in).unread(buf, offset, length);
983         pushedBackBytes(length);
984     }
985 
986     // End of Central Directory Record
987     //   end of central dir signature    WORD
988     //   number of this disk             SHORT
989     //   number of the disk with the
990     //   start of the central directory  SHORT
991     //   total number of entries in the
992     //   central directory on this disk  SHORT
993     //   total number of entries in
994     //   the central directory           SHORT
995     //   size of the central directory   WORD
996     //   offset of start of central
997     //   directory with respect to
998     //   the starting disk number        WORD
999     //   .ZIP file comment length        SHORT
1000     //   .ZIP file comment               up to 64KB
1001     //
1002 
1003     /**
1004      * Reads the stream until it find the "End of central directory
1005      * record" and consumes it as well.
1006      */
skipRemainderOfArchive()1007     private void skipRemainderOfArchive() throws IOException {
1008         // skip over central directory. One LFH has been read too much
1009         // already.  The calculation discounts file names and extra
1010         // data so it will be too short.
1011         realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
1012         findEocdRecord();
1013         realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
1014         readFully(shortBuf);
1015         // file comment
1016         realSkip(ZipShort.getValue(shortBuf));
1017     }
1018 
1019     /**
1020      * Reads forward until the signature of the &quot;End of central
1021      * directory&quot; record is found.
1022      */
findEocdRecord()1023     private void findEocdRecord() throws IOException {
1024         int currentByte = -1;
1025         boolean skipReadCall = false;
1026         while (skipReadCall || (currentByte = readOneByte()) > -1) {
1027             skipReadCall = false;
1028             if (!isFirstByteOfEocdSig(currentByte)) {
1029                 continue;
1030             }
1031             currentByte = readOneByte();
1032             if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
1033                 if (currentByte == -1) {
1034                     break;
1035                 }
1036                 skipReadCall = isFirstByteOfEocdSig(currentByte);
1037                 continue;
1038             }
1039             currentByte = readOneByte();
1040             if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
1041                 if (currentByte == -1) {
1042                     break;
1043                 }
1044                 skipReadCall = isFirstByteOfEocdSig(currentByte);
1045                 continue;
1046             }
1047             currentByte = readOneByte();
1048             if (currentByte == -1
1049                 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1050                 break;
1051             }
1052             skipReadCall = isFirstByteOfEocdSig(currentByte);
1053         }
1054     }
1055 
1056     /**
1057      * Skips bytes by reading from the underlying stream rather than
1058      * the (potentially inflating) archive stream - which {@link
1059      * #skip} would do.
1060      *
1061      * Also updates bytes-read counter.
1062      */
realSkip(final long value)1063     private void realSkip(final long value) throws IOException {
1064         if (value >= 0) {
1065             long skipped = 0;
1066             while (skipped < value) {
1067                 final long rem = value - skipped;
1068                 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1069                 if (x == -1) {
1070                     return;
1071                 }
1072                 count(x);
1073                 skipped += x;
1074             }
1075             return;
1076         }
1077         throw new IllegalArgumentException();
1078     }
1079 
1080     /**
1081      * Reads bytes by reading from the underlying stream rather than
1082      * the (potentially inflating) archive stream - which {@link #read} would do.
1083      *
1084      * Also updates bytes-read counter.
1085      */
readOneByte()1086     private int readOneByte() throws IOException {
1087         final int b = in.read();
1088         if (b != -1) {
1089             count(1);
1090         }
1091         return b;
1092     }
1093 
isFirstByteOfEocdSig(final int b)1094     private boolean isFirstByteOfEocdSig(final int b) {
1095         return b == ZipArchiveOutputStream.EOCD_SIG[0];
1096     }
1097 
1098     private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] {
1099         'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2',
1100     };
1101     private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
1102 
1103     /**
1104      * Checks whether this might be an APK Signing Block.
1105      *
1106      * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It
1107      * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature
1108      * and if we've found it, return true.</p>
1109      *
1110      * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold
1111      * the local file header of the next entry.
1112      *
1113      * @return true if this looks like a APK signing block
1114      *
1115      * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a>
1116      */
isApkSigningBlock(byte[] suspectLocalFileHeader)1117     private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException {
1118         // length of block excluding the size field itself
1119         BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader);
1120         // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block,
1121         // also subtract 16 bytes in order to position us at the magic string
1122         BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length
1123             - (long) APK_SIGNING_BLOCK_MAGIC.length));
1124         byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length];
1125 
1126         try {
1127             if (toSkip.signum() < 0) {
1128                 // suspectLocalFileHeader contains the start of suspect magic string
1129                 int off = suspectLocalFileHeader.length + toSkip.intValue();
1130                 // length was shorter than magic length
1131                 if (off < DWORD) {
1132                     return false;
1133                 }
1134                 int bytesInBuffer = Math.abs(toSkip.intValue());
1135                 System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length));
1136                 if (bytesInBuffer < magic.length) {
1137                     readFully(magic, bytesInBuffer);
1138                 }
1139             } else {
1140                 while (toSkip.compareTo(LONG_MAX) > 0) {
1141                     realSkip(Long.MAX_VALUE);
1142                     toSkip = toSkip.add(LONG_MAX.negate());
1143                 }
1144                 realSkip(toSkip.longValue());
1145                 readFully(magic);
1146             }
1147         } catch (EOFException ex) { //NOSONAR
1148             // length was invalid
1149             return false;
1150         }
1151         return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC);
1152     }
1153 
1154     /**
1155      * Structure collecting information for the entry that is
1156      * currently being read.
1157      */
1158     private static final class CurrentEntry {
1159 
1160         /**
1161          * Current ZIP entry.
1162          */
1163         private final ZipArchiveEntry entry = new ZipArchiveEntry();
1164 
1165         /**
1166          * Does the entry use a data descriptor?
1167          */
1168         private boolean hasDataDescriptor;
1169 
1170         /**
1171          * Does the entry have a ZIP64 extended information extra field.
1172          */
1173         private boolean usesZip64;
1174 
1175         /**
1176          * Number of bytes of entry content read by the client if the
1177          * entry is STORED.
1178          */
1179         private long bytesRead;
1180 
1181         /**
1182          * Number of bytes of entry content read from the stream.
1183          *
1184          * <p>This may be more than the actual entry's length as some
1185          * stuff gets buffered up and needs to be pushed back when the
1186          * end of the entry has been reached.</p>
1187          */
1188         private long bytesReadFromStream;
1189 
1190         /**
1191          * The checksum calculated as the current entry is read.
1192          */
1193         private final CRC32 crc = new CRC32();
1194 
1195         /**
1196          * The input stream decompressing the data for shrunk and imploded entries.
1197          */
1198         private InputStream in;
1199     }
1200 
1201     /**
1202      * Bounded input stream adapted from commons-io
1203      */
1204     private class BoundedInputStream extends InputStream {
1205 
1206         /** the wrapped input stream */
1207         private final InputStream in;
1208 
1209         /** the max length to provide */
1210         private final long max;
1211 
1212         /** the number of bytes already returned */
1213         private long pos = 0;
1214 
1215         /**
1216          * Creates a new <code>BoundedInputStream</code> that wraps the given input
1217          * stream and limits it to a certain size.
1218          *
1219          * @param in The wrapped input stream
1220          * @param size The maximum number of bytes to return
1221          */
BoundedInputStream(final InputStream in, final long size)1222         public BoundedInputStream(final InputStream in, final long size) {
1223             this.max = size;
1224             this.in = in;
1225         }
1226 
1227         @Override
read()1228         public int read() throws IOException {
1229             if (max >= 0 && pos >= max) {
1230                 return -1;
1231             }
1232             final int result = in.read();
1233             pos++;
1234             count(1);
1235             current.bytesReadFromStream++;
1236             return result;
1237         }
1238 
1239         @Override
read(final byte[] b)1240         public int read(final byte[] b) throws IOException {
1241             return this.read(b, 0, b.length);
1242         }
1243 
1244         @Override
read(final byte[] b, final int off, final int len)1245         public int read(final byte[] b, final int off, final int len) throws IOException {
1246             if (max >= 0 && pos >= max) {
1247                 return -1;
1248             }
1249             final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1250             final int bytesRead = in.read(b, off, (int) maxRead);
1251 
1252             if (bytesRead == -1) {
1253                 return -1;
1254             }
1255 
1256             pos += bytesRead;
1257             count(bytesRead);
1258             current.bytesReadFromStream += bytesRead;
1259             return bytesRead;
1260         }
1261 
1262         @Override
skip(final long n)1263         public long skip(final long n) throws IOException {
1264             final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1265             final long skippedBytes = IOUtils.skip(in, toSkip);
1266             pos += skippedBytes;
1267             return skippedBytes;
1268         }
1269 
1270         @Override
available()1271         public int available() throws IOException {
1272             if (max >= 0 && pos >= max) {
1273                 return 0;
1274             }
1275             return in.available();
1276         }
1277     }
1278 }
1279