• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  */
18 package org.apache.commons.compress.archivers.zip;
19 
20 import java.io.BufferedInputStream;
21 import java.io.ByteArrayInputStream;
22 import java.io.Closeable;
23 import java.io.EOFException;
24 import java.io.File;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.SequenceInputStream;
28 import java.nio.ByteBuffer;
29 import java.nio.channels.FileChannel;
30 import java.nio.channels.SeekableByteChannel;
31 import java.nio.file.Files;
32 import java.nio.file.StandardOpenOption;
33 import java.util.Arrays;
34 import java.util.Collections;
35 import java.util.Comparator;
36 import java.util.Enumeration;
37 import java.util.EnumSet;
38 import java.util.HashMap;
39 import java.util.LinkedList;
40 import java.util.List;
41 import java.util.Map;
42 import java.util.zip.Inflater;
43 import java.util.zip.ZipException;
44 
45 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
46 import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
47 import org.apache.commons.compress.utils.CountingInputStream;
48 import org.apache.commons.compress.utils.IOUtils;
49 import org.apache.commons.compress.utils.InputStreamStatistics;
50 
51 import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
52 import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
53 import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
54 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
55 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
56 
57 /**
58  * Replacement for <code>java.util.ZipFile</code>.
59  *
60  * <p>This class adds support for file name encodings other than UTF-8
61  * (which is required to work on ZIP files created by native zip tools
62  * and is able to skip a preamble like the one found in self
63  * extracting archives.  Furthermore it returns instances of
64  * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
65  * instead of <code>java.util.zip.ZipEntry</code>.</p>
66  *
67  * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
68  * have to reimplement all methods anyway.  Like
69  * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
70  * covers and supports compressed and uncompressed entries.  As of
71  * Apache Commons Compress 1.3 it also transparently supports Zip64
72  * extensions and thus individual entries and archives larger than 4
73  * GB or with more than 65536 entries.</p>
74  *
75  * <p>The method signatures mimic the ones of
76  * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
77  *
78  * <ul>
79  *   <li>There is no getName method.</li>
80  *   <li>entries has been renamed to getEntries.</li>
81  *   <li>getEntries and getEntry return
82  *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
83  *   instances.</li>
84  *   <li>close is allowed to throw IOException.</li>
85  * </ul>
86  *
87  */
88 public class ZipFile implements Closeable {
89     private static final int HASH_SIZE = 509;
90     static final int NIBLET_MASK = 0x0f;
91     static final int BYTE_SHIFT = 8;
92     private static final int POS_0 = 0;
93     private static final int POS_1 = 1;
94     private static final int POS_2 = 2;
95     private static final int POS_3 = 3;
96     private static final byte[] ONE_ZERO_BYTE = new byte[1];
97 
98     /**
99      * List of entries in the order they appear inside the central
100      * directory.
101      */
102     private final List<ZipArchiveEntry> entries =
103         new LinkedList<>();
104 
105     /**
106      * Maps String to list of ZipArchiveEntrys, name -> actual entries.
107      */
108     private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
109         new HashMap<>(HASH_SIZE);
110 
111     /**
112      * The encoding to use for filenames and the file comment.
113      *
114      * <p>For a list of possible values see <a
115      * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
116      * Defaults to UTF-8.</p>
117      */
118     private final String encoding;
119 
120     /**
121      * The zip encoding to use for filenames and the file comment.
122      */
123     private final ZipEncoding zipEncoding;
124 
125     /**
126      * File name of actual source.
127      */
128     private final String archiveName;
129 
130     /**
131      * The actual data source.
132      */
133     private final SeekableByteChannel archive;
134 
135     /**
136      * Whether to look for and use Unicode extra fields.
137      */
138     private final boolean useUnicodeExtraFields;
139 
140     /**
141      * Whether the file is closed.
142      */
143     private volatile boolean closed = true;
144 
145     // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
146     private final byte[] dwordBuf = new byte[DWORD];
147     private final byte[] wordBuf = new byte[WORD];
148     private final byte[] cfhBuf = new byte[CFH_LEN];
149     private final byte[] shortBuf = new byte[SHORT];
150     private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
151     private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
152     private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
153 
154     /**
155      * Opens the given file for reading, assuming "UTF8" for file names.
156      *
157      * @param f the archive.
158      *
159      * @throws IOException if an error occurs while reading the file.
160      */
ZipFile(final File f)161     public ZipFile(final File f) throws IOException {
162         this(f, ZipEncodingHelper.UTF8);
163     }
164 
165     /**
166      * Opens the given file for reading, assuming "UTF8".
167      *
168      * @param name name of the archive.
169      *
170      * @throws IOException if an error occurs while reading the file.
171      */
ZipFile(final String name)172     public ZipFile(final String name) throws IOException {
173         this(new File(name), ZipEncodingHelper.UTF8);
174     }
175 
176     /**
177      * Opens the given file for reading, assuming the specified
178      * encoding for file names, scanning unicode extra fields.
179      *
180      * @param name name of the archive.
181      * @param encoding the encoding to use for file names, use null
182      * for the platform's default encoding
183      *
184      * @throws IOException if an error occurs while reading the file.
185      */
ZipFile(final String name, final String encoding)186     public ZipFile(final String name, final String encoding) throws IOException {
187         this(new File(name), encoding, true);
188     }
189 
190     /**
191      * Opens the given file for reading, assuming the specified
192      * encoding for file names and scanning for unicode extra fields.
193      *
194      * @param f the archive.
195      * @param encoding the encoding to use for file names, use null
196      * for the platform's default encoding
197      *
198      * @throws IOException if an error occurs while reading the file.
199      */
ZipFile(final File f, final String encoding)200     public ZipFile(final File f, final String encoding) throws IOException {
201         this(f, encoding, true);
202     }
203 
204     /**
205      * Opens the given file for reading, assuming the specified
206      * encoding for file names.
207      *
208      * @param f the archive.
209      * @param encoding the encoding to use for file names, use null
210      * for the platform's default encoding
211      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
212      * Extra Fields (if present) to set the file names.
213      *
214      * @throws IOException if an error occurs while reading the file.
215      */
ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)216     public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
217         throws IOException {
218         this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
219              f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
220     }
221 
222     /**
223      * Opens the given channel for reading, assuming "UTF8" for file names.
224      *
225      * <p>{@link
226      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
227      * allows you to read from an in-memory archive.</p>
228      *
229      * @param channel the archive.
230      *
231      * @throws IOException if an error occurs while reading the file.
232      * @since 1.13
233      */
ZipFile(final SeekableByteChannel channel)234     public ZipFile(final SeekableByteChannel channel)
235             throws IOException {
236         this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
237     }
238 
239     /**
240      * Opens the given channel for reading, assuming the specified
241      * encoding for file names.
242      *
243      * <p>{@link
244      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
245      * allows you to read from an in-memory archive.</p>
246      *
247      * @param channel the archive.
248      * @param encoding the encoding to use for file names, use null
249      * for the platform's default encoding
250      *
251      * @throws IOException if an error occurs while reading the file.
252      * @since 1.13
253      */
ZipFile(final SeekableByteChannel channel, final String encoding)254     public ZipFile(final SeekableByteChannel channel, final String encoding)
255         throws IOException {
256         this(channel, "unknown archive", encoding, true);
257     }
258 
259     /**
260      * Opens the given channel for reading, assuming the specified
261      * encoding for file names.
262      *
263      * <p>{@link
264      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
265      * allows you to read from an in-memory archive.</p>
266      *
267      * @param channel the archive.
268      * @param archiveName name of the archive, used for error messages only.
269      * @param encoding the encoding to use for file names, use null
270      * for the platform's default encoding
271      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
272      * Extra Fields (if present) to set the file names.
273      *
274      * @throws IOException if an error occurs while reading the file.
275      * @since 1.13
276      */
ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields)277     public ZipFile(final SeekableByteChannel channel, final String archiveName,
278                    final String encoding, final boolean useUnicodeExtraFields)
279         throws IOException {
280         this(channel, archiveName, encoding, useUnicodeExtraFields, false);
281     }
282 
ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields, final boolean closeOnError)283     private ZipFile(final SeekableByteChannel channel, final String archiveName,
284                     final String encoding, final boolean useUnicodeExtraFields,
285                     final boolean closeOnError)
286         throws IOException {
287         this.archiveName = archiveName;
288         this.encoding = encoding;
289         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
290         this.useUnicodeExtraFields = useUnicodeExtraFields;
291         archive = channel;
292         boolean success = false;
293         try {
294             final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
295                 populateFromCentralDirectory();
296             resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
297             success = true;
298         } finally {
299             closed = !success;
300             if (!success && closeOnError) {
301                 IOUtils.closeQuietly(archive);
302             }
303         }
304     }
305 
306     /**
307      * The encoding to use for filenames and the file comment.
308      *
309      * @return null if using the platform's default character encoding.
310      */
getEncoding()311     public String getEncoding() {
312         return encoding;
313     }
314 
315     /**
316      * Closes the archive.
317      * @throws IOException if an error occurs closing the archive.
318      */
319     @Override
close()320     public void close() throws IOException {
321         // this flag is only written here and read in finalize() which
322         // can never be run in parallel.
323         // no synchronization needed.
324         closed = true;
325 
326         archive.close();
327     }
328 
329     /**
330      * close a zipfile quietly; throw no io fault, do nothing
331      * on a null parameter
332      * @param zipfile file to close, can be null
333      */
closeQuietly(final ZipFile zipfile)334     public static void closeQuietly(final ZipFile zipfile) {
335         IOUtils.closeQuietly(zipfile);
336     }
337 
338     /**
339      * Returns all entries.
340      *
341      * <p>Entries will be returned in the same order they appear
342      * within the archive's central directory.</p>
343      *
344      * @return all entries as {@link ZipArchiveEntry} instances
345      */
getEntries()346     public Enumeration<ZipArchiveEntry> getEntries() {
347         return Collections.enumeration(entries);
348     }
349 
350     /**
351      * Returns all entries in physical order.
352      *
353      * <p>Entries will be returned in the same order their contents
354      * appear within the archive.</p>
355      *
356      * @return all entries as {@link ZipArchiveEntry} instances
357      *
358      * @since 1.1
359      */
getEntriesInPhysicalOrder()360     public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
361         final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
362         Arrays.sort(allEntries, offsetComparator);
363         return Collections.enumeration(Arrays.asList(allEntries));
364     }
365 
366     /**
367      * Returns a named entry - or {@code null} if no entry by
368      * that name exists.
369      *
370      * <p>If multiple entries with the same name exist the first entry
371      * in the archive's central directory by that name is
372      * returned.</p>
373      *
374      * @param name name of the entry.
375      * @return the ZipArchiveEntry corresponding to the given name - or
376      * {@code null} if not present.
377      */
getEntry(final String name)378     public ZipArchiveEntry getEntry(final String name) {
379         final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
380         return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
381     }
382 
383     /**
384      * Returns all named entries in the same order they appear within
385      * the archive's central directory.
386      *
387      * @param name name of the entry.
388      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
389      * given name
390      * @since 1.6
391      */
getEntries(final String name)392     public Iterable<ZipArchiveEntry> getEntries(final String name) {
393         final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
394         return entriesOfThatName != null ? entriesOfThatName
395             : Collections.<ZipArchiveEntry>emptyList();
396     }
397 
398     /**
399      * Returns all named entries in the same order their contents
400      * appear within the archive.
401      *
402      * @param name name of the entry.
403      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
404      * given name
405      * @since 1.6
406      */
getEntriesInPhysicalOrder(final String name)407     public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
408         ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
409         if (nameMap.containsKey(name)) {
410             entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
411             Arrays.sort(entriesOfThatName, offsetComparator);
412         }
413         return Arrays.asList(entriesOfThatName);
414     }
415 
416     /**
417      * Whether this class is able to read the given entry.
418      *
419      * <p>May return false if it is set up to use encryption or a
420      * compression method that hasn't been implemented yet.</p>
421      * @since 1.1
422      * @param ze the entry
423      * @return whether this class is able to read the given entry.
424      */
canReadEntryData(final ZipArchiveEntry ze)425     public boolean canReadEntryData(final ZipArchiveEntry ze) {
426         return ZipUtil.canHandleEntryData(ze);
427     }
428 
429     /**
430      * Expose the raw stream of the archive entry (compressed form).
431      *
432      * <p>This method does not relate to how/if we understand the payload in the
433      * stream, since we really only intend to move it on to somewhere else.</p>
434      *
435      * @param ze The entry to get the stream for
436      * @return The raw input stream containing (possibly) compressed data.
437      * @since 1.11
438      */
getRawInputStream(final ZipArchiveEntry ze)439     public InputStream getRawInputStream(final ZipArchiveEntry ze) {
440         if (!(ze instanceof Entry)) {
441             return null;
442         }
443         final long start = ze.getDataOffset();
444         return createBoundedInputStream(start, ze.getCompressedSize());
445     }
446 
447 
448     /**
449      * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
450      * Compression and all other attributes will be as in this file.
451      * <p>This method transfers entries based on the central directory of the zip file.</p>
452      *
453      * @param target The zipArchiveOutputStream to write the entries to
454      * @param predicate A predicate that selects which entries to write
455      * @throws IOException on error
456      */
copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)457     public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
458             throws IOException {
459         final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
460         while (src.hasMoreElements()) {
461             final ZipArchiveEntry entry = src.nextElement();
462             if (predicate.test( entry)) {
463                 target.addRawArchiveEntry(entry, getRawInputStream(entry));
464             }
465         }
466     }
467 
468     /**
469      * Returns an InputStream for reading the contents of the given entry.
470      *
471      * @param ze the entry to get the stream for.
472      * @return a stream to read the entry from. The returned stream
473      * implements {@link InputStreamStatistics}.
474      * @throws IOException if unable to create an input stream from the zipentry
475      */
getInputStream(final ZipArchiveEntry ze)476     public InputStream getInputStream(final ZipArchiveEntry ze)
477         throws IOException {
478         if (!(ze instanceof Entry)) {
479             return null;
480         }
481         // cast validity is checked just above
482         ZipUtil.checkRequestedFeatures(ze);
483         final long start = ze.getDataOffset();
484 
485         // doesn't get closed if the method is not supported - which
486         // should never happen because of the checkRequestedFeatures
487         // call above
488         final InputStream is =
489             new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR
490         switch (ZipMethod.getMethodByCode(ze.getMethod())) {
491             case STORED:
492                 return new StoredStatisticsStream(is);
493             case UNSHRINKING:
494                 return new UnshrinkingInputStream(is);
495             case IMPLODING:
496                 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
497                         ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
498             case DEFLATED:
499                 final Inflater inflater = new Inflater(true);
500                 // Inflater with nowrap=true has this odd contract for a zero padding
501                 // byte following the data stream; this used to be zlib's requirement
502                 // and has been fixed a long time ago, but the contract persists so
503                 // we comply.
504                 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
505                 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
506                     inflater) {
507                     @Override
508                     public void close() throws IOException {
509                         try {
510                             super.close();
511                         } finally {
512                             inflater.end();
513                         }
514                     }
515                 };
516             case BZIP2:
517                 return new BZip2CompressorInputStream(is);
518             case ENHANCED_DEFLATED:
519                 return new Deflate64CompressorInputStream(is);
520             case AES_ENCRYPTED:
521             case EXPANDING_LEVEL_1:
522             case EXPANDING_LEVEL_2:
523             case EXPANDING_LEVEL_3:
524             case EXPANDING_LEVEL_4:
525             case JPEG:
526             case LZMA:
527             case PKWARE_IMPLODING:
528             case PPMD:
529             case TOKENIZATION:
530             case UNKNOWN:
531             case WAVPACK:
532             case XZ:
533             default:
534                 throw new ZipException("Found unsupported compression method "
535                                        + ze.getMethod());
536         }
537     }
538 
539     /**
540      * <p>
541      * Convenience method to return the entry's content as a String if isUnixSymlink()
542      * returns true for it, otherwise returns null.
543      * </p>
544      *
545      * <p>This method assumes the symbolic link's file name uses the
546      * same encoding that as been specified for this ZipFile.</p>
547      *
548      * @param entry ZipArchiveEntry object that represents the symbolic link
549      * @return entry's content as a String
550      * @throws IOException problem with content's input stream
551      * @since 1.5
552      */
553     public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
554         if (entry != null && entry.isUnixSymlink()) {
555             try (InputStream in = getInputStream(entry)) {
556                 return zipEncoding.decode(IOUtils.toByteArray(in));
557             }
558         }
559         return null;
560     }
561 
562     /**
563      * Ensures that the close method of this zipfile is called when
564      * there are no more references to it.
565      * @see #close()
566      */
567     @Override
568     protected void finalize() throws Throwable {
569         try {
570             if (!closed) {
571                 System.err.println("Cleaning up unclosed ZipFile for archive "
572                                    + archiveName);
573                 close();
574             }
575         } finally {
576             super.finalize();
577         }
578     }
579 
580     /**
581      * Length of a "central directory" entry structure without file
582      * name, extra fields or comment.
583      */
584     private static final int CFH_LEN =
585         /* version made by                 */ SHORT
586         /* version needed to extract       */ + SHORT
587         /* general purpose bit flag        */ + SHORT
588         /* compression method              */ + SHORT
589         /* last mod file time              */ + SHORT
590         /* last mod file date              */ + SHORT
591         /* crc-32                          */ + WORD
592         /* compressed size                 */ + WORD
593         /* uncompressed size               */ + WORD
594         /* filename length                 */ + SHORT
595         /* extra field length              */ + SHORT
596         /* file comment length             */ + SHORT
597         /* disk number start               */ + SHORT
598         /* internal file attributes        */ + SHORT
599         /* external file attributes        */ + WORD
600         /* relative offset of local header */ + WORD;
601 
602     private static final long CFH_SIG =
603         ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
604 
605     /**
606      * Reads the central directory of the given archive and populates
607      * the internal tables with ZipArchiveEntry instances.
608      *
609      * <p>The ZipArchiveEntrys will know all data that can be obtained from
610      * the central directory alone, but not the data that requires the
611      * local file header or additional data to be read.</p>
612      *
613      * @return a map of zipentries that didn't have the language
614      * encoding flag set when read.
615      */
616     private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
617         throws IOException {
618         final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
619             new HashMap<>();
620 
621         positionAtCentralDirectory();
622 
623         wordBbuf.rewind();
624         IOUtils.readFully(archive, wordBbuf);
625         long sig = ZipLong.getValue(wordBuf);
626 
627         if (sig != CFH_SIG && startsWithLocalFileHeader()) {
628             throw new IOException("central directory is empty, can't expand"
629                                   + " corrupt archive.");
630         }
631 
632         while (sig == CFH_SIG) {
633             readCentralDirectoryEntry(noUTF8Flag);
634             wordBbuf.rewind();
635             IOUtils.readFully(archive, wordBbuf);
636             sig = ZipLong.getValue(wordBuf);
637         }
638         return noUTF8Flag;
639     }
640 
641     /**
642      * Reads an individual entry of the central directory, creats an
643      * ZipArchiveEntry from it and adds it to the global maps.
644      *
645      * @param noUTF8Flag map used to collect entries that don't have
646      * their UTF-8 flag set and whose name will be set by data read
647      * from the local file header later.  The current entry may be
648      * added to this map.
649      */
650     private void
651         readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
652         throws IOException {
653         cfhBbuf.rewind();
654         IOUtils.readFully(archive, cfhBbuf);
655         int off = 0;
656         final Entry ze = new Entry();
657 
658         final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
659         off += SHORT;
660         ze.setVersionMadeBy(versionMadeBy);
661         ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
662 
663         ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
664         off += SHORT; // version required
665 
666         final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
667         final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
668         final ZipEncoding entryEncoding =
669             hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
670         if (hasUTF8Flag) {
671             ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
672         }
673         ze.setGeneralPurposeBit(gpFlag);
674         ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
675 
676         off += SHORT;
677 
678         //noinspection MagicConstant
679         ze.setMethod(ZipShort.getValue(cfhBuf, off));
680         off += SHORT;
681 
682         final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
683         ze.setTime(time);
684         off += WORD;
685 
686         ze.setCrc(ZipLong.getValue(cfhBuf, off));
687         off += WORD;
688 
689         ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
690         off += WORD;
691 
692         ze.setSize(ZipLong.getValue(cfhBuf, off));
693         off += WORD;
694 
695         final int fileNameLen = ZipShort.getValue(cfhBuf, off);
696         off += SHORT;
697 
698         final int extraLen = ZipShort.getValue(cfhBuf, off);
699         off += SHORT;
700 
701         final int commentLen = ZipShort.getValue(cfhBuf, off);
702         off += SHORT;
703 
704         final int diskStart = ZipShort.getValue(cfhBuf, off);
705         off += SHORT;
706 
707         ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
708         off += SHORT;
709 
710         ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
711         off += WORD;
712 
713         final byte[] fileName = new byte[fileNameLen];
714         IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
715         ze.setName(entryEncoding.decode(fileName), fileName);
716 
717         // LFH offset,
718         ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
719         // data offset will be filled later
720         entries.add(ze);
721 
722         final byte[] cdExtraData = new byte[extraLen];
723         IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
724         ze.setCentralDirectoryExtra(cdExtraData);
725 
726         setSizesAndOffsetFromZip64Extra(ze, diskStart);
727 
728         final byte[] comment = new byte[commentLen];
729         IOUtils.readFully(archive, ByteBuffer.wrap(comment));
730         ze.setComment(entryEncoding.decode(comment));
731 
732         if (!hasUTF8Flag && useUnicodeExtraFields) {
733             noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
734         }
735     }
736 
737     /**
738      * If the entry holds a Zip64 extended information extra field,
739      * read sizes from there if the entry's sizes are set to
740      * 0xFFFFFFFFF, do the same for the offset of the local file
741      * header.
742      *
743      * <p>Ensures the Zip64 extra either knows both compressed and
744      * uncompressed size or neither of both as the internal logic in
745      * ExtraFieldUtils forces the field to create local header data
746      * even if they are never used - and here a field with only one
747      * size would be invalid.</p>
748      */
749     private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
750                                                  final int diskStart)
751         throws IOException {
752         final Zip64ExtendedInformationExtraField z64 =
753             (Zip64ExtendedInformationExtraField)
754             ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
755         if (z64 != null) {
756             final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
757             final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
758             final boolean hasRelativeHeaderOffset =
759                 ze.getLocalHeaderOffset() == ZIP64_MAGIC;
760             z64.reparseCentralDirectoryData(hasUncompressedSize,
761                                             hasCompressedSize,
762                                             hasRelativeHeaderOffset,
763                                             diskStart == ZIP64_MAGIC_SHORT);
764 
765             if (hasUncompressedSize) {
766                 ze.setSize(z64.getSize().getLongValue());
767             } else if (hasCompressedSize) {
768                 z64.setSize(new ZipEightByteInteger(ze.getSize()));
769             }
770 
771             if (hasCompressedSize) {
772                 ze.setCompressedSize(z64.getCompressedSize().getLongValue());
773             } else if (hasUncompressedSize) {
774                 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
775             }
776 
777             if (hasRelativeHeaderOffset) {
778                 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
779             }
780         }
781     }
782 
783     /**
784      * Length of the "End of central directory record" - which is
785      * supposed to be the last structure of the archive - without file
786      * comment.
787      */
788     static final int MIN_EOCD_SIZE =
789         /* end of central dir signature    */ WORD
790         /* number of this disk             */ + SHORT
791         /* number of the disk with the     */
792         /* start of the central directory  */ + SHORT
793         /* total number of entries in      */
794         /* the central dir on this disk    */ + SHORT
795         /* total number of entries in      */
796         /* the central dir                 */ + SHORT
797         /* size of the central directory   */ + WORD
798         /* offset of start of central      */
799         /* directory with respect to       */
800         /* the starting disk number        */ + WORD
801         /* zipfile comment length          */ + SHORT;
802 
803     /**
804      * Maximum length of the "End of central directory record" with a
805      * file comment.
806      */
807     private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
808         /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
809 
810     /**
811      * Offset of the field that holds the location of the first
812      * central directory entry inside the "End of central directory
813      * record" relative to the start of the "End of central directory
814      * record".
815      */
816     private static final int CFD_LOCATOR_OFFSET =
817         /* end of central dir signature    */ WORD
818         /* number of this disk             */ + SHORT
819         /* number of the disk with the     */
820         /* start of the central directory  */ + SHORT
821         /* total number of entries in      */
822         /* the central dir on this disk    */ + SHORT
823         /* total number of entries in      */
824         /* the central dir                 */ + SHORT
825         /* size of the central directory   */ + WORD;
826 
827     /**
828      * Length of the "Zip64 end of central directory locator" - which
829      * should be right in front of the "end of central directory
830      * record" if one is present at all.
831      */
832     private static final int ZIP64_EOCDL_LENGTH =
833         /* zip64 end of central dir locator sig */ WORD
834         /* number of the disk with the start    */
835         /* start of the zip64 end of            */
836         /* central directory                    */ + WORD
837         /* relative offset of the zip64         */
838         /* end of central directory record      */ + DWORD
839         /* total number of disks                */ + WORD;
840 
841     /**
842      * Offset of the field that holds the location of the "Zip64 end
843      * of central directory record" inside the "Zip64 end of central
844      * directory locator" relative to the start of the "Zip64 end of
845      * central directory locator".
846      */
847     private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
848         /* zip64 end of central dir locator sig */ WORD
849         /* number of the disk with the start    */
850         /* start of the zip64 end of            */
851         /* central directory                    */ + WORD;
852 
853     /**
854      * Offset of the field that holds the location of the first
855      * central directory entry inside the "Zip64 end of central
856      * directory record" relative to the start of the "Zip64 end of
857      * central directory record".
858      */
859     private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
860         /* zip64 end of central dir        */
861         /* signature                       */ WORD
862         /* size of zip64 end of central    */
863         /* directory record                */ + DWORD
864         /* version made by                 */ + SHORT
865         /* version needed to extract       */ + SHORT
866         /* number of this disk             */ + WORD
867         /* number of the disk with the     */
868         /* start of the central directory  */ + WORD
869         /* total number of entries in the  */
870         /* central directory on this disk  */ + DWORD
871         /* total number of entries in the  */
872         /* central directory               */ + DWORD
873         /* size of the central directory   */ + DWORD;
874 
875     /**
876      * Searches for either the &quot;Zip64 end of central directory
877      * locator&quot; or the &quot;End of central dir record&quot;, parses
878      * it and positions the stream at the first central directory
879      * record.
880      */
881     private void positionAtCentralDirectory()
882         throws IOException {
883         positionAtEndOfCentralDirectoryRecord();
884         boolean found = false;
885         final boolean searchedForZip64EOCD =
886             archive.position() > ZIP64_EOCDL_LENGTH;
887         if (searchedForZip64EOCD) {
888             archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
889             wordBbuf.rewind();
890             IOUtils.readFully(archive, wordBbuf);
891             found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
892                                   wordBuf);
893         }
894         if (!found) {
895             // not a ZIP64 archive
896             if (searchedForZip64EOCD) {
897                 skipBytes(ZIP64_EOCDL_LENGTH - WORD);
898             }
899             positionAtCentralDirectory32();
900         } else {
901             positionAtCentralDirectory64();
902         }
903     }
904 
905     /**
906      * Parses the &quot;Zip64 end of central directory locator&quot;,
907      * finds the &quot;Zip64 end of central directory record&quot; using the
908      * parsed information, parses that and positions the stream at the
909      * first central directory record.
910      *
911      * Expects stream to be positioned right behind the &quot;Zip64
912      * end of central directory locator&quot;'s signature.
913      */
914     private void positionAtCentralDirectory64()
915         throws IOException {
916         skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
917                   - WORD /* signature has already been read */);
918         dwordBbuf.rewind();
919         IOUtils.readFully(archive, dwordBbuf);
920         archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
921         wordBbuf.rewind();
922         IOUtils.readFully(archive, wordBbuf);
923         if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
924             throw new ZipException("archive's ZIP64 end of central "
925                                    + "directory locator is corrupt.");
926         }
927         skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
928                   - WORD /* signature has already been read */);
929         dwordBbuf.rewind();
930         IOUtils.readFully(archive, dwordBbuf);
931         archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
932     }
933 
934     /**
935      * Parses the &quot;End of central dir record&quot; and positions
936      * the stream at the first central directory record.
937      *
938      * Expects stream to be positioned at the beginning of the
939      * &quot;End of central dir record&quot;.
940      */
941     private void positionAtCentralDirectory32()
942         throws IOException {
943         skipBytes(CFD_LOCATOR_OFFSET);
944         wordBbuf.rewind();
945         IOUtils.readFully(archive, wordBbuf);
946         archive.position(ZipLong.getValue(wordBuf));
947     }
948 
949     /**
950      * Searches for the and positions the stream at the start of the
951      * &quot;End of central dir record&quot;.
952      */
953     private void positionAtEndOfCentralDirectoryRecord()
954         throws IOException {
955         final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
956                                              ZipArchiveOutputStream.EOCD_SIG);
957         if (!found) {
958             throw new ZipException("archive is not a ZIP archive");
959         }
960     }
961 
962     /**
963      * Searches the archive backwards from minDistance to maxDistance
964      * for the given signature, positions the RandomaccessFile right
965      * at the signature if it has been found.
966      */
967     private boolean tryToLocateSignature(final long minDistanceFromEnd,
968                                          final long maxDistanceFromEnd,
969                                          final byte[] sig) throws IOException {
970         boolean found = false;
971         long off = archive.size() - minDistanceFromEnd;
972         final long stopSearching =
973             Math.max(0L, archive.size() - maxDistanceFromEnd);
974         if (off >= 0) {
975             for (; off >= stopSearching; off--) {
976                 archive.position(off);
977                 try {
978                     wordBbuf.rewind();
979                     IOUtils.readFully(archive, wordBbuf);
980                     wordBbuf.flip();
981                 } catch (EOFException ex) {
982                     break;
983                 }
984                 int curr = wordBbuf.get();
985                 if (curr == sig[POS_0]) {
986                     curr = wordBbuf.get();
987                     if (curr == sig[POS_1]) {
988                         curr = wordBbuf.get();
989                         if (curr == sig[POS_2]) {
990                             curr = wordBbuf.get();
991                             if (curr == sig[POS_3]) {
992                                 found = true;
993                                 break;
994                             }
995                         }
996                     }
997                 }
998             }
999         }
1000         if (found) {
1001             archive.position(off);
1002         }
1003         return found;
1004     }
1005 
1006     /**
1007      * Skips the given number of bytes or throws an EOFException if
1008      * skipping failed.
1009      */
1010     private void skipBytes(final int count) throws IOException {
1011         long currentPosition = archive.position();
1012         long newPosition = currentPosition + count;
1013         if (newPosition > archive.size()) {
1014             throw new EOFException();
1015         }
1016         archive.position(newPosition);
1017     }
1018 
1019     /**
1020      * Number of bytes in local file header up to the &quot;length of
1021      * filename&quot; entry.
1022      */
1023     private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1024         /* local file header signature     */ WORD
1025         /* version needed to extract       */ + SHORT
1026         /* general purpose bit flag        */ + SHORT
1027         /* compression method              */ + SHORT
1028         /* last mod file time              */ + SHORT
1029         /* last mod file date              */ + SHORT
1030         /* crc-32                          */ + WORD
1031         /* compressed size                 */ + WORD
1032         /* uncompressed size               */ + (long) WORD;
1033 
1034     /**
1035      * Walks through all recorded entries and adds the data available
1036      * from the local file header.
1037      *
1038      * <p>Also records the offsets for the data to read from the
1039      * entries.</p>
1040      */
1041     private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1042                                             entriesWithoutUTF8Flag)
1043         throws IOException {
1044         for (final ZipArchiveEntry zipArchiveEntry : entries) {
1045             // entries is filled in populateFromCentralDirectory and
1046             // never modified
1047             final Entry ze = (Entry) zipArchiveEntry;
1048             final long offset = ze.getLocalHeaderOffset();
1049             archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1050             wordBbuf.rewind();
1051             IOUtils.readFully(archive, wordBbuf);
1052             wordBbuf.flip();
1053             wordBbuf.get(shortBuf);
1054             final int fileNameLen = ZipShort.getValue(shortBuf);
1055             wordBbuf.get(shortBuf);
1056             final int extraFieldLen = ZipShort.getValue(shortBuf);
1057             skipBytes(fileNameLen);
1058             final byte[] localExtraData = new byte[extraFieldLen];
1059             IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1060             ze.setExtra(localExtraData);
1061             ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1062                 + SHORT + SHORT + fileNameLen + extraFieldLen);
1063             ze.setStreamContiguous(true);
1064 
1065             if (entriesWithoutUTF8Flag.containsKey(ze)) {
1066                 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1067                 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1068                                                          nc.comment);
1069             }
1070 
1071             final String name = ze.getName();
1072             LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1073             if (entriesOfThatName == null) {
1074                 entriesOfThatName = new LinkedList<>();
1075                 nameMap.put(name, entriesOfThatName);
1076             }
1077             entriesOfThatName.addLast(ze);
1078         }
1079     }
1080 
1081     /**
1082      * Checks whether the archive starts with a LFH.  If it doesn't,
1083      * it may be an empty archive.
1084      */
1085     private boolean startsWithLocalFileHeader() throws IOException {
1086         archive.position(0);
1087         wordBbuf.rewind();
1088         IOUtils.readFully(archive, wordBbuf);
1089         return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1090     }
1091 
1092     /**
1093      * Creates new BoundedInputStream, according to implementation of
1094      * underlying archive channel.
1095      */
1096     private BoundedInputStream createBoundedInputStream(long start, long remaining) {
1097         return archive instanceof FileChannel ?
1098             new BoundedFileChannelInputStream(start, remaining) :
1099             new BoundedInputStream(start, remaining);
1100     }
1101 
1102     /**
1103      * InputStream that delegates requests to the underlying
1104      * SeekableByteChannel, making sure that only bytes from a certain
1105      * range can be read.
1106      */
1107     private class BoundedInputStream extends InputStream {
1108         private ByteBuffer singleByteBuffer;
1109         private final long end;
1110         private long loc;
1111 
1112         BoundedInputStream(final long start, final long remaining) {
1113             this.end = start+remaining;
1114             if (this.end < start) {
1115                 // check for potential vulnerability due to overflow
1116                 throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining);
1117             }
1118             loc = start;
1119         }
1120 
1121         @Override
1122         public synchronized int read() throws IOException {
1123             if (loc >= end) {
1124                 return -1;
1125             }
1126             if (singleByteBuffer == null) {
1127                 singleByteBuffer = ByteBuffer.allocate(1);
1128             }
1129             else {
1130                 singleByteBuffer.rewind();
1131             }
1132             int read = read(loc, singleByteBuffer);
1133             if (read < 0) {
1134                 return read;
1135             }
1136             loc++;
1137             return singleByteBuffer.get() & 0xff;
1138         }
1139 
1140         @Override
1141         public synchronized int read(final byte[] b, final int off, int len) throws IOException {
1142             if (len <= 0) {
1143                 return 0;
1144             }
1145 
1146             if (len > end-loc) {
1147                 if (loc >= end) {
1148                     return -1;
1149                 }
1150                 len = (int)(end-loc);
1151             }
1152 
1153             ByteBuffer buf;
1154             buf = ByteBuffer.wrap(b, off, len);
1155             int ret = read(loc, buf);
1156             if (ret > 0) {
1157                 loc += ret;
1158                 return ret;
1159             }
1160             return ret;
1161         }
1162 
1163         protected int read(long pos, ByteBuffer buf) throws IOException {
1164             int read;
1165             synchronized (archive) {
1166                 archive.position(pos);
1167                 read = archive.read(buf);
1168             }
1169             buf.flip();
1170             return read;
1171         }
1172     }
1173 
1174     /**
1175      * Lock-free implementation of BoundedInputStream. The
1176      * implementation uses positioned reads on the underlying archive
1177      * file channel and therefore performs significantly faster in
1178      * concurrent environment.
1179      */
1180     private class BoundedFileChannelInputStream extends BoundedInputStream {
1181         private final FileChannel archive;
1182 
1183         BoundedFileChannelInputStream(final long start, final long remaining) {
1184             super(start, remaining);
1185             archive = (FileChannel)ZipFile.this.archive;
1186         }
1187 
1188         @Override
1189         protected int read(long pos, ByteBuffer buf) throws IOException {
1190             int read = archive.read(buf, pos);
1191             buf.flip();
1192             return read;
1193         }
1194     }
1195 
1196     private static final class NameAndComment {
1197         private final byte[] name;
1198         private final byte[] comment;
1199         private NameAndComment(final byte[] name, final byte[] comment) {
1200             this.name = name;
1201             this.comment = comment;
1202         }
1203     }
1204 
1205     /**
1206      * Compares two ZipArchiveEntries based on their offset within the archive.
1207      *
1208      * <p>Won't return any meaningful results if one of the entries
1209      * isn't part of the archive at all.</p>
1210      *
1211      * @since 1.1
1212      */
1213     private final Comparator<ZipArchiveEntry> offsetComparator =
1214         new Comparator<ZipArchiveEntry>() {
1215         @Override
1216         public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1217             if (e1 == e2) {
1218                 return 0;
1219             }
1220 
1221             final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1222             final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1223             if (ent1 == null) {
1224                 return 1;
1225             }
1226             if (ent2 == null) {
1227                 return -1;
1228             }
1229             final long val = (ent1.getLocalHeaderOffset()
1230                         - ent2.getLocalHeaderOffset());
1231             return val == 0 ? 0 : val < 0 ? -1 : +1;
1232         }
1233     };
1234 
1235     /**
1236      * Extends ZipArchiveEntry to store the offset within the archive.
1237      */
1238     private static class Entry extends ZipArchiveEntry {
1239 
1240         Entry() {
1241         }
1242 
1243         @Override
1244         public int hashCode() {
1245             return 3 * super.hashCode()
1246                 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1247         }
1248 
1249         @Override
1250         public boolean equals(final Object other) {
1251             if (super.equals(other)) {
1252                 // super.equals would return false if other were not an Entry
1253                 final Entry otherEntry = (Entry) other;
1254                 return getLocalHeaderOffset()
1255                         == otherEntry.getLocalHeaderOffset()
1256                     && getDataOffset()
1257                         == otherEntry.getDataOffset();
1258             }
1259             return false;
1260         }
1261     }
1262 
1263     private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
1264         StoredStatisticsStream(InputStream in) {
1265             super(in);
1266         }
1267 
1268         @Override
1269         public long getCompressedCount() {
1270             return super.getBytesRead();
1271         }
1272 
1273         @Override
1274         public long getUncompressedCount() {
1275             return getCompressedCount();
1276         }
1277     }
1278 }
1279