1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 package org.apache.commons.compress.archivers.zip; 19 20 import java.io.BufferedInputStream; 21 import java.io.ByteArrayInputStream; 22 import java.io.Closeable; 23 import java.io.EOFException; 24 import java.io.File; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.io.SequenceInputStream; 28 import java.nio.ByteBuffer; 29 import java.nio.channels.FileChannel; 30 import java.nio.channels.SeekableByteChannel; 31 import java.nio.file.Files; 32 import java.nio.file.StandardOpenOption; 33 import java.util.Arrays; 34 import java.util.Collections; 35 import java.util.Comparator; 36 import java.util.Enumeration; 37 import java.util.EnumSet; 38 import java.util.HashMap; 39 import java.util.LinkedList; 40 import java.util.List; 41 import java.util.Map; 42 import java.util.zip.Inflater; 43 import java.util.zip.ZipException; 44 45 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 46 import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 47 import org.apache.commons.compress.utils.CountingInputStream; 48 import org.apache.commons.compress.utils.IOUtils; 49 import org.apache.commons.compress.utils.InputStreamStatistics; 50 51 import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 52 import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 53 import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 54 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 55 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 56 57 /** 58 * Replacement for <code>java.util.ZipFile</code>. 59 * 60 * <p>This class adds support for file name encodings other than UTF-8 61 * (which is required to work on ZIP files created by native zip tools 62 * and is able to skip a preamble like the one found in self 63 * extracting archives. Furthermore it returns instances of 64 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 65 * instead of <code>java.util.zip.ZipEntry</code>.</p> 66 * 67 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 68 * have to reimplement all methods anyway. Like 69 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the 70 * covers and supports compressed and uncompressed entries. As of 71 * Apache Commons Compress 1.3 it also transparently supports Zip64 72 * extensions and thus individual entries and archives larger than 4 73 * GB or with more than 65536 entries.</p> 74 * 75 * <p>The method signatures mimic the ones of 76 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 77 * 78 * <ul> 79 * <li>There is no getName method.</li> 80 * <li>entries has been renamed to getEntries.</li> 81 * <li>getEntries and getEntry return 82 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 83 * instances.</li> 84 * <li>close is allowed to throw IOException.</li> 85 * </ul> 86 * 87 */ 88 public class ZipFile implements Closeable { 89 private static final int HASH_SIZE = 509; 90 static final int NIBLET_MASK = 0x0f; 91 static final int BYTE_SHIFT = 8; 92 private static final int POS_0 = 0; 93 private static final int POS_1 = 1; 94 private static final int POS_2 = 2; 95 private static final int POS_3 = 3; 96 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 97 98 /** 99 * List of entries in the order they appear inside the central 100 * directory. 101 */ 102 private final List<ZipArchiveEntry> entries = 103 new LinkedList<>(); 104 105 /** 106 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 107 */ 108 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 109 new HashMap<>(HASH_SIZE); 110 111 /** 112 * The encoding to use for filenames and the file comment. 113 * 114 * <p>For a list of possible values see <a 115 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 116 * Defaults to UTF-8.</p> 117 */ 118 private final String encoding; 119 120 /** 121 * The zip encoding to use for filenames and the file comment. 122 */ 123 private final ZipEncoding zipEncoding; 124 125 /** 126 * File name of actual source. 127 */ 128 private final String archiveName; 129 130 /** 131 * The actual data source. 132 */ 133 private final SeekableByteChannel archive; 134 135 /** 136 * Whether to look for and use Unicode extra fields. 137 */ 138 private final boolean useUnicodeExtraFields; 139 140 /** 141 * Whether the file is closed. 142 */ 143 private volatile boolean closed = true; 144 145 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 146 private final byte[] dwordBuf = new byte[DWORD]; 147 private final byte[] wordBuf = new byte[WORD]; 148 private final byte[] cfhBuf = new byte[CFH_LEN]; 149 private final byte[] shortBuf = new byte[SHORT]; 150 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 151 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 152 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 153 154 /** 155 * Opens the given file for reading, assuming "UTF8" for file names. 156 * 157 * @param f the archive. 158 * 159 * @throws IOException if an error occurs while reading the file. 160 */ ZipFile(final File f)161 public ZipFile(final File f) throws IOException { 162 this(f, ZipEncodingHelper.UTF8); 163 } 164 165 /** 166 * Opens the given file for reading, assuming "UTF8". 167 * 168 * @param name name of the archive. 169 * 170 * @throws IOException if an error occurs while reading the file. 171 */ ZipFile(final String name)172 public ZipFile(final String name) throws IOException { 173 this(new File(name), ZipEncodingHelper.UTF8); 174 } 175 176 /** 177 * Opens the given file for reading, assuming the specified 178 * encoding for file names, scanning unicode extra fields. 179 * 180 * @param name name of the archive. 181 * @param encoding the encoding to use for file names, use null 182 * for the platform's default encoding 183 * 184 * @throws IOException if an error occurs while reading the file. 185 */ ZipFile(final String name, final String encoding)186 public ZipFile(final String name, final String encoding) throws IOException { 187 this(new File(name), encoding, true); 188 } 189 190 /** 191 * Opens the given file for reading, assuming the specified 192 * encoding for file names and scanning for unicode extra fields. 193 * 194 * @param f the archive. 195 * @param encoding the encoding to use for file names, use null 196 * for the platform's default encoding 197 * 198 * @throws IOException if an error occurs while reading the file. 199 */ ZipFile(final File f, final String encoding)200 public ZipFile(final File f, final String encoding) throws IOException { 201 this(f, encoding, true); 202 } 203 204 /** 205 * Opens the given file for reading, assuming the specified 206 * encoding for file names. 207 * 208 * @param f the archive. 209 * @param encoding the encoding to use for file names, use null 210 * for the platform's default encoding 211 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 212 * Extra Fields (if present) to set the file names. 213 * 214 * @throws IOException if an error occurs while reading the file. 215 */ ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)216 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 217 throws IOException { 218 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 219 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true); 220 } 221 222 /** 223 * Opens the given channel for reading, assuming "UTF8" for file names. 224 * 225 * <p>{@link 226 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 227 * allows you to read from an in-memory archive.</p> 228 * 229 * @param channel the archive. 230 * 231 * @throws IOException if an error occurs while reading the file. 232 * @since 1.13 233 */ ZipFile(final SeekableByteChannel channel)234 public ZipFile(final SeekableByteChannel channel) 235 throws IOException { 236 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 237 } 238 239 /** 240 * Opens the given channel for reading, assuming the specified 241 * encoding for file names. 242 * 243 * <p>{@link 244 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 245 * allows you to read from an in-memory archive.</p> 246 * 247 * @param channel the archive. 248 * @param encoding the encoding to use for file names, use null 249 * for the platform's default encoding 250 * 251 * @throws IOException if an error occurs while reading the file. 252 * @since 1.13 253 */ ZipFile(final SeekableByteChannel channel, final String encoding)254 public ZipFile(final SeekableByteChannel channel, final String encoding) 255 throws IOException { 256 this(channel, "unknown archive", encoding, true); 257 } 258 259 /** 260 * Opens the given channel for reading, assuming the specified 261 * encoding for file names. 262 * 263 * <p>{@link 264 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 265 * allows you to read from an in-memory archive.</p> 266 * 267 * @param channel the archive. 268 * @param archiveName name of the archive, used for error messages only. 269 * @param encoding the encoding to use for file names, use null 270 * for the platform's default encoding 271 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 272 * Extra Fields (if present) to set the file names. 273 * 274 * @throws IOException if an error occurs while reading the file. 275 * @since 1.13 276 */ ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields)277 public ZipFile(final SeekableByteChannel channel, final String archiveName, 278 final String encoding, final boolean useUnicodeExtraFields) 279 throws IOException { 280 this(channel, archiveName, encoding, useUnicodeExtraFields, false); 281 } 282 ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields, final boolean closeOnError)283 private ZipFile(final SeekableByteChannel channel, final String archiveName, 284 final String encoding, final boolean useUnicodeExtraFields, 285 final boolean closeOnError) 286 throws IOException { 287 this.archiveName = archiveName; 288 this.encoding = encoding; 289 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 290 this.useUnicodeExtraFields = useUnicodeExtraFields; 291 archive = channel; 292 boolean success = false; 293 try { 294 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 295 populateFromCentralDirectory(); 296 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 297 success = true; 298 } finally { 299 closed = !success; 300 if (!success && closeOnError) { 301 IOUtils.closeQuietly(archive); 302 } 303 } 304 } 305 306 /** 307 * The encoding to use for filenames and the file comment. 308 * 309 * @return null if using the platform's default character encoding. 310 */ getEncoding()311 public String getEncoding() { 312 return encoding; 313 } 314 315 /** 316 * Closes the archive. 317 * @throws IOException if an error occurs closing the archive. 318 */ 319 @Override close()320 public void close() throws IOException { 321 // this flag is only written here and read in finalize() which 322 // can never be run in parallel. 323 // no synchronization needed. 324 closed = true; 325 326 archive.close(); 327 } 328 329 /** 330 * close a zipfile quietly; throw no io fault, do nothing 331 * on a null parameter 332 * @param zipfile file to close, can be null 333 */ closeQuietly(final ZipFile zipfile)334 public static void closeQuietly(final ZipFile zipfile) { 335 IOUtils.closeQuietly(zipfile); 336 } 337 338 /** 339 * Returns all entries. 340 * 341 * <p>Entries will be returned in the same order they appear 342 * within the archive's central directory.</p> 343 * 344 * @return all entries as {@link ZipArchiveEntry} instances 345 */ getEntries()346 public Enumeration<ZipArchiveEntry> getEntries() { 347 return Collections.enumeration(entries); 348 } 349 350 /** 351 * Returns all entries in physical order. 352 * 353 * <p>Entries will be returned in the same order their contents 354 * appear within the archive.</p> 355 * 356 * @return all entries as {@link ZipArchiveEntry} instances 357 * 358 * @since 1.1 359 */ getEntriesInPhysicalOrder()360 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 361 final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 362 Arrays.sort(allEntries, offsetComparator); 363 return Collections.enumeration(Arrays.asList(allEntries)); 364 } 365 366 /** 367 * Returns a named entry - or {@code null} if no entry by 368 * that name exists. 369 * 370 * <p>If multiple entries with the same name exist the first entry 371 * in the archive's central directory by that name is 372 * returned.</p> 373 * 374 * @param name name of the entry. 375 * @return the ZipArchiveEntry corresponding to the given name - or 376 * {@code null} if not present. 377 */ getEntry(final String name)378 public ZipArchiveEntry getEntry(final String name) { 379 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 380 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 381 } 382 383 /** 384 * Returns all named entries in the same order they appear within 385 * the archive's central directory. 386 * 387 * @param name name of the entry. 388 * @return the Iterable<ZipArchiveEntry> corresponding to the 389 * given name 390 * @since 1.6 391 */ getEntries(final String name)392 public Iterable<ZipArchiveEntry> getEntries(final String name) { 393 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 394 return entriesOfThatName != null ? entriesOfThatName 395 : Collections.<ZipArchiveEntry>emptyList(); 396 } 397 398 /** 399 * Returns all named entries in the same order their contents 400 * appear within the archive. 401 * 402 * @param name name of the entry. 403 * @return the Iterable<ZipArchiveEntry> corresponding to the 404 * given name 405 * @since 1.6 406 */ getEntriesInPhysicalOrder(final String name)407 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 408 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 409 if (nameMap.containsKey(name)) { 410 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 411 Arrays.sort(entriesOfThatName, offsetComparator); 412 } 413 return Arrays.asList(entriesOfThatName); 414 } 415 416 /** 417 * Whether this class is able to read the given entry. 418 * 419 * <p>May return false if it is set up to use encryption or a 420 * compression method that hasn't been implemented yet.</p> 421 * @since 1.1 422 * @param ze the entry 423 * @return whether this class is able to read the given entry. 424 */ canReadEntryData(final ZipArchiveEntry ze)425 public boolean canReadEntryData(final ZipArchiveEntry ze) { 426 return ZipUtil.canHandleEntryData(ze); 427 } 428 429 /** 430 * Expose the raw stream of the archive entry (compressed form). 431 * 432 * <p>This method does not relate to how/if we understand the payload in the 433 * stream, since we really only intend to move it on to somewhere else.</p> 434 * 435 * @param ze The entry to get the stream for 436 * @return The raw input stream containing (possibly) compressed data. 437 * @since 1.11 438 */ getRawInputStream(final ZipArchiveEntry ze)439 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 440 if (!(ze instanceof Entry)) { 441 return null; 442 } 443 final long start = ze.getDataOffset(); 444 return createBoundedInputStream(start, ze.getCompressedSize()); 445 } 446 447 448 /** 449 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 450 * Compression and all other attributes will be as in this file. 451 * <p>This method transfers entries based on the central directory of the zip file.</p> 452 * 453 * @param target The zipArchiveOutputStream to write the entries to 454 * @param predicate A predicate that selects which entries to write 455 * @throws IOException on error 456 */ copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)457 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 458 throws IOException { 459 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 460 while (src.hasMoreElements()) { 461 final ZipArchiveEntry entry = src.nextElement(); 462 if (predicate.test( entry)) { 463 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 464 } 465 } 466 } 467 468 /** 469 * Returns an InputStream for reading the contents of the given entry. 470 * 471 * @param ze the entry to get the stream for. 472 * @return a stream to read the entry from. The returned stream 473 * implements {@link InputStreamStatistics}. 474 * @throws IOException if unable to create an input stream from the zipentry 475 */ getInputStream(final ZipArchiveEntry ze)476 public InputStream getInputStream(final ZipArchiveEntry ze) 477 throws IOException { 478 if (!(ze instanceof Entry)) { 479 return null; 480 } 481 // cast validity is checked just above 482 ZipUtil.checkRequestedFeatures(ze); 483 final long start = ze.getDataOffset(); 484 485 // doesn't get closed if the method is not supported - which 486 // should never happen because of the checkRequestedFeatures 487 // call above 488 final InputStream is = 489 new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR 490 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 491 case STORED: 492 return new StoredStatisticsStream(is); 493 case UNSHRINKING: 494 return new UnshrinkingInputStream(is); 495 case IMPLODING: 496 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 497 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 498 case DEFLATED: 499 final Inflater inflater = new Inflater(true); 500 // Inflater with nowrap=true has this odd contract for a zero padding 501 // byte following the data stream; this used to be zlib's requirement 502 // and has been fixed a long time ago, but the contract persists so 503 // we comply. 504 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 505 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 506 inflater) { 507 @Override 508 public void close() throws IOException { 509 try { 510 super.close(); 511 } finally { 512 inflater.end(); 513 } 514 } 515 }; 516 case BZIP2: 517 return new BZip2CompressorInputStream(is); 518 case ENHANCED_DEFLATED: 519 return new Deflate64CompressorInputStream(is); 520 case AES_ENCRYPTED: 521 case EXPANDING_LEVEL_1: 522 case EXPANDING_LEVEL_2: 523 case EXPANDING_LEVEL_3: 524 case EXPANDING_LEVEL_4: 525 case JPEG: 526 case LZMA: 527 case PKWARE_IMPLODING: 528 case PPMD: 529 case TOKENIZATION: 530 case UNKNOWN: 531 case WAVPACK: 532 case XZ: 533 default: 534 throw new ZipException("Found unsupported compression method " 535 + ze.getMethod()); 536 } 537 } 538 539 /** 540 * <p> 541 * Convenience method to return the entry's content as a String if isUnixSymlink() 542 * returns true for it, otherwise returns null. 543 * </p> 544 * 545 * <p>This method assumes the symbolic link's file name uses the 546 * same encoding that as been specified for this ZipFile.</p> 547 * 548 * @param entry ZipArchiveEntry object that represents the symbolic link 549 * @return entry's content as a String 550 * @throws IOException problem with content's input stream 551 * @since 1.5 552 */ 553 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 554 if (entry != null && entry.isUnixSymlink()) { 555 try (InputStream in = getInputStream(entry)) { 556 return zipEncoding.decode(IOUtils.toByteArray(in)); 557 } 558 } 559 return null; 560 } 561 562 /** 563 * Ensures that the close method of this zipfile is called when 564 * there are no more references to it. 565 * @see #close() 566 */ 567 @Override 568 protected void finalize() throws Throwable { 569 try { 570 if (!closed) { 571 System.err.println("Cleaning up unclosed ZipFile for archive " 572 + archiveName); 573 close(); 574 } 575 } finally { 576 super.finalize(); 577 } 578 } 579 580 /** 581 * Length of a "central directory" entry structure without file 582 * name, extra fields or comment. 583 */ 584 private static final int CFH_LEN = 585 /* version made by */ SHORT 586 /* version needed to extract */ + SHORT 587 /* general purpose bit flag */ + SHORT 588 /* compression method */ + SHORT 589 /* last mod file time */ + SHORT 590 /* last mod file date */ + SHORT 591 /* crc-32 */ + WORD 592 /* compressed size */ + WORD 593 /* uncompressed size */ + WORD 594 /* filename length */ + SHORT 595 /* extra field length */ + SHORT 596 /* file comment length */ + SHORT 597 /* disk number start */ + SHORT 598 /* internal file attributes */ + SHORT 599 /* external file attributes */ + WORD 600 /* relative offset of local header */ + WORD; 601 602 private static final long CFH_SIG = 603 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 604 605 /** 606 * Reads the central directory of the given archive and populates 607 * the internal tables with ZipArchiveEntry instances. 608 * 609 * <p>The ZipArchiveEntrys will know all data that can be obtained from 610 * the central directory alone, but not the data that requires the 611 * local file header or additional data to be read.</p> 612 * 613 * @return a map of zipentries that didn't have the language 614 * encoding flag set when read. 615 */ 616 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 617 throws IOException { 618 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 619 new HashMap<>(); 620 621 positionAtCentralDirectory(); 622 623 wordBbuf.rewind(); 624 IOUtils.readFully(archive, wordBbuf); 625 long sig = ZipLong.getValue(wordBuf); 626 627 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 628 throw new IOException("central directory is empty, can't expand" 629 + " corrupt archive."); 630 } 631 632 while (sig == CFH_SIG) { 633 readCentralDirectoryEntry(noUTF8Flag); 634 wordBbuf.rewind(); 635 IOUtils.readFully(archive, wordBbuf); 636 sig = ZipLong.getValue(wordBuf); 637 } 638 return noUTF8Flag; 639 } 640 641 /** 642 * Reads an individual entry of the central directory, creats an 643 * ZipArchiveEntry from it and adds it to the global maps. 644 * 645 * @param noUTF8Flag map used to collect entries that don't have 646 * their UTF-8 flag set and whose name will be set by data read 647 * from the local file header later. The current entry may be 648 * added to this map. 649 */ 650 private void 651 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 652 throws IOException { 653 cfhBbuf.rewind(); 654 IOUtils.readFully(archive, cfhBbuf); 655 int off = 0; 656 final Entry ze = new Entry(); 657 658 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 659 off += SHORT; 660 ze.setVersionMadeBy(versionMadeBy); 661 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 662 663 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 664 off += SHORT; // version required 665 666 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 667 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 668 final ZipEncoding entryEncoding = 669 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 670 if (hasUTF8Flag) { 671 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 672 } 673 ze.setGeneralPurposeBit(gpFlag); 674 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 675 676 off += SHORT; 677 678 //noinspection MagicConstant 679 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 680 off += SHORT; 681 682 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 683 ze.setTime(time); 684 off += WORD; 685 686 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 687 off += WORD; 688 689 ze.setCompressedSize(ZipLong.getValue(cfhBuf, off)); 690 off += WORD; 691 692 ze.setSize(ZipLong.getValue(cfhBuf, off)); 693 off += WORD; 694 695 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 696 off += SHORT; 697 698 final int extraLen = ZipShort.getValue(cfhBuf, off); 699 off += SHORT; 700 701 final int commentLen = ZipShort.getValue(cfhBuf, off); 702 off += SHORT; 703 704 final int diskStart = ZipShort.getValue(cfhBuf, off); 705 off += SHORT; 706 707 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 708 off += SHORT; 709 710 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 711 off += WORD; 712 713 final byte[] fileName = new byte[fileNameLen]; 714 IOUtils.readFully(archive, ByteBuffer.wrap(fileName)); 715 ze.setName(entryEncoding.decode(fileName), fileName); 716 717 // LFH offset, 718 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 719 // data offset will be filled later 720 entries.add(ze); 721 722 final byte[] cdExtraData = new byte[extraLen]; 723 IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData)); 724 ze.setCentralDirectoryExtra(cdExtraData); 725 726 setSizesAndOffsetFromZip64Extra(ze, diskStart); 727 728 final byte[] comment = new byte[commentLen]; 729 IOUtils.readFully(archive, ByteBuffer.wrap(comment)); 730 ze.setComment(entryEncoding.decode(comment)); 731 732 if (!hasUTF8Flag && useUnicodeExtraFields) { 733 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 734 } 735 } 736 737 /** 738 * If the entry holds a Zip64 extended information extra field, 739 * read sizes from there if the entry's sizes are set to 740 * 0xFFFFFFFFF, do the same for the offset of the local file 741 * header. 742 * 743 * <p>Ensures the Zip64 extra either knows both compressed and 744 * uncompressed size or neither of both as the internal logic in 745 * ExtraFieldUtils forces the field to create local header data 746 * even if they are never used - and here a field with only one 747 * size would be invalid.</p> 748 */ 749 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze, 750 final int diskStart) 751 throws IOException { 752 final Zip64ExtendedInformationExtraField z64 = 753 (Zip64ExtendedInformationExtraField) 754 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 755 if (z64 != null) { 756 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 757 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 758 final boolean hasRelativeHeaderOffset = 759 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 760 z64.reparseCentralDirectoryData(hasUncompressedSize, 761 hasCompressedSize, 762 hasRelativeHeaderOffset, 763 diskStart == ZIP64_MAGIC_SHORT); 764 765 if (hasUncompressedSize) { 766 ze.setSize(z64.getSize().getLongValue()); 767 } else if (hasCompressedSize) { 768 z64.setSize(new ZipEightByteInteger(ze.getSize())); 769 } 770 771 if (hasCompressedSize) { 772 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 773 } else if (hasUncompressedSize) { 774 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 775 } 776 777 if (hasRelativeHeaderOffset) { 778 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 779 } 780 } 781 } 782 783 /** 784 * Length of the "End of central directory record" - which is 785 * supposed to be the last structure of the archive - without file 786 * comment. 787 */ 788 static final int MIN_EOCD_SIZE = 789 /* end of central dir signature */ WORD 790 /* number of this disk */ + SHORT 791 /* number of the disk with the */ 792 /* start of the central directory */ + SHORT 793 /* total number of entries in */ 794 /* the central dir on this disk */ + SHORT 795 /* total number of entries in */ 796 /* the central dir */ + SHORT 797 /* size of the central directory */ + WORD 798 /* offset of start of central */ 799 /* directory with respect to */ 800 /* the starting disk number */ + WORD 801 /* zipfile comment length */ + SHORT; 802 803 /** 804 * Maximum length of the "End of central directory record" with a 805 * file comment. 806 */ 807 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 808 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 809 810 /** 811 * Offset of the field that holds the location of the first 812 * central directory entry inside the "End of central directory 813 * record" relative to the start of the "End of central directory 814 * record". 815 */ 816 private static final int CFD_LOCATOR_OFFSET = 817 /* end of central dir signature */ WORD 818 /* number of this disk */ + SHORT 819 /* number of the disk with the */ 820 /* start of the central directory */ + SHORT 821 /* total number of entries in */ 822 /* the central dir on this disk */ + SHORT 823 /* total number of entries in */ 824 /* the central dir */ + SHORT 825 /* size of the central directory */ + WORD; 826 827 /** 828 * Length of the "Zip64 end of central directory locator" - which 829 * should be right in front of the "end of central directory 830 * record" if one is present at all. 831 */ 832 private static final int ZIP64_EOCDL_LENGTH = 833 /* zip64 end of central dir locator sig */ WORD 834 /* number of the disk with the start */ 835 /* start of the zip64 end of */ 836 /* central directory */ + WORD 837 /* relative offset of the zip64 */ 838 /* end of central directory record */ + DWORD 839 /* total number of disks */ + WORD; 840 841 /** 842 * Offset of the field that holds the location of the "Zip64 end 843 * of central directory record" inside the "Zip64 end of central 844 * directory locator" relative to the start of the "Zip64 end of 845 * central directory locator". 846 */ 847 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 848 /* zip64 end of central dir locator sig */ WORD 849 /* number of the disk with the start */ 850 /* start of the zip64 end of */ 851 /* central directory */ + WORD; 852 853 /** 854 * Offset of the field that holds the location of the first 855 * central directory entry inside the "Zip64 end of central 856 * directory record" relative to the start of the "Zip64 end of 857 * central directory record". 858 */ 859 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 860 /* zip64 end of central dir */ 861 /* signature */ WORD 862 /* size of zip64 end of central */ 863 /* directory record */ + DWORD 864 /* version made by */ + SHORT 865 /* version needed to extract */ + SHORT 866 /* number of this disk */ + WORD 867 /* number of the disk with the */ 868 /* start of the central directory */ + WORD 869 /* total number of entries in the */ 870 /* central directory on this disk */ + DWORD 871 /* total number of entries in the */ 872 /* central directory */ + DWORD 873 /* size of the central directory */ + DWORD; 874 875 /** 876 * Searches for either the "Zip64 end of central directory 877 * locator" or the "End of central dir record", parses 878 * it and positions the stream at the first central directory 879 * record. 880 */ 881 private void positionAtCentralDirectory() 882 throws IOException { 883 positionAtEndOfCentralDirectoryRecord(); 884 boolean found = false; 885 final boolean searchedForZip64EOCD = 886 archive.position() > ZIP64_EOCDL_LENGTH; 887 if (searchedForZip64EOCD) { 888 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 889 wordBbuf.rewind(); 890 IOUtils.readFully(archive, wordBbuf); 891 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 892 wordBuf); 893 } 894 if (!found) { 895 // not a ZIP64 archive 896 if (searchedForZip64EOCD) { 897 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 898 } 899 positionAtCentralDirectory32(); 900 } else { 901 positionAtCentralDirectory64(); 902 } 903 } 904 905 /** 906 * Parses the "Zip64 end of central directory locator", 907 * finds the "Zip64 end of central directory record" using the 908 * parsed information, parses that and positions the stream at the 909 * first central directory record. 910 * 911 * Expects stream to be positioned right behind the "Zip64 912 * end of central directory locator"'s signature. 913 */ 914 private void positionAtCentralDirectory64() 915 throws IOException { 916 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 917 - WORD /* signature has already been read */); 918 dwordBbuf.rewind(); 919 IOUtils.readFully(archive, dwordBbuf); 920 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 921 wordBbuf.rewind(); 922 IOUtils.readFully(archive, wordBbuf); 923 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 924 throw new ZipException("archive's ZIP64 end of central " 925 + "directory locator is corrupt."); 926 } 927 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 928 - WORD /* signature has already been read */); 929 dwordBbuf.rewind(); 930 IOUtils.readFully(archive, dwordBbuf); 931 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 932 } 933 934 /** 935 * Parses the "End of central dir record" and positions 936 * the stream at the first central directory record. 937 * 938 * Expects stream to be positioned at the beginning of the 939 * "End of central dir record". 940 */ 941 private void positionAtCentralDirectory32() 942 throws IOException { 943 skipBytes(CFD_LOCATOR_OFFSET); 944 wordBbuf.rewind(); 945 IOUtils.readFully(archive, wordBbuf); 946 archive.position(ZipLong.getValue(wordBuf)); 947 } 948 949 /** 950 * Searches for the and positions the stream at the start of the 951 * "End of central dir record". 952 */ 953 private void positionAtEndOfCentralDirectoryRecord() 954 throws IOException { 955 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 956 ZipArchiveOutputStream.EOCD_SIG); 957 if (!found) { 958 throw new ZipException("archive is not a ZIP archive"); 959 } 960 } 961 962 /** 963 * Searches the archive backwards from minDistance to maxDistance 964 * for the given signature, positions the RandomaccessFile right 965 * at the signature if it has been found. 966 */ 967 private boolean tryToLocateSignature(final long minDistanceFromEnd, 968 final long maxDistanceFromEnd, 969 final byte[] sig) throws IOException { 970 boolean found = false; 971 long off = archive.size() - minDistanceFromEnd; 972 final long stopSearching = 973 Math.max(0L, archive.size() - maxDistanceFromEnd); 974 if (off >= 0) { 975 for (; off >= stopSearching; off--) { 976 archive.position(off); 977 try { 978 wordBbuf.rewind(); 979 IOUtils.readFully(archive, wordBbuf); 980 wordBbuf.flip(); 981 } catch (EOFException ex) { 982 break; 983 } 984 int curr = wordBbuf.get(); 985 if (curr == sig[POS_0]) { 986 curr = wordBbuf.get(); 987 if (curr == sig[POS_1]) { 988 curr = wordBbuf.get(); 989 if (curr == sig[POS_2]) { 990 curr = wordBbuf.get(); 991 if (curr == sig[POS_3]) { 992 found = true; 993 break; 994 } 995 } 996 } 997 } 998 } 999 } 1000 if (found) { 1001 archive.position(off); 1002 } 1003 return found; 1004 } 1005 1006 /** 1007 * Skips the given number of bytes or throws an EOFException if 1008 * skipping failed. 1009 */ 1010 private void skipBytes(final int count) throws IOException { 1011 long currentPosition = archive.position(); 1012 long newPosition = currentPosition + count; 1013 if (newPosition > archive.size()) { 1014 throw new EOFException(); 1015 } 1016 archive.position(newPosition); 1017 } 1018 1019 /** 1020 * Number of bytes in local file header up to the "length of 1021 * filename" entry. 1022 */ 1023 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1024 /* local file header signature */ WORD 1025 /* version needed to extract */ + SHORT 1026 /* general purpose bit flag */ + SHORT 1027 /* compression method */ + SHORT 1028 /* last mod file time */ + SHORT 1029 /* last mod file date */ + SHORT 1030 /* crc-32 */ + WORD 1031 /* compressed size */ + WORD 1032 /* uncompressed size */ + (long) WORD; 1033 1034 /** 1035 * Walks through all recorded entries and adds the data available 1036 * from the local file header. 1037 * 1038 * <p>Also records the offsets for the data to read from the 1039 * entries.</p> 1040 */ 1041 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1042 entriesWithoutUTF8Flag) 1043 throws IOException { 1044 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1045 // entries is filled in populateFromCentralDirectory and 1046 // never modified 1047 final Entry ze = (Entry) zipArchiveEntry; 1048 final long offset = ze.getLocalHeaderOffset(); 1049 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1050 wordBbuf.rewind(); 1051 IOUtils.readFully(archive, wordBbuf); 1052 wordBbuf.flip(); 1053 wordBbuf.get(shortBuf); 1054 final int fileNameLen = ZipShort.getValue(shortBuf); 1055 wordBbuf.get(shortBuf); 1056 final int extraFieldLen = ZipShort.getValue(shortBuf); 1057 skipBytes(fileNameLen); 1058 final byte[] localExtraData = new byte[extraFieldLen]; 1059 IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData)); 1060 ze.setExtra(localExtraData); 1061 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1062 + SHORT + SHORT + fileNameLen + extraFieldLen); 1063 ze.setStreamContiguous(true); 1064 1065 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1066 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1067 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1068 nc.comment); 1069 } 1070 1071 final String name = ze.getName(); 1072 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 1073 if (entriesOfThatName == null) { 1074 entriesOfThatName = new LinkedList<>(); 1075 nameMap.put(name, entriesOfThatName); 1076 } 1077 entriesOfThatName.addLast(ze); 1078 } 1079 } 1080 1081 /** 1082 * Checks whether the archive starts with a LFH. If it doesn't, 1083 * it may be an empty archive. 1084 */ 1085 private boolean startsWithLocalFileHeader() throws IOException { 1086 archive.position(0); 1087 wordBbuf.rewind(); 1088 IOUtils.readFully(archive, wordBbuf); 1089 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1090 } 1091 1092 /** 1093 * Creates new BoundedInputStream, according to implementation of 1094 * underlying archive channel. 1095 */ 1096 private BoundedInputStream createBoundedInputStream(long start, long remaining) { 1097 return archive instanceof FileChannel ? 1098 new BoundedFileChannelInputStream(start, remaining) : 1099 new BoundedInputStream(start, remaining); 1100 } 1101 1102 /** 1103 * InputStream that delegates requests to the underlying 1104 * SeekableByteChannel, making sure that only bytes from a certain 1105 * range can be read. 1106 */ 1107 private class BoundedInputStream extends InputStream { 1108 private ByteBuffer singleByteBuffer; 1109 private final long end; 1110 private long loc; 1111 1112 BoundedInputStream(final long start, final long remaining) { 1113 this.end = start+remaining; 1114 if (this.end < start) { 1115 // check for potential vulnerability due to overflow 1116 throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining); 1117 } 1118 loc = start; 1119 } 1120 1121 @Override 1122 public synchronized int read() throws IOException { 1123 if (loc >= end) { 1124 return -1; 1125 } 1126 if (singleByteBuffer == null) { 1127 singleByteBuffer = ByteBuffer.allocate(1); 1128 } 1129 else { 1130 singleByteBuffer.rewind(); 1131 } 1132 int read = read(loc, singleByteBuffer); 1133 if (read < 0) { 1134 return read; 1135 } 1136 loc++; 1137 return singleByteBuffer.get() & 0xff; 1138 } 1139 1140 @Override 1141 public synchronized int read(final byte[] b, final int off, int len) throws IOException { 1142 if (len <= 0) { 1143 return 0; 1144 } 1145 1146 if (len > end-loc) { 1147 if (loc >= end) { 1148 return -1; 1149 } 1150 len = (int)(end-loc); 1151 } 1152 1153 ByteBuffer buf; 1154 buf = ByteBuffer.wrap(b, off, len); 1155 int ret = read(loc, buf); 1156 if (ret > 0) { 1157 loc += ret; 1158 return ret; 1159 } 1160 return ret; 1161 } 1162 1163 protected int read(long pos, ByteBuffer buf) throws IOException { 1164 int read; 1165 synchronized (archive) { 1166 archive.position(pos); 1167 read = archive.read(buf); 1168 } 1169 buf.flip(); 1170 return read; 1171 } 1172 } 1173 1174 /** 1175 * Lock-free implementation of BoundedInputStream. The 1176 * implementation uses positioned reads on the underlying archive 1177 * file channel and therefore performs significantly faster in 1178 * concurrent environment. 1179 */ 1180 private class BoundedFileChannelInputStream extends BoundedInputStream { 1181 private final FileChannel archive; 1182 1183 BoundedFileChannelInputStream(final long start, final long remaining) { 1184 super(start, remaining); 1185 archive = (FileChannel)ZipFile.this.archive; 1186 } 1187 1188 @Override 1189 protected int read(long pos, ByteBuffer buf) throws IOException { 1190 int read = archive.read(buf, pos); 1191 buf.flip(); 1192 return read; 1193 } 1194 } 1195 1196 private static final class NameAndComment { 1197 private final byte[] name; 1198 private final byte[] comment; 1199 private NameAndComment(final byte[] name, final byte[] comment) { 1200 this.name = name; 1201 this.comment = comment; 1202 } 1203 } 1204 1205 /** 1206 * Compares two ZipArchiveEntries based on their offset within the archive. 1207 * 1208 * <p>Won't return any meaningful results if one of the entries 1209 * isn't part of the archive at all.</p> 1210 * 1211 * @since 1.1 1212 */ 1213 private final Comparator<ZipArchiveEntry> offsetComparator = 1214 new Comparator<ZipArchiveEntry>() { 1215 @Override 1216 public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) { 1217 if (e1 == e2) { 1218 return 0; 1219 } 1220 1221 final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1222 final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1223 if (ent1 == null) { 1224 return 1; 1225 } 1226 if (ent2 == null) { 1227 return -1; 1228 } 1229 final long val = (ent1.getLocalHeaderOffset() 1230 - ent2.getLocalHeaderOffset()); 1231 return val == 0 ? 0 : val < 0 ? -1 : +1; 1232 } 1233 }; 1234 1235 /** 1236 * Extends ZipArchiveEntry to store the offset within the archive. 1237 */ 1238 private static class Entry extends ZipArchiveEntry { 1239 1240 Entry() { 1241 } 1242 1243 @Override 1244 public int hashCode() { 1245 return 3 * super.hashCode() 1246 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1247 } 1248 1249 @Override 1250 public boolean equals(final Object other) { 1251 if (super.equals(other)) { 1252 // super.equals would return false if other were not an Entry 1253 final Entry otherEntry = (Entry) other; 1254 return getLocalHeaderOffset() 1255 == otherEntry.getLocalHeaderOffset() 1256 && getDataOffset() 1257 == otherEntry.getDataOffset(); 1258 } 1259 return false; 1260 } 1261 } 1262 1263 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 1264 StoredStatisticsStream(InputStream in) { 1265 super(in); 1266 } 1267 1268 @Override 1269 public long getCompressedCount() { 1270 return super.getBytesRead(); 1271 } 1272 1273 @Override 1274 public long getUncompressedCount() { 1275 return getCompressedCount(); 1276 } 1277 } 1278 } 1279