1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.commons.compress.archivers.zip; 20 21 import java.io.ByteArrayInputStream; 22 import java.io.ByteArrayOutputStream; 23 import java.io.EOFException; 24 import java.io.IOException; 25 import java.io.InputStream; 26 import java.io.PushbackInputStream; 27 import java.math.BigInteger; 28 import java.nio.ByteBuffer; 29 import java.util.Arrays; 30 import java.util.zip.CRC32; 31 import java.util.zip.DataFormatException; 32 import java.util.zip.Inflater; 33 import java.util.zip.ZipEntry; 34 import java.util.zip.ZipException; 35 36 import org.apache.commons.compress.archivers.ArchiveEntry; 37 import org.apache.commons.compress.archivers.ArchiveInputStream; 38 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 39 import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 40 import org.apache.commons.compress.utils.ArchiveUtils; 41 import org.apache.commons.compress.utils.IOUtils; 42 import org.apache.commons.compress.utils.InputStreamStatistics; 43 44 import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 45 import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 46 import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 47 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 48 49 /** 50 * Implements an input stream that can read Zip archives. 51 * 52 * <p>As of Apache Commons Compress it transparently supports Zip64 53 * extensions and thus individual entries and archives larger than 4 54 * GB or with more than 65536 entries.</p> 55 * 56 * <p>The {@link ZipFile} class is preferred when reading from files 57 * as {@link ZipArchiveInputStream} is limited by not being able to 58 * read the central directory header before returning entries. In 59 * particular {@link ZipArchiveInputStream}</p> 60 * 61 * <ul> 62 * 63 * <li>may return entries that are not part of the central directory 64 * at all and shouldn't be considered part of the archive.</li> 65 * 66 * <li>may return several entries with the same name.</li> 67 * 68 * <li>will not return internal or external attributes.</li> 69 * 70 * <li>may return incomplete extra field data.</li> 71 * 72 * <li>may return unknown sizes and CRC values for entries until the 73 * next entry has been reached if the archive uses the data 74 * descriptor feature.</li> 75 * 76 * </ul> 77 * 78 * @see ZipFile 79 * @NotThreadSafe 80 */ 81 public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics { 82 83 /** The zip encoding to use for filenames and the file comment. */ 84 private final ZipEncoding zipEncoding; 85 86 // the provided encoding (for unit tests) 87 final String encoding; 88 89 /** Whether to look for and use Unicode extra fields. */ 90 private final boolean useUnicodeExtraFields; 91 92 /** Wrapped stream, will always be a PushbackInputStream. */ 93 private final InputStream in; 94 95 /** Inflater used for all deflated entries. */ 96 private final Inflater inf = new Inflater(true); 97 98 /** Buffer used to read from the wrapped stream. */ 99 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 100 101 /** The entry that is currently being read. */ 102 private CurrentEntry current = null; 103 104 /** Whether the stream has been closed. */ 105 private boolean closed = false; 106 107 /** Whether the stream has reached the central directory - and thus found all entries. */ 108 private boolean hitCentralDirectory = false; 109 110 /** 111 * When reading a stored entry that uses the data descriptor this 112 * stream has to read the full entry and caches it. This is the 113 * cache. 114 */ 115 private ByteArrayInputStream lastStoredEntry = null; 116 117 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 118 private boolean allowStoredEntriesWithDataDescriptor = false; 119 120 /** Count decompressed bytes for current entry */ 121 private long uncompressedCount = 0; 122 123 private static final int LFH_LEN = 30; 124 /* 125 local file header signature WORD 126 version needed to extract SHORT 127 general purpose bit flag SHORT 128 compression method SHORT 129 last mod file time SHORT 130 last mod file date SHORT 131 crc-32 WORD 132 compressed size WORD 133 uncompressed size WORD 134 file name length SHORT 135 extra field length SHORT 136 */ 137 138 private static final int CFH_LEN = 46; 139 /* 140 central file header signature WORD 141 version made by SHORT 142 version needed to extract SHORT 143 general purpose bit flag SHORT 144 compression method SHORT 145 last mod file time SHORT 146 last mod file date SHORT 147 crc-32 WORD 148 compressed size WORD 149 uncompressed size WORD 150 file name length SHORT 151 extra field length SHORT 152 file comment length SHORT 153 disk number start SHORT 154 internal file attributes SHORT 155 external file attributes WORD 156 relative offset of local header WORD 157 */ 158 159 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 160 161 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 162 private final byte[] lfhBuf = new byte[LFH_LEN]; 163 private final byte[] skipBuf = new byte[1024]; 164 private final byte[] shortBuf = new byte[SHORT]; 165 private final byte[] wordBuf = new byte[WORD]; 166 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 167 168 private int entriesRead = 0; 169 170 /** 171 * Create an instance using UTF-8 encoding 172 * @param inputStream the stream to wrap 173 */ ZipArchiveInputStream(final InputStream inputStream)174 public ZipArchiveInputStream(final InputStream inputStream) { 175 this(inputStream, ZipEncodingHelper.UTF8); 176 } 177 178 /** 179 * Create an instance using the specified encoding 180 * @param inputStream the stream to wrap 181 * @param encoding the encoding to use for file names, use null 182 * for the platform's default encoding 183 * @since 1.5 184 */ ZipArchiveInputStream(final InputStream inputStream, final String encoding)185 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 186 this(inputStream, encoding, true); 187 } 188 189 /** 190 * Create an instance using the specified encoding 191 * @param inputStream the stream to wrap 192 * @param encoding the encoding to use for file names, use null 193 * for the platform's default encoding 194 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 195 * Extra Fields (if present) to set the file names. 196 */ ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields)197 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 198 this(inputStream, encoding, useUnicodeExtraFields, false); 199 } 200 201 /** 202 * Create an instance using the specified encoding 203 * @param inputStream the stream to wrap 204 * @param encoding the encoding to use for file names, use null 205 * for the platform's default encoding 206 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 207 * Extra Fields (if present) to set the file names. 208 * @param allowStoredEntriesWithDataDescriptor whether the stream 209 * will try to read STORED entries that use a data descriptor 210 * @since 1.1 211 */ ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields, final boolean allowStoredEntriesWithDataDescriptor)212 public ZipArchiveInputStream(final InputStream inputStream, 213 final String encoding, 214 final boolean useUnicodeExtraFields, 215 final boolean allowStoredEntriesWithDataDescriptor) { 216 this.encoding = encoding; 217 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 218 this.useUnicodeExtraFields = useUnicodeExtraFields; 219 in = new PushbackInputStream(inputStream, buf.capacity()); 220 this.allowStoredEntriesWithDataDescriptor = 221 allowStoredEntriesWithDataDescriptor; 222 // haven't read anything so far 223 buf.limit(0); 224 } 225 getNextZipEntry()226 public ZipArchiveEntry getNextZipEntry() throws IOException { 227 uncompressedCount = 0; 228 229 boolean firstEntry = true; 230 if (closed || hitCentralDirectory) { 231 return null; 232 } 233 if (current != null) { 234 closeEntry(); 235 firstEntry = false; 236 } 237 238 long currentHeaderOffset = getBytesRead(); 239 try { 240 if (firstEntry) { 241 // split archives have a special signature before the 242 // first local file header - look for it and fail with 243 // the appropriate error message if this is a split 244 // archive. 245 readFirstLocalFileHeader(lfhBuf); 246 } else { 247 readFully(lfhBuf); 248 } 249 } catch (final EOFException e) { //NOSONAR 250 return null; 251 } 252 253 final ZipLong sig = new ZipLong(lfhBuf); 254 if (!sig.equals(ZipLong.LFH_SIG)) { 255 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) { 256 hitCentralDirectory = true; 257 skipRemainderOfArchive(); 258 return null; 259 } 260 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 261 } 262 263 int off = WORD; 264 current = new CurrentEntry(); 265 266 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 267 off += SHORT; 268 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 269 270 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 271 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 272 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 273 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 274 current.entry.setGeneralPurposeBit(gpFlag); 275 276 off += SHORT; 277 278 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 279 off += SHORT; 280 281 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 282 current.entry.setTime(time); 283 off += WORD; 284 285 ZipLong size = null, cSize = null; 286 if (!current.hasDataDescriptor) { 287 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 288 off += WORD; 289 290 cSize = new ZipLong(lfhBuf, off); 291 off += WORD; 292 293 size = new ZipLong(lfhBuf, off); 294 off += WORD; 295 } else { 296 off += 3 * WORD; 297 } 298 299 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 300 301 off += SHORT; 302 303 final int extraLen = ZipShort.getValue(lfhBuf, off); 304 off += SHORT; // NOSONAR - assignment as documentation 305 306 final byte[] fileName = new byte[fileNameLen]; 307 readFully(fileName); 308 current.entry.setName(entryEncoding.decode(fileName), fileName); 309 if (hasUTF8Flag) { 310 current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 311 } 312 313 final byte[] extraData = new byte[extraLen]; 314 readFully(extraData); 315 current.entry.setExtra(extraData); 316 317 if (!hasUTF8Flag && useUnicodeExtraFields) { 318 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 319 } 320 321 processZip64Extra(size, cSize); 322 323 current.entry.setLocalHeaderOffset(currentHeaderOffset); 324 current.entry.setDataOffset(getBytesRead()); 325 current.entry.setStreamContiguous(true); 326 327 ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); 328 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 329 if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { 330 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); 331 switch (m) { 332 case UNSHRINKING: 333 current.in = new UnshrinkingInputStream(bis); 334 break; 335 case IMPLODING: 336 current.in = new ExplodingInputStream( 337 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 338 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 339 bis); 340 break; 341 case BZIP2: 342 current.in = new BZip2CompressorInputStream(bis); 343 break; 344 case ENHANCED_DEFLATED: 345 current.in = new Deflate64CompressorInputStream(bis); 346 break; 347 default: 348 // we should never get here as all supported methods have been covered 349 // will cause an error when read is invoked, don't throw an exception here so people can 350 // skip unsupported entries 351 break; 352 } 353 } 354 } else if (m == ZipMethod.ENHANCED_DEFLATED) { 355 current.in = new Deflate64CompressorInputStream(in); 356 } 357 358 entriesRead++; 359 return current.entry; 360 } 361 362 /** 363 * Fills the given array with the first local file header and 364 * deals with splitting/spanning markers that may prefix the first 365 * LFH. 366 */ readFirstLocalFileHeader(final byte[] lfh)367 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 368 readFully(lfh); 369 final ZipLong sig = new ZipLong(lfh); 370 if (sig.equals(ZipLong.DD_SIG)) { 371 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 372 } 373 374 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 375 // The archive is not really split as only one segment was 376 // needed in the end. Just skip over the marker. 377 final byte[] missedLfhBytes = new byte[4]; 378 readFully(missedLfhBytes); 379 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 380 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 381 } 382 } 383 384 /** 385 * Records whether a Zip64 extra is present and sets the size 386 * information from it if sizes are 0xFFFFFFFF and the entry 387 * doesn't use a data descriptor. 388 */ processZip64Extra(final ZipLong size, final ZipLong cSize)389 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 390 final Zip64ExtendedInformationExtraField z64 = 391 (Zip64ExtendedInformationExtraField) 392 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 393 current.usesZip64 = z64 != null; 394 if (!current.hasDataDescriptor) { 395 if (z64 != null // same as current.usesZip64 but avoids NPE warning 396 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 397 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 398 current.entry.setSize(z64.getSize().getLongValue()); 399 } else { 400 current.entry.setCompressedSize(cSize.getValue()); 401 current.entry.setSize(size.getValue()); 402 } 403 } 404 } 405 406 @Override getNextEntry()407 public ArchiveEntry getNextEntry() throws IOException { 408 return getNextZipEntry(); 409 } 410 411 /** 412 * Whether this class is able to read the given entry. 413 * 414 * <p>May return false if it is set up to use encryption or a 415 * compression method that hasn't been implemented yet.</p> 416 * @since 1.1 417 */ 418 @Override canReadEntryData(final ArchiveEntry ae)419 public boolean canReadEntryData(final ArchiveEntry ae) { 420 if (ae instanceof ZipArchiveEntry) { 421 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 422 return ZipUtil.canHandleEntryData(ze) 423 && supportsDataDescriptorFor(ze) 424 && supportsCompressedSizeFor(ze); 425 } 426 return false; 427 } 428 429 @Override read(final byte[] buffer, final int offset, final int length)430 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 431 if (closed) { 432 throw new IOException("The stream is closed"); 433 } 434 435 if (current == null) { 436 return -1; 437 } 438 439 // avoid int overflow, check null buffer 440 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 441 throw new ArrayIndexOutOfBoundsException(); 442 } 443 444 ZipUtil.checkRequestedFeatures(current.entry); 445 if (!supportsDataDescriptorFor(current.entry)) { 446 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 447 current.entry); 448 } 449 if (!supportsCompressedSizeFor(current.entry)) { 450 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, 451 current.entry); 452 } 453 454 int read; 455 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 456 read = readStored(buffer, offset, length); 457 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 458 read = readDeflated(buffer, offset, length); 459 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 460 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 461 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 462 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 463 read = current.in.read(buffer, offset, length); 464 } else { 465 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 466 current.entry); 467 } 468 469 if (read >= 0) { 470 current.crc.update(buffer, offset, read); 471 uncompressedCount += read; 472 } 473 474 return read; 475 } 476 477 /** 478 * @since 1.17 479 */ 480 @Override getCompressedCount()481 public long getCompressedCount() { 482 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 483 return current.bytesRead; 484 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 485 return getBytesInflated(); 486 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 487 return ((UnshrinkingInputStream) current.in).getCompressedCount(); 488 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 489 return ((ExplodingInputStream) current.in).getCompressedCount(); 490 } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) { 491 return ((Deflate64CompressorInputStream) current.in).getCompressedCount(); 492 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 493 return ((BZip2CompressorInputStream) current.in).getCompressedCount(); 494 } else { 495 return -1; 496 } 497 } 498 499 /** 500 * @since 1.17 501 */ 502 @Override getUncompressedCount()503 public long getUncompressedCount() { 504 return uncompressedCount; 505 } 506 507 /** 508 * Implementation of read for STORED entries. 509 */ readStored(final byte[] buffer, final int offset, final int length)510 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 511 512 if (current.hasDataDescriptor) { 513 if (lastStoredEntry == null) { 514 readStoredEntry(); 515 } 516 return lastStoredEntry.read(buffer, offset, length); 517 } 518 519 final long csize = current.entry.getSize(); 520 if (current.bytesRead >= csize) { 521 return -1; 522 } 523 524 if (buf.position() >= buf.limit()) { 525 buf.position(0); 526 final int l = in.read(buf.array()); 527 if (l == -1) { 528 buf.limit(0); 529 throw new IOException("Truncated ZIP file"); 530 } 531 buf.limit(l); 532 533 count(l); 534 current.bytesReadFromStream += l; 535 } 536 537 int toRead = Math.min(buf.remaining(), length); 538 if ((csize - current.bytesRead) < toRead) { 539 // if it is smaller than toRead then it fits into an int 540 toRead = (int) (csize - current.bytesRead); 541 } 542 buf.get(buffer, offset, toRead); 543 current.bytesRead += toRead; 544 return toRead; 545 } 546 547 /** 548 * Implementation of read for DEFLATED entries. 549 */ readDeflated(final byte[] buffer, final int offset, final int length)550 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 551 final int read = readFromInflater(buffer, offset, length); 552 if (read <= 0) { 553 if (inf.finished()) { 554 return -1; 555 } else if (inf.needsDictionary()) { 556 throw new ZipException("This archive needs a preset dictionary" 557 + " which is not supported by Commons" 558 + " Compress."); 559 } else if (read == -1) { 560 throw new IOException("Truncated ZIP file"); 561 } 562 } 563 return read; 564 } 565 566 /** 567 * Potentially reads more bytes to fill the inflater's buffer and 568 * reads from it. 569 */ readFromInflater(final byte[] buffer, final int offset, final int length)570 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 571 int read = 0; 572 do { 573 if (inf.needsInput()) { 574 final int l = fill(); 575 if (l > 0) { 576 current.bytesReadFromStream += buf.limit(); 577 } else if (l == -1) { 578 return -1; 579 } else { 580 break; 581 } 582 } 583 try { 584 read = inf.inflate(buffer, offset, length); 585 } catch (final DataFormatException e) { 586 throw (IOException) new ZipException(e.getMessage()).initCause(e); 587 } 588 } while (read == 0 && inf.needsInput()); 589 return read; 590 } 591 592 @Override close()593 public void close() throws IOException { 594 if (!closed) { 595 closed = true; 596 try { 597 in.close(); 598 } finally { 599 inf.end(); 600 } 601 } 602 } 603 604 /** 605 * Skips over and discards value bytes of data from this input 606 * stream. 607 * 608 * <p>This implementation may end up skipping over some smaller 609 * number of bytes, possibly 0, if and only if it reaches the end 610 * of the underlying stream.</p> 611 * 612 * <p>The actual number of bytes skipped is returned.</p> 613 * 614 * @param value the number of bytes to be skipped. 615 * @return the actual number of bytes skipped. 616 * @throws IOException - if an I/O error occurs. 617 * @throws IllegalArgumentException - if value is negative. 618 */ 619 @Override skip(final long value)620 public long skip(final long value) throws IOException { 621 if (value >= 0) { 622 long skipped = 0; 623 while (skipped < value) { 624 final long rem = value - skipped; 625 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 626 if (x == -1) { 627 return skipped; 628 } 629 skipped += x; 630 } 631 return skipped; 632 } 633 throw new IllegalArgumentException(); 634 } 635 636 /** 637 * Checks if the signature matches what is expected for a zip file. 638 * Does not currently handle self-extracting zips which may have arbitrary 639 * leading content. 640 * 641 * @param signature the bytes to check 642 * @param length the number of bytes to check 643 * @return true, if this stream is a zip archive stream, false otherwise 644 */ matches(final byte[] signature, final int length)645 public static boolean matches(final byte[] signature, final int length) { 646 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 647 return false; 648 } 649 650 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 651 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 652 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 653 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 654 } 655 checksig(final byte[] signature, final byte[] expected)656 private static boolean checksig(final byte[] signature, final byte[] expected) { 657 for (int i = 0; i < expected.length; i++) { 658 if (signature[i] != expected[i]) { 659 return false; 660 } 661 } 662 return true; 663 } 664 665 /** 666 * Closes the current ZIP archive entry and positions the underlying 667 * stream to the beginning of the next entry. All per-entry variables 668 * and data structures are cleared. 669 * <p> 670 * If the compressed size of this entry is included in the entry header, 671 * then any outstanding bytes are simply skipped from the underlying 672 * stream without uncompressing them. This allows an entry to be safely 673 * closed even if the compression method is unsupported. 674 * <p> 675 * In case we don't know the compressed size of this entry or have 676 * already buffered too much data from the underlying stream to support 677 * uncompression, then the uncompression process is completed and the 678 * end position of the stream is adjusted based on the result of that 679 * process. 680 * 681 * @throws IOException if an error occurs 682 */ closeEntry()683 private void closeEntry() throws IOException { 684 if (closed) { 685 throw new IOException("The stream is closed"); 686 } 687 if (current == null) { 688 return; 689 } 690 691 // Ensure all entry bytes are read 692 if (currentEntryHasOutstandingBytes()) { 693 drainCurrentEntryData(); 694 } else { 695 // this is guaranteed to exhaust the stream 696 skip(Long.MAX_VALUE); //NOSONAR 697 698 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 699 ? getBytesInflated() : current.bytesRead; 700 701 // this is at most a single read() operation and can't 702 // exceed the range of int 703 final int diff = (int) (current.bytesReadFromStream - inB); 704 705 // Pushback any required bytes 706 if (diff > 0) { 707 pushback(buf.array(), buf.limit() - diff, diff); 708 current.bytesReadFromStream -= diff; 709 } 710 711 // Drain remainder of entry if not all data bytes were required 712 if (currentEntryHasOutstandingBytes()) { 713 drainCurrentEntryData(); 714 } 715 } 716 717 if (lastStoredEntry == null && current.hasDataDescriptor) { 718 readDataDescriptor(); 719 } 720 721 inf.reset(); 722 buf.clear().flip(); 723 current = null; 724 lastStoredEntry = null; 725 } 726 727 /** 728 * If the compressed size of the current entry is included in the entry header 729 * and there are any outstanding bytes in the underlying stream, then 730 * this returns true. 731 * 732 * @return true, if current entry is determined to have outstanding bytes, false otherwise 733 */ currentEntryHasOutstandingBytes()734 private boolean currentEntryHasOutstandingBytes() { 735 return current.bytesReadFromStream <= current.entry.getCompressedSize() 736 && !current.hasDataDescriptor; 737 } 738 739 /** 740 * Read all data of the current entry from the underlying stream 741 * that hasn't been read, yet. 742 */ drainCurrentEntryData()743 private void drainCurrentEntryData() throws IOException { 744 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 745 while (remaining > 0) { 746 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 747 if (n < 0) { 748 throw new EOFException("Truncated ZIP entry: " 749 + ArchiveUtils.sanitize(current.entry.getName())); 750 } 751 count(n); 752 remaining -= n; 753 } 754 } 755 756 /** 757 * Get the number of bytes Inflater has actually processed. 758 * 759 * <p>for Java < Java7 the getBytes* methods in 760 * Inflater/Deflater seem to return unsigned ints rather than 761 * longs that start over with 0 at 2^32.</p> 762 * 763 * <p>The stream knows how many bytes it has read, but not how 764 * many the Inflater actually consumed - it should be between the 765 * total number of bytes read for the entry and the total number 766 * minus the last read operation. Here we just try to make the 767 * value close enough to the bytes we've read by assuming the 768 * number of bytes consumed must be smaller than (or equal to) the 769 * number of bytes read but not smaller by more than 2^32.</p> 770 */ getBytesInflated()771 private long getBytesInflated() { 772 long inB = inf.getBytesRead(); 773 if (current.bytesReadFromStream >= TWO_EXP_32) { 774 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 775 inB += TWO_EXP_32; 776 } 777 } 778 return inB; 779 } 780 fill()781 private int fill() throws IOException { 782 if (closed) { 783 throw new IOException("The stream is closed"); 784 } 785 final int length = in.read(buf.array()); 786 if (length > 0) { 787 buf.limit(length); 788 count(buf.limit()); 789 inf.setInput(buf.array(), 0, buf.limit()); 790 } 791 return length; 792 } 793 readFully(final byte[] b)794 private void readFully(final byte[] b) throws IOException { 795 readFully(b, 0); 796 } 797 readFully(final byte[] b, final int off)798 private void readFully(final byte[] b, final int off) throws IOException { 799 final int len = b.length - off; 800 final int count = IOUtils.readFully(in, b, off, len); 801 count(count); 802 if (count < len) { 803 throw new EOFException(); 804 } 805 } 806 readDataDescriptor()807 private void readDataDescriptor() throws IOException { 808 readFully(wordBuf); 809 ZipLong val = new ZipLong(wordBuf); 810 if (ZipLong.DD_SIG.equals(val)) { 811 // data descriptor with signature, skip sig 812 readFully(wordBuf); 813 val = new ZipLong(wordBuf); 814 } 815 current.entry.setCrc(val.getValue()); 816 817 // if there is a ZIP64 extra field, sizes are eight bytes 818 // each, otherwise four bytes each. Unfortunately some 819 // implementations - namely Java7 - use eight bytes without 820 // using a ZIP64 extra field - 821 // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 822 823 // just read 16 bytes and check whether bytes nine to twelve 824 // look like one of the signatures of what could follow a data 825 // descriptor (ignoring archive decryption headers for now). 826 // If so, push back eight bytes and assume sizes are four 827 // bytes, otherwise sizes are eight bytes each. 828 readFully(twoDwordBuf); 829 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 830 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 831 pushback(twoDwordBuf, DWORD, DWORD); 832 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 833 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 834 } else { 835 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 836 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 837 } 838 } 839 840 /** 841 * Whether this entry requires a data descriptor this library can work with. 842 * 843 * @return true if allowStoredEntriesWithDataDescriptor is true, 844 * the entry doesn't require any data descriptor or the method is 845 * DEFLATED or ENHANCED_DEFLATED. 846 */ supportsDataDescriptorFor(final ZipArchiveEntry entry)847 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 848 return !entry.getGeneralPurposeBit().usesDataDescriptor() 849 850 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 851 || entry.getMethod() == ZipEntry.DEFLATED 852 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); 853 } 854 855 /** 856 * Whether the compressed size for the entry is either known or 857 * not required by the compression method being used. 858 */ supportsCompressedSizeFor(final ZipArchiveEntry entry)859 private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { 860 return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN 861 || entry.getMethod() == ZipEntry.DEFLATED 862 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 863 || (entry.getGeneralPurposeBit().usesDataDescriptor() 864 && allowStoredEntriesWithDataDescriptor 865 && entry.getMethod() == ZipEntry.STORED); 866 } 867 868 /** 869 * Caches a stored entry that uses the data descriptor. 870 * 871 * <ul> 872 * <li>Reads a stored entry until the signature of a local file 873 * header, central directory header or data descriptor has been 874 * found.</li> 875 * <li>Stores all entry data in lastStoredEntry.</p> 876 * <li>Rewinds the stream to position at the data 877 * descriptor.</li> 878 * <li>reads the data descriptor</li> 879 * </ul> 880 * 881 * <p>After calling this method the entry should know its size, 882 * the entry's data is cached and the stream is positioned at the 883 * next local file or central directory header.</p> 884 */ readStoredEntry()885 private void readStoredEntry() throws IOException { 886 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 887 int off = 0; 888 boolean done = false; 889 890 // length of DD without signature 891 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 892 893 while (!done) { 894 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 895 if (r <= 0) { 896 // read the whole archive without ever finding a 897 // central directory 898 throw new IOException("Truncated ZIP file"); 899 } 900 if (r + off < 4) { 901 // buffer too small to check for a signature, loop 902 off += r; 903 continue; 904 } 905 906 done = bufferContainsSignature(bos, off, r, ddLen); 907 if (!done) { 908 off = cacheBytesRead(bos, off, r, ddLen); 909 } 910 } 911 912 final byte[] b = bos.toByteArray(); 913 lastStoredEntry = new ByteArrayInputStream(b); 914 } 915 916 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 917 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 918 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 919 920 /** 921 * Checks whether the current buffer contains the signature of a 922 * "data descriptor", "local file header" or 923 * "central directory entry". 924 * 925 * <p>If it contains such a signature, reads the data descriptor 926 * and positions the stream right after the data descriptor.</p> 927 */ bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)928 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 929 throws IOException { 930 931 boolean done = false; 932 int readTooMuch = 0; 933 for (int i = 0; !done && i < offset + lastRead - 4; i++) { 934 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 935 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 936 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 937 // found a LFH or CFH: 938 readTooMuch = offset + lastRead - i - expectedDDLen; 939 done = true; 940 } 941 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 942 // found DD: 943 readTooMuch = offset + lastRead - i; 944 done = true; 945 } 946 if (done) { 947 // * push back bytes read in excess as well as the data 948 // descriptor 949 // * copy the remaining bytes to cache 950 // * read data descriptor 951 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 952 bos.write(buf.array(), 0, i); 953 readDataDescriptor(); 954 } 955 } 956 } 957 return done; 958 } 959 960 /** 961 * If the last read bytes could hold a data descriptor and an 962 * incomplete signature then save the last bytes to the front of 963 * the buffer and cache everything in front of the potential data 964 * descriptor into the given ByteArrayOutputStream. 965 * 966 * <p>Data descriptor plus incomplete signature (3 bytes in the 967 * worst case) can be 20 bytes max.</p> 968 */ cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen)969 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 970 final int cacheable = offset + lastRead - expecteDDLen - 3; 971 if (cacheable > 0) { 972 bos.write(buf.array(), 0, cacheable); 973 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 974 offset = expecteDDLen + 3; 975 } else { 976 offset += lastRead; 977 } 978 return offset; 979 } 980 pushback(final byte[] buf, final int offset, final int length)981 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 982 ((PushbackInputStream) in).unread(buf, offset, length); 983 pushedBackBytes(length); 984 } 985 986 // End of Central Directory Record 987 // end of central dir signature WORD 988 // number of this disk SHORT 989 // number of the disk with the 990 // start of the central directory SHORT 991 // total number of entries in the 992 // central directory on this disk SHORT 993 // total number of entries in 994 // the central directory SHORT 995 // size of the central directory WORD 996 // offset of start of central 997 // directory with respect to 998 // the starting disk number WORD 999 // .ZIP file comment length SHORT 1000 // .ZIP file comment up to 64KB 1001 // 1002 1003 /** 1004 * Reads the stream until it find the "End of central directory 1005 * record" and consumes it as well. 1006 */ skipRemainderOfArchive()1007 private void skipRemainderOfArchive() throws IOException { 1008 // skip over central directory. One LFH has been read too much 1009 // already. The calculation discounts file names and extra 1010 // data so it will be too short. 1011 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 1012 findEocdRecord(); 1013 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 1014 readFully(shortBuf); 1015 // file comment 1016 realSkip(ZipShort.getValue(shortBuf)); 1017 } 1018 1019 /** 1020 * Reads forward until the signature of the "End of central 1021 * directory" record is found. 1022 */ findEocdRecord()1023 private void findEocdRecord() throws IOException { 1024 int currentByte = -1; 1025 boolean skipReadCall = false; 1026 while (skipReadCall || (currentByte = readOneByte()) > -1) { 1027 skipReadCall = false; 1028 if (!isFirstByteOfEocdSig(currentByte)) { 1029 continue; 1030 } 1031 currentByte = readOneByte(); 1032 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 1033 if (currentByte == -1) { 1034 break; 1035 } 1036 skipReadCall = isFirstByteOfEocdSig(currentByte); 1037 continue; 1038 } 1039 currentByte = readOneByte(); 1040 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 1041 if (currentByte == -1) { 1042 break; 1043 } 1044 skipReadCall = isFirstByteOfEocdSig(currentByte); 1045 continue; 1046 } 1047 currentByte = readOneByte(); 1048 if (currentByte == -1 1049 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 1050 break; 1051 } 1052 skipReadCall = isFirstByteOfEocdSig(currentByte); 1053 } 1054 } 1055 1056 /** 1057 * Skips bytes by reading from the underlying stream rather than 1058 * the (potentially inflating) archive stream - which {@link 1059 * #skip} would do. 1060 * 1061 * Also updates bytes-read counter. 1062 */ realSkip(final long value)1063 private void realSkip(final long value) throws IOException { 1064 if (value >= 0) { 1065 long skipped = 0; 1066 while (skipped < value) { 1067 final long rem = value - skipped; 1068 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 1069 if (x == -1) { 1070 return; 1071 } 1072 count(x); 1073 skipped += x; 1074 } 1075 return; 1076 } 1077 throw new IllegalArgumentException(); 1078 } 1079 1080 /** 1081 * Reads bytes by reading from the underlying stream rather than 1082 * the (potentially inflating) archive stream - which {@link #read} would do. 1083 * 1084 * Also updates bytes-read counter. 1085 */ readOneByte()1086 private int readOneByte() throws IOException { 1087 final int b = in.read(); 1088 if (b != -1) { 1089 count(1); 1090 } 1091 return b; 1092 } 1093 isFirstByteOfEocdSig(final int b)1094 private boolean isFirstByteOfEocdSig(final int b) { 1095 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1096 } 1097 1098 private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] { 1099 'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2', 1100 }; 1101 private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE); 1102 1103 /** 1104 * Checks whether this might be an APK Signing Block. 1105 * 1106 * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It 1107 * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature 1108 * and if we've found it, return true.</p> 1109 * 1110 * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold 1111 * the local file header of the next entry. 1112 * 1113 * @return true if this looks like a APK signing block 1114 * 1115 * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a> 1116 */ isApkSigningBlock(byte[] suspectLocalFileHeader)1117 private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException { 1118 // length of block excluding the size field itself 1119 BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader); 1120 // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block, 1121 // also subtract 16 bytes in order to position us at the magic string 1122 BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length 1123 - (long) APK_SIGNING_BLOCK_MAGIC.length)); 1124 byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length]; 1125 1126 try { 1127 if (toSkip.signum() < 0) { 1128 // suspectLocalFileHeader contains the start of suspect magic string 1129 int off = suspectLocalFileHeader.length + toSkip.intValue(); 1130 // length was shorter than magic length 1131 if (off < DWORD) { 1132 return false; 1133 } 1134 int bytesInBuffer = Math.abs(toSkip.intValue()); 1135 System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length)); 1136 if (bytesInBuffer < magic.length) { 1137 readFully(magic, bytesInBuffer); 1138 } 1139 } else { 1140 while (toSkip.compareTo(LONG_MAX) > 0) { 1141 realSkip(Long.MAX_VALUE); 1142 toSkip = toSkip.add(LONG_MAX.negate()); 1143 } 1144 realSkip(toSkip.longValue()); 1145 readFully(magic); 1146 } 1147 } catch (EOFException ex) { //NOSONAR 1148 // length was invalid 1149 return false; 1150 } 1151 return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC); 1152 } 1153 1154 /** 1155 * Structure collecting information for the entry that is 1156 * currently being read. 1157 */ 1158 private static final class CurrentEntry { 1159 1160 /** 1161 * Current ZIP entry. 1162 */ 1163 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1164 1165 /** 1166 * Does the entry use a data descriptor? 1167 */ 1168 private boolean hasDataDescriptor; 1169 1170 /** 1171 * Does the entry have a ZIP64 extended information extra field. 1172 */ 1173 private boolean usesZip64; 1174 1175 /** 1176 * Number of bytes of entry content read by the client if the 1177 * entry is STORED. 1178 */ 1179 private long bytesRead; 1180 1181 /** 1182 * Number of bytes of entry content read from the stream. 1183 * 1184 * <p>This may be more than the actual entry's length as some 1185 * stuff gets buffered up and needs to be pushed back when the 1186 * end of the entry has been reached.</p> 1187 */ 1188 private long bytesReadFromStream; 1189 1190 /** 1191 * The checksum calculated as the current entry is read. 1192 */ 1193 private final CRC32 crc = new CRC32(); 1194 1195 /** 1196 * The input stream decompressing the data for shrunk and imploded entries. 1197 */ 1198 private InputStream in; 1199 } 1200 1201 /** 1202 * Bounded input stream adapted from commons-io 1203 */ 1204 private class BoundedInputStream extends InputStream { 1205 1206 /** the wrapped input stream */ 1207 private final InputStream in; 1208 1209 /** the max length to provide */ 1210 private final long max; 1211 1212 /** the number of bytes already returned */ 1213 private long pos = 0; 1214 1215 /** 1216 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1217 * stream and limits it to a certain size. 1218 * 1219 * @param in The wrapped input stream 1220 * @param size The maximum number of bytes to return 1221 */ BoundedInputStream(final InputStream in, final long size)1222 public BoundedInputStream(final InputStream in, final long size) { 1223 this.max = size; 1224 this.in = in; 1225 } 1226 1227 @Override read()1228 public int read() throws IOException { 1229 if (max >= 0 && pos >= max) { 1230 return -1; 1231 } 1232 final int result = in.read(); 1233 pos++; 1234 count(1); 1235 current.bytesReadFromStream++; 1236 return result; 1237 } 1238 1239 @Override read(final byte[] b)1240 public int read(final byte[] b) throws IOException { 1241 return this.read(b, 0, b.length); 1242 } 1243 1244 @Override read(final byte[] b, final int off, final int len)1245 public int read(final byte[] b, final int off, final int len) throws IOException { 1246 if (max >= 0 && pos >= max) { 1247 return -1; 1248 } 1249 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1250 final int bytesRead = in.read(b, off, (int) maxRead); 1251 1252 if (bytesRead == -1) { 1253 return -1; 1254 } 1255 1256 pos += bytesRead; 1257 count(bytesRead); 1258 current.bytesReadFromStream += bytesRead; 1259 return bytesRead; 1260 } 1261 1262 @Override skip(final long n)1263 public long skip(final long n) throws IOException { 1264 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1265 final long skippedBytes = IOUtils.skip(in, toSkip); 1266 pos += skippedBytes; 1267 return skippedBytes; 1268 } 1269 1270 @Override available()1271 public int available() throws IOException { 1272 if (max >= 0 && pos >= max) { 1273 return 0; 1274 } 1275 return in.available(); 1276 } 1277 } 1278 } 1279