1 /* 2 * SeekableXZInputStream 3 * 4 * Author: Lasse Collin <lasse.collin@tukaani.org> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 package org.tukaani.xz; 11 12 import java.util.Arrays; 13 import java.util.ArrayList; 14 import java.io.DataInputStream; 15 import java.io.IOException; 16 import java.io.EOFException; 17 import org.tukaani.xz.common.DecoderUtil; 18 import org.tukaani.xz.common.StreamFlags; 19 import org.tukaani.xz.check.Check; 20 import org.tukaani.xz.index.IndexDecoder; 21 import org.tukaani.xz.index.BlockInfo; 22 23 /** 24 * Decompresses a .xz file in random access mode. 25 * This supports decompressing concatenated .xz files. 26 * <p> 27 * Each .xz file consist of one or more Streams. Each Stream consist of zero 28 * or more Blocks. Each Stream contains an Index of Streams' Blocks. 29 * The Indexes from all Streams are loaded in RAM by a constructor of this 30 * class. A typical .xz file has only one Stream, and parsing its Index will 31 * need only three or four seeks. 32 * <p> 33 * To make random access possible, the data in a .xz file must be splitted 34 * into multiple Blocks of reasonable size. Decompression can only start at 35 * a Block boundary. When seeking to an uncompressed position that is not at 36 * a Block boundary, decompression starts at the beginning of the Block and 37 * throws away data until the target position is reached. Thus, smaller Blocks 38 * mean faster seeks to arbitrary uncompressed positions. On the other hand, 39 * smaller Blocks mean worse compression. So one has to make a compromise 40 * between random access speed and compression ratio. 41 * <p> 42 * Implementation note: This class uses linear search to locate the correct 43 * Stream from the data structures in RAM. It was the simplest to implement 44 * and should be fine as long as there aren't too many Streams. The correct 45 * Block inside a Stream is located using binary search and thus is fast 46 * even with a huge number of Blocks. 47 * 48 * <h4>Memory usage</h4> 49 * <p> 50 * The amount of memory needed for the Indexes is taken into account when 51 * checking the memory usage limit. Each Stream is calculated to need at 52 * least 1 KiB of memory and each Block 16 bytes of memory, rounded up 53 * to the next kibibyte. So unless the file has a huge number of Streams or 54 * Blocks, these don't take significant amount of memory. 55 * 56 * <h4>Creating random-accessible .xz files</h4> 57 * <p> 58 * When using {@link XZOutputStream}, a new Block can be started by calling 59 * its {@link XZOutputStream#endBlock() endBlock} method. If you know 60 * that the decompressor will only need to seek to certain uncompressed 61 * positions, it can be a good idea to start a new Block at (some of) these 62 * positions (and only at these positions to get better compression ratio). 63 * <p> 64 * liblzma in XZ Utils supports starting a new Block with 65 * <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded 66 * compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha 67 * also added the option <code>--block-size=SIZE</code> to the xz command 68 * line tool. XZ Utils 5.1.2alpha added a partial implementation of 69 * <code>--block-list=SIZES</code> which allows specifying sizes of 70 * individual Blocks. 71 * 72 * @see SeekableFileInputStream 73 * @see XZInputStream 74 * @see XZOutputStream 75 */ 76 public class SeekableXZInputStream extends SeekableInputStream { 77 /** 78 * The input stream containing XZ compressed data. 79 */ 80 private SeekableInputStream in; 81 82 /** 83 * Memory usage limit after the memory usage of the IndexDecoders have 84 * been substracted. 85 */ 86 private final int memoryLimit; 87 88 /** 89 * Memory usage of the IndexDecoders. 90 * <code>memoryLimit + indexMemoryUsage</code> equals the original 91 * memory usage limit that was passed to the constructor. 92 */ 93 private int indexMemoryUsage = 0; 94 95 /** 96 * List of IndexDecoders, one for each Stream in the file. 97 * The list is in reverse order: The first element is 98 * the last Stream in the file. 99 */ 100 private final ArrayList streams = new ArrayList(); 101 102 /** 103 * Bitmask of all Check IDs seen. 104 */ 105 private int checkTypes = 0; 106 107 /** 108 * Uncompressed size of the file (all Streams). 109 */ 110 private long uncompressedSize = 0; 111 112 /** 113 * Uncompressed size of the largest XZ Block in the file. 114 */ 115 private long largestBlockSize = 0; 116 117 /** 118 * Number of XZ Blocks in the file. 119 */ 120 private int blockCount = 0; 121 122 /** 123 * Size and position information about the current Block. 124 * If there are no Blocks, all values will be <code>-1</code>. 125 */ 126 private final BlockInfo curBlockInfo; 127 128 /** 129 * Temporary (and cached) information about the Block whose information 130 * is queried via <code>getBlockPos</code> and related functions. 131 */ 132 private final BlockInfo queriedBlockInfo; 133 134 /** 135 * Integrity Check in the current XZ Stream. The constructor leaves 136 * this to point to the Check of the first Stream. 137 */ 138 private Check check; 139 140 /** 141 * Flag indicating if the integrity checks will be verified. 142 */ 143 private final boolean verifyCheck; 144 145 /** 146 * Decoder of the current XZ Block, if any. 147 */ 148 private BlockInputStream blockDecoder = null; 149 150 /** 151 * Current uncompressed position. 152 */ 153 private long curPos = 0; 154 155 /** 156 * Target position for seeking. 157 */ 158 private long seekPos; 159 160 /** 161 * True when <code>seek(long)</code> has been called but the actual 162 * seeking hasn't been done yet. 163 */ 164 private boolean seekNeeded = false; 165 166 /** 167 * True when end of the file was reached. This can be cleared by 168 * calling <code>seek(long)</code>. 169 */ 170 private boolean endReached = false; 171 172 /** 173 * Pending exception from an earlier error. 174 */ 175 private IOException exception = null; 176 177 /** 178 * Temporary buffer for read(). This avoids reallocating memory 179 * on every read() call. 180 */ 181 private final byte[] tempBuf = new byte[1]; 182 183 /** 184 * Creates a new seekable XZ decompressor without a memory usage limit. 185 * 186 * @param in seekable input stream containing one or more 187 * XZ Streams; the whole input stream is used 188 * 189 * @throws XZFormatException 190 * input is not in the XZ format 191 * 192 * @throws CorruptedInputException 193 * XZ data is corrupt or truncated 194 * 195 * @throws UnsupportedOptionsException 196 * XZ headers seem valid but they specify 197 * options not supported by this implementation 198 * 199 * @throws EOFException 200 * less than 6 bytes of input was available 201 * from <code>in</code>, or (unlikely) the size 202 * of the underlying stream got smaller while 203 * this was reading from it 204 * 205 * @throws IOException may be thrown by <code>in</code> 206 */ SeekableXZInputStream(SeekableInputStream in)207 public SeekableXZInputStream(SeekableInputStream in) 208 throws IOException { 209 this(in, -1); 210 } 211 212 /** 213 * Creates a new seekable XZ decomporessor with an optional 214 * memory usage limit. 215 * 216 * @param in seekable input stream containing one or more 217 * XZ Streams; the whole input stream is used 218 * 219 * @param memoryLimit memory usage limit in kibibytes (KiB) 220 * or <code>-1</code> to impose no 221 * memory usage limit 222 * 223 * @throws XZFormatException 224 * input is not in the XZ format 225 * 226 * @throws CorruptedInputException 227 * XZ data is corrupt or truncated 228 * 229 * @throws UnsupportedOptionsException 230 * XZ headers seem valid but they specify 231 * options not supported by this implementation 232 * 233 * @throws MemoryLimitException 234 * decoded XZ Indexes would need more memory 235 * than allowed by the memory usage limit 236 * 237 * @throws EOFException 238 * less than 6 bytes of input was available 239 * from <code>in</code>, or (unlikely) the size 240 * of the underlying stream got smaller while 241 * this was reading from it 242 * 243 * @throws IOException may be thrown by <code>in</code> 244 */ SeekableXZInputStream(SeekableInputStream in, int memoryLimit)245 public SeekableXZInputStream(SeekableInputStream in, int memoryLimit) 246 throws IOException { 247 this(in, memoryLimit, true); 248 } 249 250 /** 251 * Creates a new seekable XZ decomporessor with an optional 252 * memory usage limit and ability to disable verification 253 * of integrity checks. 254 * <p> 255 * Note that integrity check verification should almost never be disabled. 256 * Possible reasons to disable integrity check verification: 257 * <ul> 258 * <li>Trying to recover data from a corrupt .xz file.</li> 259 * <li>Speeding up decompression. This matters mostly with SHA-256 260 * or with files that have compressed extremely well. It's recommended 261 * that integrity checking isn't disabled for performance reasons 262 * unless the file integrity is verified externally in some other 263 * way.</li> 264 * </ul> 265 * <p> 266 * <code>verifyCheck</code> only affects the integrity check of 267 * the actual compressed data. The CRC32 fields in the headers 268 * are always verified. 269 * 270 * @param in seekable input stream containing one or more 271 * XZ Streams; the whole input stream is used 272 * 273 * @param memoryLimit memory usage limit in kibibytes (KiB) 274 * or <code>-1</code> to impose no 275 * memory usage limit 276 * 277 * @param verifyCheck if <code>true</code>, the integrity checks 278 * will be verified; this should almost never 279 * be set to <code>false</code> 280 * 281 * @throws XZFormatException 282 * input is not in the XZ format 283 * 284 * @throws CorruptedInputException 285 * XZ data is corrupt or truncated 286 * 287 * @throws UnsupportedOptionsException 288 * XZ headers seem valid but they specify 289 * options not supported by this implementation 290 * 291 * @throws MemoryLimitException 292 * decoded XZ Indexes would need more memory 293 * than allowed by the memory usage limit 294 * 295 * @throws EOFException 296 * less than 6 bytes of input was available 297 * from <code>in</code>, or (unlikely) the size 298 * of the underlying stream got smaller while 299 * this was reading from it 300 * 301 * @throws IOException may be thrown by <code>in</code> 302 * 303 * @since 1.6 304 */ SeekableXZInputStream(SeekableInputStream in, int memoryLimit, boolean verifyCheck)305 public SeekableXZInputStream(SeekableInputStream in, int memoryLimit, 306 boolean verifyCheck) 307 throws IOException { 308 this.verifyCheck = verifyCheck; 309 this.in = in; 310 DataInputStream inData = new DataInputStream(in); 311 312 // Check the magic bytes in the beginning of the file. 313 { 314 in.seek(0); 315 byte[] buf = new byte[XZ.HEADER_MAGIC.length]; 316 inData.readFully(buf); 317 if (!Arrays.equals(buf, XZ.HEADER_MAGIC)) 318 throw new XZFormatException(); 319 } 320 321 // Get the file size and verify that it is a multiple of 4 bytes. 322 long pos = in.length(); 323 if ((pos & 3) != 0) 324 throw new CorruptedInputException( 325 "XZ file size is not a multiple of 4 bytes"); 326 327 // Parse the headers starting from the end of the file. 328 byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; 329 long streamPadding = 0; 330 331 while (pos > 0) { 332 if (pos < DecoderUtil.STREAM_HEADER_SIZE) 333 throw new CorruptedInputException(); 334 335 // Read the potential Stream Footer. 336 in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE); 337 inData.readFully(buf); 338 339 // Skip Stream Padding four bytes at a time. 340 // Skipping more at once would be faster, 341 // but usually there isn't much Stream Padding. 342 if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00 343 && buf[11] == 0x00) { 344 streamPadding += 4; 345 pos -= 4; 346 continue; 347 } 348 349 // It's not Stream Padding. Update pos. 350 pos -= DecoderUtil.STREAM_HEADER_SIZE; 351 352 // Decode the Stream Footer and check if Backward Size 353 // looks reasonable. 354 StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf); 355 if (streamFooter.backwardSize >= pos) 356 throw new CorruptedInputException( 357 "Backward Size in XZ Stream Footer is too big"); 358 359 // Check that the Check ID is supported. Store it in case this 360 // is the first Stream in the file. 361 check = Check.getInstance(streamFooter.checkType); 362 363 // Remember which Check IDs have been seen. 364 checkTypes |= 1 << streamFooter.checkType; 365 366 // Seek to the beginning of the Index. 367 in.seek(pos - streamFooter.backwardSize); 368 369 // Decode the Index field. 370 IndexDecoder index; 371 try { 372 index = new IndexDecoder(in, streamFooter, streamPadding, 373 memoryLimit); 374 } catch (MemoryLimitException e) { 375 // IndexDecoder doesn't know how much memory we had 376 // already needed so we need to recreate the exception. 377 assert memoryLimit >= 0; 378 throw new MemoryLimitException( 379 e.getMemoryNeeded() + indexMemoryUsage, 380 memoryLimit + indexMemoryUsage); 381 } 382 383 // Update the memory usage and limit counters. 384 indexMemoryUsage += index.getMemoryUsage(); 385 if (memoryLimit >= 0) { 386 memoryLimit -= index.getMemoryUsage(); 387 assert memoryLimit >= 0; 388 } 389 390 // Remember the uncompressed size of the largest Block. 391 if (largestBlockSize < index.getLargestBlockSize()) 392 largestBlockSize = index.getLargestBlockSize(); 393 394 // Calculate the offset to the beginning of this XZ Stream and 395 // check that it looks sane. 396 long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE; 397 if (pos < off) 398 throw new CorruptedInputException("XZ Index indicates " 399 + "too big compressed size for the XZ Stream"); 400 401 // Seek to the beginning of this Stream. 402 pos -= off; 403 in.seek(pos); 404 405 // Decode the Stream Header. 406 inData.readFully(buf); 407 StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf); 408 409 // Verify that the Stream Header matches the Stream Footer. 410 if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter)) 411 throw new CorruptedInputException( 412 "XZ Stream Footer does not match Stream Header"); 413 414 // Update the total uncompressed size of the file and check that 415 // it doesn't overflow. 416 uncompressedSize += index.getUncompressedSize(); 417 if (uncompressedSize < 0) 418 throw new UnsupportedOptionsException("XZ file is too big"); 419 420 // Update the Block count and check that it fits into an int. 421 blockCount += index.getRecordCount(); 422 if (blockCount < 0) 423 throw new UnsupportedOptionsException( 424 "XZ file has over " + Integer.MAX_VALUE + " Blocks"); 425 426 // Add this Stream to the list of Streams. 427 streams.add(index); 428 429 // Reset to be ready to parse the next Stream. 430 streamPadding = 0; 431 } 432 433 assert pos == 0; 434 435 // Save it now that indexMemoryUsage has been substracted from it. 436 this.memoryLimit = memoryLimit; 437 438 // Store the relative offsets of the Streams. This way we don't 439 // need to recalculate them in this class when seeking; the 440 // IndexDecoder instances will handle them. 441 IndexDecoder prev = (IndexDecoder)streams.get(streams.size() - 1); 442 for (int i = streams.size() - 2; i >= 0; --i) { 443 IndexDecoder cur = (IndexDecoder)streams.get(i); 444 cur.setOffsets(prev); 445 prev = cur; 446 } 447 448 // Initialize curBlockInfo to point to the first Stream. 449 // The blockNumber will be left to -1 so that .hasNext() 450 // and .setNext() work to get the first Block when starting 451 // to decompress from the beginning of the file. 452 IndexDecoder first = (IndexDecoder)streams.get(streams.size() - 1); 453 curBlockInfo = new BlockInfo(first); 454 455 // queriedBlockInfo needs to be allocated too. The Stream used for 456 // initialization doesn't matter though. 457 queriedBlockInfo = new BlockInfo(first); 458 } 459 460 /** 461 * Gets the types of integrity checks used in the .xz file. 462 * Multiple checks are possible only if there are multiple 463 * concatenated XZ Streams. 464 * <p> 465 * The returned value has a bit set for every check type that is present. 466 * For example, if CRC64 and SHA-256 were used, the return value is 467 * <code>(1 << XZ.CHECK_CRC64) 468 * | (1 << XZ.CHECK_SHA256)</code>. 469 */ getCheckTypes()470 public int getCheckTypes() { 471 return checkTypes; 472 } 473 474 /** 475 * Gets the amount of memory in kibibytes (KiB) used by 476 * the data structures needed to locate the XZ Blocks. 477 * This is usually useless information but since it is calculated 478 * for memory usage limit anyway, it is nice to make it available to too. 479 */ getIndexMemoryUsage()480 public int getIndexMemoryUsage() { 481 return indexMemoryUsage; 482 } 483 484 /** 485 * Gets the uncompressed size of the largest XZ Block in bytes. 486 * This can be useful if you want to check that the file doesn't 487 * have huge XZ Blocks which could make seeking to arbitrary offsets 488 * very slow. Note that huge Blocks don't automatically mean that 489 * seeking would be slow, for example, seeking to the beginning of 490 * any Block is always fast. 491 */ getLargestBlockSize()492 public long getLargestBlockSize() { 493 return largestBlockSize; 494 } 495 496 /** 497 * Gets the number of Streams in the .xz file. 498 * 499 * @since 1.3 500 */ getStreamCount()501 public int getStreamCount() { 502 return streams.size(); 503 } 504 505 /** 506 * Gets the number of Blocks in the .xz file. 507 * 508 * @since 1.3 509 */ getBlockCount()510 public int getBlockCount() { 511 return blockCount; 512 } 513 514 /** 515 * Gets the uncompressed start position of the given Block. 516 * 517 * @throws IndexOutOfBoundsException if 518 * <code>blockNumber < 0</code> or 519 * <code>blockNumber >= getBlockCount()</code>. 520 * 521 * @since 1.3 522 */ getBlockPos(int blockNumber)523 public long getBlockPos(int blockNumber) { 524 locateBlockByNumber(queriedBlockInfo, blockNumber); 525 return queriedBlockInfo.uncompressedOffset; 526 } 527 528 /** 529 * Gets the uncompressed size of the given Block. 530 * 531 * @throws IndexOutOfBoundsException if 532 * <code>blockNumber < 0</code> or 533 * <code>blockNumber >= getBlockCount()</code>. 534 * 535 * @since 1.3 536 */ getBlockSize(int blockNumber)537 public long getBlockSize(int blockNumber) { 538 locateBlockByNumber(queriedBlockInfo, blockNumber); 539 return queriedBlockInfo.uncompressedSize; 540 } 541 542 /** 543 * Gets the position where the given compressed Block starts in 544 * the underlying .xz file. 545 * This information is rarely useful to the users of this class. 546 * 547 * @throws IndexOutOfBoundsException if 548 * <code>blockNumber < 0</code> or 549 * <code>blockNumber >= getBlockCount()</code>. 550 * 551 * @since 1.3 552 */ getBlockCompPos(int blockNumber)553 public long getBlockCompPos(int blockNumber) { 554 locateBlockByNumber(queriedBlockInfo, blockNumber); 555 return queriedBlockInfo.compressedOffset; 556 } 557 558 /** 559 * Gets the compressed size of the given Block. 560 * This together with the uncompressed size can be used to calculate 561 * the compression ratio of the specific Block. 562 * 563 * @throws IndexOutOfBoundsException if 564 * <code>blockNumber < 0</code> or 565 * <code>blockNumber >= getBlockCount()</code>. 566 * 567 * @since 1.3 568 */ getBlockCompSize(int blockNumber)569 public long getBlockCompSize(int blockNumber) { 570 locateBlockByNumber(queriedBlockInfo, blockNumber); 571 return (queriedBlockInfo.unpaddedSize + 3) & ~3; 572 } 573 574 /** 575 * Gets integrity check type (Check ID) of the given Block. 576 * 577 * @throws IndexOutOfBoundsException if 578 * <code>blockNumber < 0</code> or 579 * <code>blockNumber >= getBlockCount()</code>. 580 * 581 * @see #getCheckTypes() 582 * 583 * @since 1.3 584 */ getBlockCheckType(int blockNumber)585 public int getBlockCheckType(int blockNumber) { 586 locateBlockByNumber(queriedBlockInfo, blockNumber); 587 return queriedBlockInfo.getCheckType(); 588 } 589 590 /** 591 * Gets the number of the Block that contains the byte at the given 592 * uncompressed position. 593 * 594 * @throws IndexOutOfBoundsException if 595 * <code>pos < 0</code> or 596 * <code>pos >= length()</code>. 597 * 598 * @since 1.3 599 */ getBlockNumber(long pos)600 public int getBlockNumber(long pos) { 601 locateBlockByPos(queriedBlockInfo, pos); 602 return queriedBlockInfo.blockNumber; 603 } 604 605 /** 606 * Decompresses the next byte from this input stream. 607 * 608 * @return the next decompressed byte, or <code>-1</code> 609 * to indicate the end of the compressed stream 610 * 611 * @throws CorruptedInputException 612 * @throws UnsupportedOptionsException 613 * @throws MemoryLimitException 614 * 615 * @throws XZIOException if the stream has been closed 616 * 617 * @throws IOException may be thrown by <code>in</code> 618 */ read()619 public int read() throws IOException { 620 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 621 } 622 623 /** 624 * Decompresses into an array of bytes. 625 * <p> 626 * If <code>len</code> is zero, no bytes are read and <code>0</code> 627 * is returned. Otherwise this will try to decompress <code>len</code> 628 * bytes of uncompressed data. Less than <code>len</code> bytes may 629 * be read only in the following situations: 630 * <ul> 631 * <li>The end of the compressed data was reached successfully.</li> 632 * <li>An error is detected after at least one but less than 633 * <code>len</code> bytes have already been successfully 634 * decompressed. The next call with non-zero <code>len</code> 635 * will immediately throw the pending exception.</li> 636 * <li>An exception is thrown.</li> 637 * </ul> 638 * 639 * @param buf target buffer for uncompressed data 640 * @param off start offset in <code>buf</code> 641 * @param len maximum number of uncompressed bytes to read 642 * 643 * @return number of bytes read, or <code>-1</code> to indicate 644 * the end of the compressed stream 645 * 646 * @throws CorruptedInputException 647 * @throws UnsupportedOptionsException 648 * @throws MemoryLimitException 649 * 650 * @throws XZIOException if the stream has been closed 651 * 652 * @throws IOException may be thrown by <code>in</code> 653 */ read(byte[] buf, int off, int len)654 public int read(byte[] buf, int off, int len) throws IOException { 655 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 656 throw new IndexOutOfBoundsException(); 657 658 if (len == 0) 659 return 0; 660 661 if (in == null) 662 throw new XZIOException("Stream closed"); 663 664 if (exception != null) 665 throw exception; 666 667 int size = 0; 668 669 try { 670 if (seekNeeded) 671 seek(); 672 673 if (endReached) 674 return -1; 675 676 while (len > 0) { 677 if (blockDecoder == null) { 678 seek(); 679 if (endReached) 680 break; 681 } 682 683 int ret = blockDecoder.read(buf, off, len); 684 685 if (ret > 0) { 686 curPos += ret; 687 size += ret; 688 off += ret; 689 len -= ret; 690 } else if (ret == -1) { 691 blockDecoder = null; 692 } 693 } 694 } catch (IOException e) { 695 // We know that the file isn't simply truncated because we could 696 // parse the Indexes in the constructor. So convert EOFException 697 // to CorruptedInputException. 698 if (e instanceof EOFException) 699 e = new CorruptedInputException(); 700 701 exception = e; 702 if (size == 0) 703 throw e; 704 } 705 706 return size; 707 } 708 709 /** 710 * Returns the number of uncompressed bytes that can be read 711 * without blocking. The value is returned with an assumption 712 * that the compressed input data will be valid. If the compressed 713 * data is corrupt, <code>CorruptedInputException</code> may get 714 * thrown before the number of bytes claimed to be available have 715 * been read from this input stream. 716 * 717 * @return the number of uncompressed bytes that can be read 718 * without blocking 719 */ available()720 public int available() throws IOException { 721 if (in == null) 722 throw new XZIOException("Stream closed"); 723 724 if (exception != null) 725 throw exception; 726 727 if (endReached || seekNeeded || blockDecoder == null) 728 return 0; 729 730 return blockDecoder.available(); 731 } 732 733 /** 734 * Closes the stream and calls <code>in.close()</code>. 735 * If the stream was already closed, this does nothing. 736 * 737 * @throws IOException if thrown by <code>in.close()</code> 738 */ close()739 public void close() throws IOException { 740 if (in != null) { 741 try { 742 in.close(); 743 } finally { 744 in = null; 745 } 746 } 747 } 748 749 /** 750 * Gets the uncompressed size of this input stream. If there are multiple 751 * XZ Streams, the total uncompressed size of all XZ Streams is returned. 752 */ length()753 public long length() { 754 return uncompressedSize; 755 } 756 757 /** 758 * Gets the current uncompressed position in this input stream. 759 * 760 * @throws XZIOException if the stream has been closed 761 */ position()762 public long position() throws IOException { 763 if (in == null) 764 throw new XZIOException("Stream closed"); 765 766 return seekNeeded ? seekPos : curPos; 767 } 768 769 /** 770 * Seeks to the specified absolute uncompressed position in the stream. 771 * This only stores the new position, so this function itself is always 772 * very fast. The actual seek is done when <code>read</code> is called 773 * to read at least one byte. 774 * <p> 775 * Seeking past the end of the stream is possible. In that case 776 * <code>read</code> will return <code>-1</code> to indicate 777 * the end of the stream. 778 * 779 * @param pos new uncompressed read position 780 * 781 * @throws XZIOException 782 * if <code>pos</code> is negative, or 783 * if stream has been closed 784 */ seek(long pos)785 public void seek(long pos) throws IOException { 786 if (in == null) 787 throw new XZIOException("Stream closed"); 788 789 if (pos < 0) 790 throw new XZIOException("Negative seek position: " + pos); 791 792 seekPos = pos; 793 seekNeeded = true; 794 } 795 796 /** 797 * Seeks to the beginning of the given XZ Block. 798 * 799 * @throws XZIOException 800 * if <code>blockNumber < 0</code> or 801 * <code>blockNumber >= getBlockCount()</code>, 802 * or if stream has been closed 803 * 804 * @since 1.3 805 */ seekToBlock(int blockNumber)806 public void seekToBlock(int blockNumber) throws IOException { 807 if (in == null) 808 throw new XZIOException("Stream closed"); 809 810 if (blockNumber < 0 || blockNumber >= blockCount) 811 throw new XZIOException("Invalid XZ Block number: " + blockNumber); 812 813 // This is a bit silly implementation. Here we locate the uncompressed 814 // offset of the specified Block, then when doing the actual seek in 815 // seek(), we need to find the Block number based on seekPos. 816 seekPos = getBlockPos(blockNumber); 817 seekNeeded = true; 818 } 819 820 /** 821 * Does the actual seeking. This is also called when <code>read</code> 822 * needs a new Block to decode. 823 */ seek()824 private void seek() throws IOException { 825 // If seek(long) wasn't called, we simply need to get the next Block 826 // from the same Stream. If there are no more Blocks in this Stream, 827 // then we behave as if seek(long) had been called. 828 if (!seekNeeded) { 829 if (curBlockInfo.hasNext()) { 830 curBlockInfo.setNext(); 831 initBlockDecoder(); 832 return; 833 } 834 835 seekPos = curPos; 836 } 837 838 seekNeeded = false; 839 840 // Check if we are seeking to or past the end of the file. 841 if (seekPos >= uncompressedSize) { 842 curPos = seekPos; 843 blockDecoder = null; 844 endReached = true; 845 return; 846 } 847 848 endReached = false; 849 850 // Locate the Block that contains the uncompressed target position. 851 locateBlockByPos(curBlockInfo, seekPos); 852 853 // Seek in the underlying stream and create a new Block decoder 854 // only if really needed. We can skip it if the current position 855 // is already in the correct Block and the target position hasn't 856 // been decompressed yet. 857 // 858 // NOTE: If curPos points to the beginning of this Block, it's 859 // because it was left there after decompressing an earlier Block. 860 // In that case, decoding of the current Block hasn't been started 861 // yet. (Decoding of a Block won't be started until at least one 862 // byte will also be read from it.) 863 if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) { 864 // Seek to the beginning of the Block. 865 in.seek(curBlockInfo.compressedOffset); 866 867 // Since it is possible that this Block is from a different 868 // Stream than the previous Block, initialize a new Check. 869 check = Check.getInstance(curBlockInfo.getCheckType()); 870 871 // Create a new Block decoder. 872 initBlockDecoder(); 873 curPos = curBlockInfo.uncompressedOffset; 874 } 875 876 // If the target wasn't at a Block boundary, decompress and throw 877 // away data to reach the target position. 878 if (seekPos > curPos) { 879 // NOTE: The "if" below is there just in case. In this situation, 880 // blockDecoder.skip will always skip the requested amount 881 // or throw an exception. 882 long skipAmount = seekPos - curPos; 883 if (blockDecoder.skip(skipAmount) != skipAmount) 884 throw new CorruptedInputException(); 885 886 curPos = seekPos; 887 } 888 } 889 890 /** 891 * Locates the Block that contains the given uncompressed position. 892 */ locateBlockByPos(BlockInfo info, long pos)893 private void locateBlockByPos(BlockInfo info, long pos) { 894 if (pos < 0 || pos >= uncompressedSize) 895 throw new IndexOutOfBoundsException( 896 "Invalid uncompressed position: " + pos); 897 898 // Locate the Stream that contains the target position. 899 IndexDecoder index; 900 for (int i = 0; ; ++i) { 901 index = (IndexDecoder)streams.get(i); 902 if (index.hasUncompressedOffset(pos)) 903 break; 904 } 905 906 // Locate the Block from the Stream that contains the target position. 907 index.locateBlock(info, pos); 908 909 assert (info.compressedOffset & 3) == 0; 910 assert info.uncompressedSize > 0; 911 assert pos >= info.uncompressedOffset; 912 assert pos < info.uncompressedOffset + info.uncompressedSize; 913 } 914 915 /** 916 * Locates the given Block and stores information about it 917 * to <code>info</code>. 918 */ 919 private void locateBlockByNumber(BlockInfo info, int blockNumber) { 920 // Validate. 921 if (blockNumber < 0 || blockNumber >= blockCount) 922 throw new IndexOutOfBoundsException( 923 "Invalid XZ Block number: " + blockNumber); 924 925 // Skip the search if info already points to the correct Block. 926 if (info.blockNumber == blockNumber) 927 return; 928 929 // Search the Stream that contains the given Block and then 930 // search the Block from that Stream. 931 for (int i = 0; ; ++i) { 932 IndexDecoder index = (IndexDecoder)streams.get(i); 933 if (index.hasRecord(blockNumber)) { 934 index.setBlockInfo(info, blockNumber); 935 return; 936 } 937 } 938 } 939 940 /** 941 * Initializes a new BlockInputStream. This is a helper function for 942 * <code>seek()</code>. 943 */ 944 private void initBlockDecoder() throws IOException { 945 try { 946 // Set it to null first so that GC can collect it if memory 947 // runs tight when initializing a new BlockInputStream. 948 blockDecoder = null; 949 blockDecoder = new BlockInputStream( 950 in, check, verifyCheck, memoryLimit, 951 curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize); 952 } catch (MemoryLimitException e) { 953 // BlockInputStream doesn't know how much memory we had 954 // already needed so we need to recreate the exception. 955 assert memoryLimit >= 0; 956 throw new MemoryLimitException( 957 e.getMemoryNeeded() + indexMemoryUsage, 958 memoryLimit + indexMemoryUsage); 959 } catch (IndexIndicatorException e) { 960 // It cannot be Index so the file must be corrupt. 961 throw new CorruptedInputException(); 962 } 963 } 964 } 965