1 /* 2 * SingleXZInputStream 3 * 4 * Author: Lasse Collin <lasse.collin@tukaani.org> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 package org.tukaani.xz; 11 12 import java.io.InputStream; 13 import java.io.DataInputStream; 14 import java.io.IOException; 15 import java.io.EOFException; 16 import org.tukaani.xz.common.DecoderUtil; 17 import org.tukaani.xz.common.StreamFlags; 18 import org.tukaani.xz.index.IndexHash; 19 import org.tukaani.xz.check.Check; 20 21 /** 22 * Decompresses exactly one XZ Stream in streamed mode (no seeking). 23 * The decompression stops after the first XZ Stream has been decompressed, 24 * and the read position in the input stream is left at the first byte 25 * after the end of the XZ Stream. This can be useful when XZ data has 26 * been stored inside some other file format or protocol. 27 * <p> 28 * Unless you know what you are doing, don't use this class to decompress 29 * standalone .xz files. For that purpose, use <code>XZInputStream</code>. 30 * 31 * <h4>When uncompressed size is known beforehand</h4> 32 * <p> 33 * If you are decompressing complete XZ streams and your application knows 34 * exactly how much uncompressed data there should be, it is good to try 35 * reading one more byte by calling <code>read()</code> and checking 36 * that it returns <code>-1</code>. This way the decompressor will parse the 37 * file footers and verify the integrity checks, giving the caller more 38 * confidence that the uncompressed data is valid. 39 * 40 * @see XZInputStream 41 */ 42 public class SingleXZInputStream extends InputStream { 43 private InputStream in; 44 private final ArrayCache arrayCache; 45 private final int memoryLimit; 46 private final StreamFlags streamHeaderFlags; 47 private final Check check; 48 private final boolean verifyCheck; 49 private BlockInputStream blockDecoder = null; 50 private final IndexHash indexHash = new IndexHash(); 51 private boolean endReached = false; 52 private IOException exception = null; 53 54 private final byte[] tempBuf = new byte[1]; 55 56 /** 57 * Reads the Stream Header into a buffer. 58 * This is a helper function for the constructors. 59 */ readStreamHeader(InputStream in)60 private static byte[] readStreamHeader(InputStream in) throws IOException { 61 byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE]; 62 new DataInputStream(in).readFully(streamHeader); 63 return streamHeader; 64 } 65 66 /** 67 * Creates a new XZ decompressor that decompresses exactly one 68 * XZ Stream from <code>in</code> without a memory usage limit. 69 * <p> 70 * This constructor reads and parses the XZ Stream Header (12 bytes) 71 * from <code>in</code>. The header of the first Block is not read 72 * until <code>read</code> is called. 73 * 74 * @param in input stream from which XZ-compressed 75 * data is read 76 * 77 * @throws XZFormatException 78 * input is not in the XZ format 79 * 80 * @throws CorruptedInputException 81 * XZ header CRC32 doesn't match 82 * 83 * @throws UnsupportedOptionsException 84 * XZ header is valid but specifies options 85 * not supported by this implementation 86 * 87 * @throws EOFException 88 * less than 12 bytes of input was available 89 * from <code>in</code> 90 * 91 * @throws IOException may be thrown by <code>in</code> 92 */ SingleXZInputStream(InputStream in)93 public SingleXZInputStream(InputStream in) throws IOException { 94 this(in, -1); 95 } 96 97 /** 98 * Creates a new XZ decompressor that decompresses exactly one 99 * XZ Stream from <code>in</code> without a memory usage limit. 100 * <p> 101 * This is identical to <code>SingleXZInputStream(InputStream)</code> 102 * except that this also takes the <code>arrayCache</code> argument. 103 * 104 * @param in input stream from which XZ-compressed 105 * data is read 106 * 107 * @param arrayCache cache to be used for allocating large arrays 108 * 109 * @throws XZFormatException 110 * input is not in the XZ format 111 * 112 * @throws CorruptedInputException 113 * XZ header CRC32 doesn't match 114 * 115 * @throws UnsupportedOptionsException 116 * XZ header is valid but specifies options 117 * not supported by this implementation 118 * 119 * @throws EOFException 120 * less than 12 bytes of input was available 121 * from <code>in</code> 122 * 123 * @throws IOException may be thrown by <code>in</code> 124 * 125 * @since 1.7 126 */ SingleXZInputStream(InputStream in, ArrayCache arrayCache)127 public SingleXZInputStream(InputStream in, ArrayCache arrayCache) 128 throws IOException { 129 this(in, -1, arrayCache); 130 } 131 132 /** 133 * Creates a new XZ decompressor that decompresses exactly one 134 * XZ Stream from <code>in</code> with an optional memory usage limit. 135 * <p> 136 * This is identical to <code>SingleXZInputStream(InputStream)</code> 137 * except that this also takes the <code>memoryLimit</code> argument. 138 * 139 * @param in input stream from which XZ-compressed 140 * data is read 141 * 142 * @param memoryLimit memory usage limit in kibibytes (KiB) 143 * or <code>-1</code> to impose no 144 * memory usage limit 145 * 146 * @throws XZFormatException 147 * input is not in the XZ format 148 * 149 * @throws CorruptedInputException 150 * XZ header CRC32 doesn't match 151 * 152 * @throws UnsupportedOptionsException 153 * XZ header is valid but specifies options 154 * not supported by this implementation 155 * 156 * @throws EOFException 157 * less than 12 bytes of input was available 158 * from <code>in</code> 159 * 160 * @throws IOException may be thrown by <code>in</code> 161 */ SingleXZInputStream(InputStream in, int memoryLimit)162 public SingleXZInputStream(InputStream in, int memoryLimit) 163 throws IOException { 164 this(in, memoryLimit, true); 165 } 166 167 /** 168 * Creates a new XZ decompressor that decompresses exactly one 169 * XZ Stream from <code>in</code> with an optional memory usage limit. 170 * <p> 171 * This is identical to <code>SingleXZInputStream(InputStream)</code> 172 * except that this also takes the <code>memoryLimit</code> and 173 * <code>arrayCache</code> arguments. 174 * 175 * @param in input stream from which XZ-compressed 176 * data is read 177 * 178 * @param memoryLimit memory usage limit in kibibytes (KiB) 179 * or <code>-1</code> to impose no 180 * memory usage limit 181 * 182 * @param arrayCache cache to be used for allocating large arrays 183 * 184 * @throws XZFormatException 185 * input is not in the XZ format 186 * 187 * @throws CorruptedInputException 188 * XZ header CRC32 doesn't match 189 * 190 * @throws UnsupportedOptionsException 191 * XZ header is valid but specifies options 192 * not supported by this implementation 193 * 194 * @throws EOFException 195 * less than 12 bytes of input was available 196 * from <code>in</code> 197 * 198 * @throws IOException may be thrown by <code>in</code> 199 * 200 * @since 1.7 201 */ SingleXZInputStream(InputStream in, int memoryLimit, ArrayCache arrayCache)202 public SingleXZInputStream(InputStream in, int memoryLimit, 203 ArrayCache arrayCache) throws IOException { 204 this(in, memoryLimit, true, arrayCache); 205 } 206 207 /** 208 * Creates a new XZ decompressor that decompresses exactly one 209 * XZ Stream from <code>in</code> with an optional memory usage limit 210 * and ability to disable verification of integrity checks. 211 * <p> 212 * This is identical to <code>SingleXZInputStream(InputStream,int)</code> 213 * except that this also takes the <code>verifyCheck</code> argument. 214 * <p> 215 * Note that integrity check verification should almost never be disabled. 216 * Possible reasons to disable integrity check verification: 217 * <ul> 218 * <li>Trying to recover data from a corrupt .xz file.</li> 219 * <li>Speeding up decompression. This matters mostly with SHA-256 220 * or with files that have compressed extremely well. It's recommended 221 * that integrity checking isn't disabled for performance reasons 222 * unless the file integrity is verified externally in some other 223 * way.</li> 224 * </ul> 225 * <p> 226 * <code>verifyCheck</code> only affects the integrity check of 227 * the actual compressed data. The CRC32 fields in the headers 228 * are always verified. 229 * 230 * @param in input stream from which XZ-compressed 231 * data is read 232 * 233 * @param memoryLimit memory usage limit in kibibytes (KiB) 234 * or <code>-1</code> to impose no 235 * memory usage limit 236 * 237 * @param verifyCheck if <code>true</code>, the integrity checks 238 * will be verified; this should almost never 239 * be set to <code>false</code> 240 * 241 * @throws XZFormatException 242 * input is not in the XZ format 243 * 244 * @throws CorruptedInputException 245 * XZ header CRC32 doesn't match 246 * 247 * @throws UnsupportedOptionsException 248 * XZ header is valid but specifies options 249 * not supported by this implementation 250 * 251 * @throws EOFException 252 * less than 12 bytes of input was available 253 * from <code>in</code> 254 * 255 * @throws IOException may be thrown by <code>in</code> 256 * 257 * @since 1.6 258 */ SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)259 public SingleXZInputStream(InputStream in, int memoryLimit, 260 boolean verifyCheck) throws IOException { 261 this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache()); 262 } 263 264 /** 265 * Creates a new XZ decompressor that decompresses exactly one 266 * XZ Stream from <code>in</code> with an optional memory usage limit 267 * and ability to disable verification of integrity checks. 268 * <p> 269 * This is identical to 270 * <code>SingleXZInputStream(InputStream,int,boolean)</code> 271 * except that this also takes the <code>arrayCache</code> argument. 272 * 273 * @param in input stream from which XZ-compressed 274 * data is read 275 * 276 * @param memoryLimit memory usage limit in kibibytes (KiB) 277 * or <code>-1</code> to impose no 278 * memory usage limit 279 * 280 * @param verifyCheck if <code>true</code>, the integrity checks 281 * will be verified; this should almost never 282 * be set to <code>false</code> 283 * 284 * @param arrayCache cache to be used for allocating large arrays 285 * 286 * @throws XZFormatException 287 * input is not in the XZ format 288 * 289 * @throws CorruptedInputException 290 * XZ header CRC32 doesn't match 291 * 292 * @throws UnsupportedOptionsException 293 * XZ header is valid but specifies options 294 * not supported by this implementation 295 * 296 * @throws EOFException 297 * less than 12 bytes of input was available 298 * from <code>in</code> 299 * 300 * @throws IOException may be thrown by <code>in</code> 301 * 302 * @since 1.7 303 */ SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, ArrayCache arrayCache)304 public SingleXZInputStream(InputStream in, int memoryLimit, 305 boolean verifyCheck, ArrayCache arrayCache) 306 throws IOException { 307 this(in, memoryLimit, verifyCheck, readStreamHeader(in), arrayCache); 308 } 309 SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, byte[] streamHeader, ArrayCache arrayCache)310 SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, 311 byte[] streamHeader, ArrayCache arrayCache) 312 throws IOException { 313 this.arrayCache = arrayCache; 314 this.in = in; 315 this.memoryLimit = memoryLimit; 316 this.verifyCheck = verifyCheck; 317 streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader); 318 check = Check.getInstance(streamHeaderFlags.checkType); 319 } 320 321 /** 322 * Gets the ID of the integrity check used in this XZ Stream. 323 * 324 * @return the Check ID specified in the XZ Stream Header 325 */ getCheckType()326 public int getCheckType() { 327 return streamHeaderFlags.checkType; 328 } 329 330 /** 331 * Gets the name of the integrity check used in this XZ Stream. 332 * 333 * @return the name of the check specified in the XZ Stream Header 334 */ getCheckName()335 public String getCheckName() { 336 return check.getName(); 337 } 338 339 /** 340 * Decompresses the next byte from this input stream. 341 * <p> 342 * Reading lots of data with <code>read()</code> from this input stream 343 * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} 344 * if you need to read lots of data one byte at a time. 345 * 346 * @return the next decompressed byte, or <code>-1</code> 347 * to indicate the end of the compressed stream 348 * 349 * @throws CorruptedInputException 350 * @throws UnsupportedOptionsException 351 * @throws MemoryLimitException 352 * 353 * @throws XZIOException if the stream has been closed 354 * 355 * @throws EOFException 356 * compressed input is truncated or corrupt 357 * 358 * @throws IOException may be thrown by <code>in</code> 359 */ read()360 public int read() throws IOException { 361 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 362 } 363 364 /** 365 * Decompresses into an array of bytes. 366 * <p> 367 * If <code>len</code> is zero, no bytes are read and <code>0</code> 368 * is returned. Otherwise this will try to decompress <code>len</code> 369 * bytes of uncompressed data. Less than <code>len</code> bytes may 370 * be read only in the following situations: 371 * <ul> 372 * <li>The end of the compressed data was reached successfully.</li> 373 * <li>An error is detected after at least one but less <code>len</code> 374 * bytes have already been successfully decompressed. 375 * The next call with non-zero <code>len</code> will immediately 376 * throw the pending exception.</li> 377 * <li>An exception is thrown.</li> 378 * </ul> 379 * 380 * @param buf target buffer for uncompressed data 381 * @param off start offset in <code>buf</code> 382 * @param len maximum number of uncompressed bytes to read 383 * 384 * @return number of bytes read, or <code>-1</code> to indicate 385 * the end of the compressed stream 386 * 387 * @throws CorruptedInputException 388 * @throws UnsupportedOptionsException 389 * @throws MemoryLimitException 390 * 391 * @throws XZIOException if the stream has been closed 392 * 393 * @throws EOFException 394 * compressed input is truncated or corrupt 395 * 396 * @throws IOException may be thrown by <code>in</code> 397 */ read(byte[] buf, int off, int len)398 public int read(byte[] buf, int off, int len) throws IOException { 399 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 400 throw new IndexOutOfBoundsException(); 401 402 if (len == 0) 403 return 0; 404 405 if (in == null) 406 throw new XZIOException("Stream closed"); 407 408 if (exception != null) 409 throw exception; 410 411 if (endReached) 412 return -1; 413 414 int size = 0; 415 416 try { 417 while (len > 0) { 418 if (blockDecoder == null) { 419 try { 420 blockDecoder = new BlockInputStream( 421 in, check, verifyCheck, memoryLimit, -1, -1, 422 arrayCache); 423 } catch (IndexIndicatorException e) { 424 indexHash.validate(in); 425 validateStreamFooter(); 426 endReached = true; 427 return size > 0 ? size : -1; 428 } 429 } 430 431 int ret = blockDecoder.read(buf, off, len); 432 433 if (ret > 0) { 434 size += ret; 435 off += ret; 436 len -= ret; 437 } else if (ret == -1) { 438 indexHash.add(blockDecoder.getUnpaddedSize(), 439 blockDecoder.getUncompressedSize()); 440 blockDecoder = null; 441 } 442 } 443 } catch (IOException e) { 444 exception = e; 445 if (size == 0) 446 throw e; 447 } 448 449 return size; 450 } 451 validateStreamFooter()452 private void validateStreamFooter() throws IOException { 453 byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; 454 new DataInputStream(in).readFully(buf); 455 StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf); 456 457 if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags, 458 streamFooterFlags) 459 || indexHash.getIndexSize() != streamFooterFlags.backwardSize) 460 throw new CorruptedInputException( 461 "XZ Stream Footer does not match Stream Header"); 462 } 463 464 /** 465 * Returns the number of uncompressed bytes that can be read 466 * without blocking. The value is returned with an assumption 467 * that the compressed input data will be valid. If the compressed 468 * data is corrupt, <code>CorruptedInputException</code> may get 469 * thrown before the number of bytes claimed to be available have 470 * been read from this input stream. 471 * 472 * @return the number of uncompressed bytes that can be read 473 * without blocking 474 */ available()475 public int available() throws IOException { 476 if (in == null) 477 throw new XZIOException("Stream closed"); 478 479 if (exception != null) 480 throw exception; 481 482 return blockDecoder == null ? 0 : blockDecoder.available(); 483 } 484 485 /** 486 * Closes the stream and calls <code>in.close()</code>. 487 * If the stream was already closed, this does nothing. 488 * <p> 489 * This is equivalent to <code>close(true)</code>. 490 * 491 * @throws IOException if thrown by <code>in.close()</code> 492 */ close()493 public void close() throws IOException { 494 close(true); 495 } 496 497 /** 498 * Closes the stream and optionally calls <code>in.close()</code>. 499 * If the stream was already closed, this does nothing. 500 * If <code>close(false)</code> has been called, a further 501 * call of <code>close(true)</code> does nothing (it doesn't call 502 * <code>in.close()</code>). 503 * <p> 504 * If you don't want to close the underlying <code>InputStream</code>, 505 * there is usually no need to worry about closing this stream either; 506 * it's fine to do nothing and let the garbage collector handle it. 507 * However, if you are using {@link ArrayCache}, <code>close(false)</code> 508 * can be useful to put the allocated arrays back to the cache without 509 * closing the underlying <code>InputStream</code>. 510 * <p> 511 * Note that if you successfully reach the end of the stream 512 * (<code>read</code> returns <code>-1</code>), the arrays are 513 * automatically put back to the cache by that <code>read</code> call. In 514 * this situation <code>close(false)</code> is redundant (but harmless). 515 * 516 * @throws IOException if thrown by <code>in.close()</code> 517 * 518 * @since 1.7 519 */ close(boolean closeInput)520 public void close(boolean closeInput) throws IOException { 521 if (in != null) { 522 if (blockDecoder != null) { 523 blockDecoder.close(); 524 blockDecoder = null; 525 } 526 527 try { 528 if (closeInput) 529 in.close(); 530 } finally { 531 in = null; 532 } 533 } 534 } 535 } 536