1 /* 2 * LZMAInputStream 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz; 12 13 import java.io.InputStream; 14 import java.io.DataInputStream; 15 import java.io.IOException; 16 import java.io.EOFException; 17 import org.tukaani.xz.lz.LZDecoder; 18 import org.tukaani.xz.rangecoder.RangeDecoderFromStream; 19 import org.tukaani.xz.lzma.LZMADecoder; 20 21 /** 22 * Decompresses legacy .lzma files and raw LZMA streams (no .lzma header). 23 * <p> 24 * <b>IMPORTANT:</b> In contrast to other classes in this package, this class 25 * reads data from its input stream one byte at a time. If the input stream 26 * is for example {@link java.io.FileInputStream}, wrapping it into 27 * {@link java.io.BufferedInputStream} tends to improve performance a lot. 28 * This is not automatically done by this class because there may be use 29 * cases where it is desired that this class won't read any bytes past 30 * the end of the LZMA stream. 31 * <p> 32 * Even when using <code>BufferedInputStream</code>, the performance tends 33 * to be worse (maybe 10-20 % slower) than with {@link LZMA2InputStream} 34 * or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data). 35 * 36 * @since 1.4 37 */ 38 public class LZMAInputStream extends InputStream { 39 /** 40 * Largest dictionary size supported by this implementation. 41 * <p> 42 * LZMA allows dictionaries up to one byte less than 4 GiB. This 43 * implementation supports only 16 bytes less than 2 GiB. This 44 * limitation is due to Java using signed 32-bit integers for array 45 * indexing. The limitation shouldn't matter much in practice since so 46 * huge dictionaries are not normally used. 47 */ 48 public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; 49 50 private InputStream in; 51 private ArrayCache arrayCache; 52 private LZDecoder lz; 53 private RangeDecoderFromStream rc; 54 private LZMADecoder lzma; 55 56 private boolean endReached = false; 57 private boolean relaxedEndCondition = false; 58 59 private final byte[] tempBuf = new byte[1]; 60 61 /** 62 * Number of uncompressed bytes left to be decompressed, or -1 if 63 * the end marker is used. 64 */ 65 private long remainingSize; 66 67 private IOException exception = null; 68 69 /** 70 * Gets approximate decompressor memory requirements as kibibytes for 71 * the given dictionary size and LZMA properties byte (lc, lp, and pb). 72 * 73 * @param dictSize LZMA dictionary size as bytes, should be 74 * in the range [<code>0</code>, 75 * <code>DICT_SIZE_MAX</code>] 76 * 77 * @param propsByte LZMA properties byte that encodes the values 78 * of lc, lp, and pb 79 * 80 * @return approximate memory requirements as kibibytes (KiB) 81 * 82 * @throws UnsupportedOptionsException 83 * if <code>dictSize</code> is outside 84 * the range [<code>0</code>, 85 * <code>DICT_SIZE_MAX</code>] 86 * 87 * @throws CorruptedInputException 88 * if <code>propsByte</code> is invalid 89 */ getMemoryUsage(int dictSize, byte propsByte)90 public static int getMemoryUsage(int dictSize, byte propsByte) 91 throws UnsupportedOptionsException, CorruptedInputException { 92 if (dictSize < 0 || dictSize > DICT_SIZE_MAX) 93 throw new UnsupportedOptionsException( 94 "LZMA dictionary is too big for this implementation"); 95 96 int props = propsByte & 0xFF; 97 if (props > (4 * 5 + 4) * 9 + 8) 98 throw new CorruptedInputException("Invalid LZMA properties byte"); 99 100 props %= 9 * 5; 101 int lp = props / 9; 102 int lc = props - lp * 9; 103 104 return getMemoryUsage(dictSize, lc, lp); 105 } 106 107 /** 108 * Gets approximate decompressor memory requirements as kibibytes for 109 * the given dictionary size, lc, and lp. Note that pb isn't needed. 110 * 111 * @param dictSize LZMA dictionary size as bytes, must be 112 * in the range [<code>0</code>, 113 * <code>DICT_SIZE_MAX</code>] 114 * 115 * @param lc number of literal context bits, must be 116 * in the range [0, 8] 117 * 118 * @param lp number of literal position bits, must be 119 * in the range [0, 4] 120 * 121 * @return approximate memory requirements as kibibytes (KiB) 122 */ getMemoryUsage(int dictSize, int lc, int lp)123 public static int getMemoryUsage(int dictSize, int lc, int lp) { 124 if (lc < 0 || lc > 8 || lp < 0 || lp > 4) 125 throw new IllegalArgumentException("Invalid lc or lp"); 126 127 // Probability variables have the type "short". There are 128 // 0x300 (768) probability variables in each literal subcoder. 129 // The number of literal subcoders is 2^(lc + lp). 130 // 131 // Roughly 10 KiB for the base state + LZ decoder's dictionary buffer 132 // + sizeof(short) * number probability variables per literal subcoder 133 // * number of literal subcoders 134 return 10 + getDictSize(dictSize) / 1024 135 + ((2 * 0x300) << (lc + lp)) / 1024; 136 } 137 getDictSize(int dictSize)138 private static int getDictSize(int dictSize) { 139 if (dictSize < 0 || dictSize > DICT_SIZE_MAX) 140 throw new IllegalArgumentException( 141 "LZMA dictionary is too big for this implementation"); 142 143 // For performance reasons, use a 4 KiB dictionary if something 144 // smaller was requested. It's a rare situation and the performance 145 // difference isn't huge, and it starts to matter mostly when the 146 // dictionary is just a few bytes. But we need to handle the special 147 // case of dictSize == 0 anyway, which is an allowed value but in 148 // practice means one-byte dictionary. 149 // 150 // Note that using a dictionary bigger than specified in the headers 151 // can hide errors if there is a reference to data beyond the original 152 // dictionary size but is still within 4 KiB. 153 if (dictSize < 4096) 154 dictSize = 4096; 155 156 // Round dictionary size upward to a multiple of 16. This way LZMA 157 // can use LZDecoder.getPos() for calculating LZMA's posMask. 158 return (dictSize + 15) & ~15; 159 } 160 161 /** 162 * Creates a new .lzma file format decompressor without 163 * a memory usage limit. 164 * 165 * @param in input stream from which .lzma data is read; 166 * it might be a good idea to wrap it in 167 * <code>BufferedInputStream</code>, see the 168 * note at the top of this page 169 * 170 * @throws CorruptedInputException 171 * file is corrupt or perhaps not in 172 * the .lzma format at all 173 * 174 * @throws UnsupportedOptionsException 175 * dictionary size or uncompressed size is too 176 * big for this implementation 177 * 178 * @throws EOFException 179 * file is truncated or perhaps not in 180 * the .lzma format at all 181 * 182 * @throws IOException may be thrown by <code>in</code> 183 */ LZMAInputStream(InputStream in)184 public LZMAInputStream(InputStream in) throws IOException { 185 this(in, -1); 186 } 187 188 /** 189 * Creates a new .lzma file format decompressor without 190 * a memory usage limit. 191 * <p> 192 * This is identical to <code>LZMAInputStream(InputStream)</code> 193 * except that this also takes the <code>arrayCache</code> argument. 194 * 195 * @param in input stream from which .lzma data is read; 196 * it might be a good idea to wrap it in 197 * <code>BufferedInputStream</code>, see the 198 * note at the top of this page 199 * 200 * 201 * @param arrayCache cache to be used for allocating large arrays 202 * 203 * @throws CorruptedInputException 204 * file is corrupt or perhaps not in 205 * the .lzma format at all 206 * 207 * @throws UnsupportedOptionsException 208 * dictionary size or uncompressed size is too 209 * big for this implementation 210 * 211 * @throws EOFException 212 * file is truncated or perhaps not in 213 * the .lzma format at all 214 * 215 * @throws IOException may be thrown by <code>in</code> 216 * 217 * @since 1.7 218 */ LZMAInputStream(InputStream in, ArrayCache arrayCache)219 public LZMAInputStream(InputStream in, ArrayCache arrayCache) 220 throws IOException { 221 this(in, -1, arrayCache); 222 } 223 224 /** 225 * Creates a new .lzma file format decompressor with an optional 226 * memory usage limit. 227 * 228 * @param in input stream from which .lzma data is read; 229 * it might be a good idea to wrap it in 230 * <code>BufferedInputStream</code>, see the 231 * note at the top of this page 232 * 233 * @param memoryLimit memory usage limit in kibibytes (KiB) 234 * or <code>-1</code> to impose no 235 * memory usage limit 236 * 237 * @throws CorruptedInputException 238 * file is corrupt or perhaps not in 239 * the .lzma format at all 240 * 241 * @throws UnsupportedOptionsException 242 * dictionary size or uncompressed size is too 243 * big for this implementation 244 * 245 * @throws MemoryLimitException 246 * memory usage limit was exceeded 247 * 248 * @throws EOFException 249 * file is truncated or perhaps not in 250 * the .lzma format at all 251 * 252 * @throws IOException may be thrown by <code>in</code> 253 */ LZMAInputStream(InputStream in, int memoryLimit)254 public LZMAInputStream(InputStream in, int memoryLimit) 255 throws IOException { 256 this(in, memoryLimit, ArrayCache.getDefaultCache()); 257 } 258 259 /** 260 * Creates a new .lzma file format decompressor with an optional 261 * memory usage limit. 262 * <p> 263 * This is identical to <code>LZMAInputStream(InputStream, int)</code> 264 * except that this also takes the <code>arrayCache</code> argument. 265 * 266 * @param in input stream from which .lzma data is read; 267 * it might be a good idea to wrap it in 268 * <code>BufferedInputStream</code>, see the 269 * note at the top of this page 270 * 271 * @param memoryLimit memory usage limit in kibibytes (KiB) 272 * or <code>-1</code> to impose no 273 * memory usage limit 274 * 275 * @param arrayCache cache to be used for allocating large arrays 276 * 277 * @throws CorruptedInputException 278 * file is corrupt or perhaps not in 279 * the .lzma format at all 280 * 281 * @throws UnsupportedOptionsException 282 * dictionary size or uncompressed size is too 283 * big for this implementation 284 * 285 * @throws MemoryLimitException 286 * memory usage limit was exceeded 287 * 288 * @throws EOFException 289 * file is truncated or perhaps not in 290 * the .lzma format at all 291 * 292 * @throws IOException may be thrown by <code>in</code> 293 * 294 * @since 1.7 295 */ LZMAInputStream(InputStream in, int memoryLimit, ArrayCache arrayCache)296 public LZMAInputStream(InputStream in, int memoryLimit, 297 ArrayCache arrayCache) throws IOException { 298 DataInputStream inData = new DataInputStream(in); 299 300 // Properties byte (lc, lp, and pb) 301 byte propsByte = inData.readByte(); 302 303 // Dictionary size is an unsigned 32-bit little endian integer. 304 int dictSize = 0; 305 for (int i = 0; i < 4; ++i) 306 dictSize |= inData.readUnsignedByte() << (8 * i); 307 308 // Uncompressed size is an unsigned 64-bit little endian integer. 309 // The maximum 64-bit value is a special case (becomes -1 here) 310 // which indicates that the end marker is used instead of knowing 311 // the uncompressed size beforehand. 312 long uncompSize = 0; 313 for (int i = 0; i < 8; ++i) 314 uncompSize |= (long)inData.readUnsignedByte() << (8 * i); 315 316 // Check the memory usage limit. 317 int memoryNeeded = getMemoryUsage(dictSize, propsByte); 318 if (memoryLimit != -1 && memoryNeeded > memoryLimit) 319 throw new MemoryLimitException(memoryNeeded, memoryLimit); 320 321 initialize(in, uncompSize, propsByte, dictSize, null, arrayCache); 322 } 323 324 /** 325 * Creates a new input stream that decompresses raw LZMA data (no .lzma 326 * header) from <code>in</code>. 327 * <p> 328 * The caller needs to know if the "end of payload marker (EOPM)" alias 329 * "end of stream marker (EOS marker)" alias "end marker" present. 330 * If the end marker isn't used, the caller must know the exact 331 * uncompressed size of the stream. 332 * <p> 333 * The caller also needs to provide the LZMA properties byte that encodes 334 * the number of literal context bits (lc), literal position bits (lp), 335 * and position bits (pb). 336 * <p> 337 * The dictionary size used when compressing is also needed. Specifying 338 * a too small dictionary size will prevent decompressing the stream. 339 * Specifying a too big dictionary is waste of memory but decompression 340 * will work. 341 * <p> 342 * There is no need to specify a dictionary bigger than 343 * the uncompressed size of the data even if a bigger dictionary 344 * was used when compressing. If you know the uncompressed size 345 * of the data, this might allow saving some memory. 346 * 347 * @param in input stream from which compressed 348 * data is read 349 * 350 * @param uncompSize uncompressed size of the LZMA stream or -1 351 * if the end marker is used in the LZMA stream 352 * 353 * @param propsByte LZMA properties byte that has the encoded 354 * values for literal context bits (lc), literal 355 * position bits (lp), and position bits (pb) 356 * 357 * @param dictSize dictionary size as bytes, must be in the range 358 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 359 * 360 * @throws CorruptedInputException 361 * if <code>propsByte</code> is invalid or 362 * the first input byte is not 0x00 363 * 364 * @throws UnsupportedOptionsException 365 * dictionary size or uncompressed size is too 366 * big for this implementation 367 * 368 * 369 */ LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize)370 public LZMAInputStream(InputStream in, long uncompSize, byte propsByte, 371 int dictSize) throws IOException { 372 initialize(in, uncompSize, propsByte, dictSize, null, 373 ArrayCache.getDefaultCache()); 374 } 375 376 /** 377 * Creates a new input stream that decompresses raw LZMA data (no .lzma 378 * header) from <code>in</code> optionally with a preset dictionary. 379 * 380 * @param in input stream from which LZMA-compressed 381 * data is read 382 * 383 * @param uncompSize uncompressed size of the LZMA stream or -1 384 * if the end marker is used in the LZMA stream 385 * 386 * @param propsByte LZMA properties byte that has the encoded 387 * values for literal context bits (lc), literal 388 * position bits (lp), and position bits (pb) 389 * 390 * @param dictSize dictionary size as bytes, must be in the range 391 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 392 * 393 * @param presetDict preset dictionary or <code>null</code> 394 * to use no preset dictionary 395 * 396 * @throws CorruptedInputException 397 * if <code>propsByte</code> is invalid or 398 * the first input byte is not 0x00 399 * 400 * @throws UnsupportedOptionsException 401 * dictionary size or uncompressed size is too 402 * big for this implementation 403 * 404 * @throws EOFException file is truncated or corrupt 405 * 406 * @throws IOException may be thrown by <code>in</code> 407 */ LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict)408 public LZMAInputStream(InputStream in, long uncompSize, byte propsByte, 409 int dictSize, byte[] presetDict) 410 throws IOException { 411 initialize(in, uncompSize, propsByte, dictSize, presetDict, 412 ArrayCache.getDefaultCache()); 413 } 414 415 /** 416 * Creates a new input stream that decompresses raw LZMA data (no .lzma 417 * header) from <code>in</code> optionally with a preset dictionary. 418 * <p> 419 * This is identical to <code>LZMAInputStream(InputStream, long, byte, int, 420 * byte[])</code> except that this also takes the <code>arrayCache</code> 421 * argument. 422 * 423 * @param in input stream from which LZMA-compressed 424 * data is read 425 * 426 * @param uncompSize uncompressed size of the LZMA stream or -1 427 * if the end marker is used in the LZMA stream 428 * 429 * @param propsByte LZMA properties byte that has the encoded 430 * values for literal context bits (lc), literal 431 * position bits (lp), and position bits (pb) 432 * 433 * @param dictSize dictionary size as bytes, must be in the range 434 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 435 * 436 * @param presetDict preset dictionary or <code>null</code> 437 * to use no preset dictionary 438 * 439 * @param arrayCache cache to be used for allocating large arrays 440 * 441 * @throws CorruptedInputException 442 * if <code>propsByte</code> is invalid or 443 * the first input byte is not 0x00 444 * 445 * @throws UnsupportedOptionsException 446 * dictionary size or uncompressed size is too 447 * big for this implementation 448 * 449 * @throws EOFException file is truncated or corrupt 450 * 451 * @throws IOException may be thrown by <code>in</code> 452 * 453 * @since 1.7 454 */ LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict, ArrayCache arrayCache)455 public LZMAInputStream(InputStream in, long uncompSize, byte propsByte, 456 int dictSize, byte[] presetDict, 457 ArrayCache arrayCache) 458 throws IOException { 459 initialize(in, uncompSize, propsByte, dictSize, presetDict, 460 arrayCache); 461 } 462 463 /** 464 * Creates a new input stream that decompresses raw LZMA data (no .lzma 465 * header) from <code>in</code> optionally with a preset dictionary. 466 * 467 * @param in input stream from which LZMA-compressed 468 * data is read 469 * 470 * @param uncompSize uncompressed size of the LZMA stream or -1 471 * if the end marker is used in the LZMA stream 472 * 473 * @param lc number of literal context bits, must be 474 * in the range [0, 8] 475 * 476 * @param lp number of literal position bits, must be 477 * in the range [0, 4] 478 * 479 * @param pb number position bits, must be 480 * in the range [0, 4] 481 * 482 * @param dictSize dictionary size as bytes, must be in the range 483 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 484 * 485 * @param presetDict preset dictionary or <code>null</code> 486 * to use no preset dictionary 487 * 488 * @throws CorruptedInputException 489 * if the first input byte is not 0x00 490 * 491 * @throws EOFException file is truncated or corrupt 492 * 493 * @throws IOException may be thrown by <code>in</code> 494 */ LZMAInputStream(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict)495 public LZMAInputStream(InputStream in, long uncompSize, 496 int lc, int lp, int pb, 497 int dictSize, byte[] presetDict) 498 throws IOException { 499 initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict, 500 ArrayCache.getDefaultCache()); 501 } 502 503 /** 504 * Creates a new input stream that decompresses raw LZMA data (no .lzma 505 * header) from <code>in</code> optionally with a preset dictionary. 506 * <p> 507 * This is identical to <code>LZMAInputStream(InputStream, long, int, int, 508 * int, int, byte[])</code> except that this also takes the 509 * <code>arrayCache</code> argument. 510 * 511 * @param in input stream from which LZMA-compressed 512 * data is read 513 * 514 * @param uncompSize uncompressed size of the LZMA stream or -1 515 * if the end marker is used in the LZMA stream 516 * 517 * @param lc number of literal context bits, must be 518 * in the range [0, 8] 519 * 520 * @param lp number of literal position bits, must be 521 * in the range [0, 4] 522 * 523 * @param pb number position bits, must be 524 * in the range [0, 4] 525 * 526 * @param dictSize dictionary size as bytes, must be in the range 527 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 528 * 529 * @param presetDict preset dictionary or <code>null</code> 530 * to use no preset dictionary 531 * 532 * @param arrayCache cache to be used for allocating large arrays 533 * 534 * @throws CorruptedInputException 535 * if the first input byte is not 0x00 536 * 537 * @throws EOFException file is truncated or corrupt 538 * 539 * @throws IOException may be thrown by <code>in</code> 540 * 541 * @since 1.7 542 */ LZMAInputStream(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict, ArrayCache arrayCache)543 public LZMAInputStream(InputStream in, long uncompSize, 544 int lc, int lp, int pb, 545 int dictSize, byte[] presetDict, 546 ArrayCache arrayCache) 547 throws IOException { 548 initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict, 549 arrayCache); 550 } 551 initialize(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict, ArrayCache arrayCache)552 private void initialize(InputStream in, long uncompSize, byte propsByte, 553 int dictSize, byte[] presetDict, 554 ArrayCache arrayCache) 555 throws IOException { 556 // Validate the uncompressed size since the other "initialize" throws 557 // IllegalArgumentException if uncompSize < -1. 558 if (uncompSize < -1) 559 throw new UnsupportedOptionsException( 560 "Uncompressed size is too big"); 561 562 // Decode the properties byte. In contrast to LZMA2, there is no 563 // limit of lc + lp <= 4. 564 int props = propsByte & 0xFF; 565 if (props > (4 * 5 + 4) * 9 + 8) 566 throw new CorruptedInputException("Invalid LZMA properties byte"); 567 568 int pb = props / (9 * 5); 569 props -= pb * 9 * 5; 570 int lp = props / 9; 571 int lc = props - lp * 9; 572 573 // Validate the dictionary size since the other "initialize" throws 574 // IllegalArgumentException if dictSize is not supported. 575 if (dictSize < 0 || dictSize > DICT_SIZE_MAX) 576 throw new UnsupportedOptionsException( 577 "LZMA dictionary is too big for this implementation"); 578 579 initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict, 580 arrayCache); 581 } 582 initialize(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict, ArrayCache arrayCache)583 private void initialize(InputStream in, long uncompSize, 584 int lc, int lp, int pb, 585 int dictSize, byte[] presetDict, 586 ArrayCache arrayCache) 587 throws IOException { 588 // getDictSize validates dictSize and gives a message in 589 // the exception too, so skip validating dictSize here. 590 if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4 591 || pb < 0 || pb > 4) 592 throw new IllegalArgumentException(); 593 594 this.in = in; 595 this.arrayCache = arrayCache; 596 597 // If uncompressed size is known, use it to avoid wasting memory for 598 // a uselessly large dictionary buffer. 599 dictSize = getDictSize(dictSize); 600 if (uncompSize >= 0 && dictSize > uncompSize) 601 dictSize = getDictSize((int)uncompSize); 602 603 lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache); 604 rc = new RangeDecoderFromStream(in); 605 lzma = new LZMADecoder(lz, rc, lc, lp, pb); 606 607 remainingSize = uncompSize; 608 } 609 610 /** 611 * Enables relaxed end-of-stream condition when uncompressed size is known. 612 * This is useful if uncompressed size is known but it is unknown if 613 * the end of stream (EOS) marker is present. After calling this function, 614 * both are allowed. 615 * <p> 616 * Note that this doesn't actually check if the EOS marker is present. 617 * This introduces a few minor downsides: 618 * <ul> 619 * <li>Some (not all!) streams that would have more data than 620 * the specified uncompressed size, for example due to data corruption, 621 * will be accepted as valid.</li> 622 * <li>After <code>read</code> has returned <code>-1</code> the 623 * input position might not be at the end of the stream (too little 624 * input may have been read).</li> 625 * </ul> 626 * <p> 627 * This should be called after the constructor before reading any data 628 * from the stream. This is a separate function because adding even more 629 * constructors to this class didn't look like a good alternative. 630 * 631 * @since 1.9 632 */ enableRelaxedEndCondition()633 public void enableRelaxedEndCondition() { 634 relaxedEndCondition = true; 635 } 636 637 /** 638 * Decompresses the next byte from this input stream. 639 * <p> 640 * Reading lots of data with <code>read()</code> from this input stream 641 * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code> 642 * if you need to read lots of data one byte at a time. 643 * 644 * @return the next decompressed byte, or <code>-1</code> 645 * to indicate the end of the compressed stream 646 * 647 * @throws CorruptedInputException 648 * 649 * @throws XZIOException if the stream has been closed 650 * 651 * @throws EOFException 652 * compressed input is truncated or corrupt 653 * 654 * @throws IOException may be thrown by <code>in</code> 655 */ read()656 public int read() throws IOException { 657 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 658 } 659 660 /** 661 * Decompresses into an array of bytes. 662 * <p> 663 * If <code>len</code> is zero, no bytes are read and <code>0</code> 664 * is returned. Otherwise this will block until <code>len</code> 665 * bytes have been decompressed, the end of the LZMA stream is reached, 666 * or an exception is thrown. 667 * 668 * @param buf target buffer for uncompressed data 669 * @param off start offset in <code>buf</code> 670 * @param len maximum number of uncompressed bytes to read 671 * 672 * @return number of bytes read, or <code>-1</code> to indicate 673 * the end of the compressed stream 674 * 675 * @throws CorruptedInputException 676 * 677 * @throws XZIOException if the stream has been closed 678 * 679 * @throws EOFException compressed input is truncated or corrupt 680 * 681 * @throws IOException may be thrown by <code>in</code> 682 */ read(byte[] buf, int off, int len)683 public int read(byte[] buf, int off, int len) throws IOException { 684 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 685 throw new IndexOutOfBoundsException(); 686 687 if (len == 0) 688 return 0; 689 690 if (in == null) 691 throw new XZIOException("Stream closed"); 692 693 if (exception != null) 694 throw exception; 695 696 if (endReached) 697 return -1; 698 699 try { 700 int size = 0; 701 702 while (len > 0) { 703 // If uncompressed size is known and thus no end marker will 704 // be present, set the limit so that the uncompressed size 705 // won't be exceeded. 706 int copySizeMax = len; 707 if (remainingSize >= 0 && remainingSize < len) 708 copySizeMax = (int)remainingSize; 709 710 lz.setLimit(copySizeMax); 711 712 // Decode into the dictionary buffer. 713 try { 714 lzma.decode(); 715 } catch (CorruptedInputException e) { 716 // The end marker is encoded with a LZMA symbol that 717 // indicates maximum match distance. This is larger 718 // than any supported dictionary and thus causes 719 // CorruptedInputException from LZDecoder.repeat. 720 if (remainingSize != -1 || !lzma.endMarkerDetected()) 721 throw e; 722 723 endReached = true; 724 725 // The exception makes lzma.decode() miss the last range 726 // decoder normalization, so do it here. This might 727 // cause an IOException if it needs to read a byte 728 // from the input stream. 729 rc.normalize(); 730 } 731 732 // Copy from the dictionary to buf. 733 int copiedSize = lz.flush(buf, off); 734 off += copiedSize; 735 len -= copiedSize; 736 size += copiedSize; 737 738 if (remainingSize >= 0) { 739 // Update the number of bytes left to be decompressed. 740 remainingSize -= copiedSize; 741 assert remainingSize >= 0; 742 743 if (remainingSize == 0) 744 endReached = true; 745 } 746 747 if (endReached) { 748 // Checking these helps a lot when catching corrupt 749 // or truncated .lzma files. LZMA Utils doesn't do 750 // the second check and thus it accepts many invalid 751 // files that this implementation and XZ Utils don't. 752 if (lz.hasPending() || (!relaxedEndCondition 753 && !rc.isFinished())) 754 throw new CorruptedInputException(); 755 756 putArraysToCache(); 757 return size == 0 ? -1 : size; 758 } 759 } 760 761 return size; 762 763 } catch (IOException e) { 764 exception = e; 765 throw e; 766 } 767 } 768 putArraysToCache()769 private void putArraysToCache() { 770 if (lz != null) { 771 lz.putArraysToCache(arrayCache); 772 lz = null; 773 } 774 } 775 776 /** 777 * Closes the stream and calls <code>in.close()</code>. 778 * If the stream was already closed, this does nothing. 779 * 780 * @throws IOException if thrown by <code>in.close()</code> 781 */ close()782 public void close() throws IOException { 783 if (in != null) { 784 putArraysToCache(); 785 786 try { 787 in.close(); 788 } finally { 789 in = null; 790 } 791 } 792 } 793 } 794