1 /* 2 * LZMAInputStream 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz; 12 13 import java.io.InputStream; 14 import java.io.DataInputStream; 15 import java.io.IOException; 16 import org.tukaani.xz.lz.LZDecoder; 17 import org.tukaani.xz.rangecoder.RangeDecoderFromStream; 18 import org.tukaani.xz.lzma.LZMADecoder; 19 20 /** 21 * Decompresses legacy .lzma files and raw LZMA streams (no .lzma header). 22 * <p> 23 * <b>IMPORTANT:</b> In contrast to other classes in this package, this class 24 * reads data from its input stream one byte at a time. If the input stream 25 * is for example {@link java.io.FileInputStream}, wrapping it into 26 * {@link java.io.BufferedInputStream} tends to improve performance a lot. 27 * This is not automatically done by this class because there may be use 28 * cases where it is desired that this class won't read any bytes past 29 * the end of the LZMA stream. 30 * <p> 31 * Even when using <code>BufferedInputStream</code>, the performance tends 32 * to be worse (maybe 10-20 % slower) than with {@link LZMA2InputStream} 33 * or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data). 34 * 35 * @since 1.4 36 */ 37 public class LZMAInputStream extends InputStream { 38 /** 39 * Largest dictionary size supported by this implementation. 40 * <p> 41 * LZMA allows dictionaries up to one byte less than 4 GiB. This 42 * implementation supports only 16 bytes less than 2 GiB. This 43 * limitation is due to Java using signed 32-bit integers for array 44 * indexing. The limitation shouldn't matter much in practice since so 45 * huge dictionaries are not normally used. 46 */ 47 public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; 48 49 private InputStream in; 50 private ArrayCache arrayCache; 51 private LZDecoder lz; 52 private RangeDecoderFromStream rc; 53 private LZMADecoder lzma; 54 55 private boolean endReached = false; 56 57 private final byte[] tempBuf = new byte[1]; 58 59 /** 60 * Number of uncompressed bytes left to be decompressed, or -1 if 61 * the end marker is used. 62 */ 63 private long remainingSize; 64 65 private IOException exception = null; 66 67 /** 68 * Gets approximate decompressor memory requirements as kibibytes for 69 * the given dictionary size and LZMA properties byte (lc, lp, and pb). 70 * 71 * @param dictSize LZMA dictionary size as bytes, should be 72 * in the range [<code>0</code>, 73 * <code>DICT_SIZE_MAX</code>] 74 * 75 * @param propsByte LZMA properties byte that encodes the values 76 * of lc, lp, and pb 77 * 78 * @return approximate memory requirements as kibibytes (KiB) 79 * 80 * @throws UnsupportedOptionsException 81 * if <code>dictSize</code> is outside 82 * the range [<code>0</code>, 83 * <code>DICT_SIZE_MAX</code>] 84 * 85 * @throws CorruptedInputException 86 * if <code>propsByte</code> is invalid 87 */ getMemoryUsage(int dictSize, byte propsByte)88 public static int getMemoryUsage(int dictSize, byte propsByte) 89 throws UnsupportedOptionsException, CorruptedInputException { 90 if (dictSize < 0 || dictSize > DICT_SIZE_MAX) 91 throw new UnsupportedOptionsException( 92 "LZMA dictionary is too big for this implementation"); 93 94 int props = propsByte & 0xFF; 95 if (props > (4 * 5 + 4) * 9 + 8) 96 throw new CorruptedInputException("Invalid LZMA properties byte"); 97 98 props %= 9 * 5; 99 int lp = props / 9; 100 int lc = props - lp * 9; 101 102 return getMemoryUsage(dictSize, lc, lp); 103 } 104 105 /** 106 * Gets approximate decompressor memory requirements as kibibytes for 107 * the given dictionary size, lc, and lp. Note that pb isn't needed. 108 * 109 * @param dictSize LZMA dictionary size as bytes, must be 110 * in the range [<code>0</code>, 111 * <code>DICT_SIZE_MAX</code>] 112 * 113 * @param lc number of literal context bits, must be 114 * in the range [0, 8] 115 * 116 * @param lp number of literal position bits, must be 117 * in the range [0, 4] 118 * 119 * @return approximate memory requirements as kibibytes (KiB) 120 */ getMemoryUsage(int dictSize, int lc, int lp)121 public static int getMemoryUsage(int dictSize, int lc, int lp) { 122 if (lc < 0 || lc > 8 || lp < 0 || lp > 4) 123 throw new IllegalArgumentException("Invalid lc or lp"); 124 125 // Probability variables have the type "short". There are 126 // 0x300 (768) probability variables in each literal subcoder. 127 // The number of literal subcoders is 2^(lc + lp). 128 // 129 // Roughly 10 KiB for the base state + LZ decoder's dictionary buffer 130 // + sizeof(short) * number probability variables per literal subcoder 131 // * number of literal subcoders 132 return 10 + getDictSize(dictSize) / 1024 133 + ((2 * 0x300) << (lc + lp)) / 1024; 134 } 135 getDictSize(int dictSize)136 private static int getDictSize(int dictSize) { 137 if (dictSize < 0 || dictSize > DICT_SIZE_MAX) 138 throw new IllegalArgumentException( 139 "LZMA dictionary is too big for this implementation"); 140 141 // For performance reasons, use a 4 KiB dictionary if something 142 // smaller was requested. It's a rare situation and the performance 143 // difference isn't huge, and it starts to matter mostly when the 144 // dictionary is just a few bytes. But we need to handle the special 145 // case of dictSize == 0 anyway, which is an allowed value but in 146 // practice means one-byte dictionary. 147 // 148 // Note that using a dictionary bigger than specified in the headers 149 // can hide errors if there is a reference to data beyond the original 150 // dictionary size but is still within 4 KiB. 151 if (dictSize < 4096) 152 dictSize = 4096; 153 154 // Round dictionary size upward to a multiple of 16. This way LZMA 155 // can use LZDecoder.getPos() for calculating LZMA's posMask. 156 return (dictSize + 15) & ~15; 157 } 158 159 /** 160 * Creates a new .lzma file format decompressor without 161 * a memory usage limit. 162 * 163 * @param in input stream from which .lzma data is read; 164 * it might be a good idea to wrap it in 165 * <code>BufferedInputStream</code>, see the 166 * note at the top of this page 167 * 168 * @throws CorruptedInputException 169 * file is corrupt or perhaps not in 170 * the .lzma format at all 171 * 172 * @throws UnsupportedOptionsException 173 * dictionary size or uncompressed size is too 174 * big for this implementation 175 * 176 * @throws EOFException 177 * file is truncated or perhaps not in 178 * the .lzma format at all 179 * 180 * @throws IOException may be thrown by <code>in</code> 181 */ LZMAInputStream(InputStream in)182 public LZMAInputStream(InputStream in) throws IOException { 183 this(in, -1); 184 } 185 186 /** 187 * Creates a new .lzma file format decompressor without 188 * a memory usage limit. 189 * <p> 190 * This is identical to <code>LZMAInputStream(InputStream)</code> 191 * except that this also takes the <code>arrayCache</code> argument. 192 * 193 * @param in input stream from which .lzma data is read; 194 * it might be a good idea to wrap it in 195 * <code>BufferedInputStream</code>, see the 196 * note at the top of this page 197 * 198 * 199 * @param arrayCache cache to be used for allocating large arrays 200 * 201 * @throws CorruptedInputException 202 * file is corrupt or perhaps not in 203 * the .lzma format at all 204 * 205 * @throws UnsupportedOptionsException 206 * dictionary size or uncompressed size is too 207 * big for this implementation 208 * 209 * @throws EOFException 210 * file is truncated or perhaps not in 211 * the .lzma format at all 212 * 213 * @throws IOException may be thrown by <code>in</code> 214 * 215 * @since 1.7 216 */ LZMAInputStream(InputStream in, ArrayCache arrayCache)217 public LZMAInputStream(InputStream in, ArrayCache arrayCache) 218 throws IOException { 219 this(in, -1, arrayCache); 220 } 221 222 /** 223 * Creates a new .lzma file format decompressor with an optional 224 * memory usage limit. 225 * 226 * @param in input stream from which .lzma data is read; 227 * it might be a good idea to wrap it in 228 * <code>BufferedInputStream</code>, see the 229 * note at the top of this page 230 * 231 * @param memoryLimit memory usage limit in kibibytes (KiB) 232 * or <code>-1</code> to impose no 233 * memory usage limit 234 * 235 * @throws CorruptedInputException 236 * file is corrupt or perhaps not in 237 * the .lzma format at all 238 * 239 * @throws UnsupportedOptionsException 240 * dictionary size or uncompressed size is too 241 * big for this implementation 242 * 243 * @throws MemoryLimitException 244 * memory usage limit was exceeded 245 * 246 * @throws EOFException 247 * file is truncated or perhaps not in 248 * the .lzma format at all 249 * 250 * @throws IOException may be thrown by <code>in</code> 251 */ LZMAInputStream(InputStream in, int memoryLimit)252 public LZMAInputStream(InputStream in, int memoryLimit) 253 throws IOException { 254 this(in, memoryLimit, ArrayCache.getDefaultCache()); 255 } 256 257 /** 258 * Creates a new .lzma file format decompressor with an optional 259 * memory usage limit. 260 * <p> 261 * This is identical to <code>LZMAInputStream(InputStream, int)</code> 262 * except that this also takes the <code>arrayCache</code> argument. 263 * 264 * @param in input stream from which .lzma data is read; 265 * it might be a good idea to wrap it in 266 * <code>BufferedInputStream</code>, see the 267 * note at the top of this page 268 * 269 * @param memoryLimit memory usage limit in kibibytes (KiB) 270 * or <code>-1</code> to impose no 271 * memory usage limit 272 * 273 * @param arrayCache cache to be used for allocating large arrays 274 * 275 * @throws CorruptedInputException 276 * file is corrupt or perhaps not in 277 * the .lzma format at all 278 * 279 * @throws UnsupportedOptionsException 280 * dictionary size or uncompressed size is too 281 * big for this implementation 282 * 283 * @throws MemoryLimitException 284 * memory usage limit was exceeded 285 * 286 * @throws EOFException 287 * file is truncated or perhaps not in 288 * the .lzma format at all 289 * 290 * @throws IOException may be thrown by <code>in</code> 291 * 292 * @since 1.7 293 */ LZMAInputStream(InputStream in, int memoryLimit, ArrayCache arrayCache)294 public LZMAInputStream(InputStream in, int memoryLimit, 295 ArrayCache arrayCache) throws IOException { 296 DataInputStream inData = new DataInputStream(in); 297 298 // Properties byte (lc, lp, and pb) 299 byte propsByte = inData.readByte(); 300 301 // Dictionary size is an unsigned 32-bit little endian integer. 302 int dictSize = 0; 303 for (int i = 0; i < 4; ++i) 304 dictSize |= inData.readUnsignedByte() << (8 * i); 305 306 // Uncompressed size is an unsigned 64-bit little endian integer. 307 // The maximum 64-bit value is a special case (becomes -1 here) 308 // which indicates that the end marker is used instead of knowing 309 // the uncompressed size beforehand. 310 long uncompSize = 0; 311 for (int i = 0; i < 8; ++i) 312 uncompSize |= (long)inData.readUnsignedByte() << (8 * i); 313 314 // Check the memory usage limit. 315 int memoryNeeded = getMemoryUsage(dictSize, propsByte); 316 if (memoryLimit != -1 && memoryNeeded > memoryLimit) 317 throw new MemoryLimitException(memoryNeeded, memoryLimit); 318 319 initialize(in, uncompSize, propsByte, dictSize, null, arrayCache); 320 } 321 322 /** 323 * Creates a new input stream that decompresses raw LZMA data (no .lzma 324 * header) from <code>in</code>. 325 * <p> 326 * The caller needs to know if the "end of payload marker (EOPM)" alias 327 * "end of stream marker (EOS marker)" alias "end marker" present. 328 * If the end marker isn't used, the caller must know the exact 329 * uncompressed size of the stream. 330 * <p> 331 * The caller also needs to provide the LZMA properties byte that encodes 332 * the number of literal context bits (lc), literal position bits (lp), 333 * and position bits (pb). 334 * <p> 335 * The dictionary size used when compressing is also needed. Specifying 336 * a too small dictionary size will prevent decompressing the stream. 337 * Specifying a too big dictionary is waste of memory but decompression 338 * will work. 339 * <p> 340 * There is no need to specify a dictionary bigger than 341 * the uncompressed size of the data even if a bigger dictionary 342 * was used when compressing. If you know the uncompressed size 343 * of the data, this might allow saving some memory. 344 * 345 * @param in input stream from which compressed 346 * data is read 347 * 348 * @param uncompSize uncompressed size of the LZMA stream or -1 349 * if the end marker is used in the LZMA stream 350 * 351 * @param propsByte LZMA properties byte that has the encoded 352 * values for literal context bits (lc), literal 353 * position bits (lp), and position bits (pb) 354 * 355 * @param dictSize dictionary size as bytes, must be in the range 356 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 357 * 358 * @throws CorruptedInputException 359 * if <code>propsByte</code> is invalid or 360 * the first input byte is not 0x00 361 * 362 * @throws UnsupportedOptionsException 363 * dictionary size or uncompressed size is too 364 * big for this implementation 365 * 366 * 367 */ LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize)368 public LZMAInputStream(InputStream in, long uncompSize, byte propsByte, 369 int dictSize) throws IOException { 370 initialize(in, uncompSize, propsByte, dictSize, null, 371 ArrayCache.getDefaultCache()); 372 } 373 374 /** 375 * Creates a new input stream that decompresses raw LZMA data (no .lzma 376 * header) from <code>in</code> optionally with a preset dictionary. 377 * 378 * @param in input stream from which LZMA-compressed 379 * data is read 380 * 381 * @param uncompSize uncompressed size of the LZMA stream or -1 382 * if the end marker is used in the LZMA stream 383 * 384 * @param propsByte LZMA properties byte that has the encoded 385 * values for literal context bits (lc), literal 386 * position bits (lp), and position bits (pb) 387 * 388 * @param dictSize dictionary size as bytes, must be in the range 389 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 390 * 391 * @param presetDict preset dictionary or <code>null</code> 392 * to use no preset dictionary 393 * 394 * @throws CorruptedInputException 395 * if <code>propsByte</code> is invalid or 396 * the first input byte is not 0x00 397 * 398 * @throws UnsupportedOptionsException 399 * dictionary size or uncompressed size is too 400 * big for this implementation 401 * 402 * @throws EOFException file is truncated or corrupt 403 * 404 * @throws IOException may be thrown by <code>in</code> 405 */ LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict)406 public LZMAInputStream(InputStream in, long uncompSize, byte propsByte, 407 int dictSize, byte[] presetDict) 408 throws IOException { 409 initialize(in, uncompSize, propsByte, dictSize, presetDict, 410 ArrayCache.getDefaultCache()); 411 } 412 413 /** 414 * Creates a new input stream that decompresses raw LZMA data (no .lzma 415 * header) from <code>in</code> optionally with a preset dictionary. 416 * <p> 417 * This is identical to <code>LZMAInputStream(InputStream, long, byte, int, 418 * byte[])</code> except that this also takes the <code>arrayCache</code> 419 * argument. 420 * 421 * @param in input stream from which LZMA-compressed 422 * data is read 423 * 424 * @param uncompSize uncompressed size of the LZMA stream or -1 425 * if the end marker is used in the LZMA stream 426 * 427 * @param propsByte LZMA properties byte that has the encoded 428 * values for literal context bits (lc), literal 429 * position bits (lp), and position bits (pb) 430 * 431 * @param dictSize dictionary size as bytes, must be in the range 432 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 433 * 434 * @param presetDict preset dictionary or <code>null</code> 435 * to use no preset dictionary 436 * 437 * @param arrayCache cache to be used for allocating large arrays 438 * 439 * @throws CorruptedInputException 440 * if <code>propsByte</code> is invalid or 441 * the first input byte is not 0x00 442 * 443 * @throws UnsupportedOptionsException 444 * dictionary size or uncompressed size is too 445 * big for this implementation 446 * 447 * @throws EOFException file is truncated or corrupt 448 * 449 * @throws IOException may be thrown by <code>in</code> 450 * 451 * @since 1.7 452 */ LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict, ArrayCache arrayCache)453 public LZMAInputStream(InputStream in, long uncompSize, byte propsByte, 454 int dictSize, byte[] presetDict, 455 ArrayCache arrayCache) 456 throws IOException { 457 initialize(in, uncompSize, propsByte, dictSize, presetDict, 458 arrayCache); 459 } 460 461 /** 462 * Creates a new input stream that decompresses raw LZMA data (no .lzma 463 * header) from <code>in</code> optionally with a preset dictionary. 464 * 465 * @param in input stream from which LZMA-compressed 466 * data is read 467 * 468 * @param uncompSize uncompressed size of the LZMA stream or -1 469 * if the end marker is used in the LZMA stream 470 * 471 * @param lc number of literal context bits, must be 472 * in the range [0, 8] 473 * 474 * @param lp number of literal position bits, must be 475 * in the range [0, 4] 476 * 477 * @param pb number position bits, must be 478 * in the range [0, 4] 479 * 480 * @param dictSize dictionary size as bytes, must be in the range 481 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 482 * 483 * @param presetDict preset dictionary or <code>null</code> 484 * to use no preset dictionary 485 * 486 * @throws CorruptedInputException 487 * if the first input byte is not 0x00 488 * 489 * @throws EOFException file is truncated or corrupt 490 * 491 * @throws IOException may be thrown by <code>in</code> 492 */ LZMAInputStream(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict)493 public LZMAInputStream(InputStream in, long uncompSize, 494 int lc, int lp, int pb, 495 int dictSize, byte[] presetDict) 496 throws IOException { 497 initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict, 498 ArrayCache.getDefaultCache()); 499 } 500 501 /** 502 * Creates a new input stream that decompresses raw LZMA data (no .lzma 503 * header) from <code>in</code> optionally with a preset dictionary. 504 * <p> 505 * This is identical to <code>LZMAInputStream(InputStream, long, int, int, 506 * int, int, byte[])</code> except that this also takes the 507 * <code>arrayCache</code> argument. 508 * 509 * @param in input stream from which LZMA-compressed 510 * data is read 511 * 512 * @param uncompSize uncompressed size of the LZMA stream or -1 513 * if the end marker is used in the LZMA stream 514 * 515 * @param lc number of literal context bits, must be 516 * in the range [0, 8] 517 * 518 * @param lp number of literal position bits, must be 519 * in the range [0, 4] 520 * 521 * @param pb number position bits, must be 522 * in the range [0, 4] 523 * 524 * @param dictSize dictionary size as bytes, must be in the range 525 * [<code>0</code>, <code>DICT_SIZE_MAX</code>] 526 * 527 * @param presetDict preset dictionary or <code>null</code> 528 * to use no preset dictionary 529 * 530 * @param arrayCache cache to be used for allocating large arrays 531 * 532 * @throws CorruptedInputException 533 * if the first input byte is not 0x00 534 * 535 * @throws EOFException file is truncated or corrupt 536 * 537 * @throws IOException may be thrown by <code>in</code> 538 * 539 * @since 1.7 540 */ LZMAInputStream(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict, ArrayCache arrayCache)541 public LZMAInputStream(InputStream in, long uncompSize, 542 int lc, int lp, int pb, 543 int dictSize, byte[] presetDict, 544 ArrayCache arrayCache) 545 throws IOException { 546 initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict, 547 arrayCache); 548 } 549 initialize(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict, ArrayCache arrayCache)550 private void initialize(InputStream in, long uncompSize, byte propsByte, 551 int dictSize, byte[] presetDict, 552 ArrayCache arrayCache) 553 throws IOException { 554 // Validate the uncompressed size since the other "initialize" throws 555 // IllegalArgumentException if uncompSize < -1. 556 if (uncompSize < -1) 557 throw new UnsupportedOptionsException( 558 "Uncompressed size is too big"); 559 560 // Decode the properties byte. In contrast to LZMA2, there is no 561 // limit of lc + lp <= 4. 562 int props = propsByte & 0xFF; 563 if (props > (4 * 5 + 4) * 9 + 8) 564 throw new CorruptedInputException("Invalid LZMA properties byte"); 565 566 int pb = props / (9 * 5); 567 props -= pb * 9 * 5; 568 int lp = props / 9; 569 int lc = props - lp * 9; 570 571 // Validate the dictionary size since the other "initialize" throws 572 // IllegalArgumentException if dictSize is not supported. 573 if (dictSize < 0 || dictSize > DICT_SIZE_MAX) 574 throw new UnsupportedOptionsException( 575 "LZMA dictionary is too big for this implementation"); 576 577 initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict, 578 arrayCache); 579 } 580 initialize(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict, ArrayCache arrayCache)581 private void initialize(InputStream in, long uncompSize, 582 int lc, int lp, int pb, 583 int dictSize, byte[] presetDict, 584 ArrayCache arrayCache) 585 throws IOException { 586 // getDictSize validates dictSize and gives a message in 587 // the exception too, so skip validating dictSize here. 588 if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4 589 || pb < 0 || pb > 4) 590 throw new IllegalArgumentException(); 591 592 this.in = in; 593 this.arrayCache = arrayCache; 594 595 // If uncompressed size is known, use it to avoid wasting memory for 596 // a uselessly large dictionary buffer. 597 dictSize = getDictSize(dictSize); 598 if (uncompSize >= 0 && dictSize > uncompSize) 599 dictSize = getDictSize((int)uncompSize); 600 601 lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache); 602 rc = new RangeDecoderFromStream(in); 603 lzma = new LZMADecoder(lz, rc, lc, lp, pb); 604 605 remainingSize = uncompSize; 606 } 607 608 /** 609 * Decompresses the next byte from this input stream. 610 * <p> 611 * Reading lots of data with <code>read()</code> from this input stream 612 * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code> 613 * if you need to read lots of data one byte at a time. 614 * 615 * @return the next decompressed byte, or <code>-1</code> 616 * to indicate the end of the compressed stream 617 * 618 * @throws CorruptedInputException 619 * 620 * @throws XZIOException if the stream has been closed 621 * 622 * @throws EOFException 623 * compressed input is truncated or corrupt 624 * 625 * @throws IOException may be thrown by <code>in</code> 626 */ read()627 public int read() throws IOException { 628 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 629 } 630 631 /** 632 * Decompresses into an array of bytes. 633 * <p> 634 * If <code>len</code> is zero, no bytes are read and <code>0</code> 635 * is returned. Otherwise this will block until <code>len</code> 636 * bytes have been decompressed, the end of the LZMA stream is reached, 637 * or an exception is thrown. 638 * 639 * @param buf target buffer for uncompressed data 640 * @param off start offset in <code>buf</code> 641 * @param len maximum number of uncompressed bytes to read 642 * 643 * @return number of bytes read, or <code>-1</code> to indicate 644 * the end of the compressed stream 645 * 646 * @throws CorruptedInputException 647 * 648 * @throws XZIOException if the stream has been closed 649 * 650 * @throws EOFException compressed input is truncated or corrupt 651 * 652 * @throws IOException may be thrown by <code>in</code> 653 */ read(byte[] buf, int off, int len)654 public int read(byte[] buf, int off, int len) throws IOException { 655 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 656 throw new IndexOutOfBoundsException(); 657 658 if (len == 0) 659 return 0; 660 661 if (in == null) 662 throw new XZIOException("Stream closed"); 663 664 if (exception != null) 665 throw exception; 666 667 if (endReached) 668 return -1; 669 670 try { 671 int size = 0; 672 673 while (len > 0) { 674 // If uncompressed size is known and thus no end marker will 675 // be present, set the limit so that the uncompressed size 676 // won't be exceeded. 677 int copySizeMax = len; 678 if (remainingSize >= 0 && remainingSize < len) 679 copySizeMax = (int)remainingSize; 680 681 lz.setLimit(copySizeMax); 682 683 // Decode into the dictionary buffer. 684 try { 685 lzma.decode(); 686 } catch (CorruptedInputException e) { 687 // The end marker is encoded with a LZMA symbol that 688 // indicates maximum match distance. This is larger 689 // than any supported dictionary and thus causes 690 // CorruptedInputException from LZDecoder.repeat. 691 if (remainingSize != -1 || !lzma.endMarkerDetected()) 692 throw e; 693 694 endReached = true; 695 696 // The exception makes lzma.decode() miss the last range 697 // decoder normalization, so do it here. This might 698 // cause an IOException if it needs to read a byte 699 // from the input stream. 700 rc.normalize(); 701 } 702 703 // Copy from the dictionary to buf. 704 int copiedSize = lz.flush(buf, off); 705 off += copiedSize; 706 len -= copiedSize; 707 size += copiedSize; 708 709 if (remainingSize >= 0) { 710 // Update the number of bytes left to be decompressed. 711 remainingSize -= copiedSize; 712 assert remainingSize >= 0; 713 714 if (remainingSize == 0) 715 endReached = true; 716 } 717 718 if (endReached) { 719 // Checking these helps a lot when catching corrupt 720 // or truncated .lzma files. LZMA Utils doesn't do 721 // the first check and thus it accepts many invalid 722 // files that this implementation and XZ Utils don't. 723 if (!rc.isFinished() || lz.hasPending()) 724 throw new CorruptedInputException(); 725 726 putArraysToCache(); 727 return size == 0 ? -1 : size; 728 } 729 } 730 731 return size; 732 733 } catch (IOException e) { 734 exception = e; 735 throw e; 736 } 737 } 738 putArraysToCache()739 private void putArraysToCache() { 740 if (lz != null) { 741 lz.putArraysToCache(arrayCache); 742 lz = null; 743 } 744 } 745 746 /** 747 * Closes the stream and calls <code>in.close()</code>. 748 * If the stream was already closed, this does nothing. 749 * 750 * @throws IOException if thrown by <code>in.close()</code> 751 */ close()752 public void close() throws IOException { 753 if (in != null) { 754 putArraysToCache(); 755 756 try { 757 in.close(); 758 } finally { 759 in = null; 760 } 761 } 762 } 763 } 764