• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * LZMAInputStream
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz;
12 
13 import java.io.InputStream;
14 import java.io.DataInputStream;
15 import java.io.IOException;
16 import java.io.EOFException;
17 import org.tukaani.xz.lz.LZDecoder;
18 import org.tukaani.xz.rangecoder.RangeDecoderFromStream;
19 import org.tukaani.xz.lzma.LZMADecoder;
20 
21 /**
22  * Decompresses legacy .lzma files and raw LZMA streams (no .lzma header).
23  * <p>
24  * <b>IMPORTANT:</b> In contrast to other classes in this package, this class
25  * reads data from its input stream one byte at a time. If the input stream
26  * is for example {@link java.io.FileInputStream}, wrapping it into
27  * {@link java.io.BufferedInputStream} tends to improve performance a lot.
28  * This is not automatically done by this class because there may be use
29  * cases where it is desired that this class won't read any bytes past
30  * the end of the LZMA stream.
31  * <p>
32  * Even when using <code>BufferedInputStream</code>, the performance tends
33  * to be worse (maybe 10-20&nbsp;% slower) than with {@link LZMA2InputStream}
34  * or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data).
35  *
36  * @since 1.4
37  */
38 public class LZMAInputStream extends InputStream {
39     /**
40      * Largest dictionary size supported by this implementation.
41      * <p>
42      * LZMA allows dictionaries up to one byte less than 4 GiB. This
43      * implementation supports only 16 bytes less than 2 GiB. This
44      * limitation is due to Java using signed 32-bit integers for array
45      * indexing. The limitation shouldn't matter much in practice since so
46      * huge dictionaries are not normally used.
47      */
48     public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
49 
50     private InputStream in;
51     private ArrayCache arrayCache;
52     private LZDecoder lz;
53     private RangeDecoderFromStream rc;
54     private LZMADecoder lzma;
55 
56     private boolean endReached = false;
57     private boolean relaxedEndCondition = false;
58 
59     private final byte[] tempBuf = new byte[1];
60 
61     /**
62      * Number of uncompressed bytes left to be decompressed, or -1 if
63      * the end marker is used.
64      */
65     private long remainingSize;
66 
67     private IOException exception = null;
68 
69     /**
70      * Gets approximate decompressor memory requirements as kibibytes for
71      * the given dictionary size and LZMA properties byte (lc, lp, and pb).
72      *
73      * @param       dictSize    LZMA dictionary size as bytes, should be
74      *                          in the range [<code>0</code>,
75      *                          <code>DICT_SIZE_MAX</code>]
76      *
77      * @param       propsByte   LZMA properties byte that encodes the values
78      *                          of lc, lp, and pb
79      *
80      * @return      approximate memory requirements as kibibytes (KiB)
81      *
82      * @throws      UnsupportedOptionsException
83      *                          if <code>dictSize</code> is outside
84      *                          the range [<code>0</code>,
85      *                          <code>DICT_SIZE_MAX</code>]
86      *
87      * @throws      CorruptedInputException
88      *                          if <code>propsByte</code> is invalid
89      */
getMemoryUsage(int dictSize, byte propsByte)90     public static int getMemoryUsage(int dictSize, byte propsByte)
91             throws UnsupportedOptionsException, CorruptedInputException {
92         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
93             throw new UnsupportedOptionsException(
94                     "LZMA dictionary is too big for this implementation");
95 
96         int props = propsByte & 0xFF;
97         if (props > (4 * 5 + 4) * 9 + 8)
98             throw new CorruptedInputException("Invalid LZMA properties byte");
99 
100         props %= 9 * 5;
101         int lp = props / 9;
102         int lc = props - lp * 9;
103 
104         return getMemoryUsage(dictSize, lc, lp);
105     }
106 
107     /**
108      * Gets approximate decompressor memory requirements as kibibytes for
109      * the given dictionary size, lc, and lp. Note that pb isn't needed.
110      *
111      * @param       dictSize    LZMA dictionary size as bytes, must be
112      *                          in the range [<code>0</code>,
113      *                          <code>DICT_SIZE_MAX</code>]
114      *
115      * @param       lc          number of literal context bits, must be
116      *                          in the range [0, 8]
117      *
118      * @param       lp          number of literal position bits, must be
119      *                          in the range [0, 4]
120      *
121      * @return      approximate memory requirements as kibibytes (KiB)
122      */
getMemoryUsage(int dictSize, int lc, int lp)123     public static int getMemoryUsage(int dictSize, int lc, int lp) {
124         if (lc < 0 || lc > 8 || lp < 0 || lp > 4)
125             throw new IllegalArgumentException("Invalid lc or lp");
126 
127         // Probability variables have the type "short". There are
128         // 0x300 (768) probability variables in each literal subcoder.
129         // The number of literal subcoders is 2^(lc + lp).
130         //
131         // Roughly 10 KiB for the base state + LZ decoder's dictionary buffer
132         // + sizeof(short) * number probability variables per literal subcoder
133         //   * number of literal subcoders
134         return 10 + getDictSize(dictSize) / 1024
135                + ((2 * 0x300) << (lc + lp)) / 1024;
136     }
137 
getDictSize(int dictSize)138     private static int getDictSize(int dictSize) {
139         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
140             throw new IllegalArgumentException(
141                     "LZMA dictionary is too big for this implementation");
142 
143         // For performance reasons, use a 4 KiB dictionary if something
144         // smaller was requested. It's a rare situation and the performance
145         // difference isn't huge, and it starts to matter mostly when the
146         // dictionary is just a few bytes. But we need to handle the special
147         // case of dictSize == 0 anyway, which is an allowed value but in
148         // practice means one-byte dictionary.
149         //
150         // Note that using a dictionary bigger than specified in the headers
151         // can hide errors if there is a reference to data beyond the original
152         // dictionary size but is still within 4 KiB.
153         if (dictSize < 4096)
154             dictSize = 4096;
155 
156         // Round dictionary size upward to a multiple of 16. This way LZMA
157         // can use LZDecoder.getPos() for calculating LZMA's posMask.
158         return (dictSize + 15) & ~15;
159     }
160 
161     /**
162      * Creates a new .lzma file format decompressor without
163      * a memory usage limit.
164      *
165      * @param       in          input stream from which .lzma data is read;
166      *                          it might be a good idea to wrap it in
167      *                          <code>BufferedInputStream</code>, see the
168      *                          note at the top of this page
169      *
170      * @throws      CorruptedInputException
171      *                          file is corrupt or perhaps not in
172      *                          the .lzma format at all
173      *
174      * @throws      UnsupportedOptionsException
175      *                          dictionary size or uncompressed size is too
176      *                          big for this implementation
177      *
178      * @throws      EOFException
179      *                          file is truncated or perhaps not in
180      *                          the .lzma format at all
181      *
182      * @throws      IOException may be thrown by <code>in</code>
183      */
LZMAInputStream(InputStream in)184     public LZMAInputStream(InputStream in) throws IOException {
185         this(in, -1);
186     }
187 
188     /**
189      * Creates a new .lzma file format decompressor without
190      * a memory usage limit.
191      * <p>
192      * This is identical to <code>LZMAInputStream(InputStream)</code>
193      * except that this also takes the <code>arrayCache</code> argument.
194      *
195      * @param       in          input stream from which .lzma data is read;
196      *                          it might be a good idea to wrap it in
197      *                          <code>BufferedInputStream</code>, see the
198      *                          note at the top of this page
199      *
200      *
201      * @param       arrayCache  cache to be used for allocating large arrays
202      *
203      * @throws      CorruptedInputException
204      *                          file is corrupt or perhaps not in
205      *                          the .lzma format at all
206      *
207      * @throws      UnsupportedOptionsException
208      *                          dictionary size or uncompressed size is too
209      *                          big for this implementation
210      *
211      * @throws      EOFException
212      *                          file is truncated or perhaps not in
213      *                          the .lzma format at all
214      *
215      * @throws      IOException may be thrown by <code>in</code>
216      *
217      * @since 1.7
218      */
LZMAInputStream(InputStream in, ArrayCache arrayCache)219     public LZMAInputStream(InputStream in, ArrayCache arrayCache)
220             throws IOException {
221         this(in, -1, arrayCache);
222     }
223 
224     /**
225      * Creates a new .lzma file format decompressor with an optional
226      * memory usage limit.
227      *
228      * @param       in          input stream from which .lzma data is read;
229      *                          it might be a good idea to wrap it in
230      *                          <code>BufferedInputStream</code>, see the
231      *                          note at the top of this page
232      *
233      * @param       memoryLimit memory usage limit in kibibytes (KiB)
234      *                          or <code>-1</code> to impose no
235      *                          memory usage limit
236      *
237      * @throws      CorruptedInputException
238      *                          file is corrupt or perhaps not in
239      *                          the .lzma format at all
240      *
241      * @throws      UnsupportedOptionsException
242      *                          dictionary size or uncompressed size is too
243      *                          big for this implementation
244      *
245      * @throws      MemoryLimitException
246      *                          memory usage limit was exceeded
247      *
248      * @throws      EOFException
249      *                          file is truncated or perhaps not in
250      *                          the .lzma format at all
251      *
252      * @throws      IOException may be thrown by <code>in</code>
253      */
LZMAInputStream(InputStream in, int memoryLimit)254     public LZMAInputStream(InputStream in, int memoryLimit)
255             throws IOException {
256         this(in, memoryLimit, ArrayCache.getDefaultCache());
257     }
258 
259     /**
260      * Creates a new .lzma file format decompressor with an optional
261      * memory usage limit.
262      * <p>
263      * This is identical to <code>LZMAInputStream(InputStream, int)</code>
264      * except that this also takes the <code>arrayCache</code> argument.
265      *
266      * @param       in          input stream from which .lzma data is read;
267      *                          it might be a good idea to wrap it in
268      *                          <code>BufferedInputStream</code>, see the
269      *                          note at the top of this page
270      *
271      * @param       memoryLimit memory usage limit in kibibytes (KiB)
272      *                          or <code>-1</code> to impose no
273      *                          memory usage limit
274      *
275      * @param       arrayCache  cache to be used for allocating large arrays
276      *
277      * @throws      CorruptedInputException
278      *                          file is corrupt or perhaps not in
279      *                          the .lzma format at all
280      *
281      * @throws      UnsupportedOptionsException
282      *                          dictionary size or uncompressed size is too
283      *                          big for this implementation
284      *
285      * @throws      MemoryLimitException
286      *                          memory usage limit was exceeded
287      *
288      * @throws      EOFException
289      *                          file is truncated or perhaps not in
290      *                          the .lzma format at all
291      *
292      * @throws      IOException may be thrown by <code>in</code>
293      *
294      * @since 1.7
295      */
LZMAInputStream(InputStream in, int memoryLimit, ArrayCache arrayCache)296     public LZMAInputStream(InputStream in, int memoryLimit,
297                            ArrayCache arrayCache) throws IOException {
298         DataInputStream inData = new DataInputStream(in);
299 
300         // Properties byte (lc, lp, and pb)
301         byte propsByte = inData.readByte();
302 
303         // Dictionary size is an unsigned 32-bit little endian integer.
304         int dictSize = 0;
305         for (int i = 0; i < 4; ++i)
306             dictSize |= inData.readUnsignedByte() << (8 * i);
307 
308         // Uncompressed size is an unsigned 64-bit little endian integer.
309         // The maximum 64-bit value is a special case (becomes -1 here)
310         // which indicates that the end marker is used instead of knowing
311         // the uncompressed size beforehand.
312         long uncompSize = 0;
313         for (int i = 0; i < 8; ++i)
314             uncompSize |= (long)inData.readUnsignedByte() << (8 * i);
315 
316         // Check the memory usage limit.
317         int memoryNeeded = getMemoryUsage(dictSize, propsByte);
318         if (memoryLimit != -1 && memoryNeeded > memoryLimit)
319             throw new MemoryLimitException(memoryNeeded, memoryLimit);
320 
321         initialize(in, uncompSize, propsByte, dictSize, null, arrayCache);
322     }
323 
324     /**
325      * Creates a new input stream that decompresses raw LZMA data (no .lzma
326      * header) from <code>in</code>.
327      * <p>
328      * The caller needs to know if the "end of payload marker (EOPM)" alias
329      * "end of stream marker (EOS marker)" alias "end marker" present.
330      * If the end marker isn't used, the caller must know the exact
331      * uncompressed size of the stream.
332      * <p>
333      * The caller also needs to provide the LZMA properties byte that encodes
334      * the number of literal context bits (lc), literal position bits (lp),
335      * and position bits (pb).
336      * <p>
337      * The dictionary size used when compressing is also needed. Specifying
338      * a too small dictionary size will prevent decompressing the stream.
339      * Specifying a too big dictionary is waste of memory but decompression
340      * will work.
341      * <p>
342      * There is no need to specify a dictionary bigger than
343      * the uncompressed size of the data even if a bigger dictionary
344      * was used when compressing. If you know the uncompressed size
345      * of the data, this might allow saving some memory.
346      *
347      * @param       in          input stream from which compressed
348      *                          data is read
349      *
350      * @param       uncompSize  uncompressed size of the LZMA stream or -1
351      *                          if the end marker is used in the LZMA stream
352      *
353      * @param       propsByte   LZMA properties byte that has the encoded
354      *                          values for literal context bits (lc), literal
355      *                          position bits (lp), and position bits (pb)
356      *
357      * @param       dictSize    dictionary size as bytes, must be in the range
358      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
359      *
360      * @throws      CorruptedInputException
361      *                          if <code>propsByte</code> is invalid or
362      *                          the first input byte is not 0x00
363      *
364      * @throws      UnsupportedOptionsException
365      *                          dictionary size or uncompressed size is too
366      *                          big for this implementation
367      *
368      *
369      */
LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize)370     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
371                            int dictSize) throws IOException {
372         initialize(in, uncompSize, propsByte, dictSize, null,
373                    ArrayCache.getDefaultCache());
374     }
375 
376     /**
377      * Creates a new input stream that decompresses raw LZMA data (no .lzma
378      * header) from <code>in</code> optionally with a preset dictionary.
379      *
380      * @param       in          input stream from which LZMA-compressed
381      *                          data is read
382      *
383      * @param       uncompSize  uncompressed size of the LZMA stream or -1
384      *                          if the end marker is used in the LZMA stream
385      *
386      * @param       propsByte   LZMA properties byte that has the encoded
387      *                          values for literal context bits (lc), literal
388      *                          position bits (lp), and position bits (pb)
389      *
390      * @param       dictSize    dictionary size as bytes, must be in the range
391      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
392      *
393      * @param       presetDict  preset dictionary or <code>null</code>
394      *                          to use no preset dictionary
395      *
396      * @throws      CorruptedInputException
397      *                          if <code>propsByte</code> is invalid or
398      *                          the first input byte is not 0x00
399      *
400      * @throws      UnsupportedOptionsException
401      *                          dictionary size or uncompressed size is too
402      *                          big for this implementation
403      *
404      * @throws      EOFException file is truncated or corrupt
405      *
406      * @throws      IOException may be thrown by <code>in</code>
407      */
LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict)408     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
409                            int dictSize, byte[] presetDict)
410             throws IOException {
411         initialize(in, uncompSize, propsByte, dictSize, presetDict,
412                    ArrayCache.getDefaultCache());
413     }
414 
415     /**
416      * Creates a new input stream that decompresses raw LZMA data (no .lzma
417      * header) from <code>in</code> optionally with a preset dictionary.
418      * <p>
419      * This is identical to <code>LZMAInputStream(InputStream, long, byte, int,
420      * byte[])</code> except that this also takes the <code>arrayCache</code>
421      * argument.
422      *
423      * @param       in          input stream from which LZMA-compressed
424      *                          data is read
425      *
426      * @param       uncompSize  uncompressed size of the LZMA stream or -1
427      *                          if the end marker is used in the LZMA stream
428      *
429      * @param       propsByte   LZMA properties byte that has the encoded
430      *                          values for literal context bits (lc), literal
431      *                          position bits (lp), and position bits (pb)
432      *
433      * @param       dictSize    dictionary size as bytes, must be in the range
434      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
435      *
436      * @param       presetDict  preset dictionary or <code>null</code>
437      *                          to use no preset dictionary
438      *
439      * @param       arrayCache  cache to be used for allocating large arrays
440      *
441      * @throws      CorruptedInputException
442      *                          if <code>propsByte</code> is invalid or
443      *                          the first input byte is not 0x00
444      *
445      * @throws      UnsupportedOptionsException
446      *                          dictionary size or uncompressed size is too
447      *                          big for this implementation
448      *
449      * @throws      EOFException file is truncated or corrupt
450      *
451      * @throws      IOException may be thrown by <code>in</code>
452      *
453      * @since 1.7
454      */
LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict, ArrayCache arrayCache)455     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
456                            int dictSize, byte[] presetDict,
457                            ArrayCache arrayCache)
458             throws IOException {
459         initialize(in, uncompSize, propsByte, dictSize, presetDict,
460                    arrayCache);
461     }
462 
463     /**
464      * Creates a new input stream that decompresses raw LZMA data (no .lzma
465      * header) from <code>in</code> optionally with a preset dictionary.
466      *
467      * @param       in          input stream from which LZMA-compressed
468      *                          data is read
469      *
470      * @param       uncompSize  uncompressed size of the LZMA stream or -1
471      *                          if the end marker is used in the LZMA stream
472      *
473      * @param       lc          number of literal context bits, must be
474      *                          in the range [0, 8]
475      *
476      * @param       lp          number of literal position bits, must be
477      *                          in the range [0, 4]
478      *
479      * @param       pb          number position bits, must be
480      *                          in the range [0, 4]
481      *
482      * @param       dictSize    dictionary size as bytes, must be in the range
483      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
484      *
485      * @param       presetDict  preset dictionary or <code>null</code>
486      *                          to use no preset dictionary
487      *
488      * @throws      CorruptedInputException
489      *                          if the first input byte is not 0x00
490      *
491      * @throws      EOFException file is truncated or corrupt
492      *
493      * @throws      IOException may be thrown by <code>in</code>
494      */
LZMAInputStream(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict)495     public LZMAInputStream(InputStream in, long uncompSize,
496                            int lc, int lp, int pb,
497                            int dictSize, byte[] presetDict)
498             throws IOException {
499         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
500                    ArrayCache.getDefaultCache());
501     }
502 
503     /**
504      * Creates a new input stream that decompresses raw LZMA data (no .lzma
505      * header) from <code>in</code> optionally with a preset dictionary.
506      * <p>
507      * This is identical to <code>LZMAInputStream(InputStream, long, int, int,
508      * int, int, byte[])</code> except that this also takes the
509      * <code>arrayCache</code> argument.
510      *
511      * @param       in          input stream from which LZMA-compressed
512      *                          data is read
513      *
514      * @param       uncompSize  uncompressed size of the LZMA stream or -1
515      *                          if the end marker is used in the LZMA stream
516      *
517      * @param       lc          number of literal context bits, must be
518      *                          in the range [0, 8]
519      *
520      * @param       lp          number of literal position bits, must be
521      *                          in the range [0, 4]
522      *
523      * @param       pb          number position bits, must be
524      *                          in the range [0, 4]
525      *
526      * @param       dictSize    dictionary size as bytes, must be in the range
527      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
528      *
529      * @param       presetDict  preset dictionary or <code>null</code>
530      *                          to use no preset dictionary
531      *
532      * @param       arrayCache  cache to be used for allocating large arrays
533      *
534      * @throws      CorruptedInputException
535      *                          if the first input byte is not 0x00
536      *
537      * @throws      EOFException file is truncated or corrupt
538      *
539      * @throws      IOException may be thrown by <code>in</code>
540      *
541      * @since 1.7
542      */
LZMAInputStream(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict, ArrayCache arrayCache)543     public LZMAInputStream(InputStream in, long uncompSize,
544                            int lc, int lp, int pb,
545                            int dictSize, byte[] presetDict,
546                            ArrayCache arrayCache)
547             throws IOException {
548         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
549                    arrayCache);
550     }
551 
initialize(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict, ArrayCache arrayCache)552     private void initialize(InputStream in, long uncompSize, byte propsByte,
553                             int dictSize, byte[] presetDict,
554                             ArrayCache arrayCache)
555             throws IOException {
556         // Validate the uncompressed size since the other "initialize" throws
557         // IllegalArgumentException if uncompSize < -1.
558         if (uncompSize < -1)
559             throw new UnsupportedOptionsException(
560                     "Uncompressed size is too big");
561 
562         // Decode the properties byte. In contrast to LZMA2, there is no
563         // limit of lc + lp <= 4.
564         int props = propsByte & 0xFF;
565         if (props > (4 * 5 + 4) * 9 + 8)
566             throw new CorruptedInputException("Invalid LZMA properties byte");
567 
568         int pb = props / (9 * 5);
569         props -= pb * 9 * 5;
570         int lp = props / 9;
571         int lc = props - lp * 9;
572 
573         // Validate the dictionary size since the other "initialize" throws
574         // IllegalArgumentException if dictSize is not supported.
575         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
576             throw new UnsupportedOptionsException(
577                     "LZMA dictionary is too big for this implementation");
578 
579         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
580                    arrayCache);
581     }
582 
initialize(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict, ArrayCache arrayCache)583     private void initialize(InputStream in, long uncompSize,
584                             int lc, int lp, int pb,
585                             int dictSize, byte[] presetDict,
586                             ArrayCache arrayCache)
587             throws IOException {
588         // getDictSize validates dictSize and gives a message in
589         // the exception too, so skip validating dictSize here.
590         if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4
591                 || pb < 0 || pb > 4)
592             throw new IllegalArgumentException();
593 
594         this.in = in;
595         this.arrayCache = arrayCache;
596 
597         // If uncompressed size is known, use it to avoid wasting memory for
598         // a uselessly large dictionary buffer.
599         dictSize = getDictSize(dictSize);
600         if (uncompSize >= 0 && dictSize > uncompSize)
601             dictSize = getDictSize((int)uncompSize);
602 
603         lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);
604         rc = new RangeDecoderFromStream(in);
605         lzma = new LZMADecoder(lz, rc, lc, lp, pb);
606 
607         remainingSize = uncompSize;
608     }
609 
610     /**
611      * Enables relaxed end-of-stream condition when uncompressed size is known.
612      * This is useful if uncompressed size is known but it is unknown if
613      * the end of stream (EOS) marker is present. After calling this function,
614      * both are allowed.
615      * <p>
616      * Note that this doesn't actually check if the EOS marker is present.
617      * This introduces a few minor downsides:
618      * <ul>
619      *   <li>Some (not all!) streams that would have more data than
620      *   the specified uncompressed size, for example due to data corruption,
621      *   will be accepted as valid.</li>
622      *   <li>After <code>read</code> has returned <code>-1</code> the
623      *   input position might not be at the end of the stream (too little
624      *   input may have been read).</li>
625      * </ul>
626      * <p>
627      * This should be called after the constructor before reading any data
628      * from the stream. This is a separate function because adding even more
629      * constructors to this class didn't look like a good alternative.
630      *
631      * @since 1.9
632      */
enableRelaxedEndCondition()633     public void enableRelaxedEndCondition() {
634         relaxedEndCondition = true;
635     }
636 
637     /**
638      * Decompresses the next byte from this input stream.
639      * <p>
640      * Reading lots of data with <code>read()</code> from this input stream
641      * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
642      * if you need to read lots of data one byte at a time.
643      *
644      * @return      the next decompressed byte, or <code>-1</code>
645      *              to indicate the end of the compressed stream
646      *
647      * @throws      CorruptedInputException
648      *
649      * @throws      XZIOException if the stream has been closed
650      *
651      * @throws      EOFException
652      *                          compressed input is truncated or corrupt
653      *
654      * @throws      IOException may be thrown by <code>in</code>
655      */
read()656     public int read() throws IOException {
657         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
658     }
659 
660     /**
661      * Decompresses into an array of bytes.
662      * <p>
663      * If <code>len</code> is zero, no bytes are read and <code>0</code>
664      * is returned. Otherwise this will block until <code>len</code>
665      * bytes have been decompressed, the end of the LZMA stream is reached,
666      * or an exception is thrown.
667      *
668      * @param       buf         target buffer for uncompressed data
669      * @param       off         start offset in <code>buf</code>
670      * @param       len         maximum number of uncompressed bytes to read
671      *
672      * @return      number of bytes read, or <code>-1</code> to indicate
673      *              the end of the compressed stream
674      *
675      * @throws      CorruptedInputException
676      *
677      * @throws      XZIOException if the stream has been closed
678      *
679      * @throws      EOFException compressed input is truncated or corrupt
680      *
681      * @throws      IOException may be thrown by <code>in</code>
682      */
read(byte[] buf, int off, int len)683     public int read(byte[] buf, int off, int len) throws IOException {
684         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
685             throw new IndexOutOfBoundsException();
686 
687         if (len == 0)
688             return 0;
689 
690         if (in == null)
691             throw new XZIOException("Stream closed");
692 
693         if (exception != null)
694             throw exception;
695 
696         if (endReached)
697             return -1;
698 
699         try {
700             int size = 0;
701 
702             while (len > 0) {
703                 // If uncompressed size is known and thus no end marker will
704                 // be present, set the limit so that the uncompressed size
705                 // won't be exceeded.
706                 int copySizeMax = len;
707                 if (remainingSize >= 0 && remainingSize < len)
708                     copySizeMax = (int)remainingSize;
709 
710                 lz.setLimit(copySizeMax);
711 
712                 // Decode into the dictionary buffer.
713                 try {
714                     lzma.decode();
715                 } catch (CorruptedInputException e) {
716                     // The end marker is encoded with a LZMA symbol that
717                     // indicates maximum match distance. This is larger
718                     // than any supported dictionary and thus causes
719                     // CorruptedInputException from LZDecoder.repeat.
720                     if (remainingSize != -1 || !lzma.endMarkerDetected())
721                         throw e;
722 
723                     endReached = true;
724 
725                     // The exception makes lzma.decode() miss the last range
726                     // decoder normalization, so do it here. This might
727                     // cause an IOException if it needs to read a byte
728                     // from the input stream.
729                     rc.normalize();
730                 }
731 
732                 // Copy from the dictionary to buf.
733                 int copiedSize = lz.flush(buf, off);
734                 off += copiedSize;
735                 len -= copiedSize;
736                 size += copiedSize;
737 
738                 if (remainingSize >= 0) {
739                     // Update the number of bytes left to be decompressed.
740                     remainingSize -= copiedSize;
741                     assert remainingSize >= 0;
742 
743                     if (remainingSize == 0)
744                         endReached = true;
745                 }
746 
747                 if (endReached) {
748                     // Checking these helps a lot when catching corrupt
749                     // or truncated .lzma files. LZMA Utils doesn't do
750                     // the second check and thus it accepts many invalid
751                     // files that this implementation and XZ Utils don't.
752                     if (lz.hasPending() || (!relaxedEndCondition
753                                             && !rc.isFinished()))
754                         throw new CorruptedInputException();
755 
756                     putArraysToCache();
757                     return size == 0 ? -1 : size;
758                 }
759             }
760 
761             return size;
762 
763         } catch (IOException e) {
764             exception = e;
765             throw e;
766         }
767     }
768 
putArraysToCache()769     private void putArraysToCache() {
770         if (lz != null) {
771             lz.putArraysToCache(arrayCache);
772             lz = null;
773         }
774     }
775 
776     /**
777      * Closes the stream and calls <code>in.close()</code>.
778      * If the stream was already closed, this does nothing.
779      *
780      * @throws  IOException if thrown by <code>in.close()</code>
781      */
close()782     public void close() throws IOException {
783         if (in != null) {
784             putArraysToCache();
785 
786             try {
787                 in.close();
788             } finally {
789                 in = null;
790             }
791         }
792     }
793 }
794