• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * LZMA2InputStream
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz;
12 
13 import java.io.InputStream;
14 import java.io.DataInputStream;
15 import java.io.IOException;
16 import java.io.EOFException;
17 import org.tukaani.xz.lz.LZDecoder;
18 import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
19 import org.tukaani.xz.lzma.LZMADecoder;
20 
21 /**
22  * Decompresses a raw LZMA2 stream (no XZ headers).
23  */
24 public class LZMA2InputStream extends InputStream {
25     /**
26      * Smallest valid LZMA2 dictionary size.
27      * <p>
28      * Very tiny dictionaries would be a performance problem, so
29      * the minimum is 4 KiB.
30      */
31     public static final int DICT_SIZE_MIN = 4096;
32 
33     /**
34      * Largest dictionary size supported by this implementation.
35      * <p>
36      * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
37      * This implementation supports only 16 bytes less than 2 GiB for raw
38      * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
39      * limitation is due to Java using signed 32-bit integers for array
40      * indexing. The limitation shouldn't matter much in practice since so
41      * huge dictionaries are not normally used.
42      */
43     public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
44 
45     private static final int COMPRESSED_SIZE_MAX = 1 << 16;
46 
47     private final ArrayCache arrayCache;
48     private DataInputStream in;
49 
50     private LZDecoder lz;
51     private RangeDecoderFromBuffer rc;
52     private LZMADecoder lzma;
53 
54     private int uncompressedSize = 0;
55     private boolean isLZMAChunk = false;
56 
57     private boolean needDictReset = true;
58     private boolean needProps = true;
59     private boolean endReached = false;
60 
61     private IOException exception = null;
62 
63     private final byte[] tempBuf = new byte[1];
64 
65     /**
66      * Gets approximate decompressor memory requirements as kibibytes for
67      * the given dictionary size.
68      *
69      * @param       dictSize    LZMA2 dictionary size as bytes, must be
70      *                          in the range [<code>DICT_SIZE_MIN</code>,
71      *                          <code>DICT_SIZE_MAX</code>]
72      *
73      * @return      approximate memory requirements as kibibytes (KiB)
74      */
getMemoryUsage(int dictSize)75     public static int getMemoryUsage(int dictSize) {
76         // The base state is around 30-40 KiB (probabilities etc.),
77         // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
78         // and LZ decoder needs a dictionary buffer.
79         return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
80     }
81 
getDictSize(int dictSize)82     private static int getDictSize(int dictSize) {
83         if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX)
84             throw new IllegalArgumentException(
85                     "Unsupported dictionary size " + dictSize);
86 
87         // Round dictionary size upward to a multiple of 16. This way LZMA
88         // can use LZDecoder.getPos() for calculating LZMA's posMask.
89         // Note that this check is needed only for raw LZMA2 streams; it is
90         // redundant with .xz.
91         return (dictSize + 15) & ~15;
92     }
93 
94     /**
95      * Creates a new input stream that decompresses raw LZMA2 data
96      * from <code>in</code>.
97      * <p>
98      * The caller needs to know the dictionary size used when compressing;
99      * the dictionary size isn't stored as part of a raw LZMA2 stream.
100      * <p>
101      * Specifying a too small dictionary size will prevent decompressing
102      * the stream. Specifying a too big dictionary is waste of memory but
103      * decompression will work.
104      * <p>
105      * There is no need to specify a dictionary bigger than
106      * the uncompressed size of the data even if a bigger dictionary
107      * was used when compressing. If you know the uncompressed size
108      * of the data, this might allow saving some memory.
109      *
110      * @param       in          input stream from which LZMA2-compressed
111      *                          data is read
112      *
113      * @param       dictSize    LZMA2 dictionary size as bytes, must be
114      *                          in the range [<code>DICT_SIZE_MIN</code>,
115      *                          <code>DICT_SIZE_MAX</code>]
116      */
LZMA2InputStream(InputStream in, int dictSize)117     public LZMA2InputStream(InputStream in, int dictSize) {
118         this(in, dictSize, null);
119     }
120 
121     /**
122      * Creates a new LZMA2 decompressor using a preset dictionary.
123      * <p>
124      * This is like <code>LZMA2InputStream(InputStream, int)</code> except
125      * that the dictionary may be initialized using a preset dictionary.
126      * If a preset dictionary was used when compressing the data, the
127      * same preset dictionary must be provided when decompressing.
128      *
129      * @param       in          input stream from which LZMA2-compressed
130      *                          data is read
131      *
132      * @param       dictSize    LZMA2 dictionary size as bytes, must be
133      *                          in the range [<code>DICT_SIZE_MIN</code>,
134      *                          <code>DICT_SIZE_MAX</code>]
135      *
136      * @param       presetDict  preset dictionary or <code>null</code>
137      *                          to use no preset dictionary
138      */
LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict)139     public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
140         this(in, dictSize, presetDict, ArrayCache.getDefaultCache());
141     }
142 
143     /**
144      * Creates a new LZMA2 decompressor using a preset dictionary
145      * and array cache.
146      * <p>
147      * This is like <code>LZMA2InputStream(InputStream, int, byte[])</code>
148      * except that this also takes the <code>arrayCache</code> argument.
149      *
150      * @param       in          input stream from which LZMA2-compressed
151      *                          data is read
152      *
153      * @param       dictSize    LZMA2 dictionary size as bytes, must be
154      *                          in the range [<code>DICT_SIZE_MIN</code>,
155      *                          <code>DICT_SIZE_MAX</code>]
156      *
157      * @param       presetDict  preset dictionary or <code>null</code>
158      *                          to use no preset dictionary
159      *
160      * @param       arrayCache  cache to be used for allocating large arrays
161      *
162      * @since 1.7
163      */
LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict, ArrayCache arrayCache)164     LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict,
165                      ArrayCache arrayCache) {
166         // Check for null because otherwise null isn't detect
167         // in this constructor.
168         if (in == null)
169             throw new NullPointerException();
170 
171         this.arrayCache = arrayCache;
172         this.in = new DataInputStream(in);
173         this.rc = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX, arrayCache);
174         this.lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);
175 
176         if (presetDict != null && presetDict.length > 0)
177             needDictReset = false;
178     }
179 
180     /**
181      * Decompresses the next byte from this input stream.
182      * <p>
183      * Reading lots of data with <code>read()</code> from this input stream
184      * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
185      * if you need to read lots of data one byte at a time.
186      *
187      * @return      the next decompressed byte, or <code>-1</code>
188      *              to indicate the end of the compressed stream
189      *
190      * @throws      CorruptedInputException
191      *
192      * @throws      XZIOException if the stream has been closed
193      *
194      * @throws      EOFException
195      *                          compressed input is truncated or corrupt
196      *
197      * @throws      IOException may be thrown by <code>in</code>
198      */
read()199     public int read() throws IOException {
200         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
201     }
202 
203     /**
204      * Decompresses into an array of bytes.
205      * <p>
206      * If <code>len</code> is zero, no bytes are read and <code>0</code>
207      * is returned. Otherwise this will block until <code>len</code>
208      * bytes have been decompressed, the end of the LZMA2 stream is reached,
209      * or an exception is thrown.
210      *
211      * @param       buf         target buffer for uncompressed data
212      * @param       off         start offset in <code>buf</code>
213      * @param       len         maximum number of uncompressed bytes to read
214      *
215      * @return      number of bytes read, or <code>-1</code> to indicate
216      *              the end of the compressed stream
217      *
218      * @throws      CorruptedInputException
219      *
220      * @throws      XZIOException if the stream has been closed
221      *
222      * @throws      EOFException
223      *                          compressed input is truncated or corrupt
224      *
225      * @throws      IOException may be thrown by <code>in</code>
226      */
read(byte[] buf, int off, int len)227     public int read(byte[] buf, int off, int len) throws IOException {
228         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
229             throw new IndexOutOfBoundsException();
230 
231         if (len == 0)
232             return 0;
233 
234         if (in == null)
235             throw new XZIOException("Stream closed");
236 
237         if (exception != null)
238             throw exception;
239 
240         if (endReached)
241             return -1;
242 
243         try {
244             int size = 0;
245 
246             while (len > 0) {
247                 if (uncompressedSize == 0) {
248                     decodeChunkHeader();
249                     if (endReached)
250                         return size == 0 ? -1 : size;
251                 }
252 
253                 int copySizeMax = Math.min(uncompressedSize, len);
254 
255                 if (!isLZMAChunk) {
256                     lz.copyUncompressed(in, copySizeMax);
257                 } else {
258                     lz.setLimit(copySizeMax);
259                     lzma.decode();
260                 }
261 
262                 int copiedSize = lz.flush(buf, off);
263                 off += copiedSize;
264                 len -= copiedSize;
265                 size += copiedSize;
266                 uncompressedSize -= copiedSize;
267 
268                 if (uncompressedSize == 0)
269                     if (!rc.isFinished() || lz.hasPending())
270                         throw new CorruptedInputException();
271             }
272 
273             return size;
274 
275         } catch (IOException e) {
276             exception = e;
277             throw e;
278         }
279     }
280 
decodeChunkHeader()281     private void decodeChunkHeader() throws IOException {
282         int control = in.readUnsignedByte();
283 
284         if (control == 0x00) {
285             endReached = true;
286             putArraysToCache();
287             return;
288         }
289 
290         if (control >= 0xE0 || control == 0x01) {
291             needProps = true;
292             needDictReset = false;
293             lz.reset();
294         } else if (needDictReset) {
295             throw new CorruptedInputException();
296         }
297 
298         if (control >= 0x80) {
299             isLZMAChunk = true;
300 
301             uncompressedSize = (control & 0x1F) << 16;
302             uncompressedSize += in.readUnsignedShort() + 1;
303 
304             int compressedSize = in.readUnsignedShort() + 1;
305 
306             if (control >= 0xC0) {
307                 needProps = false;
308                 decodeProps();
309 
310             } else if (needProps) {
311                 throw new CorruptedInputException();
312 
313             } else if (control >= 0xA0) {
314                 lzma.reset();
315             }
316 
317             rc.prepareInputBuffer(in, compressedSize);
318 
319         } else if (control > 0x02) {
320             throw new CorruptedInputException();
321 
322         } else {
323             isLZMAChunk = false;
324             uncompressedSize = in.readUnsignedShort() + 1;
325         }
326     }
327 
decodeProps()328     private void decodeProps() throws IOException {
329         int props = in.readUnsignedByte();
330 
331         if (props > (4 * 5 + 4) * 9 + 8)
332             throw new CorruptedInputException();
333 
334         int pb = props / (9 * 5);
335         props -= pb * 9 * 5;
336         int lp = props / 9;
337         int lc = props - lp * 9;
338 
339         if (lc + lp > 4)
340             throw new CorruptedInputException();
341 
342         lzma = new LZMADecoder(lz, rc, lc, lp, pb);
343     }
344 
345     /**
346      * Returns the number of uncompressed bytes that can be read
347      * without blocking. The value is returned with an assumption
348      * that the compressed input data will be valid. If the compressed
349      * data is corrupt, <code>CorruptedInputException</code> may get
350      * thrown before the number of bytes claimed to be available have
351      * been read from this input stream.
352      * <p>
353      * In LZMA2InputStream, the return value will be non-zero when the
354      * decompressor is in the middle of an LZMA2 chunk. The return value
355      * will then be the number of uncompressed bytes remaining from that
356      * chunk. The return value can also be non-zero in the middle of
357      * an uncompressed chunk, but then the return value depends also on
358      * the <code>available()</code> method of the underlying InputStream.
359      *
360      * @return      the number of uncompressed bytes that can be read
361      *              without blocking
362      */
available()363     public int available() throws IOException {
364         if (in == null)
365             throw new XZIOException("Stream closed");
366 
367         if (exception != null)
368             throw exception;
369 
370         return isLZMAChunk ? uncompressedSize
371                            : Math.min(uncompressedSize, in.available());
372     }
373 
putArraysToCache()374     private void putArraysToCache() {
375         if (lz != null) {
376             lz.putArraysToCache(arrayCache);
377             lz = null;
378 
379             rc.putArraysToCache(arrayCache);
380             rc = null;
381         }
382     }
383 
384     /**
385      * Closes the stream and calls <code>in.close()</code>.
386      * If the stream was already closed, this does nothing.
387      *
388      * @throws  IOException if thrown by <code>in.close()</code>
389      */
close()390     public void close() throws IOException {
391         if (in != null) {
392             putArraysToCache();
393 
394             try {
395                 in.close();
396             } finally {
397                 in = null;
398             }
399         }
400     }
401 }
402