• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * XZInputStream
3  *
4  * Author: Lasse Collin <lasse.collin@tukaani.org>
5  *
6  * This file has been put into the public domain.
7  * You can do whatever you want with this file.
8  */
9 
10 package org.tukaani.xz;
11 
12 import java.io.InputStream;
13 import java.io.DataInputStream;
14 import java.io.IOException;
15 import java.io.EOFException;
16 import org.tukaani.xz.common.DecoderUtil;
17 
18 /**
19  * Decompresses a .xz file in streamed mode (no seeking).
20  * <p>
21  * Use this to decompress regular standalone .xz files. This reads from
22  * its input stream until the end of the input or until an error occurs.
23  * This supports decompressing concatenated .xz files.
24  *
25  * <h4>Typical use cases</h4>
26  * <p>
27  * Getting an input stream to decompress a .xz file:
28  * <p><blockquote><pre>
29  * InputStream infile = new FileInputStream("foo.xz");
30  * XZInputStream inxz = new XZInputStream(infile);
31  * </pre></blockquote>
32  * <p>
33  * It's important to keep in mind that decompressor memory usage depends
34  * on the settings used to compress the file. The worst-case memory usage
35  * of XZInputStream is currently 1.5&nbsp;GiB. Still, very few files will
36  * require more than about 65&nbsp;MiB because that's how much decompressing
37  * a file created with the highest preset level will need, and only a few
38  * people use settings other than the predefined presets.
39  * <p>
40  * It is possible to specify a memory usage limit for
41  * <code>XZInputStream</code>. If decompression requires more memory than
42  * the specified limit, MemoryLimitException will be thrown when reading
43  * from the stream. For example, the following sets the memory usage limit
44  * to 100&nbsp;MiB:
45  * <p><blockquote><pre>
46  * InputStream infile = new FileInputStream("foo.xz");
47  * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
48  * </pre></blockquote>
49  *
50  * <h4>When uncompressed size is known beforehand</h4>
51  * <p>
52  * If you are decompressing complete files and your application knows
53  * exactly how much uncompressed data there should be, it is good to try
54  * reading one more byte by calling <code>read()</code> and checking
55  * that it returns <code>-1</code>. This way the decompressor will parse the
56  * file footers and verify the integrity checks, giving the caller more
57  * confidence that the uncompressed data is valid. (This advice seems to
58  * apply to
59  * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
60  *
61  * @see SingleXZInputStream
62  */
63 public class XZInputStream extends InputStream {
64     private final int memoryLimit;
65     private InputStream in;
66     private SingleXZInputStream xzIn;
67     private final boolean verifyCheck;
68     private boolean endReached = false;
69     private IOException exception = null;
70 
71     private final byte[] tempBuf = new byte[1];
72 
73     /**
74      * Creates a new XZ decompressor without a memory usage limit.
75      * <p>
76      * This constructor reads and parses the XZ Stream Header (12 bytes)
77      * from <code>in</code>. The header of the first Block is not read
78      * until <code>read</code> is called.
79      *
80      * @param       in          input stream from which XZ-compressed
81      *                          data is read
82      *
83      * @throws      XZFormatException
84      *                          input is not in the XZ format
85      *
86      * @throws      CorruptedInputException
87      *                          XZ header CRC32 doesn't match
88      *
89      * @throws      UnsupportedOptionsException
90      *                          XZ header is valid but specifies options
91      *                          not supported by this implementation
92      *
93      * @throws      EOFException
94      *                          less than 12 bytes of input was available
95      *                          from <code>in</code>
96      *
97      * @throws      IOException may be thrown by <code>in</code>
98      */
XZInputStream(InputStream in)99     public XZInputStream(InputStream in) throws IOException {
100         this(in, -1);
101     }
102 
103     /**
104      * Creates a new XZ decompressor with an optional memory usage limit.
105      * <p>
106      * This is identical to <code>XZInputStream(InputStream)</code> except
107      * that this takes also the <code>memoryLimit</code> argument.
108      *
109      * @param       in          input stream from which XZ-compressed
110      *                          data is read
111      *
112      * @param       memoryLimit memory usage limit in kibibytes (KiB)
113      *                          or <code>-1</code> to impose no
114      *                          memory usage limit
115      *
116      * @throws      XZFormatException
117      *                          input is not in the XZ format
118      *
119      * @throws      CorruptedInputException
120      *                          XZ header CRC32 doesn't match
121      *
122      * @throws      UnsupportedOptionsException
123      *                          XZ header is valid but specifies options
124      *                          not supported by this implementation
125      *
126      * @throws      EOFException
127      *                          less than 12 bytes of input was available
128      *                          from <code>in</code>
129      *
130      * @throws      IOException may be thrown by <code>in</code>
131      */
XZInputStream(InputStream in, int memoryLimit)132     public XZInputStream(InputStream in, int memoryLimit) throws IOException {
133         this(in, memoryLimit, true);
134     }
135 
136     /**
137      * Creates a new XZ decompressor with an optional memory usage limit
138      * and ability to disable verification of integrity checks.
139      * <p>
140      * This is identical to <code>XZInputStream(InputStream,int)</code> except
141      * that this takes also the <code>verifyCheck</code> argument.
142      * <p>
143      * Note that integrity check verification should almost never be disabled.
144      * Possible reasons to disable integrity check verification:
145      * <ul>
146      *   <li>Trying to recover data from a corrupt .xz file.</li>
147      *   <li>Speeding up decompression. This matters mostly with SHA-256
148      *   or with files that have compressed extremely well. It's recommended
149      *   that integrity checking isn't disabled for performance reasons
150      *   unless the file integrity is verified externally in some other
151      *   way.</li>
152      * </ul>
153      * <p>
154      * <code>verifyCheck</code> only affects the integrity check of
155      * the actual compressed data. The CRC32 fields in the headers
156      * are always verified.
157      *
158      * @param       in          input stream from which XZ-compressed
159      *                          data is read
160      *
161      * @param       memoryLimit memory usage limit in kibibytes (KiB)
162      *                          or <code>-1</code> to impose no
163      *                          memory usage limit
164      *
165      * @param       verifyCheck if <code>true</code>, the integrity checks
166      *                          will be verified; this should almost never
167      *                          be set to <code>false</code>
168      *
169      * @throws      XZFormatException
170      *                          input is not in the XZ format
171      *
172      * @throws      CorruptedInputException
173      *                          XZ header CRC32 doesn't match
174      *
175      * @throws      UnsupportedOptionsException
176      *                          XZ header is valid but specifies options
177      *                          not supported by this implementation
178      *
179      * @throws      EOFException
180      *                          less than 12 bytes of input was available
181      *                          from <code>in</code>
182      *
183      * @throws      IOException may be thrown by <code>in</code>
184      *
185      * @since 1.6
186      */
XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)187     public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)
188             throws IOException {
189         this.in = in;
190         this.memoryLimit = memoryLimit;
191         this.verifyCheck = verifyCheck;
192         this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck);
193     }
194 
195     /**
196      * Decompresses the next byte from this input stream.
197      * <p>
198      * Reading lots of data with <code>read()</code> from this input stream
199      * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
200      * if you need to read lots of data one byte at a time.
201      *
202      * @return      the next decompressed byte, or <code>-1</code>
203      *              to indicate the end of the compressed stream
204      *
205      * @throws      CorruptedInputException
206      * @throws      UnsupportedOptionsException
207      * @throws      MemoryLimitException
208      *
209      * @throws      XZIOException if the stream has been closed
210      *
211      * @throws      EOFException
212      *                          compressed input is truncated or corrupt
213      *
214      * @throws      IOException may be thrown by <code>in</code>
215      */
read()216     public int read() throws IOException {
217         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
218     }
219 
220     /**
221      * Decompresses into an array of bytes.
222      * <p>
223      * If <code>len</code> is zero, no bytes are read and <code>0</code>
224      * is returned. Otherwise this will try to decompress <code>len</code>
225      * bytes of uncompressed data. Less than <code>len</code> bytes may
226      * be read only in the following situations:
227      * <ul>
228      *   <li>The end of the compressed data was reached successfully.</li>
229      *   <li>An error is detected after at least one but less <code>len</code>
230      *       bytes have already been successfully decompressed.
231      *       The next call with non-zero <code>len</code> will immediately
232      *       throw the pending exception.</li>
233      *   <li>An exception is thrown.</li>
234      * </ul>
235      *
236      * @param       buf         target buffer for uncompressed data
237      * @param       off         start offset in <code>buf</code>
238      * @param       len         maximum number of uncompressed bytes to read
239      *
240      * @return      number of bytes read, or <code>-1</code> to indicate
241      *              the end of the compressed stream
242      *
243      * @throws      CorruptedInputException
244      * @throws      UnsupportedOptionsException
245      * @throws      MemoryLimitException
246      *
247      * @throws      XZIOException if the stream has been closed
248      *
249      * @throws      EOFException
250      *                          compressed input is truncated or corrupt
251      *
252      * @throws      IOException may be thrown by <code>in</code>
253      */
read(byte[] buf, int off, int len)254     public int read(byte[] buf, int off, int len) throws IOException {
255         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
256             throw new IndexOutOfBoundsException();
257 
258         if (len == 0)
259             return 0;
260 
261         if (in == null)
262             throw new XZIOException("Stream closed");
263 
264         if (exception != null)
265             throw exception;
266 
267         if (endReached)
268             return -1;
269 
270         int size = 0;
271 
272         try {
273             while (len > 0) {
274                 if (xzIn == null) {
275                     prepareNextStream();
276                     if (endReached)
277                         return size == 0 ? -1 : size;
278                 }
279 
280                 int ret = xzIn.read(buf, off, len);
281 
282                 if (ret > 0) {
283                     size += ret;
284                     off += ret;
285                     len -= ret;
286                 } else if (ret == -1) {
287                     xzIn = null;
288                 }
289             }
290         } catch (IOException e) {
291             exception = e;
292             if (size == 0)
293                 throw e;
294         }
295 
296         return size;
297     }
298 
prepareNextStream()299     private void prepareNextStream() throws IOException {
300         DataInputStream inData = new DataInputStream(in);
301         byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
302 
303         // The size of Stream Padding must be a multiple of four bytes,
304         // all bytes zero.
305         do {
306             // First try to read one byte to see if we have reached the end
307             // of the file.
308             int ret = inData.read(buf, 0, 1);
309             if (ret == -1) {
310                 endReached = true;
311                 return;
312             }
313 
314             // Since we got one byte of input, there must be at least
315             // three more available in a valid file.
316             inData.readFully(buf, 1, 3);
317 
318         } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);
319 
320         // Not all bytes are zero. In a valid Stream it indicates the
321         // beginning of the next Stream. Read the rest of the Stream Header
322         // and initialize the XZ decoder.
323         inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);
324 
325         try {
326             xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf);
327         } catch (XZFormatException e) {
328             // Since this isn't the first .xz Stream, it is more
329             // logical to tell that the data is corrupt.
330             throw new CorruptedInputException(
331                     "Garbage after a valid XZ Stream");
332         }
333     }
334 
335     /**
336      * Returns the number of uncompressed bytes that can be read
337      * without blocking. The value is returned with an assumption
338      * that the compressed input data will be valid. If the compressed
339      * data is corrupt, <code>CorruptedInputException</code> may get
340      * thrown before the number of bytes claimed to be available have
341      * been read from this input stream.
342      *
343      * @return      the number of uncompressed bytes that can be read
344      *              without blocking
345      */
available()346     public int available() throws IOException {
347         if (in == null)
348             throw new XZIOException("Stream closed");
349 
350         if (exception != null)
351             throw exception;
352 
353         return xzIn == null ? 0 : xzIn.available();
354     }
355 
356     /**
357      * Closes the stream and calls <code>in.close()</code>.
358      * If the stream was already closed, this does nothing.
359      *
360      * @throws  IOException if thrown by <code>in.close()</code>
361      */
close()362     public void close() throws IOException {
363         if (in != null) {
364             try {
365                 in.close();
366             } finally {
367                 in = null;
368             }
369         }
370     }
371 }
372