1 /* 2 * LZMA2InputStream 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz; 12 13 import java.io.InputStream; 14 import java.io.DataInputStream; 15 import java.io.IOException; 16 import java.io.EOFException; 17 import org.tukaani.xz.lz.LZDecoder; 18 import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer; 19 import org.tukaani.xz.lzma.LZMADecoder; 20 21 /** 22 * Decompresses a raw LZMA2 stream (no XZ headers). 23 */ 24 public class LZMA2InputStream extends InputStream { 25 /** 26 * Smallest valid LZMA2 dictionary size. 27 * <p> 28 * Very tiny dictionaries would be a performance problem, so 29 * the minimum is 4 KiB. 30 */ 31 public static final int DICT_SIZE_MIN = 4096; 32 33 /** 34 * Largest dictionary size supported by this implementation. 35 * <p> 36 * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB. 37 * This implementation supports only 16 bytes less than 2 GiB for raw 38 * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This 39 * limitation is due to Java using signed 32-bit integers for array 40 * indexing. The limitation shouldn't matter much in practice since so 41 * huge dictionaries are not normally used. 42 */ 43 public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; 44 45 private static final int COMPRESSED_SIZE_MAX = 1 << 16; 46 47 private final ArrayCache arrayCache; 48 private DataInputStream in; 49 50 private LZDecoder lz; 51 private RangeDecoderFromBuffer rc; 52 private LZMADecoder lzma; 53 54 private int uncompressedSize = 0; 55 private boolean isLZMAChunk = false; 56 57 private boolean needDictReset = true; 58 private boolean needProps = true; 59 private boolean endReached = false; 60 61 private IOException exception = null; 62 63 private final byte[] tempBuf = new byte[1]; 64 65 /** 66 * Gets approximate decompressor memory requirements as kibibytes for 67 * the given dictionary size. 68 * 69 * @param dictSize LZMA2 dictionary size as bytes, must be 70 * in the range [<code>DICT_SIZE_MIN</code>, 71 * <code>DICT_SIZE_MAX</code>] 72 * 73 * @return approximate memory requirements as kibibytes (KiB) 74 */ getMemoryUsage(int dictSize)75 public static int getMemoryUsage(int dictSize) { 76 // The base state is around 30-40 KiB (probabilities etc.), 77 // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering, 78 // and LZ decoder needs a dictionary buffer. 79 return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024; 80 } 81 getDictSize(int dictSize)82 private static int getDictSize(int dictSize) { 83 if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX) 84 throw new IllegalArgumentException( 85 "Unsupported dictionary size " + dictSize); 86 87 // Round dictionary size upward to a multiple of 16. This way LZMA 88 // can use LZDecoder.getPos() for calculating LZMA's posMask. 89 // Note that this check is needed only for raw LZMA2 streams; it is 90 // redundant with .xz. 91 return (dictSize + 15) & ~15; 92 } 93 94 /** 95 * Creates a new input stream that decompresses raw LZMA2 data 96 * from <code>in</code>. 97 * <p> 98 * The caller needs to know the dictionary size used when compressing; 99 * the dictionary size isn't stored as part of a raw LZMA2 stream. 100 * <p> 101 * Specifying a too small dictionary size will prevent decompressing 102 * the stream. Specifying a too big dictionary is waste of memory but 103 * decompression will work. 104 * <p> 105 * There is no need to specify a dictionary bigger than 106 * the uncompressed size of the data even if a bigger dictionary 107 * was used when compressing. If you know the uncompressed size 108 * of the data, this might allow saving some memory. 109 * 110 * @param in input stream from which LZMA2-compressed 111 * data is read 112 * 113 * @param dictSize LZMA2 dictionary size as bytes, must be 114 * in the range [<code>DICT_SIZE_MIN</code>, 115 * <code>DICT_SIZE_MAX</code>] 116 */ LZMA2InputStream(InputStream in, int dictSize)117 public LZMA2InputStream(InputStream in, int dictSize) { 118 this(in, dictSize, null); 119 } 120 121 /** 122 * Creates a new LZMA2 decompressor using a preset dictionary. 123 * <p> 124 * This is like <code>LZMA2InputStream(InputStream, int)</code> except 125 * that the dictionary may be initialized using a preset dictionary. 126 * If a preset dictionary was used when compressing the data, the 127 * same preset dictionary must be provided when decompressing. 128 * 129 * @param in input stream from which LZMA2-compressed 130 * data is read 131 * 132 * @param dictSize LZMA2 dictionary size as bytes, must be 133 * in the range [<code>DICT_SIZE_MIN</code>, 134 * <code>DICT_SIZE_MAX</code>] 135 * 136 * @param presetDict preset dictionary or <code>null</code> 137 * to use no preset dictionary 138 */ LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict)139 public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) { 140 this(in, dictSize, presetDict, ArrayCache.getDefaultCache()); 141 } 142 143 /** 144 * Creates a new LZMA2 decompressor using a preset dictionary 145 * and array cache. 146 * <p> 147 * This is like <code>LZMA2InputStream(InputStream, int, byte[])</code> 148 * except that this also takes the <code>arrayCache</code> argument. 149 * 150 * @param in input stream from which LZMA2-compressed 151 * data is read 152 * 153 * @param dictSize LZMA2 dictionary size as bytes, must be 154 * in the range [<code>DICT_SIZE_MIN</code>, 155 * <code>DICT_SIZE_MAX</code>] 156 * 157 * @param presetDict preset dictionary or <code>null</code> 158 * to use no preset dictionary 159 * 160 * @param arrayCache cache to be used for allocating large arrays 161 * 162 * @since 1.7 163 */ LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict, ArrayCache arrayCache)164 LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict, 165 ArrayCache arrayCache) { 166 // Check for null because otherwise null isn't detect 167 // in this constructor. 168 if (in == null) 169 throw new NullPointerException(); 170 171 this.arrayCache = arrayCache; 172 this.in = new DataInputStream(in); 173 this.rc = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX, arrayCache); 174 this.lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache); 175 176 if (presetDict != null && presetDict.length > 0) 177 needDictReset = false; 178 } 179 180 /** 181 * Decompresses the next byte from this input stream. 182 * <p> 183 * Reading lots of data with <code>read()</code> from this input stream 184 * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code> 185 * if you need to read lots of data one byte at a time. 186 * 187 * @return the next decompressed byte, or <code>-1</code> 188 * to indicate the end of the compressed stream 189 * 190 * @throws CorruptedInputException 191 * 192 * @throws XZIOException if the stream has been closed 193 * 194 * @throws EOFException 195 * compressed input is truncated or corrupt 196 * 197 * @throws IOException may be thrown by <code>in</code> 198 */ read()199 public int read() throws IOException { 200 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 201 } 202 203 /** 204 * Decompresses into an array of bytes. 205 * <p> 206 * If <code>len</code> is zero, no bytes are read and <code>0</code> 207 * is returned. Otherwise this will block until <code>len</code> 208 * bytes have been decompressed, the end of the LZMA2 stream is reached, 209 * or an exception is thrown. 210 * 211 * @param buf target buffer for uncompressed data 212 * @param off start offset in <code>buf</code> 213 * @param len maximum number of uncompressed bytes to read 214 * 215 * @return number of bytes read, or <code>-1</code> to indicate 216 * the end of the compressed stream 217 * 218 * @throws CorruptedInputException 219 * 220 * @throws XZIOException if the stream has been closed 221 * 222 * @throws EOFException 223 * compressed input is truncated or corrupt 224 * 225 * @throws IOException may be thrown by <code>in</code> 226 */ read(byte[] buf, int off, int len)227 public int read(byte[] buf, int off, int len) throws IOException { 228 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 229 throw new IndexOutOfBoundsException(); 230 231 if (len == 0) 232 return 0; 233 234 if (in == null) 235 throw new XZIOException("Stream closed"); 236 237 if (exception != null) 238 throw exception; 239 240 if (endReached) 241 return -1; 242 243 try { 244 int size = 0; 245 246 while (len > 0) { 247 if (uncompressedSize == 0) { 248 decodeChunkHeader(); 249 if (endReached) 250 return size == 0 ? -1 : size; 251 } 252 253 int copySizeMax = Math.min(uncompressedSize, len); 254 255 if (!isLZMAChunk) { 256 lz.copyUncompressed(in, copySizeMax); 257 } else { 258 lz.setLimit(copySizeMax); 259 lzma.decode(); 260 } 261 262 int copiedSize = lz.flush(buf, off); 263 off += copiedSize; 264 len -= copiedSize; 265 size += copiedSize; 266 uncompressedSize -= copiedSize; 267 268 if (uncompressedSize == 0) 269 if (!rc.isFinished() || lz.hasPending()) 270 throw new CorruptedInputException(); 271 } 272 273 return size; 274 275 } catch (IOException e) { 276 exception = e; 277 throw e; 278 } 279 } 280 decodeChunkHeader()281 private void decodeChunkHeader() throws IOException { 282 int control = in.readUnsignedByte(); 283 284 if (control == 0x00) { 285 endReached = true; 286 putArraysToCache(); 287 return; 288 } 289 290 if (control >= 0xE0 || control == 0x01) { 291 needProps = true; 292 needDictReset = false; 293 lz.reset(); 294 } else if (needDictReset) { 295 throw new CorruptedInputException(); 296 } 297 298 if (control >= 0x80) { 299 isLZMAChunk = true; 300 301 uncompressedSize = (control & 0x1F) << 16; 302 uncompressedSize += in.readUnsignedShort() + 1; 303 304 int compressedSize = in.readUnsignedShort() + 1; 305 306 if (control >= 0xC0) { 307 needProps = false; 308 decodeProps(); 309 310 } else if (needProps) { 311 throw new CorruptedInputException(); 312 313 } else if (control >= 0xA0) { 314 lzma.reset(); 315 } 316 317 rc.prepareInputBuffer(in, compressedSize); 318 319 } else if (control > 0x02) { 320 throw new CorruptedInputException(); 321 322 } else { 323 isLZMAChunk = false; 324 uncompressedSize = in.readUnsignedShort() + 1; 325 } 326 } 327 decodeProps()328 private void decodeProps() throws IOException { 329 int props = in.readUnsignedByte(); 330 331 if (props > (4 * 5 + 4) * 9 + 8) 332 throw new CorruptedInputException(); 333 334 int pb = props / (9 * 5); 335 props -= pb * 9 * 5; 336 int lp = props / 9; 337 int lc = props - lp * 9; 338 339 if (lc + lp > 4) 340 throw new CorruptedInputException(); 341 342 lzma = new LZMADecoder(lz, rc, lc, lp, pb); 343 } 344 345 /** 346 * Returns the number of uncompressed bytes that can be read 347 * without blocking. The value is returned with an assumption 348 * that the compressed input data will be valid. If the compressed 349 * data is corrupt, <code>CorruptedInputException</code> may get 350 * thrown before the number of bytes claimed to be available have 351 * been read from this input stream. 352 * <p> 353 * In LZMA2InputStream, the return value will be non-zero when the 354 * decompressor is in the middle of an LZMA2 chunk. The return value 355 * will then be the number of uncompressed bytes remaining from that 356 * chunk. The return value can also be non-zero in the middle of 357 * an uncompressed chunk, but then the return value depends also on 358 * the <code>available()</code> method of the underlying InputStream. 359 * 360 * @return the number of uncompressed bytes that can be read 361 * without blocking 362 */ available()363 public int available() throws IOException { 364 if (in == null) 365 throw new XZIOException("Stream closed"); 366 367 if (exception != null) 368 throw exception; 369 370 return isLZMAChunk ? uncompressedSize 371 : Math.min(uncompressedSize, in.available()); 372 } 373 putArraysToCache()374 private void putArraysToCache() { 375 if (lz != null) { 376 lz.putArraysToCache(arrayCache); 377 lz = null; 378 379 rc.putArraysToCache(arrayCache); 380 rc = null; 381 } 382 } 383 384 /** 385 * Closes the stream and calls <code>in.close()</code>. 386 * If the stream was already closed, this does nothing. 387 * 388 * @throws IOException if thrown by <code>in.close()</code> 389 */ close()390 public void close() throws IOException { 391 if (in != null) { 392 putArraysToCache(); 393 394 try { 395 in.close(); 396 } finally { 397 in = null; 398 } 399 } 400 } 401 } 402