1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.commons.compress.compressors.lzw; 20 21 import java.io.IOException; 22 import java.io.InputStream; 23 import java.nio.ByteOrder; 24 25 import org.apache.commons.compress.MemoryLimitException; 26 import org.apache.commons.compress.compressors.CompressorInputStream; 27 import org.apache.commons.compress.utils.BitInputStream; 28 import org.apache.commons.compress.utils.InputStreamStatistics; 29 30 /** 31 * <p>Generic LZW implementation. It is used internally for 32 * the Z decompressor and the Unshrinking Zip file compression method, 33 * but may be useful for third-party projects in implementing their own LZW variations.</p> 34 * 35 * @NotThreadSafe 36 * @since 1.10 37 */ 38 public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics { 39 protected static final int DEFAULT_CODE_SIZE = 9; 40 protected static final int UNUSED_PREFIX = -1; 41 42 private final byte[] oneByte = new byte[1]; 43 44 protected final BitInputStream in; 45 private int clearCode = -1; 46 private int codeSize = DEFAULT_CODE_SIZE; 47 private byte previousCodeFirstChar; 48 private int previousCode = UNUSED_PREFIX; 49 private int tableSize; 50 private int[] prefixes; 51 private byte[] characters; 52 private byte[] outputStack; 53 private int outputStackLocation; 54 LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder)55 protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) { 56 this.in = new BitInputStream(inputStream, byteOrder); 57 } 58 59 @Override close()60 public void close() throws IOException { 61 in.close(); 62 } 63 64 @Override read()65 public int read() throws IOException { 66 final int ret = read(oneByte); 67 if (ret < 0) { 68 return ret; 69 } 70 return 0xff & oneByte[0]; 71 } 72 73 @Override read(final byte[] b, final int off, final int len)74 public int read(final byte[] b, final int off, final int len) throws IOException { 75 int bytesRead = readFromStack(b, off, len); 76 while (len - bytesRead > 0) { 77 final int result = decompressNextSymbol(); 78 if (result < 0) { 79 if (bytesRead > 0) { 80 count(bytesRead); 81 return bytesRead; 82 } 83 return result; 84 } 85 bytesRead += readFromStack(b, off + bytesRead, len - bytesRead); 86 } 87 count(bytesRead); 88 return bytesRead; 89 } 90 91 /** 92 * @since 1.17 93 */ 94 @Override getCompressedCount()95 public long getCompressedCount() { 96 return in.getBytesRead(); 97 } 98 99 /** 100 * Read the next code and expand it. 101 * @return the expanded next code 102 * @throws IOException on error 103 */ decompressNextSymbol()104 protected abstract int decompressNextSymbol() throws IOException; 105 106 /** 107 * Add a new entry to the dictionary. 108 * @param previousCode the previous code 109 * @param character the next character to append 110 * @return the new code 111 * @throws IOException on error 112 */ addEntry(int previousCode, byte character)113 protected abstract int addEntry(int previousCode, byte character) 114 throws IOException; 115 116 /** 117 * Sets the clear code based on the code size. 118 * @param codeSize code size 119 */ setClearCode(final int codeSize)120 protected void setClearCode(final int codeSize) { 121 clearCode = (1 << (codeSize - 1)); 122 } 123 124 /** 125 * Initializes the arrays based on the maximum code size. 126 * First checks that the estimated memory usage is below memoryLimitInKb 127 * 128 * @param maxCodeSize maximum code size 129 * @param memoryLimitInKb maximum allowed estimated memory usage in Kb 130 * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb 131 */ initializeTables(final int maxCodeSize, final int memoryLimitInKb)132 protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) 133 throws MemoryLimitException { 134 135 if (memoryLimitInKb > -1) { 136 final int maxTableSize = 1 << maxCodeSize; 137 //account for potential overflow 138 long memoryUsageInBytes = (long) maxTableSize * 6;//(4 (prefixes) + 1 (characters) +1 (outputStack)) 139 long memoryUsageInKb = memoryUsageInBytes >> 10; 140 141 if (memoryUsageInKb > memoryLimitInKb) { 142 throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb); 143 } 144 } 145 initializeTables(maxCodeSize); 146 } 147 148 /** 149 * Initializes the arrays based on the maximum code size. 150 * @param maxCodeSize maximum code size 151 */ initializeTables(final int maxCodeSize)152 protected void initializeTables(final int maxCodeSize) { 153 final int maxTableSize = 1 << maxCodeSize; 154 prefixes = new int[maxTableSize]; 155 characters = new byte[maxTableSize]; 156 outputStack = new byte[maxTableSize]; 157 outputStackLocation = maxTableSize; 158 final int max = 1 << 8; 159 for (int i = 0; i < max; i++) { 160 prefixes[i] = -1; 161 characters[i] = (byte) i; 162 } 163 } 164 165 /** 166 * Reads the next code from the stream. 167 * @return the next code 168 * @throws IOException on error 169 */ readNextCode()170 protected int readNextCode() throws IOException { 171 if (codeSize > 31) { 172 throw new IllegalArgumentException("code size must not be bigger than 31"); 173 } 174 return (int) in.readBits(codeSize); 175 } 176 177 /** 178 * Adds a new entry if the maximum table size hasn't been exceeded 179 * and returns the new index. 180 * @param previousCode the previous code 181 * @param character the character to append 182 * @param maxTableSize the maximum table size 183 * @return the new code 184 */ addEntry(final int previousCode, final byte character, final int maxTableSize)185 protected int addEntry(final int previousCode, final byte character, final int maxTableSize) { 186 if (tableSize < maxTableSize) { 187 prefixes[tableSize] = previousCode; 188 characters[tableSize] = character; 189 return tableSize++; 190 } 191 return -1; 192 } 193 194 /** 195 * Add entry for repeat of previousCode we haven't added, yet. 196 * @return new code for a repeat of the previous code 197 * @throws IOException on error 198 */ addRepeatOfPreviousCode()199 protected int addRepeatOfPreviousCode() throws IOException { 200 if (previousCode == -1) { 201 // can't have a repeat for the very first code 202 throw new IOException("The first code can't be a reference to its preceding code"); 203 } 204 return addEntry(previousCode, previousCodeFirstChar); 205 } 206 207 /** 208 * Expands the entry with index code to the output stack and may 209 * create a new entry 210 * @param code the code 211 * @param addedUnfinishedEntry whether unfinished entries have been added 212 * @return the new location of the output stack 213 * @throws IOException on error 214 */ expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry)215 protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) 216 throws IOException { 217 for (int entry = code; entry >= 0; entry = prefixes[entry]) { 218 outputStack[--outputStackLocation] = characters[entry]; 219 } 220 if (previousCode != -1 && !addedUnfinishedEntry) { 221 addEntry(previousCode, outputStack[outputStackLocation]); 222 } 223 previousCode = code; 224 previousCodeFirstChar = outputStack[outputStackLocation]; 225 return outputStackLocation; 226 } 227 readFromStack(final byte[] b, final int off, final int len)228 private int readFromStack(final byte[] b, final int off, final int len) { 229 final int remainingInStack = outputStack.length - outputStackLocation; 230 if (remainingInStack > 0) { 231 final int maxLength = Math.min(remainingInStack, len); 232 System.arraycopy(outputStack, outputStackLocation, b, off, maxLength); 233 outputStackLocation += maxLength; 234 return maxLength; 235 } 236 return 0; 237 } 238 getCodeSize()239 protected int getCodeSize() { 240 return codeSize; 241 } 242 resetCodeSize()243 protected void resetCodeSize() { 244 setCodeSize(DEFAULT_CODE_SIZE); 245 } 246 setCodeSize(final int cs)247 protected void setCodeSize(final int cs) { 248 this.codeSize = cs; 249 } 250 incrementCodeSize()251 protected void incrementCodeSize() { 252 codeSize++; 253 } 254 resetPreviousCode()255 protected void resetPreviousCode() { 256 this.previousCode = -1; 257 } 258 getPrefix(final int offset)259 protected int getPrefix(final int offset) { 260 return prefixes[offset]; 261 } 262 setPrefix(final int offset, final int value)263 protected void setPrefix(final int offset, final int value) { 264 prefixes[offset] = value; 265 } 266 getPrefixesLength()267 protected int getPrefixesLength() { 268 return prefixes.length; 269 } 270 getClearCode()271 protected int getClearCode() { 272 return clearCode; 273 } 274 getTableSize()275 protected int getTableSize() { 276 return tableSize; 277 } 278 setTableSize(final int newSize)279 protected void setTableSize(final int newSize) { 280 tableSize = newSize; 281 } 282 283 } 284