1 /* 2 * LZEncoder 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lz; 12 13 import java.io.OutputStream; 14 import java.io.IOException; 15 16 public abstract class LZEncoder { 17 public static final int MF_HC4 = 0x04; 18 public static final int MF_BT4 = 0x14; 19 20 /** 21 * Number of bytes to keep available before the current byte 22 * when moving the LZ window. 23 */ 24 private final int keepSizeBefore; 25 26 /** 27 * Number of bytes that must be available, the current byte included, 28 * to make hasEnoughData return true. Flushing and finishing are 29 * naturally exceptions to this since there cannot be any data after 30 * the end of the uncompressed input. 31 */ 32 private final int keepSizeAfter; 33 34 final int matchLenMax; 35 final int niceLen; 36 37 final byte[] buf; 38 39 int readPos = -1; 40 private int readLimit = -1; 41 private boolean finishing = false; 42 private int writePos = 0; 43 private int pendingSize = 0; 44 normalize(int[] positions, int normalizationOffset)45 static void normalize(int[] positions, int normalizationOffset) { 46 for (int i = 0; i < positions.length; ++i) { 47 if (positions[i] <= normalizationOffset) 48 positions[i] = 0; 49 else 50 positions[i] -= normalizationOffset; 51 } 52 } 53 54 /** 55 * Gets the size of the LZ window buffer that needs to be allocated. 56 */ getBufSize( int dictSize, int extraSizeBefore, int extraSizeAfter, int matchLenMax)57 private static int getBufSize( 58 int dictSize, int extraSizeBefore, int extraSizeAfter, 59 int matchLenMax) { 60 int keepSizeBefore = extraSizeBefore + dictSize; 61 int keepSizeAfter = extraSizeAfter + matchLenMax; 62 int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20); 63 return keepSizeBefore + keepSizeAfter + reserveSize; 64 } 65 66 /** 67 * Gets approximate memory usage of the LZEncoder base structure and 68 * the match finder as kibibytes. 69 */ getMemoryUsage( int dictSize, int extraSizeBefore, int extraSizeAfter, int matchLenMax, int mf)70 public static int getMemoryUsage( 71 int dictSize, int extraSizeBefore, int extraSizeAfter, 72 int matchLenMax, int mf) { 73 // Buffer size + a little extra 74 int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter, 75 matchLenMax) / 1024 + 10; 76 77 switch (mf) { 78 case MF_HC4: 79 m += HC4.getMemoryUsage(dictSize); 80 break; 81 82 case MF_BT4: 83 m += BT4.getMemoryUsage(dictSize); 84 break; 85 86 default: 87 throw new IllegalArgumentException(); 88 } 89 90 return m; 91 } 92 93 /** 94 * Creates a new LZEncoder. 95 * <p> 96 * @param dictSize dictionary size 97 * 98 * @param extraSizeBefore 99 * number of bytes to keep available in the 100 * history in addition to dictSize 101 * 102 * @param extraSizeAfter 103 * number of bytes that must be available 104 * after current position + matchLenMax 105 * 106 * @param niceLen if a match of at least <code>niceLen</code> 107 * bytes is found, be happy with it and don't 108 * stop looking for longer matches 109 * 110 * @param matchLenMax don't test for matches longer than 111 * <code>matchLenMax</code> bytes 112 * 113 * @param mf match finder ID 114 * 115 * @param depthLimit match finder search depth limit 116 */ getInstance( int dictSize, int extraSizeBefore, int extraSizeAfter, int niceLen, int matchLenMax, int mf, int depthLimit)117 public static LZEncoder getInstance( 118 int dictSize, int extraSizeBefore, int extraSizeAfter, 119 int niceLen, int matchLenMax, int mf, int depthLimit) { 120 switch (mf) { 121 case MF_HC4: 122 return new HC4(dictSize, extraSizeBefore, extraSizeAfter, 123 niceLen, matchLenMax, depthLimit); 124 125 case MF_BT4: 126 return new BT4(dictSize, extraSizeBefore, extraSizeAfter, 127 niceLen, matchLenMax, depthLimit); 128 } 129 130 throw new IllegalArgumentException(); 131 } 132 133 /** 134 * Creates a new LZEncoder. See <code>getInstance</code>. 135 */ LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter, int niceLen, int matchLenMax)136 LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter, 137 int niceLen, int matchLenMax) { 138 buf = new byte[getBufSize(dictSize, extraSizeBefore, extraSizeAfter, 139 matchLenMax)]; 140 141 keepSizeBefore = extraSizeBefore + dictSize; 142 keepSizeAfter = extraSizeAfter + matchLenMax; 143 144 this.matchLenMax = matchLenMax; 145 this.niceLen = niceLen; 146 } 147 148 /** 149 * Sets a preset dictionary. If a preset dictionary is wanted, this 150 * function must be called immediately after creating the LZEncoder 151 * before any data has been encoded. 152 */ setPresetDict(int dictSize, byte[] presetDict)153 public void setPresetDict(int dictSize, byte[] presetDict) { 154 assert !isStarted(); 155 assert writePos == 0; 156 157 if (presetDict != null) { 158 // If the preset dictionary buffer is bigger than the dictionary 159 // size, copy only the tail of the preset dictionary. 160 int copySize = Math.min(presetDict.length, dictSize); 161 int offset = presetDict.length - copySize; 162 System.arraycopy(presetDict, offset, buf, 0, copySize); 163 writePos += copySize; 164 skip(copySize); 165 } 166 } 167 168 /** 169 * Moves data from the end of the buffer to the beginning, discarding 170 * old data and making space for new input. 171 */ moveWindow()172 private void moveWindow() { 173 // Align the move to a multiple of 16 bytes. LZMA2 needs this 174 // because it uses the lowest bits from readPos to get the 175 // alignment of the uncompressed data. 176 int moveOffset = (readPos + 1 - keepSizeBefore) & ~15; 177 int moveSize = writePos - moveOffset; 178 System.arraycopy(buf, moveOffset, buf, 0, moveSize); 179 180 readPos -= moveOffset; 181 readLimit -= moveOffset; 182 writePos -= moveOffset; 183 } 184 185 /** 186 * Copies new data into the LZEncoder's buffer. 187 */ fillWindow(byte[] in, int off, int len)188 public int fillWindow(byte[] in, int off, int len) { 189 assert !finishing; 190 191 // Move the sliding window if needed. 192 if (readPos >= buf.length - keepSizeAfter) 193 moveWindow(); 194 195 // Try to fill the dictionary buffer. If it becomes full, 196 // some of the input bytes may be left unused. 197 if (len > buf.length - writePos) 198 len = buf.length - writePos; 199 200 System.arraycopy(in, off, buf, writePos, len); 201 writePos += len; 202 203 // Set the new readLimit but only if there's enough data to allow 204 // encoding of at least one more byte. 205 if (writePos >= keepSizeAfter) 206 readLimit = writePos - keepSizeAfter; 207 208 processPendingBytes(); 209 210 // Tell the caller how much input we actually copied into 211 // the dictionary. 212 return len; 213 } 214 215 /** 216 * Process pending bytes remaining from preset dictionary initialization 217 * or encoder flush operation. 218 */ processPendingBytes()219 private void processPendingBytes() { 220 // After flushing or setting a preset dictionary there will be 221 // pending data that hasn't been ran through the match finder yet. 222 // Run it through the match finder now if there is enough new data 223 // available (readPos < readLimit) that the encoder may encode at 224 // least one more input byte. This way we don't waste any time 225 // looping in the match finder (and marking the same bytes as 226 // pending again) if the application provides very little new data 227 // per write call. 228 if (pendingSize > 0 && readPos < readLimit) { 229 readPos -= pendingSize; 230 int oldPendingSize = pendingSize; 231 pendingSize = 0; 232 skip(oldPendingSize); 233 assert pendingSize < oldPendingSize; 234 } 235 } 236 237 /** 238 * Returns true if at least one byte has already been run through 239 * the match finder. 240 */ 241 public boolean isStarted() { 242 return readPos != -1; 243 } 244 245 /** 246 * Marks that all the input needs to be made available in 247 * the encoded output. 248 */ 249 public void setFlushing() { 250 readLimit = writePos - 1; 251 processPendingBytes(); 252 } 253 254 /** 255 * Marks that there is no more input remaining. The read position 256 * can be advanced until the end of the data. 257 */ 258 public void setFinishing() { 259 readLimit = writePos - 1; 260 finishing = true; 261 processPendingBytes(); 262 } 263 264 /** 265 * Tests if there is enough input available to let the caller encode 266 * at least one more byte. 267 */ 268 public boolean hasEnoughData(int alreadyReadLen) { 269 return readPos - alreadyReadLen < readLimit; 270 } 271 272 public void copyUncompressed(OutputStream out, int backward, int len) 273 throws IOException { 274 out.write(buf, readPos + 1 - backward, len); 275 } 276 277 /** 278 * Get the number of bytes available, including the current byte. 279 * <p> 280 * Note that the result is undefined if <code>getMatches</code> or 281 * <code>skip</code> hasn't been called yet and no preset dictionary 282 * is being used. 283 */ 284 public int getAvail() { 285 assert isStarted(); 286 return writePos - readPos; 287 } 288 289 /** 290 * Gets the lowest four bits of the absolute offset of the current byte. 291 * Bits other than the lowest four are undefined. 292 */ 293 public int getPos() { 294 return readPos; 295 } 296 297 /** 298 * Gets the byte from the given backward offset. 299 * <p> 300 * The current byte is at <code>0</code>, the previous byte 301 * at <code>1</code> etc. To get a byte at zero-based distance, 302 * use <code>getByte(dist + 1)<code>. 303 * <p> 304 * This function is equivalent to <code>getByte(0, backward)</code>. 305 */ 306 public int getByte(int backward) { 307 return buf[readPos - backward] & 0xFF; 308 } 309 310 /** 311 * Gets the byte from the given forward minus backward offset. 312 * The forward offset is added to the current position. This lets 313 * one read bytes ahead of the current byte. 314 */ 315 public int getByte(int forward, int backward) { 316 return buf[readPos + forward - backward] & 0xFF; 317 } 318 319 /** 320 * Get the length of a match at the given distance. 321 * 322 * @param dist zero-based distance of the match to test 323 * @param lenLimit don't test for a match longer than this 324 * 325 * @return length of the match; it is in the range [0, lenLimit] 326 */ 327 public int getMatchLen(int dist, int lenLimit) { 328 int backPos = readPos - dist - 1; 329 int len = 0; 330 331 while (len < lenLimit && buf[readPos + len] == buf[backPos + len]) 332 ++len; 333 334 return len; 335 } 336 337 /** 338 * Get the length of a match at the given distance and forward offset. 339 * 340 * @param forward forward offset 341 * @param dist zero-based distance of the match to test 342 * @param lenLimit don't test for a match longer than this 343 * 344 * @return length of the match; it is in the range [0, lenLimit] 345 */ 346 public int getMatchLen(int forward, int dist, int lenLimit) { 347 int curPos = readPos + forward; 348 int backPos = curPos - dist - 1; 349 int len = 0; 350 351 while (len < lenLimit && buf[curPos + len] == buf[backPos + len]) 352 ++len; 353 354 return len; 355 } 356 357 /** 358 * Verifies that the matches returned by the match finder are valid. 359 * This is meant to be used in an assert statement. This is totally 360 * useless for actual encoding since match finder's results should 361 * naturally always be valid if it isn't broken. 362 * 363 * @param matches return value from <code>getMatches</code> 364 * 365 * @return true if matches are valid, false if match finder is broken 366 */ 367 public boolean verifyMatches(Matches matches) { 368 int lenLimit = Math.min(getAvail(), matchLenMax); 369 370 for (int i = 0; i < matches.count; ++i) 371 if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i]) 372 return false; 373 374 return true; 375 } 376 377 /** 378 * Moves to the next byte, checks if there is enough input available, 379 * and returns the amount of input available. 380 * 381 * @param requiredForFlushing 382 * minimum number of available bytes when 383 * flushing; encoding may be continued with 384 * new input after flushing 385 * @param requiredForFinishing 386 * minimum number of available bytes when 387 * finishing; encoding must not be continued 388 * after finishing or the match finder state 389 * may be corrupt 390 * 391 * @return the number of bytes available or zero if there 392 * is not enough input available 393 */ 394 int movePos(int requiredForFlushing, int requiredForFinishing) { 395 assert requiredForFlushing >= requiredForFinishing; 396 397 ++readPos; 398 int avail = writePos - readPos; 399 400 if (avail < requiredForFlushing) { 401 if (avail < requiredForFinishing || !finishing) { 402 ++pendingSize; 403 avail = 0; 404 } 405 } 406 407 return avail; 408 } 409 410 /** 411 * Runs match finder for the next byte and returns the matches found. 412 */ 413 public abstract Matches getMatches(); 414 415 /** 416 * Skips the given number of bytes in the match finder. 417 */ 418 public abstract void skip(int len); 419 } 420