1 /* 2 * LZMAEncoderNormal 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lzma; 12 13 import org.tukaani.xz.lz.LZEncoder; 14 import org.tukaani.xz.lz.Matches; 15 import org.tukaani.xz.rangecoder.RangeEncoder; 16 17 final class LZMAEncoderNormal extends LZMAEncoder { 18 private static final int OPTS = 4096; 19 20 private static final int EXTRA_SIZE_BEFORE = OPTS; 21 private static final int EXTRA_SIZE_AFTER = OPTS; 22 23 private final Optimum[] opts = new Optimum[OPTS]; 24 private int optCur = 0; 25 private int optEnd = 0; 26 27 private Matches matches; 28 29 // These are fields solely to avoid allocating the objects again and 30 // again on each function call. 31 private final int[] repLens = new int[REPS]; 32 private final State nextState = new State(); 33 getMemoryUsage(int dictSize, int extraSizeBefore, int mf)34 static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { 35 return LZEncoder.getMemoryUsage(dictSize, 36 Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), 37 EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf) 38 + OPTS * 64 / 1024; 39 } 40 LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, int dictSize, int extraSizeBefore, int niceLen, int mf, int depthLimit)41 LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, 42 int dictSize, int extraSizeBefore, 43 int niceLen, int mf, int depthLimit) { 44 super(rc, LZEncoder.getInstance(dictSize, 45 Math.max(extraSizeBefore, 46 EXTRA_SIZE_BEFORE), 47 EXTRA_SIZE_AFTER, 48 niceLen, MATCH_LEN_MAX, 49 mf, depthLimit), 50 lc, lp, pb, dictSize, niceLen); 51 52 for (int i = 0; i < OPTS; ++i) 53 opts[i] = new Optimum(); 54 } 55 reset()56 public void reset() { 57 optCur = 0; 58 optEnd = 0; 59 super.reset(); 60 } 61 62 /** 63 * Converts the opts array from backward indexes to forward indexes. 64 * Then it will be simple to get the next symbol from the array 65 * in later calls to <code>getNextSymbol()</code>. 66 */ convertOpts()67 private int convertOpts() { 68 optEnd = optCur; 69 70 int optPrev = opts[optCur].optPrev; 71 72 do { 73 Optimum opt = opts[optCur]; 74 75 if (opt.prev1IsLiteral) { 76 opts[optPrev].optPrev = optCur; 77 opts[optPrev].backPrev = -1; 78 optCur = optPrev--; 79 80 if (opt.hasPrev2) { 81 opts[optPrev].optPrev = optPrev + 1; 82 opts[optPrev].backPrev = opt.backPrev2; 83 optCur = optPrev; 84 optPrev = opt.optPrev2; 85 } 86 } 87 88 int temp = opts[optPrev].optPrev; 89 opts[optPrev].optPrev = optCur; 90 optCur = optPrev; 91 optPrev = temp; 92 } while (optCur > 0); 93 94 optCur = opts[0].optPrev; 95 back = opts[optCur].backPrev; 96 return optCur; 97 } 98 getNextSymbol()99 int getNextSymbol() { 100 // If there are pending symbols from an earlier call to this 101 // function, return those symbols first. 102 if (optCur < optEnd) { 103 int len = opts[optCur].optPrev - optCur; 104 optCur = opts[optCur].optPrev; 105 back = opts[optCur].backPrev; 106 return len; 107 } 108 109 assert optCur == optEnd; 110 optCur = 0; 111 optEnd = 0; 112 back = -1; 113 114 if (readAhead == -1) 115 matches = getMatches(); 116 117 // Get the number of bytes available in the dictionary, but 118 // not more than the maximum match length. If there aren't 119 // enough bytes remaining to encode a match at all, return 120 // immediately to encode this byte as a literal. 121 int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); 122 if (avail < MATCH_LEN_MIN) 123 return 1; 124 125 // Get the lengths of repeated matches. 126 int repBest = 0; 127 for (int rep = 0; rep < REPS; ++rep) { 128 repLens[rep] = lz.getMatchLen(reps[rep], avail); 129 130 if (repLens[rep] < MATCH_LEN_MIN) { 131 repLens[rep] = 0; 132 continue; 133 } 134 135 if (repLens[rep] > repLens[repBest]) 136 repBest = rep; 137 } 138 139 // Return if the best repeated match is at least niceLen bytes long. 140 if (repLens[repBest] >= niceLen) { 141 back = repBest; 142 skip(repLens[repBest] - 1); 143 return repLens[repBest]; 144 } 145 146 // Initialize mainLen and mainDist to the longest match found 147 // by the match finder. 148 int mainLen = 0; 149 int mainDist = 0; 150 if (matches.count > 0) { 151 mainLen = matches.len[matches.count - 1]; 152 mainDist = matches.dist[matches.count - 1]; 153 154 // Return if it is at least niceLen bytes long. 155 if (mainLen >= niceLen) { 156 back = mainDist + REPS; 157 skip(mainLen - 1); 158 return mainLen; 159 } 160 } 161 162 int curByte = lz.getByte(0); 163 int matchByte = lz.getByte(reps[0] + 1); 164 165 // If the match finder found no matches and this byte cannot be 166 // encoded as a repeated match (short or long), we must be return 167 // to have the byte encoded as a literal. 168 if (mainLen < MATCH_LEN_MIN && curByte != matchByte 169 && repLens[repBest] < MATCH_LEN_MIN) 170 return 1; 171 172 173 int pos = lz.getPos(); 174 int posState = pos & posMask; 175 176 // Calculate the price of encoding the current byte as a literal. 177 { 178 int prevByte = lz.getByte(1); 179 int literalPrice = literalEncoder.getPrice(curByte, matchByte, 180 prevByte, pos, state); 181 opts[1].set1(literalPrice, 0, -1); 182 } 183 184 int anyMatchPrice = getAnyMatchPrice(state, posState); 185 int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); 186 187 // If it is possible to encode this byte as a short rep, see if 188 // it is cheaper than encoding it as a literal. 189 if (matchByte == curByte) { 190 int shortRepPrice = getShortRepPrice(anyRepPrice, 191 state, posState); 192 if (shortRepPrice < opts[1].price) 193 opts[1].set1(shortRepPrice, 0, 0); 194 } 195 196 // Return if there is neither normal nor long repeated match. Use 197 // a short match instead of a literal if is is possible and cheaper. 198 optEnd = Math.max(mainLen, repLens[repBest]); 199 if (optEnd < MATCH_LEN_MIN) { 200 assert optEnd == 0 : optEnd; 201 back = opts[1].backPrev; 202 return 1; 203 } 204 205 206 // Update the lookup tables for distances and lengths before using 207 // those price calculation functions. (The price function above 208 // don't need these tables.) 209 updatePrices(); 210 211 // Initialize the state and reps of this position in opts[]. 212 // updateOptStateAndReps() will need these to get the new 213 // state and reps for the next byte. 214 opts[0].state.set(state); 215 System.arraycopy(reps, 0, opts[0].reps, 0, REPS); 216 217 // Initialize the prices for latter opts that will be used below. 218 for (int i = optEnd; i >= MATCH_LEN_MIN; --i) 219 opts[i].reset(); 220 221 // Calculate the prices of repeated matches of all lengths. 222 for (int rep = 0; rep < REPS; ++rep) { 223 int repLen = repLens[rep]; 224 if (repLen < MATCH_LEN_MIN) 225 continue; 226 227 int longRepPrice = getLongRepPrice(anyRepPrice, rep, 228 state, posState); 229 do { 230 int price = longRepPrice + repLenEncoder.getPrice(repLen, 231 posState); 232 if (price < opts[repLen].price) 233 opts[repLen].set1(price, 0, rep); 234 } while (--repLen >= MATCH_LEN_MIN); 235 } 236 237 // Calculate the prices of normal matches that are longer than rep0. 238 { 239 int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN); 240 if (len <= mainLen) { 241 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, 242 state); 243 244 // Set i to the index of the shortest match that is 245 // at least len bytes long. 246 int i = 0; 247 while (len > matches.len[i]) 248 ++i; 249 250 while (true) { 251 int dist = matches.dist[i]; 252 int price = getMatchAndLenPrice(normalMatchPrice, 253 dist, len, posState); 254 if (price < opts[len].price) 255 opts[len].set1(price, 0, dist + REPS); 256 257 if (len == matches.len[i]) 258 if (++i == matches.count) 259 break; 260 261 ++len; 262 } 263 } 264 } 265 266 267 avail = Math.min(lz.getAvail(), OPTS - 1); 268 269 // Get matches for later bytes and optimize the use of LZMA symbols 270 // by calculating the prices and picking the cheapest symbol 271 // combinations. 272 while (++optCur < optEnd) { 273 matches = getMatches(); 274 if (matches.count > 0 275 && matches.len[matches.count - 1] >= niceLen) 276 break; 277 278 --avail; 279 ++pos; 280 posState = pos & posMask; 281 282 updateOptStateAndReps(); 283 anyMatchPrice = opts[optCur].price 284 + getAnyMatchPrice(opts[optCur].state, posState); 285 anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state); 286 287 calc1BytePrices(pos, posState, avail, anyRepPrice); 288 289 if (avail >= MATCH_LEN_MIN) { 290 int startLen = calcLongRepPrices(pos, posState, 291 avail, anyRepPrice); 292 if (matches.count > 0) 293 calcNormalMatchPrices(pos, posState, avail, 294 anyMatchPrice, startLen); 295 } 296 } 297 298 return convertOpts(); 299 } 300 301 /** 302 * Updates the state and reps for the current byte in the opts array. 303 */ updateOptStateAndReps()304 private void updateOptStateAndReps() { 305 int optPrev = opts[optCur].optPrev; 306 assert optPrev < optCur; 307 308 if (opts[optCur].prev1IsLiteral) { 309 --optPrev; 310 311 if (opts[optCur].hasPrev2) { 312 opts[optCur].state.set(opts[opts[optCur].optPrev2].state); 313 if (opts[optCur].backPrev2 < REPS) 314 opts[optCur].state.updateLongRep(); 315 else 316 opts[optCur].state.updateMatch(); 317 } else { 318 opts[optCur].state.set(opts[optPrev].state); 319 } 320 321 opts[optCur].state.updateLiteral(); 322 } else { 323 opts[optCur].state.set(opts[optPrev].state); 324 } 325 326 if (optPrev == optCur - 1) { 327 // Must be either a short rep or a literal. 328 assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1; 329 330 if (opts[optCur].backPrev == 0) 331 opts[optCur].state.updateShortRep(); 332 else 333 opts[optCur].state.updateLiteral(); 334 335 System.arraycopy(opts[optPrev].reps, 0, 336 opts[optCur].reps, 0, REPS); 337 } else { 338 int back; 339 if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) { 340 optPrev = opts[optCur].optPrev2; 341 back = opts[optCur].backPrev2; 342 opts[optCur].state.updateLongRep(); 343 } else { 344 back = opts[optCur].backPrev; 345 if (back < REPS) 346 opts[optCur].state.updateLongRep(); 347 else 348 opts[optCur].state.updateMatch(); 349 } 350 351 if (back < REPS) { 352 opts[optCur].reps[0] = opts[optPrev].reps[back]; 353 354 int rep; 355 for (rep = 1; rep <= back; ++rep) 356 opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1]; 357 358 for (; rep < REPS; ++rep) 359 opts[optCur].reps[rep] = opts[optPrev].reps[rep]; 360 } else { 361 opts[optCur].reps[0] = back - REPS; 362 System.arraycopy(opts[optPrev].reps, 0, 363 opts[optCur].reps, 1, REPS - 1); 364 } 365 } 366 } 367 368 /** 369 * Calculates prices of a literal, a short rep, and literal + rep0. 370 */ 371 private void calc1BytePrices(int pos, int posState, 372 int avail, int anyRepPrice) { 373 // This will be set to true if using a literal or a short rep. 374 boolean nextIsByte = false; 375 376 int curByte = lz.getByte(0); 377 int matchByte = lz.getByte(opts[optCur].reps[0] + 1); 378 379 // Try a literal. 380 int literalPrice = opts[optCur].price 381 + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1), 382 pos, opts[optCur].state); 383 if (literalPrice < opts[optCur + 1].price) { 384 opts[optCur + 1].set1(literalPrice, optCur, -1); 385 nextIsByte = true; 386 } 387 388 // Try a short rep. 389 if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur 390 || opts[optCur + 1].backPrev != 0)) { 391 int shortRepPrice = getShortRepPrice(anyRepPrice, 392 opts[optCur].state, 393 posState); 394 if (shortRepPrice <= opts[optCur + 1].price) { 395 opts[optCur + 1].set1(shortRepPrice, optCur, 0); 396 nextIsByte = true; 397 } 398 } 399 400 // If neither a literal nor a short rep was the cheapest choice, 401 // try literal + long rep0. 402 if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) { 403 int lenLimit = Math.min(niceLen, avail - 1); 404 int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit); 405 406 if (len >= MATCH_LEN_MIN) { 407 nextState.set(opts[optCur].state); 408 nextState.updateLiteral(); 409 int nextPosState = (pos + 1) & posMask; 410 int price = literalPrice 411 + getLongRepAndLenPrice(0, len, 412 nextState, nextPosState); 413 414 int i = optCur + 1 + len; 415 while (optEnd < i) 416 opts[++optEnd].reset(); 417 418 if (price < opts[i].price) 419 opts[i].set2(price, optCur, 0); 420 } 421 } 422 } 423 424 /** 425 * Calculates prices of long rep and long rep + literal + rep0. 426 */ 427 private int calcLongRepPrices(int pos, int posState, 428 int avail, int anyRepPrice) { 429 int startLen = MATCH_LEN_MIN; 430 int lenLimit = Math.min(avail, niceLen); 431 432 for (int rep = 0; rep < REPS; ++rep) { 433 int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit); 434 if (len < MATCH_LEN_MIN) 435 continue; 436 437 while (optEnd < optCur + len) 438 opts[++optEnd].reset(); 439 440 int longRepPrice = getLongRepPrice(anyRepPrice, rep, 441 opts[optCur].state, posState); 442 443 for (int i = len; i >= MATCH_LEN_MIN; --i) { 444 int price = longRepPrice 445 + repLenEncoder.getPrice(i, posState); 446 if (price < opts[optCur + i].price) 447 opts[optCur + i].set1(price, optCur, rep); 448 } 449 450 if (rep == 0) 451 startLen = len + 1; 452 453 int len2Limit = Math.min(niceLen, avail - len - 1); 454 int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep], 455 len2Limit); 456 457 if (len2 >= MATCH_LEN_MIN) { 458 // Rep 459 int price = longRepPrice 460 + repLenEncoder.getPrice(len, posState); 461 nextState.set(opts[optCur].state); 462 nextState.updateLongRep(); 463 464 // Literal 465 int curByte = lz.getByte(len, 0); 466 int matchByte = lz.getByte(0); // lz.getByte(len, len) 467 int prevByte = lz.getByte(len, 1); 468 price += literalEncoder.getPrice(curByte, matchByte, prevByte, 469 pos + len, nextState); 470 nextState.updateLiteral(); 471 472 // Rep0 473 int nextPosState = (pos + len + 1) & posMask; 474 price += getLongRepAndLenPrice(0, len2, 475 nextState, nextPosState); 476 477 int i = optCur + len + 1 + len2; 478 while (optEnd < i) 479 opts[++optEnd].reset(); 480 481 if (price < opts[i].price) 482 opts[i].set3(price, optCur, rep, len, 0); 483 } 484 } 485 486 return startLen; 487 } 488 489 /** 490 * Calculates prices of a normal match and normal match + literal + rep0. 491 */ 492 private void calcNormalMatchPrices(int pos, int posState, int avail, 493 int anyMatchPrice, int startLen) { 494 // If the longest match is so long that it would not fit into 495 // the opts array, shorten the matches. 496 if (matches.len[matches.count - 1] > avail) { 497 matches.count = 0; 498 while (matches.len[matches.count] < avail) 499 ++matches.count; 500 501 matches.len[matches.count++] = avail; 502 } 503 504 if (matches.len[matches.count - 1] < startLen) 505 return; 506 507 while (optEnd < optCur + matches.len[matches.count - 1]) 508 opts[++optEnd].reset(); 509 510 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, 511 opts[optCur].state); 512 513 int match = 0; 514 while (startLen > matches.len[match]) 515 ++match; 516 517 for (int len = startLen; ; ++len) { 518 int dist = matches.dist[match]; 519 520 // Calculate the price of a match of len bytes from the nearest 521 // possible distance. 522 int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice, 523 dist, len, posState); 524 if (matchAndLenPrice < opts[optCur + len].price) 525 opts[optCur + len].set1(matchAndLenPrice, 526 optCur, dist + REPS); 527 528 if (len != matches.len[match]) 529 continue; 530 531 // Try match + literal + rep0. First get the length of the rep0. 532 int len2Limit = Math.min(niceLen, avail - len - 1); 533 int len2 = lz.getMatchLen(len + 1, dist, len2Limit); 534 535 if (len2 >= MATCH_LEN_MIN) { 536 nextState.set(opts[optCur].state); 537 nextState.updateMatch(); 538 539 // Literal 540 int curByte = lz.getByte(len, 0); 541 int matchByte = lz.getByte(0); // lz.getByte(len, len) 542 int prevByte = lz.getByte(len, 1); 543 int price = matchAndLenPrice 544 + literalEncoder.getPrice(curByte, matchByte, 545 prevByte, pos + len, 546 nextState); 547 nextState.updateLiteral(); 548 549 // Rep0 550 int nextPosState = (pos + len + 1) & posMask; 551 price += getLongRepAndLenPrice(0, len2, 552 nextState, nextPosState); 553 554 int i = optCur + len + 1 + len2; 555 while (optEnd < i) 556 opts[++optEnd].reset(); 557 558 if (price < opts[i].price) 559 opts[i].set3(price, optCur, dist + REPS, len, 0); 560 } 561 562 if (++match == matches.count) 563 break; 564 } 565 } 566 } 567