• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * LZMAEncoderNormal
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz.lzma;
12 
13 import org.tukaani.xz.lz.LZEncoder;
14 import org.tukaani.xz.lz.Matches;
15 import org.tukaani.xz.rangecoder.RangeEncoder;
16 
17 final class LZMAEncoderNormal extends LZMAEncoder {
18     private static final int OPTS = 4096;
19 
20     private static final int EXTRA_SIZE_BEFORE = OPTS;
21     private static final int EXTRA_SIZE_AFTER = OPTS;
22 
23     private final Optimum[] opts = new Optimum[OPTS];
24     private int optCur = 0;
25     private int optEnd = 0;
26 
27     private Matches matches;
28 
29     // These are fields solely to avoid allocating the objects again and
30     // again on each function call.
31     private final int[] repLens = new int[REPS];
32     private final State nextState = new State();
33 
getMemoryUsage(int dictSize, int extraSizeBefore, int mf)34     static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
35         return LZEncoder.getMemoryUsage(dictSize,
36                    Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
37                    EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
38                + OPTS * 64 / 1024;
39     }
40 
LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, int dictSize, int extraSizeBefore, int niceLen, int mf, int depthLimit)41     LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
42                              int dictSize, int extraSizeBefore,
43                              int niceLen, int mf, int depthLimit) {
44         super(rc, LZEncoder.getInstance(dictSize,
45                                         Math.max(extraSizeBefore,
46                                                  EXTRA_SIZE_BEFORE),
47                                         EXTRA_SIZE_AFTER,
48                                         niceLen, MATCH_LEN_MAX,
49                                         mf, depthLimit),
50               lc, lp, pb, dictSize, niceLen);
51 
52         for (int i = 0; i < OPTS; ++i)
53             opts[i] = new Optimum();
54     }
55 
reset()56     public void reset() {
57         optCur = 0;
58         optEnd = 0;
59         super.reset();
60     }
61 
62     /**
63      * Converts the opts array from backward indexes to forward indexes.
64      * Then it will be simple to get the next symbol from the array
65      * in later calls to <code>getNextSymbol()</code>.
66      */
convertOpts()67     private int convertOpts() {
68         optEnd = optCur;
69 
70         int optPrev = opts[optCur].optPrev;
71 
72         do {
73             Optimum opt = opts[optCur];
74 
75             if (opt.prev1IsLiteral) {
76                 opts[optPrev].optPrev = optCur;
77                 opts[optPrev].backPrev = -1;
78                 optCur = optPrev--;
79 
80                 if (opt.hasPrev2) {
81                     opts[optPrev].optPrev = optPrev + 1;
82                     opts[optPrev].backPrev = opt.backPrev2;
83                     optCur = optPrev;
84                     optPrev = opt.optPrev2;
85                 }
86             }
87 
88             int temp = opts[optPrev].optPrev;
89             opts[optPrev].optPrev = optCur;
90             optCur = optPrev;
91             optPrev = temp;
92         } while (optCur > 0);
93 
94         optCur = opts[0].optPrev;
95         back = opts[optCur].backPrev;
96         return optCur;
97     }
98 
getNextSymbol()99     int getNextSymbol() {
100         // If there are pending symbols from an earlier call to this
101         // function, return those symbols first.
102         if (optCur < optEnd) {
103             int len = opts[optCur].optPrev - optCur;
104             optCur = opts[optCur].optPrev;
105             back = opts[optCur].backPrev;
106             return len;
107         }
108 
109         assert optCur == optEnd;
110         optCur = 0;
111         optEnd = 0;
112         back = -1;
113 
114         if (readAhead == -1)
115             matches = getMatches();
116 
117         // Get the number of bytes available in the dictionary, but
118         // not more than the maximum match length. If there aren't
119         // enough bytes remaining to encode a match at all, return
120         // immediately to encode this byte as a literal.
121         int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
122         if (avail < MATCH_LEN_MIN)
123             return 1;
124 
125         // Get the lengths of repeated matches.
126         int repBest = 0;
127         for (int rep = 0; rep < REPS; ++rep) {
128             repLens[rep] = lz.getMatchLen(reps[rep], avail);
129 
130             if (repLens[rep] < MATCH_LEN_MIN) {
131                 repLens[rep] = 0;
132                 continue;
133             }
134 
135             if (repLens[rep] > repLens[repBest])
136                 repBest = rep;
137         }
138 
139         // Return if the best repeated match is at least niceLen bytes long.
140         if (repLens[repBest] >= niceLen) {
141             back = repBest;
142             skip(repLens[repBest] - 1);
143             return repLens[repBest];
144         }
145 
146         // Initialize mainLen and mainDist to the longest match found
147         // by the match finder.
148         int mainLen = 0;
149         int mainDist = 0;
150         if (matches.count > 0) {
151             mainLen = matches.len[matches.count - 1];
152             mainDist = matches.dist[matches.count - 1];
153 
154             // Return if it is at least niceLen bytes long.
155             if (mainLen >= niceLen) {
156                 back = mainDist + REPS;
157                 skip(mainLen - 1);
158                 return mainLen;
159             }
160         }
161 
162         int curByte = lz.getByte(0);
163         int matchByte = lz.getByte(reps[0] + 1);
164 
165         // If the match finder found no matches and this byte cannot be
166         // encoded as a repeated match (short or long), we must be return
167         // to have the byte encoded as a literal.
168         if (mainLen < MATCH_LEN_MIN && curByte != matchByte
169                 && repLens[repBest] < MATCH_LEN_MIN)
170             return 1;
171 
172 
173         int pos = lz.getPos();
174         int posState = pos & posMask;
175 
176         // Calculate the price of encoding the current byte as a literal.
177         {
178             int prevByte = lz.getByte(1);
179             int literalPrice = literalEncoder.getPrice(curByte, matchByte,
180                                                        prevByte, pos, state);
181             opts[1].set1(literalPrice, 0, -1);
182         }
183 
184         int anyMatchPrice = getAnyMatchPrice(state, posState);
185         int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
186 
187         // If it is possible to encode this byte as a short rep, see if
188         // it is cheaper than encoding it as a literal.
189         if (matchByte == curByte) {
190             int shortRepPrice = getShortRepPrice(anyRepPrice,
191                                                  state, posState);
192             if (shortRepPrice < opts[1].price)
193                 opts[1].set1(shortRepPrice, 0, 0);
194         }
195 
196         // Return if there is neither normal nor long repeated match. Use
197         // a short match instead of a literal if is is possible and cheaper.
198         optEnd = Math.max(mainLen, repLens[repBest]);
199         if (optEnd < MATCH_LEN_MIN) {
200             assert optEnd == 0 : optEnd;
201             back = opts[1].backPrev;
202             return 1;
203         }
204 
205 
206         // Update the lookup tables for distances and lengths before using
207         // those price calculation functions. (The price function above
208         // don't need these tables.)
209         updatePrices();
210 
211         // Initialize the state and reps of this position in opts[].
212         // updateOptStateAndReps() will need these to get the new
213         // state and reps for the next byte.
214         opts[0].state.set(state);
215         System.arraycopy(reps, 0, opts[0].reps, 0, REPS);
216 
217         // Initialize the prices for latter opts that will be used below.
218         for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
219             opts[i].reset();
220 
221         // Calculate the prices of repeated matches of all lengths.
222         for (int rep = 0; rep < REPS; ++rep) {
223             int repLen = repLens[rep];
224             if (repLen < MATCH_LEN_MIN)
225                 continue;
226 
227             int longRepPrice = getLongRepPrice(anyRepPrice, rep,
228                                                state, posState);
229             do {
230                 int price = longRepPrice + repLenEncoder.getPrice(repLen,
231                                                                   posState);
232                 if (price < opts[repLen].price)
233                     opts[repLen].set1(price, 0, rep);
234             } while (--repLen >= MATCH_LEN_MIN);
235         }
236 
237         // Calculate the prices of normal matches that are longer than rep0.
238         {
239             int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
240             if (len <= mainLen) {
241                 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
242                                                            state);
243 
244                 // Set i to the index of the shortest match that is
245                 // at least len bytes long.
246                 int i = 0;
247                 while (len > matches.len[i])
248                     ++i;
249 
250                 while (true) {
251                     int dist = matches.dist[i];
252                     int price = getMatchAndLenPrice(normalMatchPrice,
253                                                     dist, len, posState);
254                     if (price < opts[len].price)
255                         opts[len].set1(price, 0, dist + REPS);
256 
257                     if (len == matches.len[i])
258                         if (++i == matches.count)
259                             break;
260 
261                     ++len;
262                 }
263             }
264         }
265 
266 
267         avail = Math.min(lz.getAvail(), OPTS - 1);
268 
269         // Get matches for later bytes and optimize the use of LZMA symbols
270         // by calculating the prices and picking the cheapest symbol
271         // combinations.
272         while (++optCur < optEnd) {
273             matches = getMatches();
274             if (matches.count > 0
275                     && matches.len[matches.count - 1] >= niceLen)
276                 break;
277 
278             --avail;
279             ++pos;
280             posState = pos & posMask;
281 
282             updateOptStateAndReps();
283             anyMatchPrice = opts[optCur].price
284                             + getAnyMatchPrice(opts[optCur].state, posState);
285             anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);
286 
287             calc1BytePrices(pos, posState, avail, anyRepPrice);
288 
289             if (avail >= MATCH_LEN_MIN) {
290                 int startLen = calcLongRepPrices(pos, posState,
291                                                  avail, anyRepPrice);
292                 if (matches.count > 0)
293                     calcNormalMatchPrices(pos, posState, avail,
294                                           anyMatchPrice, startLen);
295             }
296         }
297 
298         return convertOpts();
299     }
300 
301     /**
302      * Updates the state and reps for the current byte in the opts array.
303      */
updateOptStateAndReps()304     private void updateOptStateAndReps() {
305         int optPrev = opts[optCur].optPrev;
306         assert optPrev < optCur;
307 
308         if (opts[optCur].prev1IsLiteral) {
309             --optPrev;
310 
311             if (opts[optCur].hasPrev2) {
312                 opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
313                 if (opts[optCur].backPrev2 < REPS)
314                     opts[optCur].state.updateLongRep();
315                 else
316                     opts[optCur].state.updateMatch();
317             } else {
318                 opts[optCur].state.set(opts[optPrev].state);
319             }
320 
321             opts[optCur].state.updateLiteral();
322         } else {
323             opts[optCur].state.set(opts[optPrev].state);
324         }
325 
326         if (optPrev == optCur - 1) {
327             // Must be either a short rep or a literal.
328             assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;
329 
330             if (opts[optCur].backPrev == 0)
331                 opts[optCur].state.updateShortRep();
332             else
333                 opts[optCur].state.updateLiteral();
334 
335             System.arraycopy(opts[optPrev].reps, 0,
336                              opts[optCur].reps, 0, REPS);
337         } else {
338             int back;
339             if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
340                 optPrev = opts[optCur].optPrev2;
341                 back = opts[optCur].backPrev2;
342                 opts[optCur].state.updateLongRep();
343             } else {
344                 back = opts[optCur].backPrev;
345                 if (back < REPS)
346                     opts[optCur].state.updateLongRep();
347                 else
348                     opts[optCur].state.updateMatch();
349             }
350 
351             if (back < REPS) {
352                 opts[optCur].reps[0] = opts[optPrev].reps[back];
353 
354                 int rep;
355                 for (rep = 1; rep <= back; ++rep)
356                     opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];
357 
358                 for (; rep < REPS; ++rep)
359                     opts[optCur].reps[rep] = opts[optPrev].reps[rep];
360             } else {
361                 opts[optCur].reps[0] = back - REPS;
362                 System.arraycopy(opts[optPrev].reps, 0,
363                                  opts[optCur].reps, 1, REPS - 1);
364             }
365         }
366     }
367 
368     /**
369      * Calculates prices of a literal, a short rep, and literal + rep0.
370      */
371     private void calc1BytePrices(int pos, int posState,
372                                  int avail, int anyRepPrice) {
373         // This will be set to true if using a literal or a short rep.
374         boolean nextIsByte = false;
375 
376         int curByte = lz.getByte(0);
377         int matchByte = lz.getByte(opts[optCur].reps[0] + 1);
378 
379         // Try a literal.
380         int literalPrice = opts[optCur].price
381                 + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
382                                           pos, opts[optCur].state);
383         if (literalPrice < opts[optCur + 1].price) {
384             opts[optCur + 1].set1(literalPrice, optCur, -1);
385             nextIsByte = true;
386         }
387 
388         // Try a short rep.
389         if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
390                                       || opts[optCur + 1].backPrev != 0)) {
391             int shortRepPrice = getShortRepPrice(anyRepPrice,
392                                                  opts[optCur].state,
393                                                  posState);
394             if (shortRepPrice <= opts[optCur + 1].price) {
395                 opts[optCur + 1].set1(shortRepPrice, optCur, 0);
396                 nextIsByte = true;
397             }
398         }
399 
400         // If neither a literal nor a short rep was the cheapest choice,
401         // try literal + long rep0.
402         if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
403             int lenLimit = Math.min(niceLen, avail - 1);
404             int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);
405 
406             if (len >= MATCH_LEN_MIN) {
407                 nextState.set(opts[optCur].state);
408                 nextState.updateLiteral();
409                 int nextPosState = (pos + 1) & posMask;
410                 int price = literalPrice
411                             + getLongRepAndLenPrice(0, len,
412                                                     nextState, nextPosState);
413 
414                 int i = optCur + 1 + len;
415                 while (optEnd < i)
416                     opts[++optEnd].reset();
417 
418                 if (price < opts[i].price)
419                     opts[i].set2(price, optCur, 0);
420             }
421         }
422     }
423 
424     /**
425      * Calculates prices of long rep and long rep + literal + rep0.
426      */
427     private int calcLongRepPrices(int pos, int posState,
428                                   int avail, int anyRepPrice) {
429         int startLen = MATCH_LEN_MIN;
430         int lenLimit = Math.min(avail, niceLen);
431 
432         for (int rep = 0; rep < REPS; ++rep) {
433             int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
434             if (len < MATCH_LEN_MIN)
435                 continue;
436 
437             while (optEnd < optCur + len)
438                 opts[++optEnd].reset();
439 
440             int longRepPrice = getLongRepPrice(anyRepPrice, rep,
441                                                opts[optCur].state, posState);
442 
443             for (int i = len; i >= MATCH_LEN_MIN; --i) {
444                 int price = longRepPrice
445                             + repLenEncoder.getPrice(i, posState);
446                 if (price < opts[optCur + i].price)
447                     opts[optCur + i].set1(price, optCur, rep);
448             }
449 
450             if (rep == 0)
451                 startLen = len + 1;
452 
453             int len2Limit = Math.min(niceLen, avail - len - 1);
454             int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
455                                       len2Limit);
456 
457             if (len2 >= MATCH_LEN_MIN) {
458                 // Rep
459                 int price = longRepPrice
460                             + repLenEncoder.getPrice(len, posState);
461                 nextState.set(opts[optCur].state);
462                 nextState.updateLongRep();
463 
464                 // Literal
465                 int curByte = lz.getByte(len, 0);
466                 int matchByte = lz.getByte(0); // lz.getByte(len, len)
467                 int prevByte = lz.getByte(len, 1);
468                 price += literalEncoder.getPrice(curByte, matchByte, prevByte,
469                                                  pos + len, nextState);
470                 nextState.updateLiteral();
471 
472                 // Rep0
473                 int nextPosState = (pos + len + 1) & posMask;
474                 price += getLongRepAndLenPrice(0, len2,
475                                                nextState, nextPosState);
476 
477                 int i = optCur + len + 1 + len2;
478                 while (optEnd < i)
479                     opts[++optEnd].reset();
480 
481                 if (price < opts[i].price)
482                     opts[i].set3(price, optCur, rep, len, 0);
483             }
484         }
485 
486         return startLen;
487     }
488 
489     /**
490      * Calculates prices of a normal match and normal match + literal + rep0.
491      */
492     private void calcNormalMatchPrices(int pos, int posState, int avail,
493                                        int anyMatchPrice, int startLen) {
494         // If the longest match is so long that it would not fit into
495         // the opts array, shorten the matches.
496         if (matches.len[matches.count - 1] > avail) {
497             matches.count = 0;
498             while (matches.len[matches.count] < avail)
499                 ++matches.count;
500 
501             matches.len[matches.count++] = avail;
502         }
503 
504         if (matches.len[matches.count - 1] < startLen)
505             return;
506 
507         while (optEnd < optCur + matches.len[matches.count - 1])
508             opts[++optEnd].reset();
509 
510         int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
511                                                    opts[optCur].state);
512 
513         int match = 0;
514         while (startLen > matches.len[match])
515             ++match;
516 
517         for (int len = startLen; ; ++len) {
518             int dist = matches.dist[match];
519 
520             // Calculate the price of a match of len bytes from the nearest
521             // possible distance.
522             int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
523                                                        dist, len, posState);
524             if (matchAndLenPrice < opts[optCur + len].price)
525                 opts[optCur + len].set1(matchAndLenPrice,
526                                         optCur, dist + REPS);
527 
528             if (len != matches.len[match])
529                 continue;
530 
531             // Try match + literal + rep0. First get the length of the rep0.
532             int len2Limit = Math.min(niceLen, avail - len - 1);
533             int len2 = lz.getMatchLen(len + 1, dist, len2Limit);
534 
535             if (len2 >= MATCH_LEN_MIN) {
536                 nextState.set(opts[optCur].state);
537                 nextState.updateMatch();
538 
539                 // Literal
540                 int curByte = lz.getByte(len, 0);
541                 int matchByte = lz.getByte(0); // lz.getByte(len, len)
542                 int prevByte = lz.getByte(len, 1);
543                 int price = matchAndLenPrice
544                         + literalEncoder.getPrice(curByte, matchByte,
545                                                   prevByte, pos + len,
546                                                   nextState);
547                 nextState.updateLiteral();
548 
549                 // Rep0
550                 int nextPosState = (pos + len + 1) & posMask;
551                 price += getLongRepAndLenPrice(0, len2,
552                                                nextState, nextPosState);
553 
554                 int i = optCur + len + 1 + len2;
555                 while (optEnd < i)
556                     opts[++optEnd].reset();
557 
558                 if (price < opts[i].price)
559                     opts[i].set3(price, optCur, dist + REPS, len, 0);
560             }
561 
562             if (++match == matches.count)
563                 break;
564         }
565     }
566 }
567